From 35fe0e5d9a98afa13a2db58af4a038cdc8f30dff Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 22 Oct 2019 16:43:24 +0200 Subject: [PATCH 001/122] Added first draft of AqlCallObject --- arangod/Aql/AqlCall.h | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 arangod/Aql/AqlCall.h diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h new file mode 100644 index 000000000000..d0989fdab2b8 --- /dev/null +++ b/arangod/Aql/AqlCall.h @@ -0,0 +1,47 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_AQL_CALL_H +#define ARANGOD_AQL_AQL_CALL_H 1 + +#include "Aql/ExecutionBlock.h" + +#include +#include + +namespace arangodb { +namespace aql { +struct AqlCall { + class Infinity {}; + using Limit = std::variant; + + std::size_t offset{0}; + // TODO: The defaultBatchSize function could move into this file instead + Limit batchSize{ExecutionBlock::DefaultBatchSize()}; + Limit limit{Infinity{}}; + bool fullCount{false}; +}; + +} // namespace aql +} // namespace arangodb + +#endif \ No newline at end of file From ead968c786aa74f930ecf6b3856b04b4e8fafa54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Tue, 22 Oct 2019 17:15:33 +0200 Subject: [PATCH 002/122] Added AqlItemBlockInputRange --- arangod/Aql/AllRowsFetcher.h | 8 ++++ arangod/Aql/AqlItemBlockInputRange.cpp | 64 ++++++++++++++++++++++++++ arangod/Aql/AqlItemBlockInputRange.h | 55 ++++++++++++++++++++++ arangod/Aql/SingleRowFetcher.h | 8 ++++ arangod/CMakeLists.txt | 1 + 5 files changed, 136 insertions(+) create mode 100644 arangod/Aql/AqlItemBlockInputRange.cpp create mode 100644 arangod/Aql/AqlItemBlockInputRange.h diff --git a/arangod/Aql/AllRowsFetcher.h b/arangod/Aql/AllRowsFetcher.h index 73a96a9e4c9b..9ac5f2d3300e 100644 --- a/arangod/Aql/AllRowsFetcher.h +++ b/arangod/Aql/AllRowsFetcher.h @@ -92,11 +92,19 @@ class AllRowsFetcher { TEST_VIRTUAL ~AllRowsFetcher() = default; + using DataRange = std::shared_ptr; + protected: // only for testing! Does not initialize _dependencyProxy! AllRowsFetcher() = default; public: + // TODO implement and document + std::tuple execute(/* TODO: add"justDoIt"-style parameter */) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); + } + /** * @brief Fetch one new AqlItemRow from upstream. * **Guarantee**: the pointer returned is valid only diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp new file mode 100644 index 000000000000..57bcf986d3e0 --- /dev/null +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -0,0 +1,64 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#include "AqlItemBlockInputRange.h" + +using namespace arangodb; +using namespace arangodb::aql; + +AqlItemBlockInputRange::AqlItemBlockInputRange(AqlItemBlockInputRange::State state, + SharedAqlItemBlockPtr const& block, std::size_t index) + : _finalState{state}, _block{block}, _rowIndex{index} {} + +AqlItemBlockInputRange::AqlItemBlockInputRange(AqlItemBlockInputRange::State state, + SharedAqlItemBlockPtr&& block, + std::size_t index) noexcept + : _finalState{state}, _block{std::move(block)}, _rowIndex{index} {} + +std::pair AqlItemBlockInputRange::peek() { + if (indexIsValid()) { + return std::make_pair(state(), InputAqlItemRow{_block, _rowIndex}); + } + return std::make_pair(state(), InputAqlItemRow{CreateInvalidInputRowHint{}}); +} + +std::pair AqlItemBlockInputRange::next() { + auto res = peek(); + ++_rowIndex; + if (!indexIsValid()) { + _block = nullptr; + _rowIndex = 0; + } + return res; +} + +bool AqlItemBlockInputRange::indexIsValid() const noexcept { + return _block != nullptr && _rowIndex < _block->size(); +} + +bool AqlItemBlockInputRange::moreRowsAfterThis() const noexcept { + return indexIsValid() && _rowIndex + 1 < _block->size(); +} + +AqlItemBlockInputRange::State AqlItemBlockInputRange::state() const noexcept { + return moreRowsAfterThis() ? State::HASMORE : _finalState; +} diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h new file mode 100644 index 000000000000..bdfc710c9f05 --- /dev/null +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -0,0 +1,55 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_AQLITEMBLOCKINPUTITERATOR_H +#define ARANGOD_AQL_AQLITEMBLOCKINPUTITERATOR_H + +#include "Aql/InputAqlItemRow.h" +#include "Aql/SharedAqlItemBlockPtr.h" + +namespace arangodb::aql { + +class AqlItemBlockInputRange { + public: + enum class State : uint8_t { HASMORE, DONE }; + + AqlItemBlockInputRange(State, arangodb::aql::SharedAqlItemBlockPtr const&, std::size_t); + AqlItemBlockInputRange(State, arangodb::aql::SharedAqlItemBlockPtr&&, std::size_t) noexcept; + + std::pair peek(); + + std::pair next(); + + private: + State state() const noexcept; + bool indexIsValid() const noexcept; + bool moreRowsAfterThis() const noexcept; + + private: + arangodb::aql::SharedAqlItemBlockPtr _block; + std::size_t _rowIndex; + State _finalState; +}; + +} + +#endif // ARANGOD_AQL_AQLITEMBLOCKINPUTITERATOR_H diff --git a/arangod/Aql/SingleRowFetcher.h b/arangod/Aql/SingleRowFetcher.h index 594822b44517..09270cf8cc2e 100644 --- a/arangod/Aql/SingleRowFetcher.h +++ b/arangod/Aql/SingleRowFetcher.h @@ -55,11 +55,19 @@ class SingleRowFetcher { explicit SingleRowFetcher(DependencyProxy& executionBlock); TEST_VIRTUAL ~SingleRowFetcher() = default; + using DataRange = AqlItemBlockInputIterator; + protected: // only for testing! Does not initialize _dependencyProxy! SingleRowFetcher(); public: + // TODO implement and document + std::tuple execute(/* TODO: add"justDoIt"-style parameter */) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); + } + /** * @brief Fetch one new AqlItemRow from upstream. * **Guarantee**: the row returned is valid only diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 9238ece0fd38..441e9a044260 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -216,6 +216,7 @@ set(LIB_ARANGO_AQL_SOURCES Aql/AllRowsFetcher.cpp Aql/AqlFunctionFeature.cpp Aql/AqlItemBlock.cpp + Aql/AqlItemBlockInputRange.cpp Aql/AqlItemBlockManager.cpp Aql/AqlItemBlockUtils.cpp Aql/AqlItemMatrix.cpp From 7c6bcbad8689ab4c4bdd1ce0848e522c567b034d Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 22 Oct 2019 17:51:41 +0200 Subject: [PATCH 003/122] Added Header file for AqlCallStack with comments. No implementation yet. --- arangod/Aql/AqlCallStack.h | 70 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 arangod/Aql/AqlCallStack.h diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h new file mode 100644 index 000000000000..aba835feabf9 --- /dev/null +++ b/arangod/Aql/AqlCallStack.h @@ -0,0 +1,70 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_AQL_CALLSTACK_H +#define ARANGOD_AQL_AQL_CALLSTACK_H 1 + +#include + +namespace arangodb { +namespace aql { + +struct AqlCall; + +class AqlCallStack { + // Initial + AqlCallStack(AqlCall call); + // Used in subquery + AqlCallStack(AqlCallStack const& other, AqlCall call); + // Used to pass between blocks + AqlCallStack(AqlCallStack const& other); + + // Quick test is this CallStack is of local relevance, or it is sufficient to pass it through + bool isRelevant() const; + + // Get the top most Call element (this must be relevant). + // Caller is allowed to modify it, if necessary + AqlCall& myCall(); + + // fill up all missing calls within this stack s.t. we reach depth == 0 + // This needs to be called if an executor requires to be fully executed, even if skipped, + // even if the subquery it is located in is skipped. + // The default operations added here will correspond to produce all Rows, unlimitted. + // e.g. every Modification Executor needs to call this functionality, as modifictions need to be + // performed even if skipped. + void stackUpMissingCalls(); + + private: + // The list of operations, stacked by depth (e.g. bottom element is from main query) + std::stack _operations; + + // The depth of subqueries that have not issued calls into operations, + // as they have been skipped. + // In most cases this will be zero. + // However if we skip a subquery that has a nested subquery this depth will be 1 in the nested subquery. + size_t _depth; +}; + +} // namespace aql +} // namespace arangodb + +#endif \ No newline at end of file From 22d2977be5fe4f1d974bb95a4d645a127d1f16cd Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 22 Oct 2019 23:33:21 +0200 Subject: [PATCH 004/122] Added AqlCallStack first draft implementation. Made everything compile --- arangod/Aql/AqlCallStack.cpp | 73 ++++++++++++++++++++++++++ arangod/Aql/AqlCallStack.h | 10 +++- arangod/Aql/AqlItemBlockInputRange.cpp | 7 +-- arangod/Aql/SingleRowFetcher.h | 3 +- arangod/CMakeLists.txt | 1 + 5 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 arangod/Aql/AqlCallStack.cpp diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp new file mode 100644 index 000000000000..28bd80c93029 --- /dev/null +++ b/arangod/Aql/AqlCallStack.cpp @@ -0,0 +1,73 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#include "AqlCallStack.h" + +// TODO: This class is not yet memory efficient or optimized in any way. +// it might be reimplement soon to have the above features, Focus now is on +// the API we want to use. + +using namespace arangodb; +using namespace arangodb::aql; + +AqlCallStack::AqlCallStack(AqlCall call) + : _operations{{std::move(call)}}, _depth(0) {} + +AqlCallStack::AqlCallStack(AqlCallStack const& other, AqlCall call) + : _operations{other._operations}, _depth(0) { + // We can only use this constructor on relevant levels + // Alothers need to use passThrough constructor + TRI_ASSERT(other._depth == 0); + _operations.push(std::move(call)); +} + +AqlCallStack::AqlCallStack(AqlCallStack const& other) + : _operations{other._operations}, _depth(other._depth) {} + +bool AqlCallStack::isRelevant() const { return _depth == 0; } + +AqlCall& AqlCallStack::myCall() { + TRI_ASSERT(isRelevant()); + TRI_ASSERT(!_operations.empty()); + return _operations.top(); +} + +void AqlCallStack::stackUpMissingCalls() { + while (!isRelevant()) { + // For every depth, we add an additional default call. + // The default is to produce unlimited many results, + // using DefaultBatchSize each. + _operations.emplace(AqlCall{}); + _depth--; + } + TRI_ASSERT(isRelevant()); +} + +void AqlCallStack::pop() { + if (isRelevant()) { + // We have one element to pop + TRI_ASSERT(!_operations.empty()); + _operations.pop(); + // We can never pop the main query, so one element needs to stay + TRI_ASSERT(!_operations.empty()); + } +} \ No newline at end of file diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index aba835feabf9..3d966bf6a0ea 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -23,13 +23,13 @@ #ifndef ARANGOD_AQL_AQL_CALLSTACK_H #define ARANGOD_AQL_AQL_CALLSTACK_H 1 +#include "Aql/AqlCall.h" + #include namespace arangodb { namespace aql { -struct AqlCall; - class AqlCallStack { // Initial AqlCallStack(AqlCall call); @@ -53,6 +53,12 @@ class AqlCallStack { // performed even if skipped. void stackUpMissingCalls(); + // Pops one subquery level. + // if this isRelevent it pops the top-most call from the stack. + // if this is not revelent it reduces the depth by 1. + // Can be savely called on every subquery Start. + void pop(); + private: // The list of operations, stacked by depth (e.g. bottom element is from main query) std::stack _operations; diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 57bcf986d3e0..aa2e9a47e1fd 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -26,13 +26,14 @@ using namespace arangodb; using namespace arangodb::aql; AqlItemBlockInputRange::AqlItemBlockInputRange(AqlItemBlockInputRange::State state, - SharedAqlItemBlockPtr const& block, std::size_t index) - : _finalState{state}, _block{block}, _rowIndex{index} {} + SharedAqlItemBlockPtr const& block, + std::size_t index) + : _block{block}, _rowIndex{index}, _finalState{state} {} AqlItemBlockInputRange::AqlItemBlockInputRange(AqlItemBlockInputRange::State state, SharedAqlItemBlockPtr&& block, std::size_t index) noexcept - : _finalState{state}, _block{std::move(block)}, _rowIndex{index} {} + : _block{std::move(block)}, _rowIndex{index}, _finalState{state} {} std::pair AqlItemBlockInputRange::peek() { if (indexIsValid()) { diff --git a/arangod/Aql/SingleRowFetcher.h b/arangod/Aql/SingleRowFetcher.h index 09270cf8cc2e..a4d2788c6207 100644 --- a/arangod/Aql/SingleRowFetcher.h +++ b/arangod/Aql/SingleRowFetcher.h @@ -26,6 +26,7 @@ #ifndef ARANGOD_AQL_SINGLE_ROW_FETCHER_H #define ARANGOD_AQL_SINGLE_ROW_FETCHER_H +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionBlock.h" #include "Aql/ExecutionState.h" #include "Aql/InputAqlItemRow.h" @@ -55,7 +56,7 @@ class SingleRowFetcher { explicit SingleRowFetcher(DependencyProxy& executionBlock); TEST_VIRTUAL ~SingleRowFetcher() = default; - using DataRange = AqlItemBlockInputIterator; + using DataRange = AqlItemBlockInputRange; protected: // only for testing! Does not initialize _dependencyProxy! diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 441e9a044260..f6b7beb8a0ac 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -214,6 +214,7 @@ set(LIB_ARANGO_PREGEL_SOURCES set(LIB_ARANGO_AQL_SOURCES Aql/Aggregator.cpp Aql/AllRowsFetcher.cpp + Aql/AqlCallStack.cpp Aql/AqlFunctionFeature.cpp Aql/AqlItemBlock.cpp Aql/AqlItemBlockInputRange.cpp From 3ab448831cb55e627fe014b8d64bbd3ab33d4d41 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 23 Oct 2019 00:08:05 +0200 Subject: [PATCH 005/122] Added empty dummy stubs for execute() in ExecutionBlock --- arangod/Aql/BlocksWithClients.cpp | 7 +++ arangod/Aql/BlocksWithClients.h | 2 + arangod/Aql/ExecutionBlock.h | 18 +++++- arangod/Aql/ExecutionBlockImpl.cpp | 78 ++++++++++++++----------- arangod/Aql/ExecutionBlockImpl.h | 16 ++++- arangod/Aql/IdExecutor.cpp | 31 +++++++--- arangod/Aql/IdExecutor.h | 2 + arangod/Aql/RemoteExecutor.cpp | 11 +++- arangod/Aql/RemoteExecutor.h | 12 ++-- tests/Aql/WaitingExecutionBlockMock.cpp | 7 +++ tests/Aql/WaitingExecutionBlockMock.h | 4 ++ 11 files changed, 134 insertions(+), 54 deletions(-) diff --git a/arangod/Aql/BlocksWithClients.cpp b/arangod/Aql/BlocksWithClients.cpp index 5108d1927372..7adda6f3ec54 100644 --- a/arangod/Aql/BlocksWithClients.cpp +++ b/arangod/Aql/BlocksWithClients.cpp @@ -23,6 +23,7 @@ #include "BlocksWithClients.h" +#include "Aql/AqlCallStack.h" #include "Aql/AqlItemBlock.h" #include "Aql/AqlTransaction.h" #include "Aql/AqlValue.h" @@ -137,3 +138,9 @@ std::pair BlocksWithClients::skipSome(size_t) { TRI_ASSERT(false); THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } + +std::tuple BlocksWithClients::execute(AqlCallStack stack) { + // This will not be implemented here! + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} diff --git a/arangod/Aql/BlocksWithClients.h b/arangod/Aql/BlocksWithClients.h index bdc659a1aede..ca29e4cd476f 100644 --- a/arangod/Aql/BlocksWithClients.h +++ b/arangod/Aql/BlocksWithClients.h @@ -80,6 +80,8 @@ class BlocksWithClients : public ExecutionBlock { virtual std::pair skipSomeForShard(size_t atMost, std::string const& shardId) = 0; + std::tuple execute(AqlCallStack stack) override; + protected: /// @brief getClientId: get the number (used internally) /// corresponding to diff --git a/arangod/Aql/ExecutionBlock.h b/arangod/Aql/ExecutionBlock.h index 8dbba6258c79..29bc09dc4230 100644 --- a/arangod/Aql/ExecutionBlock.h +++ b/arangod/Aql/ExecutionBlock.h @@ -39,6 +39,7 @@ class Methods; } namespace aql { +class AqlCallStack; class InputAqlItemRow; class ExecutionEngine; class ExecutionNode; @@ -96,8 +97,8 @@ class ExecutionBlock { void traceGetSomeBegin(size_t atMost); // Trace the end of a getSome call, potentially with result - std::pair traceGetSomeEnd( - ExecutionState state, SharedAqlItemBlockPtr result); + std::pair traceGetSomeEnd(ExecutionState state, + SharedAqlItemBlockPtr result); void traceSkipSomeBegin(size_t atMost); @@ -122,6 +123,19 @@ class ExecutionBlock { /// @brief add a dependency void addDependency(ExecutionBlock* ep); + /// @brief main function to produce data in this ExecutionBlock. + /// It gets the AqlCallStack defining the operations required in every + /// subquery level. It will then perform the requested amount of offset, data and fullcount. + /// The AqlCallStack is copied on purpose, so this block can modify it. + /// Will return + /// 1. state: + /// * WAITING: We have async operation going on, nothing happend, please call again + /// * HASMORE: Here is some data in the request range, there is still more, if required call again + /// * DONE: Here is some data, and there will be no further data available. + /// 2. size_t: Amount of documents skipped. + /// 3. SharedAqlItemBlockPtr: The next data block. + virtual std::tuple execute(AqlCallStack stack) = 0; + protected: /// @brief the execution engine ExecutionEngine* _engine; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 1880a006aa83..7f9656966d35 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -26,6 +26,7 @@ #include "ExecutionBlockImpl.h" #include "Aql/AllRowsFetcher.h" +#include "Aql/AqlCallStack.h" #include "Aql/AqlItemBlock.h" #include "Aql/CalculationExecutor.h" #include "Aql/ConstFetcher.h" @@ -46,6 +47,7 @@ #include "Aql/InputAqlItemRow.h" #include "Aql/KShortestPathsExecutor.h" #include "Aql/LimitExecutor.h" +#include "Aql/MaterializeExecutor.h" #include "Aql/ModificationExecutor.h" #include "Aql/ModificationExecutorTraits.h" #include "Aql/MultiDependencySingleRowFetcher.h" @@ -63,7 +65,6 @@ #include "Aql/SubqueryExecutor.h" #include "Aql/SubqueryStartExecutor.h" #include "Aql/TraversalExecutor.h" -#include "Aql/MaterializeExecutor.h" #include @@ -79,18 +80,18 @@ using namespace arangodb::aql; * constexpr bool someClassHasSomeMethod = hasSomeMethod::value; */ -#define CREATE_HAS_MEMBER_CHECK(methodName, checkName) \ - template \ - class checkName { \ - template \ - static std::true_type test(decltype(&C::methodName)); \ - template \ +#define CREATE_HAS_MEMBER_CHECK(methodName, checkName) \ + template \ + class checkName { \ + template \ + static std::true_type test(decltype(&C::methodName)); \ + template \ static std::true_type test(decltype(&C::template methodName<>)); \ - template \ - static std::false_type test(...); \ - \ - public: \ - static constexpr bool value = decltype(test(0))::value; \ + template \ + static std::false_type test(...); \ + \ + public: \ + static constexpr bool value = decltype(test(0))::value; \ } CREATE_HAS_MEMBER_CHECK(initializeCursor, hasInitializeCursor); @@ -301,23 +302,24 @@ static SkipVariants constexpr skipType() { static_assert(!useFetcher || hasSkipRows::value, "Fetcher is chosen for skipping, but has not skipRows method!"); - static_assert(useExecutor == - (std::is_same::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same::value || - std::is_same::value || - std::is_same>>::value || - std::is_same::value || - std::is_same::value || - std::is_same::value), - "Unexpected executor for SkipVariants::EXECUTOR"); + static_assert( + useExecutor == + (std::is_same::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same::value || + std::is_same::value || + std::is_same>>::value || + std::is_same::value || + std::is_same::value || + std::is_same::value), + "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! static_assert( @@ -386,7 +388,6 @@ std::pair ExecutionBlockImpl::skipSomeOnceWith return {state, skipped}; } - template struct InitializeCursor {}; @@ -453,6 +454,13 @@ std::pair ExecutionBlockImpl::shutdown(int err return ExecutionBlock::shutdown(errorCode); } +template +std::tuple ExecutionBlockImpl::execute(AqlCallStack stack) { + // TODO implement! + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + // Work around GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480 // Without the namespaces it fails with // error: specialization of 'template std::pair arangodb::aql::ExecutionBlockImpl::initializeCursor(arangodb::aql::AqlItemBlock*, size_t)' in different namespace @@ -568,8 +576,8 @@ std::pair ExecutionBlockImpl>::s } template <> -std::pair -ExecutionBlockImpl>>::shutdown(int errorCode) { +std::pair ExecutionBlockImpl< + IdExecutor>>::shutdown(int errorCode) { if (this->infos().isResponsibleForInitializeCursor()) { return ExecutionBlock::shutdown(errorCode); } @@ -766,8 +774,10 @@ template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; -template class ::arangodb::aql::ExecutionBlockImpl>>; -template class ::arangodb::aql::ExecutionBlockImpl>>; +template class ::arangodb::aql::ExecutionBlockImpl< + IdExecutor>>; +template class ::arangodb::aql::ExecutionBlockImpl< + IdExecutor>>; template class ::arangodb::aql::ExecutionBlockImpl; template class ::arangodb::aql::ExecutionBlockImpl; template class ::arangodb::aql::ExecutionBlockImpl>>; diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 3b451241df0c..53f2738dbea3 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -98,7 +98,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { typename aql::DependencyProxy; static_assert( - Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Disable || Executor::Properties::preservesOrder, + Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Disable || + Executor::Properties::preservesOrder, "allowsBlockPassthrough must imply preservesOrder, but does not!"); public: @@ -174,6 +175,19 @@ class ExecutionBlockImpl final : public ExecutionBlock { /// central place. std::pair shutdown(int) override; + /// @brief main function to produce data in this ExecutionBlock. + /// It gets the AqlCallStack defining the operations required in every + /// subquery level. It will then perform the requested amount of offset, data and fullcount. + /// The AqlCallStack is copied on purpose, so this block can modify it. + /// Will return + /// 1. state: + /// * WAITING: We have async operation going on, nothing happend, please call again + /// * HASMORE: Here is some data in the request range, there is still more, if required call again + /// * DONE: Here is some data, and there will be no further data available. + /// 2. size_t: Amount of documents skipped. + /// 3. SharedAqlItemBlockPtr: The next data block. + std::tuple execute(AqlCallStack stack) override; + private: /** * @brief Inner getSome() part, without the tracing calls. diff --git a/arangod/Aql/IdExecutor.cpp b/arangod/Aql/IdExecutor.cpp index d2dd944c5c85..0b4f99b936b6 100644 --- a/arangod/Aql/IdExecutor.cpp +++ b/arangod/Aql/IdExecutor.cpp @@ -22,6 +22,7 @@ #include "IdExecutor.h" +#include "Aql/AqlCallStack.h" #include "Aql/AqlValue.h" #include "Aql/ConstFetcher.h" #include "Aql/ExecutionEngine.h" @@ -43,9 +44,8 @@ constexpr BlockPassthrough IdExecutor::Properties::allowsBloc template constexpr bool IdExecutor::Properties::inputSizeRestrictsOutputSize; -ExecutionBlockImpl>::ExecutionBlockImpl(ExecutionEngine* engine, - ExecutionNode const* node, - RegisterId outputRegister, bool doCount) +ExecutionBlockImpl>::ExecutionBlockImpl( + ExecutionEngine* engine, ExecutionNode const* node, RegisterId outputRegister, bool doCount) : ExecutionBlock(engine, node), _currentDependency(0), _outputRegister(outputRegister), @@ -56,7 +56,8 @@ ExecutionBlockImpl>::ExecutionBlockIm } } -std::pair ExecutionBlockImpl>::skipSome(size_t atMost) { +std::pair +ExecutionBlockImpl>::skipSome(size_t atMost) { traceSkipSomeBegin(atMost); if (isDone()) { return traceSkipSomeEnd(ExecutionState::DONE, 0); @@ -73,7 +74,8 @@ std::pair ExecutionBlockImpl ExecutionBlockImpl>::getSome(size_t atMost) { +std::pair +ExecutionBlockImpl>::getSome(size_t atMost) { traceGetSomeBegin(atMost); if (isDone()) { return traceGetSomeEnd(ExecutionState::DONE, nullptr); @@ -99,10 +101,18 @@ bool aql::ExecutionBlockImpl>::isDone return _currentDependency >= _dependencies.size(); } -RegisterId ExecutionBlockImpl>::getOutputRegisterId() const noexcept { +RegisterId ExecutionBlockImpl>::getOutputRegisterId() const + noexcept { return _outputRegister; } +std::tuple +ExecutionBlockImpl>::execute(AqlCallStack stack) { + // TODO Implement me + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + ExecutionBlock& ExecutionBlockImpl>::currentDependency() const { TRI_ASSERT(_currentDependency < _dependencies.size()); TRI_ASSERT(_dependencies[_currentDependency] != nullptr); @@ -209,10 +219,13 @@ template class ::arangodb::aql::IdExecutor>; template std::tuple::Stats, SharedAqlItemBlockPtr> -IdExecutor::fetchBlockForPassthrough(size_t atMost); +IdExecutor::fetchBlockForPassthrough( + size_t atMost); template std::tuple>::Stats, SharedAqlItemBlockPtr> -IdExecutor>::fetchBlockForPassthrough(size_t atMost); +IdExecutor>::fetchBlockForPassthrough< + BlockPassthrough::Enable, void>(size_t atMost); template std::tuple -IdExecutor>::skipRows(size_t atMost); +IdExecutor>::skipRows< + BlockPassthrough::Disable, void>(size_t atMost); diff --git a/arangod/Aql/IdExecutor.h b/arangod/Aql/IdExecutor.h index c0beeaa907eb..398184889699 100644 --- a/arangod/Aql/IdExecutor.h +++ b/arangod/Aql/IdExecutor.h @@ -93,6 +93,8 @@ class ExecutionBlockImpl> : public Ex RegisterId getOutputRegisterId() const noexcept; + std::tuple execute(AqlCallStack stack) override; + private: bool isDone() const noexcept; diff --git a/arangod/Aql/RemoteExecutor.cpp b/arangod/Aql/RemoteExecutor.cpp index 01dbe671baab..37ca55970be6 100644 --- a/arangod/Aql/RemoteExecutor.cpp +++ b/arangod/Aql/RemoteExecutor.cpp @@ -22,6 +22,7 @@ #include "RemoteExecutor.h" +#include "Aql/AqlCallStack.h" #include "Aql/ClusterNodes.h" #include "Aql/ExecutionEngine.h" #include "Aql/ExecutorInfos.h" @@ -142,8 +143,6 @@ std::pair ExecutionBlockImpl ExecutionBlockImpl::shutdown(i return {ExecutionState::WAITING, TRI_ERROR_NO_ERROR}; } +std::tuple ExecutionBlockImpl::execute( + AqlCallStack stack) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + namespace { Result handleErrorResponse(network::EndpointSpec const& spec, fuerte::Error err, fuerte::Response* response) { @@ -390,7 +395,7 @@ Result handleErrorResponse(network::EndpointSpec const& spec, fuerte::Error err, .append(spec.serverId) .append("': "); } - + int res = TRI_ERROR_INTERNAL; if (err != fuerte::Error::NoError) { res = network::fuerteToArangoErrorCode(err); diff --git a/arangod/Aql/RemoteExecutor.h b/arangod/Aql/RemoteExecutor.h index 5cb40b2e9a9d..40f735f7c629 100644 --- a/arangod/Aql/RemoteExecutor.h +++ b/arangod/Aql/RemoteExecutor.h @@ -24,8 +24,8 @@ #define ARANGOD_AQL_REMOTE_EXECUTOR_H #include "Aql/ClusterNodes.h" -#include "Aql/ExecutorInfos.h" #include "Aql/ExecutionBlockImpl.h" +#include "Aql/ExecutorInfos.h" #include @@ -62,6 +62,8 @@ class ExecutionBlockImpl : public ExecutionBlock { std::pair shutdown(int errorCode) override; + std::tuple execute(AqlCallStack stack) override; + #ifdef ARANGODB_ENABLE_MAINTAINER_MODE // only for asserts: public: @@ -109,16 +111,16 @@ class ExecutionBlockImpl : public ExecutionBlock { /// @brief the last remote response Result object, may contain an error. arangodb::Result _lastError; - + std::mutex _communicationMutex; - + unsigned _lastTicket; /// used to check for canceled requests - + bool _hasTriggeredShutdown; // _communicationMutex *must* be locked for this! unsigned generateNewTicket(); - + bool _didSendShutdownRequest = false; void traceGetSomeRequest(velocypack::Slice slice, size_t atMost); diff --git a/tests/Aql/WaitingExecutionBlockMock.cpp b/tests/Aql/WaitingExecutionBlockMock.cpp index 4706dd4f4d48..6e938d4f3c16 100644 --- a/tests/Aql/WaitingExecutionBlockMock.cpp +++ b/tests/Aql/WaitingExecutionBlockMock.cpp @@ -22,6 +22,7 @@ #include "WaitingExecutionBlockMock.h" +#include "Aql/AqlCallStack.h" #include "Aql/AqlItemBlock.h" #include "Aql/ExecutionEngine.h" #include "Aql/ExecutionState.h" @@ -113,3 +114,9 @@ std::pair WaitingExecutionBlockMock::skip return {ExecutionState::HASMORE, skipped}; } } + +std::tuple WaitingExecutionBlockMock::execute(AqlCallStack stack) { + // TODO implement! + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} \ No newline at end of file diff --git a/tests/Aql/WaitingExecutionBlockMock.h b/tests/Aql/WaitingExecutionBlockMock.h index 598308e25da8..f644b37ccc0f 100644 --- a/tests/Aql/WaitingExecutionBlockMock.h +++ b/tests/Aql/WaitingExecutionBlockMock.h @@ -95,6 +95,10 @@ class WaitingExecutionBlockMock final : public arangodb::aql::ExecutionBlock { */ std::pair skipSome(size_t atMost) override; + // TODO: Document and implement me! + std::tuple execute( + arangodb::aql::AqlCallStack stack) override; + private: std::deque _data; arangodb::aql::ResourceMonitor _resourceMonitor; From b079d783f3b36596112cd16c87e2a119e3b55d2e Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 24 Oct 2019 06:57:16 +0200 Subject: [PATCH 006/122] Implementation of new API in FilterExecutor. Rough implementation in ExecutionBlockImpl (not complete, only skip path) everything compiles, but is not tested --- arangod/Aql/AqlCallStack.h | 1 + arangod/Aql/AqlItemBlockInputRange.cpp | 38 ++++++++----- arangod/Aql/AqlItemBlockInputRange.h | 24 +++++--- arangod/Aql/ExecutionBlockImpl.cpp | 39 +++++++++++++ arangod/Aql/ExecutionState.cpp | 12 ++++ arangod/Aql/ExecutionState.h | 13 +++++ arangod/Aql/FilterExecutor.cpp | 76 +++++++++++++++++++++++++- arangod/Aql/FilterExecutor.h | 14 +++++ arangod/Aql/SingleRowFetcher.h | 2 +- 9 files changed, 194 insertions(+), 25 deletions(-) diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index 3d966bf6a0ea..82bbddcbed5c 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -31,6 +31,7 @@ namespace arangodb { namespace aql { class AqlCallStack { + public: // Initial AqlCallStack(AqlCall call); // Used in subquery diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index aa2e9a47e1fd..01ae52159dae 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -25,24 +25,36 @@ using namespace arangodb; using namespace arangodb::aql; -AqlItemBlockInputRange::AqlItemBlockInputRange(AqlItemBlockInputRange::State state, +AqlItemBlockInputRange::AqlItemBlockInputRange() + : _block(nullptr), _rowIndex(0), _endIndex(0), _finalState(ExecutorState::HASMORE) { + TRI_ASSERT(!hasMore()); + TRI_ASSERT(state() == ExecutorState::HASMORE); +} + +AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, SharedAqlItemBlockPtr const& block, - std::size_t index) - : _block{block}, _rowIndex{index}, _finalState{state} {} + std::size_t index, std::size_t endIndex) + : _block{block}, _rowIndex{index}, _endIndex(endIndex), _finalState{state} { + TRI_ASSERT(index < endIndex); + TRI_ASSERT(endIndex <= block->size()); +} -AqlItemBlockInputRange::AqlItemBlockInputRange(AqlItemBlockInputRange::State state, +AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, SharedAqlItemBlockPtr&& block, - std::size_t index) noexcept - : _block{std::move(block)}, _rowIndex{index}, _finalState{state} {} + std::size_t index, std::size_t endIndex) noexcept + : _block{std::move(block)}, _rowIndex{index}, _endIndex(endIndex), _finalState{state} { + TRI_ASSERT(index < endIndex); + TRI_ASSERT(endIndex <= block->size()); +} -std::pair AqlItemBlockInputRange::peek() { +std::pair AqlItemBlockInputRange::peek() { if (indexIsValid()) { return std::make_pair(state(), InputAqlItemRow{_block, _rowIndex}); } return std::make_pair(state(), InputAqlItemRow{CreateInvalidInputRowHint{}}); } -std::pair AqlItemBlockInputRange::next() { +std::pair AqlItemBlockInputRange::next() { auto res = peek(); ++_rowIndex; if (!indexIsValid()) { @@ -53,13 +65,13 @@ std::pair AqlItemBlockInputRange } bool AqlItemBlockInputRange::indexIsValid() const noexcept { - return _block != nullptr && _rowIndex < _block->size(); + return _block != nullptr && _rowIndex < _endIndex; } -bool AqlItemBlockInputRange::moreRowsAfterThis() const noexcept { - return indexIsValid() && _rowIndex + 1 < _block->size(); +bool AqlItemBlockInputRange::hasMore() const noexcept { + return indexIsValid() && _rowIndex + 1 < _endIndex; } -AqlItemBlockInputRange::State AqlItemBlockInputRange::state() const noexcept { - return moreRowsAfterThis() ? State::HASMORE : _finalState; +ExecutorState AqlItemBlockInputRange::state() const noexcept { + return hasMore() ? ExecutorState::HASMORE : _finalState; } diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index bdfc710c9f05..477b58c53bfe 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -23,6 +23,7 @@ #ifndef ARANGOD_AQL_AQLITEMBLOCKINPUTITERATOR_H #define ARANGOD_AQL_AQLITEMBLOCKINPUTITERATOR_H +#include "Aql/ExecutionState.h" #include "Aql/InputAqlItemRow.h" #include "Aql/SharedAqlItemBlockPtr.h" @@ -30,26 +31,31 @@ namespace arangodb::aql { class AqlItemBlockInputRange { public: - enum class State : uint8_t { HASMORE, DONE }; + AqlItemBlockInputRange(); - AqlItemBlockInputRange(State, arangodb::aql::SharedAqlItemBlockPtr const&, std::size_t); - AqlItemBlockInputRange(State, arangodb::aql::SharedAqlItemBlockPtr&&, std::size_t) noexcept; + AqlItemBlockInputRange(ExecutorState, arangodb::aql::SharedAqlItemBlockPtr const&, + std::size_t, std::size_t endIndex); + AqlItemBlockInputRange(ExecutorState, arangodb::aql::SharedAqlItemBlockPtr&&, + std::size_t, std::size_t endIndex) noexcept; - std::pair peek(); + bool hasMore() const noexcept; - std::pair next(); + ExecutorState state() const noexcept; + + std::pair peek(); + + std::pair next(); private: - State state() const noexcept; bool indexIsValid() const noexcept; - bool moreRowsAfterThis() const noexcept; private: arangodb::aql::SharedAqlItemBlockPtr _block; std::size_t _rowIndex; - State _finalState; + std::size_t _endIndex; + ExecutorState _finalState; }; -} +} // namespace arangodb::aql #endif // ARANGOD_AQL_AQLITEMBLOCKINPUTITERATOR_H diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 7f9656966d35..0162b0c62663 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -583,6 +583,45 @@ std::pair ExecutionBlockImpl< } return {ExecutionState::DONE, {errorCode}}; } + +// TODO this is only temporary, remove me +// Just to make sure everything compiles! +template <> +std::tuple +ExecutionBlockImpl::execute(AqlCallStack stack) { + // TODO make this a member variable + Fetcher::DataRange emptyRange{}; + // TODO: pop this from the stack instead of modify. + // TODO: Need to make this member variable for waiting? + AqlCall& myCall = stack.myCall(); + // Skipping path + while (myCall.offset > 0) { + // Execute skipSome + auto const [state, skipped, call] = _executor.skipRowsRange(myCall.offset, emptyRange); + if (state == ExecutorState::DONE) { + // We are done with this subquery + // TODO Implement me properly, we would need to fill shadowRows into the block + return {ExecutionState::DONE, skipped, nullptr}; + } + TRI_ASSERT(skipped <= myCall.offset); + myCall.offset -= skipped; + if (myCall.offset > 0) { + // Need to fetch more + // TODO: we need to push the returned call into the stack, pop our call of. + size_t skipped = 0; + std::tie(_upstreamState, skipped, emptyRange) = _rowFetcher.execute(stack); + TRI_ASSERT(skipped <= myCall.offset); + myCall.offset -= skipped; + } + } + + // TODO add GetSome path + + // TODO implement! + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/ExecutionState.cpp b/arangod/Aql/ExecutionState.cpp index 4e788e565560..39b426fb6d46 100644 --- a/arangod/Aql/ExecutionState.cpp +++ b/arangod/Aql/ExecutionState.cpp @@ -42,5 +42,17 @@ std::ostream& operator<<(std::ostream& ostream, ExecutionState state) { return ostream; } +std::ostream& operator<<(std::ostream& ostream, ExecutorState state) { + switch (state) { + case ExecutorState::DONE: + ostream << "DONE"; + break; + case ExecutorState::HASMORE: + ostream << "HASMORE"; + break; + } + return ostream; +} + } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/ExecutionState.h b/arangod/Aql/ExecutionState.h index 0ac112b92aa8..04edf78a9e8a 100644 --- a/arangod/Aql/ExecutionState.h +++ b/arangod/Aql/ExecutionState.h @@ -42,8 +42,21 @@ enum class ExecutionState { WAITING }; +enum class ExecutorState { + // done with this block, definitely no more results + DONE, + // (potentially) more results available. this may "lie" and + // report that there are more results when in fact there are + // none (background: to accurately determine that there are + // more results we may need to execute expensive operations + // on the preceeding blocks, which we want to avoid) + HASMORE +}; + std::ostream& operator<<(std::ostream& ostream, ExecutionState state); +std::ostream& operator<<(std::ostream& ostream, ExecutorState state); + } // namespace aql } // namespace arangodb #endif diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index f6769f6fae6b..601e955125b4 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -25,6 +25,9 @@ #include "FilterExecutor.h" +#include "Aql/AqlCall.h" +#include "Aql/AqlCallStack.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/AqlValue.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" @@ -52,9 +55,12 @@ FilterExecutorInfos::FilterExecutorInfos(RegisterId inputRegister, RegisterId nr std::move(registersToClear), std::move(registersToKeep)), _inputRegister(inputRegister) {} -RegisterId FilterExecutorInfos::getInputRegister() const noexcept { return _inputRegister; } +RegisterId FilterExecutorInfos::getInputRegister() const noexcept { + return _inputRegister; +} -FilterExecutor::FilterExecutor(Fetcher& fetcher, Infos& infos) : _infos(infos), _fetcher(fetcher) {} +FilterExecutor::FilterExecutor(Fetcher& fetcher, Infos& infos) + : _infos(infos), _fetcher(fetcher) {} FilterExecutor::~FilterExecutor() = default; @@ -99,3 +105,69 @@ std::pair FilterExecutor::expectedNumberOfRows(size_t at return _fetcher.preFetchNumberOfRows(atMost); } +std::tuple FilterExecutor::skipRowsRange( + size_t offset, AqlItemBlockInputRange& inputRange) { + ExecutorState state = ExecutorState::HASMORE; + InputAqlItemRow input{CreateInvalidInputRowHint{}}; + size_t skipped = 0; + while (inputRange.hasMore() && skipped < offset) { + std::tie(state, input) = inputRange.next(); + if (!input) { + TRI_ASSERT(!inputRange.hasMore()); + break; + } + if (input.getValue(_infos.getInputRegister()).toBoolean()) { + skipped++; + } + } + + AqlCall upstreamCall{}; + upstreamCall.batchSize = offset - skipped; + return {state, skipped, upstreamCall}; +} + +std::tuple FilterExecutor::produceRows( + size_t limit, AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { + TRI_IF_FAILURE("FilterExecutor::produceRows") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + FilterStats stats{}; + ExecutorState state = ExecutorState::HASMORE; + InputAqlItemRow input{CreateInvalidInputRowHint{}}; + + while (inputRange.hasMore() && limit > 0) { + TRI_ASSERT(!output.isFull()); + std::tie(state, input) = inputRange.next(); + if (!input) { + TRI_ASSERT(!inputRange.hasMore()); + break; + } + if (input.getValue(_infos.getInputRegister()).toBoolean()) { + output.copyRow(input); + output.advanceRow(); + limit--; + } else { + stats.incrFiltered(); + } + } + + AqlCall upstreamCall{}; + upstreamCall.batchSize = limit; + return {state, stats, upstreamCall}; +} + +/* +skipSome(x) = > AqlCall{ + offset : x, + batchSize : 0, + limit : AqlCall::Infinity{}, + fullCount : | false +} + +getSome(x) = > { + offset: 0, + batchSize : x, + limit : AqlCall::Infinity{}, + fullCount : | false +} +*/ diff --git a/arangod/Aql/FilterExecutor.h b/arangod/Aql/FilterExecutor.h index a8e15705029e..7ebc68cfdc80 100644 --- a/arangod/Aql/FilterExecutor.h +++ b/arangod/Aql/FilterExecutor.h @@ -35,6 +35,8 @@ namespace arangodb { namespace aql { +struct AqlCall; +class AqlItemBlockInputRange; class InputAqlItemRow; class OutputAqlItemRow; class ExecutorInfos; @@ -89,6 +91,18 @@ class FilterExecutor { */ std::pair produceRows(OutputAqlItemRow& output); + /** + * @brief produce the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + std::tuple produceRows(size_t atMost, + AqlItemBlockInputRange& input, + OutputAqlItemRow& output); + + std::tuple skipRowsRange(size_t atMost, + AqlItemBlockInputRange& input); + std::pair expectedNumberOfRows(size_t atMost) const; private: diff --git a/arangod/Aql/SingleRowFetcher.h b/arangod/Aql/SingleRowFetcher.h index a4d2788c6207..47b8e22d4f13 100644 --- a/arangod/Aql/SingleRowFetcher.h +++ b/arangod/Aql/SingleRowFetcher.h @@ -64,7 +64,7 @@ class SingleRowFetcher { public: // TODO implement and document - std::tuple execute(/* TODO: add"justDoIt"-style parameter */) { + std::tuple execute(AqlCallStack& stack) { TRI_ASSERT(false); THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } From eb120a05af2ae2105760167cd283080dcbfcc47b Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 24 Oct 2019 13:42:46 +0200 Subject: [PATCH 007/122] Fixed off by one error in InputRange. Fixed FilterExecutor --- arangod/Aql/AqlItemBlockInputRange.cpp | 6 ++- arangod/Aql/AqlItemBlockInputRange.h | 2 + tests/Aql/FilterExecutorTest.cpp | 66 +++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 01ae52159dae..7f9b04efc841 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -68,10 +68,12 @@ bool AqlItemBlockInputRange::indexIsValid() const noexcept { return _block != nullptr && _rowIndex < _endIndex; } -bool AqlItemBlockInputRange::hasMore() const noexcept { +bool AqlItemBlockInputRange::hasMore() const noexcept { return indexIsValid(); } + +bool AqlItemBlockInputRange::hasMoreAfterThis() const noexcept { return indexIsValid() && _rowIndex + 1 < _endIndex; } ExecutorState AqlItemBlockInputRange::state() const noexcept { - return hasMore() ? ExecutorState::HASMORE : _finalState; + return hasMoreAfterThis() ? ExecutorState::HASMORE : _finalState; } diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index 477b58c53bfe..a1195cc46108 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -49,6 +49,8 @@ class AqlItemBlockInputRange { private: bool indexIsValid() const noexcept; + bool hasMoreAfterThis() const noexcept; + private: arangodb::aql::SharedAqlItemBlockPtr _block; std::size_t _rowIndex; diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index 780aa699b5b5..5a3868d7f737 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -23,9 +23,11 @@ /// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// +#include "AqlItemBlockHelper.h" #include "RowFetcherHelper.h" #include "gtest/gtest.h" +#include "Aql/AqlCall.h" #include "Aql/AqlItemBlock.h" #include "Aql/ExecutorInfos.h" #include "Aql/FilterExecutor.h" @@ -64,7 +66,8 @@ class FilterExecutorTest : public ::testing::Test { TEST_F(FilterExecutorTest, there_are_no_rows_upstream_the_producer_does_not_wait) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), false); FilterExecutor testee(fetcher, infos); FilterStats stats{}; @@ -77,7 +80,8 @@ TEST_F(FilterExecutorTest, there_are_no_rows_upstream_the_producer_does_not_wait TEST_F(FilterExecutorTest, there_are_no_rows_upstream_the_producer_waits) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), true); FilterExecutor testee(fetcher, infos); FilterStats stats{}; @@ -97,7 +101,8 @@ TEST_F(FilterExecutorTest, there_are_no_rows_upstream_the_producer_waits) { TEST_F(FilterExecutorTest, there_are_rows_in_the_upstream_the_producer_does_not_wait) { auto input = VPackParser::fromJson( "[ [true], [false], [true], [false], [false], [true] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); FilterExecutor testee(fetcher, infos); FilterStats stats{}; @@ -134,7 +139,8 @@ TEST_F(FilterExecutorTest, there_are_rows_in_the_upstream_the_producer_does_not_ TEST_F(FilterExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) { auto input = VPackParser::fromJson( "[ [true], [false], [true], [false], [false], [true] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), true); FilterExecutor testee(fetcher, infos); FilterStats stats{}; @@ -216,7 +222,8 @@ TEST_F(FilterExecutorTest, there_are_rows_in_the_upstream_and_the_last_one_has_to_be_filtered_the_producer_does_not_wait) { auto input = VPackParser::fromJson( "[ [true], [false], [true], [false], [false], [true], [false] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); FilterExecutor testee(fetcher, infos); FilterStats stats{}; @@ -259,7 +266,8 @@ TEST_F(FilterExecutorTest, there_are_rows_in_the_upstream_and_the_last_one_has_to_be_filtered_the_producer_waits) { auto input = VPackParser::fromJson( "[ [true], [false], [true], [false], [false], [true], [false] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), true); FilterExecutor testee(fetcher, infos); FilterStats stats{}; @@ -342,6 +350,52 @@ TEST_F(FilterExecutorTest, ASSERT_EQ(stats.getFiltered(), 1); } +TEST_F(FilterExecutorTest, test_produce_datarange) { + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + FilterExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, + {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + + OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, + infos.registersToClear()); + EXPECT_EQ(output.numRowsWritten(), 0); + auto const [state, stats, call] = testee.produceRows(1000, input, output); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_EQ(stats.getFiltered(), 2); + EXPECT_EQ(output.numRowsWritten(), 3); + EXPECT_FALSE(input.hasMore()); +} + +TEST_F(FilterExecutorTest, test_skip_datarange) { + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + FilterExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, + {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + + auto const [state, skipped, call] = testee.skipRowsRange(1000, input); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_EQ(skipped, 3); + EXPECT_FALSE(input.hasMore()); +} + } // namespace aql } // namespace tests } // namespace arangodb From cb2cef238039c2e5b598af25c061b8f61bd2e2fb Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 24 Oct 2019 14:03:05 +0200 Subject: [PATCH 008/122] Added some more tests if executor cannot fulfill atMost --- tests/Aql/FilterExecutorTest.cpp | 119 ++++++++++++++++++++++++++++++- 1 file changed, 116 insertions(+), 3 deletions(-) diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index 5a3868d7f737..72272917a7bb 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -375,7 +375,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange) { EXPECT_FALSE(input.hasMore()); } -TEST_F(FilterExecutorTest, test_skip_datarange) { +TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { // This fetcher will not be called! // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); @@ -388,12 +388,125 @@ TEST_F(FilterExecutorTest, test_skip_datarange) { buildBlock<1>(itemBlockManager, {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::HASMORE, inBlock, 0, inBlock->size()}; + + OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, + infos.registersToClear()); + EXPECT_EQ(output.numRowsWritten(), 0); + auto const [state, stats, call] = testee.produceRows(1000, input, output); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(stats.getFiltered(), 2); + EXPECT_EQ(output.numRowsWritten(), 3); + EXPECT_FALSE(input.hasMore()); + // Test the Call we send to upstream + EXPECT_EQ(call.offset, 0); + EXPECT_TRUE(std::holds_alternative(call.limit)); + // Avoid overfetching. I do not have a strong requirement on this + // test, however this is what we do right now. + ASSERT_TRUE(std::holds_alternative(call.batchSize)); + EXPECT_EQ(std::get(call.batchSize), 997); + EXPECT_FALSE(call.fullCount); +} + +TEST_F(FilterExecutorTest, test_skip_datarange_need_more) { + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + FilterExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, + {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); + + AqlItemBlockInputRange input{ExecutorState::HASMORE, inBlock, 0, inBlock->size()}; auto const [state, skipped, call] = testee.skipRowsRange(1000, input); - EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(skipped, 3); EXPECT_FALSE(input.hasMore()); + + // Test the Call we send to upstream + EXPECT_EQ(call.offset, 0); + EXPECT_TRUE(std::holds_alternative(call.limit)); + // Avoid overfetching. I do not have a strong requirement on this + // test, however this is what we do right now. + ASSERT_TRUE(std::holds_alternative(call.batchSize)); + EXPECT_EQ(std::get(call.batchSize), 997); + EXPECT_FALSE(call.fullCount); +} + +TEST_F(FilterExecutorTest, test_produce_datarange_has_more) { + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + FilterExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, + {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, + infos.registersToClear()); + auto const [state, stats, call] = testee.produceRows(2, input, output); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(stats.getFiltered(), 1); + EXPECT_EQ(output.numRowsWritten(), 2); + EXPECT_TRUE(input.hasMore()); + // We still have two values in block: false and true + { + // pop false + auto const [state, row] = input.next(); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_FALSE(row.getValue(0).toBoolean()); + } + { + // pop true + auto const [state, row] = input.next(); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_TRUE(row.getValue(0).toBoolean()); + } + EXPECT_FALSE(input.hasMore()); +} + +TEST_F(FilterExecutorTest, test_skip_datarange_has_more) { + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + FilterExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, + {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + auto const [state, skipped, call] = testee.skipRowsRange(2, input); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(skipped, 2); + EXPECT_TRUE(input.hasMore()); + // We still have two values in block: false and true + { + // pop false + auto const [state, row] = input.next(); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_FALSE(row.getValue(0).toBoolean()); + } + { + // pop true + auto const [state, row] = input.next(); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_TRUE(row.getValue(0).toBoolean()); + } + EXPECT_FALSE(input.hasMore()); } } // namespace aql From a46941ed34d7284a8f2063e682ef1213e11feb82 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 25 Oct 2019 09:12:38 +0200 Subject: [PATCH 009/122] Added heplper functions to the AqlCall struct to avoid mangeling around with variants everywhere. --- arangod/Aql/AqlCall.h | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index d0989fdab2b8..1e3a4c840463 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -36,9 +36,42 @@ struct AqlCall { std::size_t offset{0}; // TODO: The defaultBatchSize function could move into this file instead - Limit batchSize{ExecutionBlock::DefaultBatchSize()}; - Limit limit{Infinity{}}; + Limit softLimit{Infinity{}}; + Limit hardLimit{Infinity{}}; bool fullCount{false}; + + std::size_t getOffset() const { return offset; } + + std::size_t getLimit() const { + // By default we use batchsize + std::size_t limit = ExecutionBlock::DefaultBatchSize(); + // We are not allowed to go above softLimit + if (std::holds_alternative(softLimit)) { + limit = (std::min)(std::get(softLimit), limit); + } + // We are not allowed to go above hardLimit + if (std::holds_alternative(hardLimit)) { + limit = (std::min)(std::get(hardLimit), limit); + } + return limit; + } + + void didProduce(std::size_t n) { + if (std::holds_alternative(softLimit)) { + TRI_ASSERT(n <= std::get(softLimit)); + softLimit = std::get(softLimit) - n; + } + if (std::holds_alternative(hardLimit)) { + TRI_ASSERT(n <= std::get(hardLimit)); + hardLimit = std::get(hardLimit) - n; + } + } + + bool hasHardLimit() const { + return !std::holds_alternative(hardLimit); + } + + bool needsFullCount() const { return fullCount; } }; } // namespace aql From e248e45709e0f62ee7ea362ff152c299f30f8588 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 25 Oct 2019 09:26:27 +0200 Subject: [PATCH 010/122] Add a member DataRange to ExecutionBlockImpl. Also now all Fetcher expose a DataRange, this might not be the correct one every where --- arangod/Aql/ConstFetcher.h | 2 ++ arangod/Aql/ExecutionBlockImpl.cpp | 26 +++++++++++++++---- arangod/Aql/ExecutionBlockImpl.h | 2 ++ arangod/Aql/FilterExecutor.cpp | 25 +++--------------- arangod/Aql/MultiDependencySingleRowFetcher.h | 8 +++--- arangod/Aql/SingleBlockFetcher.h | 3 +++ tests/Aql/FilterExecutorTest.cpp | 10 +++---- 7 files changed, 40 insertions(+), 36 deletions(-) diff --git a/arangod/Aql/ConstFetcher.h b/arangod/Aql/ConstFetcher.h index fc282cbb2d38..49aefb19b901 100644 --- a/arangod/Aql/ConstFetcher.h +++ b/arangod/Aql/ConstFetcher.h @@ -23,6 +23,7 @@ #ifndef ARANGOD_AQL_CONST_FETCHER_H #define ARANGOD_AQL_CONST_FETCHER_H +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionState.h" #include "Aql/InputAqlItemRow.h" @@ -47,6 +48,7 @@ class ConstFetcher { using DependencyProxy = aql::DependencyProxy; public: + using DataRange = AqlItemBlockInputRange; explicit ConstFetcher(DependencyProxy& executionBlock); TEST_VIRTUAL ~ConstFetcher() = default; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 0162b0c62663..dbc9c2b37e73 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -110,7 +110,8 @@ ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, _infos(std::move(infos)), _executor(_rowFetcher, _infos), _outputItemRow(), - _query(*engine->getQuery()) { + _query(*engine->getQuery()), + _lastRange{} { // already insert ourselves into the statistics results if (_profile >= PROFILE_LEVEL_BLOCKS) { _engine->_stats.nodes.emplace(node->id(), ExecutionStats::Node()); @@ -122,6 +123,14 @@ ExecutionBlockImpl::~ExecutionBlockImpl() = default; template std::pair ExecutionBlockImpl::getSome(size_t atMost) { + /* + getSome(x) = > { + offset: 0, + batchSize : x, + limit : AqlCall::Infinity{}, + fullCount : | false + } + */ traceGetSomeBegin(atMost); auto result = getSomeWithoutTrace(atMost); return traceGetSomeEnd(result.first, std::move(result.second)); @@ -340,6 +349,14 @@ static SkipVariants constexpr skipType() { template std::pair ExecutionBlockImpl::skipSome(size_t const atMost) { + /* + skipSome(x) = > AqlCall{ + offset : x, + batchSize : 0, + limit : AqlCall::Infinity{}, + fullCount : | false + } + */ traceSkipSomeBegin(atMost); auto state = ExecutionState::HASMORE; @@ -589,15 +606,13 @@ std::pair ExecutionBlockImpl< template <> std::tuple ExecutionBlockImpl::execute(AqlCallStack stack) { - // TODO make this a member variable - Fetcher::DataRange emptyRange{}; // TODO: pop this from the stack instead of modify. // TODO: Need to make this member variable for waiting? AqlCall& myCall = stack.myCall(); // Skipping path while (myCall.offset > 0) { // Execute skipSome - auto const [state, skipped, call] = _executor.skipRowsRange(myCall.offset, emptyRange); + auto const [state, skipped, call] = _executor.skipRowsRange(myCall.offset, _lastRange); if (state == ExecutorState::DONE) { // We are done with this subquery // TODO Implement me properly, we would need to fill shadowRows into the block @@ -609,7 +624,8 @@ ExecutionBlockImpl::execute(AqlCallStack stack) { // Need to fetch more // TODO: we need to push the returned call into the stack, pop our call of. size_t skipped = 0; - std::tie(_upstreamState, skipped, emptyRange) = _rowFetcher.execute(stack); + TRI_ASSERT(!_lastRange.hasMore()); + std::tie(_upstreamState, skipped, _lastRange) = _rowFetcher.execute(stack); TRI_ASSERT(skipped <= myCall.offset); myCall.offset -= skipped; } diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 53f2738dbea3..2bfa5cab9e90 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -251,6 +251,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { Query const& _query; size_t _skipped{}; + + typename Fetcher::DataRange _lastRange; }; } // namespace aql diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 601e955125b4..445e23da3fa4 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -122,7 +122,7 @@ std::tuple FilterExecutor::skipRowsRange( } AqlCall upstreamCall{}; - upstreamCall.batchSize = offset - skipped; + upstreamCall.softLimit = offset - skipped; return {state, skipped, upstreamCall}; } @@ -138,10 +138,7 @@ std::tuple FilterExecutor::produceRows( while (inputRange.hasMore() && limit > 0) { TRI_ASSERT(!output.isFull()); std::tie(state, input) = inputRange.next(); - if (!input) { - TRI_ASSERT(!inputRange.hasMore()); - break; - } + TRI_ASSERT(input.isInitialized()); if (input.getValue(_infos.getInputRegister()).toBoolean()) { output.copyRow(input); output.advanceRow(); @@ -152,22 +149,6 @@ std::tuple FilterExecutor::produceRows( } AqlCall upstreamCall{}; - upstreamCall.batchSize = limit; + upstreamCall.softLimit = limit; return {state, stats, upstreamCall}; } - -/* -skipSome(x) = > AqlCall{ - offset : x, - batchSize : 0, - limit : AqlCall::Infinity{}, - fullCount : | false -} - -getSome(x) = > { - offset: 0, - batchSize : x, - limit : AqlCall::Infinity{}, - fullCount : | false -} -*/ diff --git a/arangod/Aql/MultiDependencySingleRowFetcher.h b/arangod/Aql/MultiDependencySingleRowFetcher.h index de8d8a7592e8..9e8bde0168cd 100644 --- a/arangod/Aql/MultiDependencySingleRowFetcher.h +++ b/arangod/Aql/MultiDependencySingleRowFetcher.h @@ -23,6 +23,7 @@ #ifndef ARANGOD_AQL_MULTI_DEPENDENCY_SINGLE_ROW_FETCHER_H #define ARANGOD_AQL_MULTI_DEPENDENCY_SINGLE_ROW_FETCHER_H +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionBlock.h" #include "Aql/ExecutionState.h" #include "Aql/InputAqlItemRow.h" @@ -84,6 +85,7 @@ class MultiDependencySingleRowFetcher { }; public: + using DataRange = AqlItemBlockInputRange; explicit MultiDependencySingleRowFetcher(DependencyProxy& executionBlock); TEST_VIRTUAL ~MultiDependencySingleRowFetcher() = default; @@ -157,9 +159,9 @@ class MultiDependencySingleRowFetcher { bool isLastRowInBlock(DependencyInfo const& info) const; /** - * @brief If it returns true, there are no more data row in the current subquery - * level. If it returns false, there may or may not be more. - */ + * @brief If it returns true, there are no more data row in the current + * subquery level. If it returns false, there may or may not be more. + */ bool noMoreDataRows(DependencyInfo const& info) const; std::pair preFetchNumberOfRowsForDependency(size_t dependency, diff --git a/arangod/Aql/SingleBlockFetcher.h b/arangod/Aql/SingleBlockFetcher.h index 1ad20448cccb..9020046f2f3f 100644 --- a/arangod/Aql/SingleBlockFetcher.h +++ b/arangod/Aql/SingleBlockFetcher.h @@ -24,6 +24,7 @@ #define ARANGOD_AQL_SINGLE_BLOCK_FETCHER_H #include "Aql/AqlItemBlock.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/AqlItemMatrix.h" #include "Aql/DependencyProxy.h" #include "Aql/ExecutionState.h" @@ -48,6 +49,8 @@ class DependencyProxy; template class SingleBlockFetcher { public: + using DataRange = AqlItemBlockInputRange; + explicit SingleBlockFetcher(DependencyProxy& executionBlock) : _prefetched(false), _dependencyProxy(&executionBlock), diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index 72272917a7bb..5f64b03278d7 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -400,11 +400,10 @@ TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { EXPECT_FALSE(input.hasMore()); // Test the Call we send to upstream EXPECT_EQ(call.offset, 0); - EXPECT_TRUE(std::holds_alternative(call.limit)); + EXPECT_FALSE(call.hasHardLimit()); // Avoid overfetching. I do not have a strong requirement on this // test, however this is what we do right now. - ASSERT_TRUE(std::holds_alternative(call.batchSize)); - EXPECT_EQ(std::get(call.batchSize), 997); + EXPECT_EQ(call.getLimit(), 997); EXPECT_FALSE(call.fullCount); } @@ -430,11 +429,10 @@ TEST_F(FilterExecutorTest, test_skip_datarange_need_more) { // Test the Call we send to upstream EXPECT_EQ(call.offset, 0); - EXPECT_TRUE(std::holds_alternative(call.limit)); + EXPECT_FALSE(call.hasHardLimit()); // Avoid overfetching. I do not have a strong requirement on this // test, however this is what we do right now. - ASSERT_TRUE(std::holds_alternative(call.batchSize)); - EXPECT_EQ(std::get(call.batchSize), 997); + EXPECT_EQ(call.getLimit(), 997); EXPECT_FALSE(call.fullCount); } From c40cfc0634a03ce49d2350c9c59929dd4ce64884 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 25 Oct 2019 10:01:59 +0200 Subject: [PATCH 011/122] Added preliminary minimal implementation of execute to DependencyProxy and SingleRowFetcher. Both untested and yet incomplete, they will only work for MVP. --- arangod/Aql/DependencyProxy.cpp | 9 +++++++++ arangod/Aql/DependencyProxy.h | 3 +++ arangod/Aql/SingleRowFetcher.cpp | 16 ++++++++++++++++ arangod/Aql/SingleRowFetcher.h | 5 +---- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/arangod/Aql/DependencyProxy.cpp b/arangod/Aql/DependencyProxy.cpp index 03a893f165c0..1a320bfe2535 100644 --- a/arangod/Aql/DependencyProxy.cpp +++ b/arangod/Aql/DependencyProxy.cpp @@ -22,6 +22,7 @@ #include "DependencyProxy.h" +#include "Aql/AqlCallStack.h" #include "Aql/BlocksWithClients.h" #include "Aql/types.h" #include "Basics/Exceptions.h" @@ -30,6 +31,14 @@ using namespace arangodb; using namespace arangodb::aql; +template +std::tuple +DependencyProxy::execute(AqlCallStack& stack) { + // TODO: Test this, especially if upstreamBlock is done etc. + // We do not modify any local state here. + return upstreamBlock().execute(stack); +} + template ExecutionState DependencyProxy::prefetchBlock(size_t atMost) { TRI_ASSERT(atMost > 0); diff --git a/arangod/Aql/DependencyProxy.h b/arangod/Aql/DependencyProxy.h index 222c68e6f5db..36d946798517 100644 --- a/arangod/Aql/DependencyProxy.h +++ b/arangod/Aql/DependencyProxy.h @@ -73,6 +73,9 @@ class DependencyProxy { TEST_VIRTUAL ~DependencyProxy() = default; + // TODO Implement and document properly! + std::tuple execute(AqlCallStack& stack); + // This is only TEST_VIRTUAL, so we ignore this lint warning: // NOLINTNEXTLINE google-default-arguments TEST_VIRTUAL std::pair fetchBlock( diff --git a/arangod/Aql/SingleRowFetcher.cpp b/arangod/Aql/SingleRowFetcher.cpp index 6ec650eeb5b7..d8fdf2ed214f 100644 --- a/arangod/Aql/SingleRowFetcher.cpp +++ b/arangod/Aql/SingleRowFetcher.cpp @@ -73,6 +73,22 @@ SingleRowFetcher::fetchBlockForPassthrough(size_t atMost) { return _dependencyProxy->fetchBlockForPassthrough(atMost); } +template +std::tuple +SingleRowFetcher::execute(AqlCallStack& stack) { + auto const [state, skipped, block] = _dependencyProxy->execute(stack); + if (state == ExecutionState::WAITING) { + // On waiting we have nothing to return + return {state, 0, AqlItemBlockInputRange{}}; + } + if (state == ExecutionState::HASMORE) { + return {state, skipped, + AqlItemBlockInputRange{ExecutorState::HASMORE, block, 0, block->size()}}; + } + return {state, skipped, + AqlItemBlockInputRange{ExecutorState::DONE, block, 0, block->size()}}; +} + template std::pair SingleRowFetcher::skipRows(size_t atMost) { TRI_ASSERT(!_currentRow.isInitialized() || _currentRow.isLastRowInBlock()); diff --git a/arangod/Aql/SingleRowFetcher.h b/arangod/Aql/SingleRowFetcher.h index 47b8e22d4f13..7d2c971878b9 100644 --- a/arangod/Aql/SingleRowFetcher.h +++ b/arangod/Aql/SingleRowFetcher.h @@ -64,10 +64,7 @@ class SingleRowFetcher { public: // TODO implement and document - std::tuple execute(AqlCallStack& stack) { - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); - } + std::tuple execute(AqlCallStack& stack); /** * @brief Fetch one new AqlItemRow from upstream. From 236825280bb5fd03dfe01febfc4dcf1a2d2e2c2f Mon Sep 17 00:00:00 2001 From: hkernbach Date: Mon, 28 Oct 2019 18:24:13 +0100 Subject: [PATCH 012/122] added first implementation of count collect datarange produceRows function + test --- arangod/Aql/CountCollectExecutor.cpp | 29 +++++++++++++++++++ arangod/Aql/CountCollectExecutor.h | 12 ++++++++ tests/Aql/CountCollectExecutorTest.cpp | 39 +++++++++++++++++++++++--- 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/arangod/Aql/CountCollectExecutor.cpp b/arangod/Aql/CountCollectExecutor.cpp index 228599464298..0005485f8caf 100644 --- a/arangod/Aql/CountCollectExecutor.cpp +++ b/arangod/Aql/CountCollectExecutor.cpp @@ -25,6 +25,8 @@ #include "CountCollectExecutor.h" +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/AqlValue.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" @@ -93,6 +95,33 @@ std::pair CountCollectExecutor::produceRows(OutputAqlIt return {_state, NoStats{}}; } +std::tuple CountCollectExecutor::produceRows( + size_t limit, AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { + TRI_IF_FAILURE("CountCollectExecutor::produceRows") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + InputAqlItemRow input{CreateInvalidInputRowHint{}}; + + while (inputRange.hasMore() && limit > 0) { + std::tie(_executorState, input) = inputRange.next(); + + limit--; + _count++; + } + + // In general, we do not have an input row. In fact, we never fetch one. + output.setAllowSourceRowUninitialized(); + + // We must produce exactly one output row. + output.cloneValueInto(_infos.getOutputRegisterId(), + InputAqlItemRow{CreateInvalidInputRowHint{}}, + AqlValue(AqlValueHintUInt(getCount()))); + + AqlCall upstreamCall{}; + upstreamCall.softLimit = limit; + return {_executorState, NoStats{}, upstreamCall}; +} + void CountCollectExecutor::incrCountBy(size_t incr) noexcept { _count += incr; } uint64_t CountCollectExecutor::getCount() noexcept { return _count; } diff --git a/arangod/Aql/CountCollectExecutor.h b/arangod/Aql/CountCollectExecutor.h index 76243a3e8104..1288efc659f7 100644 --- a/arangod/Aql/CountCollectExecutor.h +++ b/arangod/Aql/CountCollectExecutor.h @@ -36,6 +36,8 @@ namespace arangodb { namespace aql { +struct AqlCall; +class AqlItemBlockInputRange; class InputAqlItemRow; class NoStats; class ExecutorInfos; @@ -91,6 +93,15 @@ class CountCollectExecutor { std::pair produceRows(OutputAqlItemRow& output); + /** + * @brief produce the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + std::tuple produceRows(size_t atMost, + AqlItemBlockInputRange& input, + OutputAqlItemRow& output); + void incrCountBy(size_t incr) noexcept; uint64_t getCount() noexcept;; @@ -104,6 +115,7 @@ class CountCollectExecutor { Infos const& _infos; Fetcher& _fetcher; ExecutionState _state; + ExecutorState _executorState; uint64_t _count; }; diff --git a/tests/Aql/CountCollectExecutorTest.cpp b/tests/Aql/CountCollectExecutorTest.cpp index dea9fc1f2982..45c9e276379a 100644 --- a/tests/Aql/CountCollectExecutorTest.cpp +++ b/tests/Aql/CountCollectExecutorTest.cpp @@ -20,9 +20,11 @@ /// @author Heiko Kernbach //////////////////////////////////////////////////////////////////////////////// +#include "AqlItemBlockHelper.h" #include "RowFetcherHelper.h" #include "gtest/gtest.h" +#include "Aql/AqlCall.h" #include "Aql/AqlItemBlock.h" #include "Aql/CountCollectExecutor.h" #include "Aql/InputAqlItemRow.h" @@ -60,7 +62,8 @@ class CountCollectExecutorTest : public ::testing::Test { TEST_F(CountCollectExecutorTest, there_are_no_rows_upstream_the_producer_doesnt_wait) { CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), false); CountCollectExecutor testee(fetcher, infos); NoStats stats{}; @@ -81,7 +84,8 @@ TEST_F(CountCollectExecutorTest, there_are_no_rows_upstream_the_producer_doesnt_ TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits) { CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), true); CountCollectExecutor testee(fetcher, infos); NoStats stats{}; @@ -106,7 +110,8 @@ TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits) TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doesnt_wait) { CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); CountCollectExecutor testee(fetcher, infos); NoStats stats{}; @@ -127,7 +132,8 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doe TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) { CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), true); CountCollectExecutor testee(fetcher, infos); NoStats stats{}; OutputAqlItemRow result{std::move(block), outputRegisters, @@ -157,6 +163,31 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_wai ASSERT_EQ(3, fetcher.totalSkipped()); } +TEST_F(CountCollectExecutorTest, test_produce_datarange) { + CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + CountCollectExecutor testee(fetcher, infos); + + SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{}}); + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + + OutputAqlItemRow output(std::move(block), outputRegisters, + infos.registersToKeep(), infos.registersToClear()); + EXPECT_EQ(output.numRowsWritten(), 0); + auto const [state, stats, call] = testee.produceRows(1000, input, output); + ASSERT_EQ(state, ExecutorState::DONE); + ASSERT_TRUE(output.produced()); + + auto block = output.stealBlock(); + AqlValue x = block->getValue(0, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.toInt64(), 0); + + ASSERT_EQ(0, fetcher.totalSkipped()); +} + } // namespace aql } // namespace tests } // namespace arangodb From acb414f68c38bccb2c89a8433e47cc86323d92d0 Mon Sep 17 00:00:00 2001 From: hkernbach Date: Mon, 28 Oct 2019 18:25:48 +0100 Subject: [PATCH 013/122] Revert "added first implementation of count collect datarange produceRows function + test" This reverts commit 236825280bb5fd03dfe01febfc4dcf1a2d2e2c2f. --- arangod/Aql/CountCollectExecutor.cpp | 29 ------------------- arangod/Aql/CountCollectExecutor.h | 12 -------- tests/Aql/CountCollectExecutorTest.cpp | 39 +++----------------------- 3 files changed, 4 insertions(+), 76 deletions(-) diff --git a/arangod/Aql/CountCollectExecutor.cpp b/arangod/Aql/CountCollectExecutor.cpp index 0005485f8caf..228599464298 100644 --- a/arangod/Aql/CountCollectExecutor.cpp +++ b/arangod/Aql/CountCollectExecutor.cpp @@ -25,8 +25,6 @@ #include "CountCollectExecutor.h" -#include "Aql/AqlCall.h" -#include "Aql/AqlItemBlockInputRange.h" #include "Aql/AqlValue.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" @@ -95,33 +93,6 @@ std::pair CountCollectExecutor::produceRows(OutputAqlIt return {_state, NoStats{}}; } -std::tuple CountCollectExecutor::produceRows( - size_t limit, AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { - TRI_IF_FAILURE("CountCollectExecutor::produceRows") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - InputAqlItemRow input{CreateInvalidInputRowHint{}}; - - while (inputRange.hasMore() && limit > 0) { - std::tie(_executorState, input) = inputRange.next(); - - limit--; - _count++; - } - - // In general, we do not have an input row. In fact, we never fetch one. - output.setAllowSourceRowUninitialized(); - - // We must produce exactly one output row. - output.cloneValueInto(_infos.getOutputRegisterId(), - InputAqlItemRow{CreateInvalidInputRowHint{}}, - AqlValue(AqlValueHintUInt(getCount()))); - - AqlCall upstreamCall{}; - upstreamCall.softLimit = limit; - return {_executorState, NoStats{}, upstreamCall}; -} - void CountCollectExecutor::incrCountBy(size_t incr) noexcept { _count += incr; } uint64_t CountCollectExecutor::getCount() noexcept { return _count; } diff --git a/arangod/Aql/CountCollectExecutor.h b/arangod/Aql/CountCollectExecutor.h index 1288efc659f7..76243a3e8104 100644 --- a/arangod/Aql/CountCollectExecutor.h +++ b/arangod/Aql/CountCollectExecutor.h @@ -36,8 +36,6 @@ namespace arangodb { namespace aql { -struct AqlCall; -class AqlItemBlockInputRange; class InputAqlItemRow; class NoStats; class ExecutorInfos; @@ -93,15 +91,6 @@ class CountCollectExecutor { std::pair produceRows(OutputAqlItemRow& output); - /** - * @brief produce the next Row of Aql Values. - * - * @return ExecutorState, the stats, and a new Call that needs to be send to upstream - */ - std::tuple produceRows(size_t atMost, - AqlItemBlockInputRange& input, - OutputAqlItemRow& output); - void incrCountBy(size_t incr) noexcept; uint64_t getCount() noexcept;; @@ -115,7 +104,6 @@ class CountCollectExecutor { Infos const& _infos; Fetcher& _fetcher; ExecutionState _state; - ExecutorState _executorState; uint64_t _count; }; diff --git a/tests/Aql/CountCollectExecutorTest.cpp b/tests/Aql/CountCollectExecutorTest.cpp index 45c9e276379a..dea9fc1f2982 100644 --- a/tests/Aql/CountCollectExecutorTest.cpp +++ b/tests/Aql/CountCollectExecutorTest.cpp @@ -20,11 +20,9 @@ /// @author Heiko Kernbach //////////////////////////////////////////////////////////////////////////////// -#include "AqlItemBlockHelper.h" #include "RowFetcherHelper.h" #include "gtest/gtest.h" -#include "Aql/AqlCall.h" #include "Aql/AqlItemBlock.h" #include "Aql/CountCollectExecutor.h" #include "Aql/InputAqlItemRow.h" @@ -62,8 +60,7 @@ class CountCollectExecutorTest : public ::testing::Test { TEST_F(CountCollectExecutorTest, there_are_no_rows_upstream_the_producer_doesnt_wait) { CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input.steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); CountCollectExecutor testee(fetcher, infos); NoStats stats{}; @@ -84,8 +81,7 @@ TEST_F(CountCollectExecutorTest, there_are_no_rows_upstream_the_producer_doesnt_ TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits) { CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input.steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); CountCollectExecutor testee(fetcher, infos); NoStats stats{}; @@ -110,8 +106,7 @@ TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits) TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doesnt_wait) { CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); CountCollectExecutor testee(fetcher, infos); NoStats stats{}; @@ -132,8 +127,7 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doe TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) { CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); CountCollectExecutor testee(fetcher, infos); NoStats stats{}; OutputAqlItemRow result{std::move(block), outputRegisters, @@ -163,31 +157,6 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_wai ASSERT_EQ(3, fetcher.totalSkipped()); } -TEST_F(CountCollectExecutorTest, test_produce_datarange) { - CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - CountCollectExecutor testee(fetcher, infos); - - SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{}}); - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; - - OutputAqlItemRow output(std::move(block), outputRegisters, - infos.registersToKeep(), infos.registersToClear()); - EXPECT_EQ(output.numRowsWritten(), 0); - auto const [state, stats, call] = testee.produceRows(1000, input, output); - ASSERT_EQ(state, ExecutorState::DONE); - ASSERT_TRUE(output.produced()); - - auto block = output.stealBlock(); - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.toInt64(), 0); - - ASSERT_EQ(0, fetcher.totalSkipped()); -} - } // namespace aql } // namespace tests } // namespace arangodb From e0205b5e813e579942f11a2f33d544862f1d0542 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 30 Oct 2019 12:46:10 +0100 Subject: [PATCH 014/122] First draft of execute implementation, not yet handling ShadowRows though. This state is broken in the sense that a server does not boot with it and catch tests are broken --- arangod/Aql/AllRowsFetcher.cpp | 2 +- arangod/Aql/AllRowsFetcher.h | 6 +- arangod/Aql/AqlCall.h | 35 +++++ arangod/Aql/AqlCallStack.cpp | 12 +- arangod/Aql/AqlCallStack.h | 7 +- arangod/Aql/AqlItemBlockInputRange.cpp | 5 +- arangod/Aql/AqlItemBlockInputRange.h | 2 +- arangod/Aql/ExecutionBlockImpl.cpp | 203 ++++++++++++++++++++----- arangod/Aql/ExecutionBlockImpl.h | 7 +- arangod/Aql/FilterExecutor.cpp | 6 +- arangod/Aql/SingleRowFetcher.cpp | 6 +- 11 files changed, 235 insertions(+), 56 deletions(-) diff --git a/arangod/Aql/AllRowsFetcher.cpp b/arangod/Aql/AllRowsFetcher.cpp index 1a48f56cc5d2..d9985d987b01 100644 --- a/arangod/Aql/AllRowsFetcher.cpp +++ b/arangod/Aql/AllRowsFetcher.cpp @@ -83,7 +83,7 @@ std::pair AllRowsFetcher::fetchRow(size_t atMos _nextReturn = 0; _dataFetchedState = DATA_FETCH_ONGOING; } - [[fallthrough]]; + [[fallthrough]]; case DATA_FETCH_ONGOING: { TRI_ASSERT(_nextReturn < _rowIndexes.size()); TRI_ASSERT(_aqlItemMatrix != nullptr); diff --git a/arangod/Aql/AllRowsFetcher.h b/arangod/Aql/AllRowsFetcher.h index 9ac5f2d3300e..e271ac3b1c6d 100644 --- a/arangod/Aql/AllRowsFetcher.h +++ b/arangod/Aql/AllRowsFetcher.h @@ -33,6 +33,9 @@ #include #include +// TODO REMOVE ME TEMPORARY +#include "Aql/AqlItemBlockInputRange.h" + namespace arangodb { namespace aql { @@ -92,7 +95,8 @@ class AllRowsFetcher { TEST_VIRTUAL ~AllRowsFetcher() = default; - using DataRange = std::shared_ptr; + // TODO FIXME, this Range does not work here. + using DataRange = AqlItemBlockInputRange; protected: // only for testing! Does not initialize _dependencyProxy! diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 1e3a4c840463..8f6592e3ee07 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -34,6 +34,36 @@ struct AqlCall { class Infinity {}; using Limit = std::variant; + // TODO Remove me, this will not be necessary later + static AqlCall SimulateSkipSome(std::size_t toSkip) { + AqlCall call; + call.offset = toSkip; + call.softLimit = 0; + call.hardLimit = AqlCall::Infinity{}; + call.fullCount = false; + return call; + } + + // TODO Remove me, this will not be necessary later + static AqlCall SimulateGetSome(std::size_t atMost) { + AqlCall call; + call.offset = 0; + call.softLimit = atMost; + call.hardLimit = AqlCall::Infinity{}; + call.fullCount = false; + return call; + } + + // TODO Remove me, this will not be necessary later + static bool IsSkipSomeCall(AqlCall const& call) { + return !call.hasHardLimit() && call.getLimit() == 0 && call.getOffset() > 0; + } + + // TODO Remove me, this will not be necessary later + static bool IsGetSomeCall(AqlCall const& call) { + return !call.hasHardLimit() && call.getLimit() > 0 && call.getOffset() == 0; + } + std::size_t offset{0}; // TODO: The defaultBatchSize function could move into this file instead Limit softLimit{Infinity{}}; @@ -56,6 +86,11 @@ struct AqlCall { return limit; } + void didSkip(std::size_t n) { + TRI_ASSERT(n <= offset); + offset -= n; + } + void didProduce(std::size_t n) { if (std::holds_alternative(softLimit)) { TRI_ASSERT(n <= std::get(softLimit)); diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp index 28bd80c93029..7ce044200658 100644 --- a/arangod/Aql/AqlCallStack.cpp +++ b/arangod/Aql/AqlCallStack.cpp @@ -45,10 +45,18 @@ AqlCallStack::AqlCallStack(AqlCallStack const& other) bool AqlCallStack::isRelevant() const { return _depth == 0; } -AqlCall& AqlCallStack::myCall() { +AqlCall&& AqlCallStack::popCall() { TRI_ASSERT(isRelevant()); TRI_ASSERT(!_operations.empty()); - return _operations.top(); + auto call = _operations.top(); + _operations.pop(); + return std::move(call); +} + +void AqlCallStack::pushCall(AqlCall&& call) { + // TODO is this correct on subqueries? + TRI_ASSERT(isRelevant()); + _operations.push(call); } void AqlCallStack::stackUpMissingCalls() { diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index 82bbddcbed5c..f34e0f3d0d71 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -43,8 +43,11 @@ class AqlCallStack { bool isRelevant() const; // Get the top most Call element (this must be relevant). - // Caller is allowed to modify it, if necessary - AqlCall& myCall(); + // This is popped of the stack and caller can take responsibility for it + AqlCall&& popCall(); + + // Put another call on top of the stack. + void pushCall(AqlCall&& call); // fill up all missing calls within this stack s.t. we reach depth == 0 // This needs to be called if an executor requires to be fully executed, even if skipped, diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 7f9b04efc841..f1b656c3cc71 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -25,10 +25,9 @@ using namespace arangodb; using namespace arangodb::aql; -AqlItemBlockInputRange::AqlItemBlockInputRange() - : _block(nullptr), _rowIndex(0), _endIndex(0), _finalState(ExecutorState::HASMORE) { +AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state) + : _block(nullptr), _rowIndex(0), _endIndex(0), _finalState(state) { TRI_ASSERT(!hasMore()); - TRI_ASSERT(state() == ExecutorState::HASMORE); } AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index a1195cc46108..01eef1512402 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -31,7 +31,7 @@ namespace arangodb::aql { class AqlItemBlockInputRange { public: - AqlItemBlockInputRange(); + explicit AqlItemBlockInputRange(ExecutorState state); AqlItemBlockInputRange(ExecutorState, arangodb::aql::SharedAqlItemBlockPtr const&, std::size_t, std::size_t endIndex); diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index dbc9c2b37e73..b776c6f44351 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -111,7 +111,7 @@ ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, _executor(_rowFetcher, _infos), _outputItemRow(), _query(*engine->getQuery()), - _lastRange{} { + _lastRange{ExecutorState::HASMORE} { // already insert ourselves into the statistics results if (_profile >= PROFILE_LEVEL_BLOCKS) { _engine->_stats.nodes.emplace(node->id(), ExecutionStats::Node()); @@ -473,8 +473,27 @@ std::pair ExecutionBlockImpl::shutdown(int err template std::tuple ExecutionBlockImpl::execute(AqlCallStack stack) { - // TODO implement! - TRI_ASSERT(false); + // TODO remove this IF + if (std::is_same::value) { + // Only this executor is fully implemented + return executeWithoutTrace(stack); + } + + // Fall back to getSome/skipSome + auto myCall = stack.popCall(); + TRI_ASSERT(AqlCall::IsSkipSomeCall(myCall) || AqlCall::IsGetSomeCall(myCall)); + if (AqlCall::IsSkipSomeCall(myCall)) { + auto const [state, skipped] = skipSome(myCall.getOffset()); + if (state != ExecutionState::WAITING) { + myCall.didSkip(skipped); + } + return {state, skipped, nullptr}; + } else if (AqlCall::IsGetSomeCall(myCall)) { + auto const [state, block] = getSome(myCall.getLimit()); + // We do not need to count as softLimit will be overwritten, and hard cannot be set. + return {state, 0, block}; + } + // Should never get here! THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } @@ -601,43 +620,6 @@ std::pair ExecutionBlockImpl< return {ExecutionState::DONE, {errorCode}}; } -// TODO this is only temporary, remove me -// Just to make sure everything compiles! -template <> -std::tuple -ExecutionBlockImpl::execute(AqlCallStack stack) { - // TODO: pop this from the stack instead of modify. - // TODO: Need to make this member variable for waiting? - AqlCall& myCall = stack.myCall(); - // Skipping path - while (myCall.offset > 0) { - // Execute skipSome - auto const [state, skipped, call] = _executor.skipRowsRange(myCall.offset, _lastRange); - if (state == ExecutorState::DONE) { - // We are done with this subquery - // TODO Implement me properly, we would need to fill shadowRows into the block - return {ExecutionState::DONE, skipped, nullptr}; - } - TRI_ASSERT(skipped <= myCall.offset); - myCall.offset -= skipped; - if (myCall.offset > 0) { - // Need to fetch more - // TODO: we need to push the returned call into the stack, pop our call of. - size_t skipped = 0; - TRI_ASSERT(!_lastRange.hasMore()); - std::tie(_upstreamState, skipped, _lastRange) = _rowFetcher.execute(stack); - TRI_ASSERT(skipped <= myCall.offset); - myCall.offset -= skipped; - } - } - - // TODO add GetSome path - - // TODO implement! - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); -} - } // namespace aql } // namespace arangodb @@ -810,6 +792,147 @@ SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, return _engine->itemBlockManager().requestBlock(nrItems, nrRegs); } +// TODO: Remove this special implementations +template <> +std::pair ExecutionBlockImpl::getSome(size_t atMost) { + AqlCallStack stack{AqlCall::SimulateGetSome(atMost)}; + auto const [state, skipped, block] = execute(stack); + return {state, block}; +} + +template <> +std::pair ExecutionBlockImpl::skipSome(size_t const toSkip) { + AqlCallStack stack{AqlCall::SimulateSkipSome(toSkip)}; + auto const [state, skipped, block] = execute(stack); + return {state, skipped}; +} + +template +std::tuple +ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { + // TODO implement! + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +// TODO move me up +enum ExecState { SKIP, PRODUCE, FULLCOUNT, UPSTREAM, SHADOWROWS, DONE }; + +namespace { +// This cannot return upstream call or shadowrows. +ExecState NextState(AqlCall const& call) { + if (call.getOffset() > 0) { + // First skip + return ExecState::SKIP; + } + if (call.getLimit() > 0) { + // Then produce + return ExecState::PRODUCE; + } + if (call.needsFullCount()) { + // then fullcount + return ExecState::FULLCOUNT; + } + // now we are done. + return ExecState::DONE; +} +} // namespace + +template <> +std::tuple +ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { + if (!_outputItemRow) { + // TODO: FIXME Hard coded size + SharedAqlItemBlockPtr newBlock = + _engine->itemBlockManager().requestBlock(1000, _infos.numberOfOutputRegisters()); + TRI_ASSERT(newBlock != nullptr); + TRI_ASSERT(newBlock->size() == 1000); + _outputItemRow = createOutputRow(newBlock); + } + size_t skipped = 0; + + // TODO: Need to make this member variable for waiting? + AqlCall myCall = stack.popCall(); + ExecState execState = ::NextState(myCall); + AqlCall executorRequest; + + while (execState != ExecState::DONE) { + switch (execState) { + case ExecState::SKIP: { + auto [state, skippedLocal, call] = + _executor.skipRowsRange(myCall.getOffset(), _lastRange); + myCall.didSkip(skippedLocal); + skipped += skippedLocal; + + if (state == ExecutorState::DONE) { + execState = ExecState::SHADOWROWS; + } else if (myCall.getOffset() > 0) { + // We need to request more + executorRequest = call; + execState = ExecState::UPSTREAM; + } else { + // We are done with skipping. Skip is not allowed to request more + execState = ::NextState(myCall); + } + break; + } + case ExecState::PRODUCE: { + auto linesBefore = _outputItemRow->numRowsWritten(); + TRI_ASSERT(myCall.getLimit() > 0); + // Execute getSome + auto const [state, stats, call] = + _executor.produceRows(myCall.getLimit(), _lastRange, *_outputItemRow); + auto written = _outputItemRow->numRowsWritten() - linesBefore; + myCall.didProduce(written); + if (state == ExecutorState::DONE) { + execState = ExecState::SHADOWROWS; + } else if (myCall.getLimit() > 0) { + // We need to request more + executorRequest = call; + execState = ExecState::UPSTREAM; + } else { + // We are done with skipping. Skip is not allowed to request more + execState = ::NextState(myCall); + } + break; + } + case ExecState::FULLCOUNT: { + TRI_ASSERT(false); + } + case ExecState::UPSTREAM: { + // If this triggers the executors produceRows function has returned + // HASMORE even if it new that upstream has no further rows. + TRI_ASSERT(_upstreamState != ExecutionState::DONE); + TRI_ASSERT(!_lastRange.hasMore()); + size_t skippedLocal = 0; + stack.pushCall(std::move(executorRequest)); + std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); + // Do we need to call it? + // myCall.didSkip(skippedLocal); + skipped += skippedLocal; + execState = ::NextState(myCall); + break; + } + case ExecState::SHADOWROWS: { + // Not implemented yet + // TRI_ASSERT(false); + // execState = ::NextState(myCall); + execState = ExecState::DONE; + break; + } + default: + // unreachable + TRI_ASSERT(false); + } + } + + auto outputBlock = _outputItemRow->stealBlock(); + // This is not strictly necessary here, as we shouldn't be called again + // after DONE. + _outputItemRow.reset(); + return {_upstreamState, skipped, std::move(outputBlock)}; +} + template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 2bfa5cab9e90..bbc6579b7e34 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -189,13 +189,18 @@ class ExecutionBlockImpl final : public ExecutionBlock { std::tuple execute(AqlCallStack stack) override; private: + /** + * @brief Inner execute() part, without the tracing calls. + */ + std::tuple executeWithoutTrace(AqlCallStack stack); + /** * @brief Inner getSome() part, without the tracing calls. */ std::pair getSomeWithoutTrace(size_t atMost); /** - * @brief Inner getSome() part, without the tracing calls. + * @brief Inner skipSome() part, without the tracing calls. */ std::pair skipSomeOnceWithoutTrace(size_t atMost); diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 445e23da3fa4..cbc856397906 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -132,12 +132,10 @@ std::tuple FilterExecutor::produceRows( THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } FilterStats stats{}; - ExecutorState state = ExecutorState::HASMORE; - InputAqlItemRow input{CreateInvalidInputRowHint{}}; while (inputRange.hasMore() && limit > 0) { TRI_ASSERT(!output.isFull()); - std::tie(state, input) = inputRange.next(); + auto const& [state, input] = inputRange.next(); TRI_ASSERT(input.isInitialized()); if (input.getValue(_infos.getInputRegister()).toBoolean()) { output.copyRow(input); @@ -150,5 +148,5 @@ std::tuple FilterExecutor::produceRows( AqlCall upstreamCall{}; upstreamCall.softLimit = limit; - return {state, stats, upstreamCall}; + return {inputRange.peek().first, stats, upstreamCall}; } diff --git a/arangod/Aql/SingleRowFetcher.cpp b/arangod/Aql/SingleRowFetcher.cpp index d8fdf2ed214f..34219bde206e 100644 --- a/arangod/Aql/SingleRowFetcher.cpp +++ b/arangod/Aql/SingleRowFetcher.cpp @@ -79,12 +79,16 @@ SingleRowFetcher::execute(AqlCallStack& stack) { auto const [state, skipped, block] = _dependencyProxy->execute(stack); if (state == ExecutionState::WAITING) { // On waiting we have nothing to return - return {state, 0, AqlItemBlockInputRange{}}; + return {state, 0, AqlItemBlockInputRange{ExecutorState::HASMORE}}; } if (state == ExecutionState::HASMORE) { + TRI_ASSERT(block != nullptr); return {state, skipped, AqlItemBlockInputRange{ExecutorState::HASMORE, block, 0, block->size()}}; } + if (block == nullptr) { + return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE}}; + } return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE, block, 0, block->size()}}; } From 96c0fda0cb1192a2b0f979a48c2cd1e31d519c80 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 31 Oct 2019 15:41:18 +0100 Subject: [PATCH 015/122] First working draft of execute() call in ExecutionBlockImpl. Tests are locally green, however there is at least one query still red (on ShadowRows) --- arangod/Aql/AqlCallStack.cpp | 6 ++ arangod/Aql/AqlCallStack.h | 4 ++ arangod/Aql/ExecutionBlockImpl.cpp | 108 ++++++++++++++++++++++++++++- arangod/Aql/ExecutionBlockImpl.h | 10 ++- 4 files changed, 126 insertions(+), 2 deletions(-) diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp index 7ce044200658..0c5f44b2647d 100644 --- a/arangod/Aql/AqlCallStack.cpp +++ b/arangod/Aql/AqlCallStack.cpp @@ -53,6 +53,12 @@ AqlCall&& AqlCallStack::popCall() { return std::move(call); } +AqlCall const& AqlCallStack::peek() const { + TRI_ASSERT(isRelevant()); + TRI_ASSERT(!_operations.empty()); + return _operations.top(); +} + void AqlCallStack::pushCall(AqlCall&& call) { // TODO is this correct on subqueries? TRI_ASSERT(isRelevant()); diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index f34e0f3d0d71..03a9a91d34c5 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -46,6 +46,10 @@ class AqlCallStack { // This is popped of the stack and caller can take responsibility for it AqlCall&& popCall(); + // Peek at the top most Call element (this must be relevant). + // The responsibility will stay at the stack + AqlCall const& peek() const; + // Put another call on top of the stack. void pushCall(AqlCall&& call); diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index b776c6f44351..15bebd23b963 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -65,12 +65,40 @@ #include "Aql/SubqueryExecutor.h" #include "Aql/SubqueryStartExecutor.h" #include "Aql/TraversalExecutor.h" +#include "Basics/system-functions.h" +#include "Transaction/Context.h" + +#include +#include #include using namespace arangodb; using namespace arangodb::aql; +namespace { + +std::string const doneString = "DONE"; +std::string const hasMoreString = "HASMORE"; +std::string const waitingString = "WAITING"; +std::string const unknownString = "UNKNOWN"; + +std::string const& stateToString(aql::ExecutionState state) { + switch (state) { + case aql::ExecutionState::DONE: + return doneString; + case aql::ExecutionState::HASMORE: + return hasMoreString; + case aql::ExecutionState::WAITING: + return waitingString; + default: + // just to suppress a warning .. + return unknownString; + } +} + +} // namespace + /* * Creates a metafunction `checkName` that tests whether a class has a method * named `methodName`, used like this: @@ -433,6 +461,7 @@ template std::pair ExecutionBlockImpl::initializeCursor(InputAqlItemRow const& input) { // reinitialize the DependencyProxy _dependencyProxy.reset(); + _lastRange = DataRange(ExecutorState::HASMORE); // destroy and re-create the Fetcher _rowFetcher.~Fetcher(); @@ -476,7 +505,10 @@ std::tuple ExecutionBlockImpl::value) { // Only this executor is fully implemented - return executeWithoutTrace(stack); + traceExecuteBegin(stack); + auto res = executeWithoutTrace(stack); + traceExecuteEnd(res); + return res; } // Fall back to getSome/skipSome @@ -497,6 +529,79 @@ std::tuple ExecutionBlockImpl +void ExecutionBlockImpl::traceExecuteBegin(AqlCallStack const& stack) { + if (_profile >= PROFILE_LEVEL_BLOCKS) { + if (_getSomeBegin <= 0.0) { + _getSomeBegin = TRI_microtime(); + } + if (_profile >= PROFILE_LEVEL_TRACE_1) { + auto const node = getPlanNode(); + auto const queryId = this->_engine->getQuery()->id(); + // TODO make sure this works also if stack is non relevant, e.g. passed through by outer subquery. + auto const& call = stack.peek(); + LOG_TOPIC("1e717", INFO, Logger::QUERIES) + << "[query#" << queryId << "] " + << "execute type=" << node->getTypeString() + << " offset=" << call.getOffset() << " limit= " << call.getLimit() + << " this=" << (uintptr_t)this << " id=" << node->id(); + } + } +} + +template +void ExecutionBlockImpl::traceExecuteEnd( + std::tuple const& result) { + if (_profile >= PROFILE_LEVEL_BLOCKS) { + auto const& [state, skipped, block] = result; + auto const items = block != nullptr ? block->size() : 0; + ExecutionNode const* en = getPlanNode(); + ExecutionStats::Node stats; + stats.calls = 1; + stats.items = skipped + items; + if (state != ExecutionState::WAITING) { + stats.runtime = TRI_microtime() - _getSomeBegin; + _getSomeBegin = 0.0; + } + + auto it = _engine->_stats.nodes.find(en->id()); + if (it != _engine->_stats.nodes.end()) { + it->second += stats; + } else { + _engine->_stats.nodes.emplace(en->id(), stats); + } + + if (_profile >= PROFILE_LEVEL_TRACE_1) { + ExecutionNode const* node = getPlanNode(); + auto const queryId = this->_engine->getQuery()->id(); + LOG_TOPIC("60bbc", INFO, Logger::QUERIES) + << "[query#" << queryId << "] " + << "execute done type=" << node->getTypeString() << " this=" << (uintptr_t)this + << " id=" << node->id() << " state=" << stateToString(state) + << " skipped=" << skipped << " produced=" << items; + + if (_profile >= PROFILE_LEVEL_TRACE_2) { + if (block == nullptr) { + LOG_TOPIC("9b3f4", INFO, Logger::QUERIES) + << "[query#" << queryId << "] " + << "execute type=" << node->getTypeString() << " result: nullptr"; + } else { + VPackBuilder builder; + { + VPackObjectBuilder guard(&builder); + block->toVelocyPack(transaction(), builder); + } + auto options = transaction()->transactionContextPtr()->getVPackOptions(); + LOG_TOPIC("f12f9", INFO, Logger::QUERIES) + << "[query#" << queryId << "] " + << "execute type=" << node->getTypeString() + << " result: " << VPackDumper::toString(builder.slice(), options); + } + } + } + } +} + // Work around GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480 // Without the namespaces it fails with // error: specialization of 'template std::pair arangodb::aql::ExecutionBlockImpl::initializeCursor(arangodb::aql::AqlItemBlock*, size_t)' in different namespace @@ -883,6 +988,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { auto const [state, stats, call] = _executor.produceRows(myCall.getLimit(), _lastRange, *_outputItemRow); auto written = _outputItemRow->numRowsWritten() - linesBefore; + _engine->_stats += stats; myCall.didProduce(written); if (state == ExecutorState::DONE) { execState = ExecState::SHADOWROWS; diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index bbc6579b7e34..920d825e6dd0 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -94,6 +94,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { using Fetcher = typename Executor::Fetcher; using ExecutorStats = typename Executor::Stats; using Infos = typename Executor::Infos; + using DataRange = typename Executor::Fetcher::DataRange; + using DependencyProxy = typename aql::DependencyProxy; @@ -229,6 +231,12 @@ class ExecutionBlockImpl final : public ExecutionBlock { /// @brief request an AqlItemBlock from the memory manager SharedAqlItemBlockPtr requestBlock(size_t nrItems, RegisterCount nrRegs); + // Trace the start of a getSome call + void traceExecuteBegin(AqlCallStack const& stack); + + // Trace the end of a getSome call, potentially with result + void traceExecuteEnd(std::tuple const& result); + private: /** * @brief Used to allow the row Fetcher to access selected methods of this @@ -257,7 +265,7 @@ class ExecutionBlockImpl final : public ExecutionBlock { size_t _skipped{}; - typename Fetcher::DataRange _lastRange; + DataRange _lastRange; }; } // namespace aql From 4d019a1d51f2f0932b893bf20de3863a04bda64d Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 31 Oct 2019 17:01:19 +0100 Subject: [PATCH 016/122] Removed non finished implementation from this Branch. It moved to seperate branch --- arangod/Aql/ExecutionBlockImpl.cpp | 159 ----------------------------- arangod/Aql/SingleRowFetcher.cpp | 17 +-- 2 files changed, 2 insertions(+), 174 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 15bebd23b963..c1f90547643b 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -151,14 +151,6 @@ ExecutionBlockImpl::~ExecutionBlockImpl() = default; template std::pair ExecutionBlockImpl::getSome(size_t atMost) { - /* - getSome(x) = > { - offset: 0, - batchSize : x, - limit : AqlCall::Infinity{}, - fullCount : | false - } - */ traceGetSomeBegin(atMost); auto result = getSomeWithoutTrace(atMost); return traceGetSomeEnd(result.first, std::move(result.second)); @@ -377,14 +369,6 @@ static SkipVariants constexpr skipType() { template std::pair ExecutionBlockImpl::skipSome(size_t const atMost) { - /* - skipSome(x) = > AqlCall{ - offset : x, - batchSize : 0, - limit : AqlCall::Infinity{}, - fullCount : | false - } - */ traceSkipSomeBegin(atMost); auto state = ExecutionState::HASMORE; @@ -502,15 +486,6 @@ std::pair ExecutionBlockImpl::shutdown(int err template std::tuple ExecutionBlockImpl::execute(AqlCallStack stack) { - // TODO remove this IF - if (std::is_same::value) { - // Only this executor is fully implemented - traceExecuteBegin(stack); - auto res = executeWithoutTrace(stack); - traceExecuteEnd(res); - return res; - } - // Fall back to getSome/skipSome auto myCall = stack.popCall(); TRI_ASSERT(AqlCall::IsSkipSomeCall(myCall) || AqlCall::IsGetSomeCall(myCall)); @@ -897,21 +872,6 @@ SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, return _engine->itemBlockManager().requestBlock(nrItems, nrRegs); } -// TODO: Remove this special implementations -template <> -std::pair ExecutionBlockImpl::getSome(size_t atMost) { - AqlCallStack stack{AqlCall::SimulateGetSome(atMost)}; - auto const [state, skipped, block] = execute(stack); - return {state, block}; -} - -template <> -std::pair ExecutionBlockImpl::skipSome(size_t const toSkip) { - AqlCallStack stack{AqlCall::SimulateSkipSome(toSkip)}; - auto const [state, skipped, block] = execute(stack); - return {state, skipped}; -} - template std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { @@ -920,125 +880,6 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } -// TODO move me up -enum ExecState { SKIP, PRODUCE, FULLCOUNT, UPSTREAM, SHADOWROWS, DONE }; - -namespace { -// This cannot return upstream call or shadowrows. -ExecState NextState(AqlCall const& call) { - if (call.getOffset() > 0) { - // First skip - return ExecState::SKIP; - } - if (call.getLimit() > 0) { - // Then produce - return ExecState::PRODUCE; - } - if (call.needsFullCount()) { - // then fullcount - return ExecState::FULLCOUNT; - } - // now we are done. - return ExecState::DONE; -} -} // namespace - -template <> -std::tuple -ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { - if (!_outputItemRow) { - // TODO: FIXME Hard coded size - SharedAqlItemBlockPtr newBlock = - _engine->itemBlockManager().requestBlock(1000, _infos.numberOfOutputRegisters()); - TRI_ASSERT(newBlock != nullptr); - TRI_ASSERT(newBlock->size() == 1000); - _outputItemRow = createOutputRow(newBlock); - } - size_t skipped = 0; - - // TODO: Need to make this member variable for waiting? - AqlCall myCall = stack.popCall(); - ExecState execState = ::NextState(myCall); - AqlCall executorRequest; - - while (execState != ExecState::DONE) { - switch (execState) { - case ExecState::SKIP: { - auto [state, skippedLocal, call] = - _executor.skipRowsRange(myCall.getOffset(), _lastRange); - myCall.didSkip(skippedLocal); - skipped += skippedLocal; - - if (state == ExecutorState::DONE) { - execState = ExecState::SHADOWROWS; - } else if (myCall.getOffset() > 0) { - // We need to request more - executorRequest = call; - execState = ExecState::UPSTREAM; - } else { - // We are done with skipping. Skip is not allowed to request more - execState = ::NextState(myCall); - } - break; - } - case ExecState::PRODUCE: { - auto linesBefore = _outputItemRow->numRowsWritten(); - TRI_ASSERT(myCall.getLimit() > 0); - // Execute getSome - auto const [state, stats, call] = - _executor.produceRows(myCall.getLimit(), _lastRange, *_outputItemRow); - auto written = _outputItemRow->numRowsWritten() - linesBefore; - _engine->_stats += stats; - myCall.didProduce(written); - if (state == ExecutorState::DONE) { - execState = ExecState::SHADOWROWS; - } else if (myCall.getLimit() > 0) { - // We need to request more - executorRequest = call; - execState = ExecState::UPSTREAM; - } else { - // We are done with skipping. Skip is not allowed to request more - execState = ::NextState(myCall); - } - break; - } - case ExecState::FULLCOUNT: { - TRI_ASSERT(false); - } - case ExecState::UPSTREAM: { - // If this triggers the executors produceRows function has returned - // HASMORE even if it new that upstream has no further rows. - TRI_ASSERT(_upstreamState != ExecutionState::DONE); - TRI_ASSERT(!_lastRange.hasMore()); - size_t skippedLocal = 0; - stack.pushCall(std::move(executorRequest)); - std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); - // Do we need to call it? - // myCall.didSkip(skippedLocal); - skipped += skippedLocal; - execState = ::NextState(myCall); - break; - } - case ExecState::SHADOWROWS: { - // Not implemented yet - // TRI_ASSERT(false); - // execState = ::NextState(myCall); - execState = ExecState::DONE; - break; - } - default: - // unreachable - TRI_ASSERT(false); - } - } - - auto outputBlock = _outputItemRow->stealBlock(); - // This is not strictly necessary here, as we shouldn't be called again - // after DONE. - _outputItemRow.reset(); - return {_upstreamState, skipped, std::move(outputBlock)}; -} - template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; diff --git a/arangod/Aql/SingleRowFetcher.cpp b/arangod/Aql/SingleRowFetcher.cpp index 34219bde206e..8ac67aa4070a 100644 --- a/arangod/Aql/SingleRowFetcher.cpp +++ b/arangod/Aql/SingleRowFetcher.cpp @@ -76,21 +76,8 @@ SingleRowFetcher::fetchBlockForPassthrough(size_t atMost) { template std::tuple SingleRowFetcher::execute(AqlCallStack& stack) { - auto const [state, skipped, block] = _dependencyProxy->execute(stack); - if (state == ExecutionState::WAITING) { - // On waiting we have nothing to return - return {state, 0, AqlItemBlockInputRange{ExecutorState::HASMORE}}; - } - if (state == ExecutionState::HASMORE) { - TRI_ASSERT(block != nullptr); - return {state, skipped, - AqlItemBlockInputRange{ExecutorState::HASMORE, block, 0, block->size()}}; - } - if (block == nullptr) { - return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE}}; - } - return {state, skipped, - AqlItemBlockInputRange{ExecutorState::DONE, block, 0, block->size()}}; + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } template From aa275b48ffc2c8fc9a3c364e2241321ec9c9293f Mon Sep 17 00:00:00 2001 From: hkernbach Date: Fri, 8 Nov 2019 19:23:59 +0100 Subject: [PATCH 017/122] working - not done yet --- arangod/Aql/TraversalExecutor.cpp | 110 ++++++++++++++++++++++++++++ arangod/Aql/TraversalExecutor.h | 12 +++ tests/Aql/TraversalExecutorTest.cpp | 108 ++++++++++++++++++++++++--- 3 files changed, 220 insertions(+), 10 deletions(-) diff --git a/arangod/Aql/TraversalExecutor.cpp b/arangod/Aql/TraversalExecutor.cpp index 963c8bd4845f..6dd62bab66b6 100644 --- a/arangod/Aql/TraversalExecutor.cpp +++ b/arangod/Aql/TraversalExecutor.cpp @@ -21,7 +21,10 @@ //////////////////////////////////////////////////////////////////////////////// #include "TraversalExecutor.h" +#include +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionNode.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/PruneExpressionEvaluator.h" @@ -248,6 +251,57 @@ std::pair TraversalExecutor::produceRows(OutputA return {ExecutionState::DONE, s}; } +std::tuple TraversalExecutor::produceRows( + size_t limit, AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { + TraversalStats s; + + while (inputRange.hasMore() && limit > 0) { + auto const& [state, input] = inputRange.next(); + LOG_DEVEL << "ExecutorState: " << state << " - remove me after review"; + + if (!resetTraverser(input)) { + // Could not start here, (invalid) + // Go to next + continue; + } + + if (!_traverser.hasMore() || !_traverser.next()) { + // Nothing more to read, reset input to refetch + continue; + } else { + // traverser now has next v, e, p values + if (_infos.useVertexOutput()) { + AqlValue vertex = _traverser.lastVertexToAqlValue(); + AqlValueGuard guard{vertex, true}; + output.moveValueInto(_infos.vertexRegister(), input, guard); + } + if (_infos.useEdgeOutput()) { + AqlValue edge = _traverser.lastEdgeToAqlValue(); + AqlValueGuard guard{edge, true}; + output.moveValueInto(_infos.edgeRegister(), input, guard); + } + if (_infos.usePathOutput()) { + transaction::BuilderLeaser tmp(_traverser.trx()); + tmp->clear(); + AqlValue path = _traverser.pathToAqlValue(*tmp.builder()); + AqlValueGuard guard{path, true}; + output.moveValueInto(_infos.pathRegister(), input, guard); + } + output.advanceRow(); + limit--; + } + } + + // we are done + s.addFiltered(_traverser.getAndResetFilteredPaths()); + s.addScannedIndex(_traverser.getAndResetReadDocuments()); + s.addHttpRequests(_traverser.getAndResetHttpRequests()); + + AqlCall upstreamCall{}; + upstreamCall.softLimit = limit; + return {inputRange.peek().first, s, upstreamCall}; +} + ExecutionState TraversalExecutor::computeState() const { if (_rowState == ExecutionState::DONE && !_traverser.hasMore()) { return ExecutionState::DONE; @@ -310,3 +364,59 @@ bool TraversalExecutor::resetTraverser() { } } } + +bool TraversalExecutor::resetTraverser(InputAqlItemRow const& input) { + _traverser.traverserCache()->clear(); + + // Initialize the Expressions within the options. + // We need to find the variable and read its value here. Everything is + // computed right now. + auto opts = _traverser.options(); + opts->clearVariableValues(); + for (auto const& pair : _infos.filterConditionVariables()) { + opts->setVariableValue(pair.first, input.getValue(pair.second)); + } + if (opts->usesPrune()) { + auto* evaluator = opts->getPruneEvaluator(); + // Replace by inputRow + evaluator->prepareContext(input); + } + // Now reset the traverser + if (_infos.usesFixedSource()) { + auto pos = _infos.getFixedSource().find('/'); + if (pos == std::string::npos) { + _traverser.options()->query()->registerWarning( + TRI_ERROR_BAD_PARAMETER, + "Invalid input for traversal: " + "Only id strings or objects with " + "_id are allowed"); + return false; + } else { + // Use constant value + _traverser.setStartVertex(_infos.getFixedSource()); + return true; + } + } else { + AqlValue const& in = input.getValue(_infos.getInputRegister()); + if (in.isObject()) { + try { + _traverser.setStartVertex(_traverser.options()->trx()->extractIdString(in.slice())); + return true; + } catch (...) { + // on purpose ignore this error. + return false; + } + // _id or _key not present we cannot start here, register warning take next + } else if (in.isString()) { + _traverser.setStartVertex(in.slice().copyString()); + return true; + } else { + _traverser.options()->query()->registerWarning( + TRI_ERROR_BAD_PARAMETER, + "Invalid input for traversal: Only " + "id strings or objects with _id are " + "allowed"); + return false; + } + } +} diff --git a/arangod/Aql/TraversalExecutor.h b/arangod/Aql/TraversalExecutor.h index f749e85d8301..02b4460ad0b3 100644 --- a/arangod/Aql/TraversalExecutor.h +++ b/arangod/Aql/TraversalExecutor.h @@ -38,6 +38,8 @@ class Traverser; namespace aql { +struct AqlCall; +class AqlItemBlockInputRange; class Query; class OutputAqlItemRow; class ExecutorInfos; @@ -138,6 +140,15 @@ class TraversalExecutor { */ std::pair produceRows(OutputAqlItemRow& output); + /** + * @brief produce the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + std::tuple produceRows(size_t limit, + AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output); + private: /** * @brief compute the return state @@ -146,6 +157,7 @@ class TraversalExecutor { ExecutionState computeState() const; bool resetTraverser(); + bool resetTraverser(InputAqlItemRow const& input); private: Infos& _infos; diff --git a/tests/Aql/TraversalExecutorTest.cpp b/tests/Aql/TraversalExecutorTest.cpp index 4d7bdc3f10a4..2fabd2ff183c 100644 --- a/tests/Aql/TraversalExecutorTest.cpp +++ b/tests/Aql/TraversalExecutorTest.cpp @@ -20,9 +20,11 @@ /// @author Michael Hackstein //////////////////////////////////////////////////////////////////////////////// +#include "AqlItemBlockHelper.h" #include "RowFetcherHelper.h" #include "gtest/gtest.h" +#include "Aql/AqlCall.h" #include "Aql/AqlItemBlock.h" #include "Aql/ExecutionNode.h" #include "Aql/InputAqlItemRow.h" @@ -295,7 +297,8 @@ class TraversalExecutorTestInputStartVertex : public ::testing::Test { TEST_F(TraversalExecutorTestInputStartVertex, there_are_no_rows_upstream_producer_doesnt_wait) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -308,7 +311,8 @@ TEST_F(TraversalExecutorTestInputStartVertex, there_are_no_rows_upstream_produce TEST_F(TraversalExecutorTestInputStartVertex, there_are_no_rows_upstream_producer_waits) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -330,7 +334,8 @@ TEST_F(TraversalExecutorTestInputStartVertex, there_are_rows_upstream_producer_d myGraph.addVertex("2"); myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([["v/1"], ["v/2"], ["v/3"]])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -362,7 +367,8 @@ TEST_F(TraversalExecutorTestInputStartVertex, myGraph.addVertex("2"); myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([["v/1"], ["v/2"], ["v/3"]])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -400,7 +406,8 @@ TEST_F(TraversalExecutorTestInputStartVertex, myGraph.addVertex("2"); myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([["v/1"], ["v/2"], ["v/3"]])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -501,7 +508,8 @@ class TraversalExecutorTestConstantStartVertex : public ::testing::Test { TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream_producer_doesnt_wait) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -514,7 +522,8 @@ TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream_producer_doesn TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream_producer_waits) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -537,7 +546,8 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_doesnt_w myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([ ["v/1"], ["v/2"], ["v/3"] ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -569,7 +579,8 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_waits_no myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([ ["v/1"], ["v/2"], ["v/3"] ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; OutputAqlItemRow row(std::move(block), infos.getOutputRegisters(), @@ -606,7 +617,8 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_waits_ed myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([ ["v/1"], ["v/2"], ["v/3"] ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; myGraph.addEdge("1", "2", "1->2"); @@ -656,6 +668,82 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_waits_ed } } +TEST_F(TraversalExecutorTestInputStartVertex, test_produce_datarange_no_edges_are_connected) { + myGraph.addVertex("1"); + myGraph.addVertex("2"); + myGraph.addVertex("3"); + + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + TraversalExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, {{R"("v/1")"}, {R"("v/2")"}, {R"("v/3")"}}); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + EXPECT_EQ(output.numRowsWritten(), 0); + auto const [state, stats, call] = testee.produceRows(1000, input, output); + EXPECT_EQ(state, ExecutorState::DONE); + + ASSERT_EQ(stats.getFiltered(), 0); + ASSERT_FALSE(output.produced()); + + ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); + ASSERT_EQ(traverser->startVertexUsedAt(1), "v/2"); + ASSERT_EQ(traverser->startVertexUsedAt(2), "v/3"); +} + +TEST_F(TraversalExecutorTestConstantStartVertex, test_produce_datarange_edges_are_connected) { + myGraph.addVertex("1"); + myGraph.addVertex("2"); + myGraph.addVertex("3"); + + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + TraversalExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, {{R"("v/1")"}, {R"("v/2")"}, {R"("v/3")"}}); + + myGraph.addEdge("1", "2", "1->2"); + myGraph.addEdge("2", "3", "2->3"); + myGraph.addEdge("3", "1", "3->1"); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + + EXPECT_EQ(output.numRowsWritten(), 0); + auto const [state, stats, call] = testee.produceRows(1000, input, output); + EXPECT_EQ(state, ExecutorState::DONE); + + ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); + ASSERT_EQ(traverser->startVertexUsedAt(1), "v/2"); + ASSERT_EQ(traverser->startVertexUsedAt(2), "v/3"); + + std::vector expectedResult{"v/2", "v/3", "v/1"}; + auto block = output.stealBlock(); + for (std::size_t index = 0; index < 3; index++) { + AqlValue value = block->getValue(index, outReg); + ASSERT_TRUE(value.isObject()); + ASSERT_TRUE(arangodb::basics::VelocyPackHelper::compare( + value.slice(), + myGraph.getVertexData( + arangodb::velocypack::StringRef(expectedResult.at(index))), + false) == 0); + } +} + } // namespace aql } // namespace tests } // namespace arangodb From fd33b7f4204693702a33fc3bdb63d549eb7215b3 Mon Sep 17 00:00:00 2001 From: hkernbach Date: Fri, 8 Nov 2019 19:25:36 +0100 Subject: [PATCH 018/122] Revert "working - not done yet" This reverts commit aa275b48ffc2c8fc9a3c364e2241321ec9c9293f. --- arangod/Aql/TraversalExecutor.cpp | 110 ---------------------------- arangod/Aql/TraversalExecutor.h | 12 --- tests/Aql/TraversalExecutorTest.cpp | 108 +++------------------------ 3 files changed, 10 insertions(+), 220 deletions(-) diff --git a/arangod/Aql/TraversalExecutor.cpp b/arangod/Aql/TraversalExecutor.cpp index 6dd62bab66b6..963c8bd4845f 100644 --- a/arangod/Aql/TraversalExecutor.cpp +++ b/arangod/Aql/TraversalExecutor.cpp @@ -21,10 +21,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "TraversalExecutor.h" -#include -#include "Aql/AqlCall.h" -#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionNode.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/PruneExpressionEvaluator.h" @@ -251,57 +248,6 @@ std::pair TraversalExecutor::produceRows(OutputA return {ExecutionState::DONE, s}; } -std::tuple TraversalExecutor::produceRows( - size_t limit, AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { - TraversalStats s; - - while (inputRange.hasMore() && limit > 0) { - auto const& [state, input] = inputRange.next(); - LOG_DEVEL << "ExecutorState: " << state << " - remove me after review"; - - if (!resetTraverser(input)) { - // Could not start here, (invalid) - // Go to next - continue; - } - - if (!_traverser.hasMore() || !_traverser.next()) { - // Nothing more to read, reset input to refetch - continue; - } else { - // traverser now has next v, e, p values - if (_infos.useVertexOutput()) { - AqlValue vertex = _traverser.lastVertexToAqlValue(); - AqlValueGuard guard{vertex, true}; - output.moveValueInto(_infos.vertexRegister(), input, guard); - } - if (_infos.useEdgeOutput()) { - AqlValue edge = _traverser.lastEdgeToAqlValue(); - AqlValueGuard guard{edge, true}; - output.moveValueInto(_infos.edgeRegister(), input, guard); - } - if (_infos.usePathOutput()) { - transaction::BuilderLeaser tmp(_traverser.trx()); - tmp->clear(); - AqlValue path = _traverser.pathToAqlValue(*tmp.builder()); - AqlValueGuard guard{path, true}; - output.moveValueInto(_infos.pathRegister(), input, guard); - } - output.advanceRow(); - limit--; - } - } - - // we are done - s.addFiltered(_traverser.getAndResetFilteredPaths()); - s.addScannedIndex(_traverser.getAndResetReadDocuments()); - s.addHttpRequests(_traverser.getAndResetHttpRequests()); - - AqlCall upstreamCall{}; - upstreamCall.softLimit = limit; - return {inputRange.peek().first, s, upstreamCall}; -} - ExecutionState TraversalExecutor::computeState() const { if (_rowState == ExecutionState::DONE && !_traverser.hasMore()) { return ExecutionState::DONE; @@ -364,59 +310,3 @@ bool TraversalExecutor::resetTraverser() { } } } - -bool TraversalExecutor::resetTraverser(InputAqlItemRow const& input) { - _traverser.traverserCache()->clear(); - - // Initialize the Expressions within the options. - // We need to find the variable and read its value here. Everything is - // computed right now. - auto opts = _traverser.options(); - opts->clearVariableValues(); - for (auto const& pair : _infos.filterConditionVariables()) { - opts->setVariableValue(pair.first, input.getValue(pair.second)); - } - if (opts->usesPrune()) { - auto* evaluator = opts->getPruneEvaluator(); - // Replace by inputRow - evaluator->prepareContext(input); - } - // Now reset the traverser - if (_infos.usesFixedSource()) { - auto pos = _infos.getFixedSource().find('/'); - if (pos == std::string::npos) { - _traverser.options()->query()->registerWarning( - TRI_ERROR_BAD_PARAMETER, - "Invalid input for traversal: " - "Only id strings or objects with " - "_id are allowed"); - return false; - } else { - // Use constant value - _traverser.setStartVertex(_infos.getFixedSource()); - return true; - } - } else { - AqlValue const& in = input.getValue(_infos.getInputRegister()); - if (in.isObject()) { - try { - _traverser.setStartVertex(_traverser.options()->trx()->extractIdString(in.slice())); - return true; - } catch (...) { - // on purpose ignore this error. - return false; - } - // _id or _key not present we cannot start here, register warning take next - } else if (in.isString()) { - _traverser.setStartVertex(in.slice().copyString()); - return true; - } else { - _traverser.options()->query()->registerWarning( - TRI_ERROR_BAD_PARAMETER, - "Invalid input for traversal: Only " - "id strings or objects with _id are " - "allowed"); - return false; - } - } -} diff --git a/arangod/Aql/TraversalExecutor.h b/arangod/Aql/TraversalExecutor.h index 02b4460ad0b3..f749e85d8301 100644 --- a/arangod/Aql/TraversalExecutor.h +++ b/arangod/Aql/TraversalExecutor.h @@ -38,8 +38,6 @@ class Traverser; namespace aql { -struct AqlCall; -class AqlItemBlockInputRange; class Query; class OutputAqlItemRow; class ExecutorInfos; @@ -140,15 +138,6 @@ class TraversalExecutor { */ std::pair produceRows(OutputAqlItemRow& output); - /** - * @brief produce the next Row of Aql Values. - * - * @return ExecutorState, the stats, and a new Call that needs to be send to upstream - */ - std::tuple produceRows(size_t limit, - AqlItemBlockInputRange& inputRange, - OutputAqlItemRow& output); - private: /** * @brief compute the return state @@ -157,7 +146,6 @@ class TraversalExecutor { ExecutionState computeState() const; bool resetTraverser(); - bool resetTraverser(InputAqlItemRow const& input); private: Infos& _infos; diff --git a/tests/Aql/TraversalExecutorTest.cpp b/tests/Aql/TraversalExecutorTest.cpp index 2fabd2ff183c..4d7bdc3f10a4 100644 --- a/tests/Aql/TraversalExecutorTest.cpp +++ b/tests/Aql/TraversalExecutorTest.cpp @@ -20,11 +20,9 @@ /// @author Michael Hackstein //////////////////////////////////////////////////////////////////////////////// -#include "AqlItemBlockHelper.h" #include "RowFetcherHelper.h" #include "gtest/gtest.h" -#include "Aql/AqlCall.h" #include "Aql/AqlItemBlock.h" #include "Aql/ExecutionNode.h" #include "Aql/InputAqlItemRow.h" @@ -297,8 +295,7 @@ class TraversalExecutorTestInputStartVertex : public ::testing::Test { TEST_F(TraversalExecutorTestInputStartVertex, there_are_no_rows_upstream_producer_doesnt_wait) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input.steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -311,8 +308,7 @@ TEST_F(TraversalExecutorTestInputStartVertex, there_are_no_rows_upstream_produce TEST_F(TraversalExecutorTestInputStartVertex, there_are_no_rows_upstream_producer_waits) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input.steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -334,8 +330,7 @@ TEST_F(TraversalExecutorTestInputStartVertex, there_are_rows_upstream_producer_d myGraph.addVertex("2"); myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([["v/1"], ["v/2"], ["v/3"]])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -367,8 +362,7 @@ TEST_F(TraversalExecutorTestInputStartVertex, myGraph.addVertex("2"); myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([["v/1"], ["v/2"], ["v/3"]])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -406,8 +400,7 @@ TEST_F(TraversalExecutorTestInputStartVertex, myGraph.addVertex("2"); myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([["v/1"], ["v/2"], ["v/3"]])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -508,8 +501,7 @@ class TraversalExecutorTestConstantStartVertex : public ::testing::Test { TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream_producer_doesnt_wait) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input.steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -522,8 +514,7 @@ TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream_producer_doesn TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream_producer_waits) { VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input.steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -546,8 +537,7 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_doesnt_w myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([ ["v/1"], ["v/2"], ["v/3"] ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; @@ -579,8 +569,7 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_waits_no myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([ ["v/1"], ["v/2"], ["v/3"] ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; OutputAqlItemRow row(std::move(block), infos.getOutputRegisters(), @@ -617,8 +606,7 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_waits_ed myGraph.addVertex("3"); auto input = VPackParser::fromJson(R"([ ["v/1"], ["v/2"], ["v/3"] ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; myGraph.addEdge("1", "2", "1->2"); @@ -668,82 +656,6 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_waits_ed } } -TEST_F(TraversalExecutorTestInputStartVertex, test_produce_datarange_no_edges_are_connected) { - myGraph.addVertex("1"); - myGraph.addVertex("2"); - myGraph.addVertex("3"); - - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - TraversalExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, {{R"("v/1")"}, {R"("v/2")"}, {R"("v/3")"}}); - - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; - OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - EXPECT_EQ(output.numRowsWritten(), 0); - auto const [state, stats, call] = testee.produceRows(1000, input, output); - EXPECT_EQ(state, ExecutorState::DONE); - - ASSERT_EQ(stats.getFiltered(), 0); - ASSERT_FALSE(output.produced()); - - ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); - ASSERT_EQ(traverser->startVertexUsedAt(1), "v/2"); - ASSERT_EQ(traverser->startVertexUsedAt(2), "v/3"); -} - -TEST_F(TraversalExecutorTestConstantStartVertex, test_produce_datarange_edges_are_connected) { - myGraph.addVertex("1"); - myGraph.addVertex("2"); - myGraph.addVertex("3"); - - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - TraversalExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, {{R"("v/1")"}, {R"("v/2")"}, {R"("v/3")"}}); - - myGraph.addEdge("1", "2", "1->2"); - myGraph.addEdge("2", "3", "2->3"); - myGraph.addEdge("3", "1", "3->1"); - - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; - OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - EXPECT_EQ(output.numRowsWritten(), 0); - auto const [state, stats, call] = testee.produceRows(1000, input, output); - EXPECT_EQ(state, ExecutorState::DONE); - - ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); - ASSERT_EQ(traverser->startVertexUsedAt(1), "v/2"); - ASSERT_EQ(traverser->startVertexUsedAt(2), "v/3"); - - std::vector expectedResult{"v/2", "v/3", "v/1"}; - auto block = output.stealBlock(); - for (std::size_t index = 0; index < 3; index++) { - AqlValue value = block->getValue(index, outReg); - ASSERT_TRUE(value.isObject()); - ASSERT_TRUE(arangodb::basics::VelocyPackHelper::compare( - value.slice(), - myGraph.getVertexData( - arangodb::velocypack::StringRef(expectedResult.at(index))), - false) == 0); - } -} - } // namespace aql } // namespace tests } // namespace arangodb From c703b11b722be97630dd90eaa7b4a0dd9246e0c9 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 18 Nov 2019 17:14:24 +0100 Subject: [PATCH 019/122] first implementation of a ShadowRow fetching interface on AqlItemBlockInputRange --- arangod/Aql/AqlItemBlockInputRange.cpp | 56 ++++++++++++++++++++++++-- arangod/Aql/AqlItemBlockInputRange.h | 8 ++++ 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index f1b656c3cc71..a00674965d65 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -21,6 +21,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "AqlItemBlockInputRange.h" +#include "Aql/ShadowAqlItemRow.h" using namespace arangodb; using namespace arangodb::aql; @@ -55,10 +56,9 @@ std::pair AqlItemBlockInputRange::peek() { std::pair AqlItemBlockInputRange::next() { auto res = peek(); - ++_rowIndex; - if (!indexIsValid()) { - _block = nullptr; - _rowIndex = 0; + if (indexIsValid()) { + TRI_ASSERT(res.second); + ++_rowIndex; } return res; } @@ -76,3 +76,51 @@ bool AqlItemBlockInputRange::hasMoreAfterThis() const noexcept { ExecutorState AqlItemBlockInputRange::state() const noexcept { return hasMoreAfterThis() ? ExecutorState::HASMORE : _finalState; } + +bool AqlItemBlockInputRange::hasShadowRow() const noexcept { + if (_block == nullptr) { + // No block => no ShadowRow + return false; + } + + if (hasMore()) { + // As long as hasMore() is true, we still have DataRows and are not on a ShadowRow now. + return false; + } + + if (_rowIndex < _block->size()) { + // We still have more rows here, get next ShadowRow + TRI_ASSERT(_block->isShadowRow(_rowIndex)); + return true; + } + return false; +} + +std::pair AqlItemBlockInputRange::peekShadowRow() { + if (hasShadowRow()) { + return std::make_pair(state(), ShadowAqlItemRow{_block, _rowIndex}); + } + return std::make_pair(state(), ShadowAqlItemRow{CreateInvalidShadowRowHint{}}); +} + +std::pair AqlItemBlockInputRange::nextShadowRow() { + auto res = peekShadowRow(); + if (hasShadowRow()) { + auto const& shadowRowIndexes = _block->getShadowRowIndexes(); + auto it = std::find(shadowRowIndexes.begin(), shadowRowIndexes.end(), _rowIndex); + // We have a shadow row in this index, so we cannot be at the end now. + TRI_ASSERT(it != shadowRowIndexes.end()); + // Go to next ShadowRow. + it++; + if (it == shadowRowIndexes.end()) { + // No more shadow row here. + _endIndex = _block->size(); + } else { + // Set endIndex to the next ShadowRowIndex. + _endIndex = *it; + } + // Advance the current row. + _rowIndex++; + } + return res; +} diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index 01eef1512402..c35a9b6baa3b 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -29,6 +29,8 @@ namespace arangodb::aql { +class ShadowAqlItemRow; + class AqlItemBlockInputRange { public: explicit AqlItemBlockInputRange(ExecutorState state); @@ -46,6 +48,12 @@ class AqlItemBlockInputRange { std::pair next(); + bool hasShadowRow() const noexcept; + + std::pair peekShadowRow(); + + std::pair nextShadowRow(); + private: bool indexIsValid() const noexcept; From 79117675feabd88b7a6030194ffb01cca60bfa11 Mon Sep 17 00:00:00 2001 From: hkernbach Date: Wed, 20 Nov 2019 11:17:24 +0100 Subject: [PATCH 020/122] single row fetcher, execute + tests --- arangod/Aql/AqlItemBlock.cpp | 19 ++++++- arangod/Aql/AqlItemBlock.h | 8 +++ arangod/Aql/AqlItemBlockInputRange.h | 7 ++- arangod/Aql/DependencyProxy.h | 2 +- arangod/Aql/SingleRowFetcher.cpp | 16 +++--- tests/Aql/DependencyProxyMock.cpp | 11 ++++ tests/Aql/DependencyProxyMock.h | 4 ++ tests/Aql/SingleRowFetcherTest.cpp | 79 ++++++++++++++++++++++++++++ 8 files changed, 135 insertions(+), 11 deletions(-) diff --git a/arangod/Aql/AqlItemBlock.cpp b/arangod/Aql/AqlItemBlock.cpp index d3bd739c5c22..7192a4a70ffd 100644 --- a/arangod/Aql/AqlItemBlock.cpp +++ b/arangod/Aql/AqlItemBlock.cpp @@ -69,7 +69,7 @@ inline void CopyValueOver(std::unordered_set& cache, AqlValue const& a /// @brief create the block AqlItemBlock::AqlItemBlock(AqlItemBlockManager& manager, size_t nrItems, RegisterId nrRegs) - : _nrItems(nrItems), _nrRegs(nrRegs), _manager(manager), _refCount(0) { + : _nrItems(nrItems), _nrRegs(nrRegs), _manager(manager), _refCount(0), _rowIndex(0) { TRI_ASSERT(nrItems > 0); // empty AqlItemBlocks are not allowed! // check that the nrRegs value is somewhat sensible // this compare value is arbitrary, but having so many registers in a single @@ -855,6 +855,23 @@ RegisterId AqlItemBlock::getNrRegs() const noexcept { return _nrRegs; } size_t AqlItemBlock::size() const noexcept { return _nrItems; } +std::tuple AqlItemBlock::getRelevantRange() { + size_t startIndex = _rowIndex; + size_t endIndex = 0; + + for (; _rowIndex < this->size(); _rowIndex++) { + if (isShadowRow(_rowIndex)) { + endIndex = _rowIndex - 1; + break; + } + if (_rowIndex - 1 != this->size()) { + endIndex = _rowIndex; + } + } + + return std::make_pair(startIndex, endIndex); +} + size_t AqlItemBlock::numEntries() const { return internalNrRegs() * _nrItems; } size_t AqlItemBlock::capacity() const noexcept { return _data.capacity(); } diff --git a/arangod/Aql/AqlItemBlock.h b/arangod/Aql/AqlItemBlock.h index d8d3288f8808..df3b7c03ce93 100644 --- a/arangod/Aql/AqlItemBlock.h +++ b/arangod/Aql/AqlItemBlock.h @@ -166,6 +166,9 @@ class AqlItemBlock { /// @brief getter for _nrItems size_t size() const noexcept; + /// @brief get the relevant consumable range of the block + std::tuple getRelevantRange(); + /// @brief Number of entries in the matrix. If this changes, the memory usage /// must be / in- or decreased appropriately as well. /// All entries _data[i] for numEntries() <= i < _data.size() always have to @@ -287,6 +290,11 @@ class AqlItemBlock { /// @brief A list of indexes with all shadowRows within /// this ItemBlock. Used to easier split data based on them. std::set _shadowRowIndexes; + + /// @brief current row index we want to read from. This will be increased after + /// getRelevantRange function will be called, which will return a tuple of the + /// old _rowIndex and the newly calculated _rowIndex - 1 + size_t _rowIndex; }; } // namespace aql diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index 01eef1512402..bfb264835dff 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -34,9 +34,9 @@ class AqlItemBlockInputRange { explicit AqlItemBlockInputRange(ExecutorState state); AqlItemBlockInputRange(ExecutorState, arangodb::aql::SharedAqlItemBlockPtr const&, - std::size_t, std::size_t endIndex); + std::size_t startIndex, std::size_t endIndex); AqlItemBlockInputRange(ExecutorState, arangodb::aql::SharedAqlItemBlockPtr&&, - std::size_t, std::size_t endIndex) noexcept; + std::size_t startIndex, std::size_t endIndex) noexcept; bool hasMore() const noexcept; @@ -46,6 +46,9 @@ class AqlItemBlockInputRange { std::pair next(); + std::size_t getRowIndex() noexcept { return _rowIndex; }; + std::size_t getEndIndex() noexcept { return _endIndex; }; + private: bool indexIsValid() const noexcept; diff --git a/arangod/Aql/DependencyProxy.h b/arangod/Aql/DependencyProxy.h index 36d946798517..bbee1df2ae43 100644 --- a/arangod/Aql/DependencyProxy.h +++ b/arangod/Aql/DependencyProxy.h @@ -74,7 +74,7 @@ class DependencyProxy { TEST_VIRTUAL ~DependencyProxy() = default; // TODO Implement and document properly! - std::tuple execute(AqlCallStack& stack); + TEST_VIRTUAL std::tuple execute(AqlCallStack& stack); // This is only TEST_VIRTUAL, so we ignore this lint warning: // NOLINTNEXTLINE google-default-arguments diff --git a/arangod/Aql/SingleRowFetcher.cpp b/arangod/Aql/SingleRowFetcher.cpp index 34219bde206e..cdf502e2ef55 100644 --- a/arangod/Aql/SingleRowFetcher.cpp +++ b/arangod/Aql/SingleRowFetcher.cpp @@ -24,6 +24,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "SingleRowFetcher.h" +#include #include "Aql/AqlItemBlock.h" #include "Aql/DependencyProxy.h" @@ -76,21 +77,22 @@ SingleRowFetcher::fetchBlockForPassthrough(size_t atMost) { template std::tuple SingleRowFetcher::execute(AqlCallStack& stack) { - auto const [state, skipped, block] = _dependencyProxy->execute(stack); + auto [state, skipped, block] = _dependencyProxy->execute(stack); if (state == ExecutionState::WAITING) { // On waiting we have nothing to return return {state, 0, AqlItemBlockInputRange{ExecutorState::HASMORE}}; } + if (block == nullptr) { + return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE}}; + } + + auto [start, end] = block->getRelevantRange(); if (state == ExecutionState::HASMORE) { TRI_ASSERT(block != nullptr); return {state, skipped, - AqlItemBlockInputRange{ExecutorState::HASMORE, block, 0, block->size()}}; - } - if (block == nullptr) { - return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE}}; + AqlItemBlockInputRange{ExecutorState::HASMORE, block, start, end}}; } - return {state, skipped, - AqlItemBlockInputRange{ExecutorState::DONE, block, 0, block->size()}}; + return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE, block, start, end}}; } template diff --git a/tests/Aql/DependencyProxyMock.cpp b/tests/Aql/DependencyProxyMock.cpp index f17bcd9098be..ecec7d319ca9 100644 --- a/tests/Aql/DependencyProxyMock.cpp +++ b/tests/Aql/DependencyProxyMock.cpp @@ -21,6 +21,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "DependencyProxyMock.h" +#include #include "gtest/gtest.h" @@ -104,6 +105,9 @@ DependencyProxyMock& DependencyProxyMock:: for (RegisterId i = 0; i < this->getNrInputRegisters(); i++) { inputRegisters->emplace(i); } + // keep the block address + _block = block; + return andThenReturn({state, block}); } @@ -125,6 +129,13 @@ DependencyProxyMock& DependencyProxyMock:: return *this; } +template +std::tuple +DependencyProxyMock::execute(AqlCallStack& stack) { + TRI_ASSERT(_block != nullptr); + return {arangodb::aql::ExecutionState::DONE, 0, _block}; +} + template bool DependencyProxyMock::allBlocksFetched() const { return _itemsToReturn.empty(); diff --git a/tests/Aql/DependencyProxyMock.h b/tests/Aql/DependencyProxyMock.h index 2692dd344e35..9fd2f8ea2d4b 100644 --- a/tests/Aql/DependencyProxyMock.h +++ b/tests/Aql/DependencyProxyMock.h @@ -51,6 +51,9 @@ class DependencyProxyMock : public ::arangodb::aql::DependencyProxy skipSome(size_t atMost) override; + std::tuple execute( + arangodb::aql::AqlCallStack& stack) override; + private: using FetchBlockReturnItem = std::pair; @@ -76,6 +79,7 @@ class DependencyProxyMock : public ::arangodb::aql::DependencyProxy diff --git a/tests/Aql/SingleRowFetcherTest.cpp b/tests/Aql/SingleRowFetcherTest.cpp index 07a5ab6ab152..4dcb098f349a 100644 --- a/tests/Aql/SingleRowFetcherTest.cpp +++ b/tests/Aql/SingleRowFetcherTest.cpp @@ -28,6 +28,7 @@ #include "RowFetcherHelper.h" #include "gtest/gtest.h" +#include "Aql/AqlCallStack.h" #include "Aql/AqlItemBlock.h" #include "Aql/DependencyProxy.h" #include "Aql/ExecutionBlock.h" @@ -1147,6 +1148,84 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_consecutive_shadowrows) { ASSERT_EQ(dependencyProxyMock.numFetchBlockCalls(), 1); } +TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_oneAndDone) { + DependencyProxyMock dependencyProxyMock{monitor, 1}; + InputAqlItemRow row{CreateInvalidInputRowHint{}}; + ShadowAqlItemRow shadow{CreateInvalidShadowRowHint{}}; + + { + SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 7, 1)}; + block->emplaceValue(0, 0, "a"); + block->emplaceValue(1, 0, "b"); + block->emplaceValue(2, 0, "c"); + block->emplaceValue(3, 0, "d"); + block->emplaceValue(4, 0, "e"); // first shadowrow + block->setShadowRowDepth(4, AqlValue(AqlValueHintUInt(1ull))); + block->emplaceValue(5, 0, "f"); + block->setShadowRowDepth(5, AqlValue(AqlValueHintUInt(0ull))); + block->emplaceValue(6, 0, "g"); + block->setShadowRowDepth(6, AqlValue(AqlValueHintUInt(0ull))); + dependencyProxyMock.shouldReturn(ExecutionState::DONE, std::move(block)); + } + + { + SingleRowFetcher testee(dependencyProxyMock); + AqlCall call; + AqlCallStack stack = {call}; + + // First no data row + auto [state, skipped, input] = testee.execute(stack); + EXPECT_EQ(input.getRowIndex(), 0); + EXPECT_EQ(input.getEndIndex(), 3); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(state, ExecutionState::DONE); + } // testee is destroyed here +} + +TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_twoAndHasMore) { + DependencyProxyMock dependencyProxyMock{monitor, 1}; + InputAqlItemRow row{CreateInvalidInputRowHint{}}; + ShadowAqlItemRow shadow{CreateInvalidShadowRowHint{}}; + + { + SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 9, 1)}; + block->emplaceValue(0, 0, "a"); + block->emplaceValue(1, 0, "b"); + block->emplaceValue(2, 0, "c"); + block->emplaceValue(3, 0, "d"); + block->emplaceValue(4, 0, "e"); // first shadowrow + block->setShadowRowDepth(4, AqlValue(AqlValueHintUInt(1ull))); + block->emplaceValue(5, 0, "f"); + block->setShadowRowDepth(5, AqlValue(AqlValueHintUInt(0ull))); + block->emplaceValue(6, 0, "g"); + block->setShadowRowDepth(6, AqlValue(AqlValueHintUInt(0ull))); + block->emplaceValue(7, 0, "h"); + block->emplaceValue(8, 0, "i"); + dependencyProxyMock.shouldReturn(ExecutionState::DONE, std::move(block)); + } + + { + SingleRowFetcher testee(dependencyProxyMock); + AqlCall call; + AqlCallStack stack = {call}; + + { + auto [state, skipped, input] = testee.execute(stack); + EXPECT_EQ(input.getRowIndex(), 0); + EXPECT_EQ(input.getEndIndex(), 3); + EXPECT_EQ(state, ExecutionState::HASMORE); + // EXPECT_EQ(skipped, 0); + } + + { + auto [state, skipped, input] = testee.execute(stack); + EXPECT_EQ(input.getRowIndex(), 7); + EXPECT_EQ(input.getEndIndex(), 8); + EXPECT_EQ(state, ExecutionState::DONE); + } + } // testee is destroyed here +} + class SingleRowFetcherWrapper : public fetcherHelper::PatternTestWrapper> { public: From b91be5f8e29ac5dac93e972ea415fccc08924ca0 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 27 Nov 2019 11:57:31 +0100 Subject: [PATCH 021/122] Fixed SingleRowFetcherTest. Also an AqlItemBlockInputRange can now handle ShadowRows properly --- arangod/Aql/AqlItemBlock.cpp | 22 +++---- arangod/Aql/AqlItemBlockInputRange.cpp | 21 +++++- arangod/Aql/AqlItemBlockInputRange.h | 2 + arangod/Aql/ExecutionBlockImpl.cpp | 1 - tests/Aql/SingleRowFetcherTest.cpp | 90 ++++++++++++++++++++------ 5 files changed, 103 insertions(+), 33 deletions(-) diff --git a/arangod/Aql/AqlItemBlock.cpp b/arangod/Aql/AqlItemBlock.cpp index a2ee97ed97af..4d9846ceb8af 100644 --- a/arangod/Aql/AqlItemBlock.cpp +++ b/arangod/Aql/AqlItemBlock.cpp @@ -699,7 +699,8 @@ void AqlItemBlock::toVelocyPack(transaction::Methods* trx, VPackBuilder& result) result.add("raw", raw.slice()); } -void AqlItemBlock::rowToSimpleVPack(size_t const row, transaction::Methods* trx, arangodb::velocypack::Builder& builder) const { +void AqlItemBlock::rowToSimpleVPack(size_t const row, transaction::Methods* trx, + arangodb::velocypack::Builder& builder) const { VPackArrayBuilder rowBuilder{&builder}; if (isShadowRow(row)) { @@ -712,7 +713,8 @@ void AqlItemBlock::rowToSimpleVPack(size_t const row, transaction::Methods* trx, } } -void AqlItemBlock::toSimpleVPack(transaction::Methods* trx, arangodb::velocypack::Builder& builder) const { +void AqlItemBlock::toSimpleVPack(transaction::Methods* trx, + arangodb::velocypack::Builder& builder) const { VPackObjectBuilder block{&builder}; block->add("nrItems", VPackValue(size())); block->add("nrRegs", VPackValue(getNrRegs())); @@ -882,18 +884,14 @@ size_t AqlItemBlock::size() const noexcept { return _nrItems; } std::tuple AqlItemBlock::getRelevantRange() { size_t startIndex = _rowIndex; - size_t endIndex = 0; + ++_rowIndex; - for (; _rowIndex < this->size(); _rowIndex++) { - if (isShadowRow(_rowIndex)) { - endIndex = _rowIndex - 1; - break; - } - if (_rowIndex - 1 != this->size()) { - endIndex = _rowIndex; - } + for (; _rowIndex < this->size() && !isShadowRow(_rowIndex); _rowIndex++) { + // Move on as long as we are not at the end or at a shadow row } - + size_t endIndex = _rowIndex; + TRI_ASSERT(startIndex < endIndex); + TRI_ASSERT(endIndex <= this->size()); return std::make_pair(startIndex, endIndex); } diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index a00674965d65..91823b014460 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -77,6 +77,23 @@ ExecutorState AqlItemBlockInputRange::state() const noexcept { return hasMoreAfterThis() ? ExecutorState::HASMORE : _finalState; } +ExecutorState AqlItemBlockInputRange::shadowState() const noexcept { + if (_block == nullptr) { + return _finalState; + } + // We Return HASMORE, if the next shadow row is NOT relevant. + // So we can directly fetch the next shadow row without informing + // the executor about an empty subquery. + size_t nextRowIndex = _rowIndex + 1; + if (_block != nullptr && nextRowIndex < _block->size() && _block->isShadowRow(nextRowIndex)) { + ShadowAqlItemRow nextRow{_block, nextRowIndex}; + if (!nextRow.isRelevant()) { + return ExecutorState::HASMORE; + } + } + return ExecutorState::DONE; +} + bool AqlItemBlockInputRange::hasShadowRow() const noexcept { if (_block == nullptr) { // No block => no ShadowRow @@ -98,9 +115,9 @@ bool AqlItemBlockInputRange::hasShadowRow() const noexcept { std::pair AqlItemBlockInputRange::peekShadowRow() { if (hasShadowRow()) { - return std::make_pair(state(), ShadowAqlItemRow{_block, _rowIndex}); + return std::make_pair(shadowState(), ShadowAqlItemRow{_block, _rowIndex}); } - return std::make_pair(state(), ShadowAqlItemRow{CreateInvalidShadowRowHint{}}); + return std::make_pair(shadowState(), ShadowAqlItemRow{CreateInvalidShadowRowHint{}}); } std::pair AqlItemBlockInputRange::nextShadowRow() { diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index 8da8678d14bc..fd68ace50fff 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -44,6 +44,8 @@ class AqlItemBlockInputRange { ExecutorState state() const noexcept; + ExecutorState shadowState() const noexcept; + std::pair peek(); std::pair next(); diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 3851020b89a4..7b0fb0779161 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1026,7 +1026,6 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { AqlCall executorRequest; while (execState != ExecState::DONE) { - LOG_DEVEL << "State: " << execState; switch (execState) { case ExecState::SKIP: { auto [state, skippedLocal, call] = diff --git a/tests/Aql/SingleRowFetcherTest.cpp b/tests/Aql/SingleRowFetcherTest.cpp index 4dcb098f349a..54818943f3e0 100644 --- a/tests/Aql/SingleRowFetcherTest.cpp +++ b/tests/Aql/SingleRowFetcherTest.cpp @@ -37,6 +37,7 @@ #include "Aql/InputAqlItemRow.h" #include "Aql/ResourceUsage.h" #include "Aql/SingleRowFetcher.h" +#include "Basics/StringUtils.h" #include "FetcherTestHelper.h" @@ -65,6 +66,58 @@ class SingleRowFetcherTestPassBlocks : public ::testing::Test { ::arangodb::aql::BlockPassthrough::Enable; SingleRowFetcherTestPassBlocks() : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS) {} + + void validateInputRange(AqlItemBlockInputRange& input, + std::vector const& result) { + EXPECT_EQ(result.size(), input.getEndIndex() - input.getRowIndex()); + for (auto const& value : result) { + SCOPED_TRACE("Checking for value: " + value); + // We need more rows + ASSERT_TRUE(input.hasMore()); + EXPECT_FALSE(input.hasShadowRow()); + + auto [state, row] = input.next(); + + if (value == result.back()) { + EXPECT_EQ(state, ExecutorState::DONE); + } else { + EXPECT_EQ(state, ExecutorState::HASMORE); + } + ASSERT_TRUE(row.isInitialized()); + auto const& inputVal = row.getValue(0); + ASSERT_TRUE(inputVal.isString()); + EXPECT_TRUE(inputVal.slice().isEqualString(value)) + << inputVal.slice().toJson() << " should be equal to \"" << value << "\""; + } + // We always fetch to the end + EXPECT_FALSE(input.hasMore()); + } + + void validateShadowRange(AqlItemBlockInputRange& input, + std::vector> const& result) { + for (auto const& [depth, value] : result) { + SCOPED_TRACE("Checking for depth " + basics::StringUtils::itoa(depth) + + " with value: " + value); + // We need more rows + ASSERT_TRUE(input.hasShadowRow()); + EXPECT_FALSE(input.hasMore()); + + auto [state, row] = input.nextShadowRow(); + + if (depth == result.back().first && value == result.back().second) { + EXPECT_EQ(state, ExecutorState::DONE); + } else { + EXPECT_EQ(state, ExecutorState::HASMORE); + } + ASSERT_TRUE(row.isInitialized()); + auto const& inputVal = row.getValue(0); + + ASSERT_TRUE(inputVal.isString()); + EXPECT_TRUE(inputVal.slice().isEqualString(value)) + << inputVal.slice().toJson() << " should be equal to \"" << value << "\""; + EXPECT_EQ(row.getDepth(), depth); + } + } }; class SingleRowFetcherTestDoNotPassBlocks : public ::testing::Test { @@ -1160,9 +1213,9 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_oneAndDone block->emplaceValue(2, 0, "c"); block->emplaceValue(3, 0, "d"); block->emplaceValue(4, 0, "e"); // first shadowrow - block->setShadowRowDepth(4, AqlValue(AqlValueHintUInt(1ull))); + block->setShadowRowDepth(4, AqlValue(AqlValueHintUInt(0ull))); block->emplaceValue(5, 0, "f"); - block->setShadowRowDepth(5, AqlValue(AqlValueHintUInt(0ull))); + block->setShadowRowDepth(5, AqlValue(AqlValueHintUInt(1ull))); block->emplaceValue(6, 0, "g"); block->setShadowRowDepth(6, AqlValue(AqlValueHintUInt(0ull))); dependencyProxyMock.shouldReturn(ExecutionState::DONE, std::move(block)); @@ -1176,7 +1229,7 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_oneAndDone // First no data row auto [state, skipped, input] = testee.execute(stack); EXPECT_EQ(input.getRowIndex(), 0); - EXPECT_EQ(input.getEndIndex(), 3); + EXPECT_EQ(input.getEndIndex(), 4); EXPECT_EQ(skipped, 0); EXPECT_EQ(state, ExecutionState::DONE); } // testee is destroyed here @@ -1194,9 +1247,9 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_twoAndHasM block->emplaceValue(2, 0, "c"); block->emplaceValue(3, 0, "d"); block->emplaceValue(4, 0, "e"); // first shadowrow - block->setShadowRowDepth(4, AqlValue(AqlValueHintUInt(1ull))); + block->setShadowRowDepth(4, AqlValue(AqlValueHintUInt(0ull))); block->emplaceValue(5, 0, "f"); - block->setShadowRowDepth(5, AqlValue(AqlValueHintUInt(0ull))); + block->setShadowRowDepth(5, AqlValue(AqlValueHintUInt(1ull))); block->emplaceValue(6, 0, "g"); block->setShadowRowDepth(6, AqlValue(AqlValueHintUInt(0ull))); block->emplaceValue(7, 0, "h"); @@ -1209,20 +1262,21 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_twoAndHasM AqlCall call; AqlCallStack stack = {call}; - { - auto [state, skipped, input] = testee.execute(stack); - EXPECT_EQ(input.getRowIndex(), 0); - EXPECT_EQ(input.getEndIndex(), 3); - EXPECT_EQ(state, ExecutionState::HASMORE); - // EXPECT_EQ(skipped, 0); - } + auto [state, skipped, input] = testee.execute(stack); + // We only have one block, no more calls to execute necessary + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(input.getRowIndex(), 0); + EXPECT_EQ(input.getEndIndex(), 4); + + // Now validate the input range + validateInputRange(input, std::vector{"a", "b", "c", "d"}); + validateShadowRange(input, std::vector>{ + {0ull, "e"}, {1ull, "f"}}); + validateShadowRange(input, std::vector>{ + {0ull, "g"}}); + validateInputRange(input, std::vector{"h", "i"}); - { - auto [state, skipped, input] = testee.execute(stack); - EXPECT_EQ(input.getRowIndex(), 7); - EXPECT_EQ(input.getEndIndex(), 8); - EXPECT_EQ(state, ExecutionState::DONE); - } } // testee is destroyed here } From 8ba440c8a105bd8c56301665d18c7544afda9b5b Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 27 Nov 2019 15:36:47 +0100 Subject: [PATCH 022/122] Another fix on handling of shadow rows in InputRanges --- arangod/Aql/AqlItemBlock.cpp | 24 ++++++++++++++---------- arangod/Aql/AqlItemBlockInputRange.cpp | 4 ++-- arangod/Aql/ExecutionBlockImpl.cpp | 15 ++++++++------- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/arangod/Aql/AqlItemBlock.cpp b/arangod/Aql/AqlItemBlock.cpp index 4d9846ceb8af..687844c58830 100644 --- a/arangod/Aql/AqlItemBlock.cpp +++ b/arangod/Aql/AqlItemBlock.cpp @@ -37,6 +37,8 @@ #include #include +#include "Logger/LogMacros.h" + using namespace arangodb; using namespace arangodb::aql; @@ -883,16 +885,18 @@ RegisterId AqlItemBlock::getNrRegs() const noexcept { return _nrRegs; } size_t AqlItemBlock::size() const noexcept { return _nrItems; } std::tuple AqlItemBlock::getRelevantRange() { - size_t startIndex = _rowIndex; - ++_rowIndex; - - for (; _rowIndex < this->size() && !isShadowRow(_rowIndex); _rowIndex++) { - // Move on as long as we are not at the end or at a shadow row - } - size_t endIndex = _rowIndex; - TRI_ASSERT(startIndex < endIndex); - TRI_ASSERT(endIndex <= this->size()); - return std::make_pair(startIndex, endIndex); + // NOTE: + // Right now we can only support a range of datarows, that ends + // In a range of ShadowRows. + // After a shadow row, we do NOT know how to continue with + // The next Executor. + // So we can hardcode to return 0 -> firstShadowRow || endOfBlock + if (hasShadowRows()) { + auto const& shadows = getShadowRowIndexes(); + TRI_ASSERT(!shadows.empty()); + return {0, *shadows.begin()}; + } + return {0, size()}; } size_t AqlItemBlock::numEntries() const { return internalNrRegs() * _nrItems; } diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 91823b014460..178997eb6ec3 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -35,7 +35,7 @@ AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, SharedAqlItemBlockPtr const& block, std::size_t index, std::size_t endIndex) : _block{block}, _rowIndex{index}, _endIndex(endIndex), _finalState{state} { - TRI_ASSERT(index < endIndex); + TRI_ASSERT(index <= endIndex); TRI_ASSERT(endIndex <= block->size()); } @@ -43,7 +43,7 @@ AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, SharedAqlItemBlockPtr&& block, std::size_t index, std::size_t endIndex) noexcept : _block{std::move(block)}, _rowIndex{index}, _endIndex(endIndex), _finalState{state} { - TRI_ASSERT(index < endIndex); + TRI_ASSERT(index <= endIndex); TRI_ASSERT(endIndex <= block->size()); } diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 7b0fb0779161..a54efe1db958 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1101,13 +1101,11 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } TRI_ASSERT(_outputItemRow->produced()); _outputItemRow->advanceRow(); - if (_lastRange.hasShadowRow()) { - auto const& [state, shadowRow] = _lastRange.peekShadowRow(); - TRI_ASSERT(shadowRow.isInitialized()); - if (shadowRow.isRelevant()) { - // We need to call The Executor with this input again. - execState = ExecState::DONE; - } + if (state == ExecutorState::DONE) { + // Right now we cannot support to have more than one set of + // ShadowRows inside of a Range. + // We do not know how to continue with the above executor after a shadowrow. + execState = ExecState::DONE; } } else { execState = ExecState::DONE; @@ -1127,6 +1125,9 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // This is not strictly necessary here, as we shouldn't be called again // after DONE. _outputItemRow.reset(); + if (_lastRange.hasMore()) { + return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; + } return {_upstreamState, skipped, std::move(outputBlock)}; } From b5dacbd9cf0aea9a9cd03e09edbd740e18606153 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 27 Nov 2019 15:37:15 +0100 Subject: [PATCH 023/122] Improved human readable output of SortLimit test, while validating the result --- tests/Aql/SortLimit-test.cpp | 58 +++++++++++++++++------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/tests/Aql/SortLimit-test.cpp b/tests/Aql/SortLimit-test.cpp index 8e9146feaf1f..2bf1f312bab6 100644 --- a/tests/Aql/SortLimit-test.cpp +++ b/tests/Aql/SortLimit-test.cpp @@ -79,14 +79,13 @@ class SortLimitTest arangodb::ClusterEngine::Mocking = true; arangodb::RandomGenerator::initialize(arangodb::RandomGenerator::RandomType::MERSENNE); - vocbase = std::make_unique(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, testDBInfo(server.server())); + vocbase = std::make_unique(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, + testDBInfo(server.server())); CreateCollection(); } - ~SortLimitTest() { - vocbase.reset(); - } + ~SortLimitTest() { vocbase.reset(); } std::string sorterType(TRI_vocbase_t& vocbase, std::string const& queryString, std::string rules = "") { @@ -113,7 +112,7 @@ class SortLimitTest return strategy; } - bool verifyExpectedResults(TRI_vocbase_t& vocbase, std::string const& queryString, + void verifyExpectedResults(TRI_vocbase_t& vocbase, std::string const& queryString, std::vector const& expected, std::string rules = "") { auto options = arangodb::velocypack::Parser::fromJson( @@ -135,21 +134,18 @@ class SortLimitTest EXPECT_TRUE(result.result.ok()); auto slice = result.data->slice(); EXPECT_TRUE(slice.isArray()); - - if (slice.length() != expected.size()) { - return false; - } + ASSERT_EQ(slice.length(), expected.size()); size_t i = 0; for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) { auto const resolved = itr.value().resolveExternals(); - if (0 != arangodb::basics::VelocyPackHelper::compare( - insertedDocs[expected[i++]].slice(), resolved, true)) { - return false; - } - } - return true; + EXPECT_EQ(0, arangodb::basics::VelocyPackHelper::compare( + insertedDocs[expected[i]].slice(), resolved, true)) + << insertedDocs[expected[i]].slice().toJson() << " vs. " + << resolved.toJson(); + i++; + } } // create collection0, insertedDocs[0, 999] @@ -170,9 +166,9 @@ class SortLimitTest arangodb::OperationOptions options; options.returnNew = true; - arangodb::SingleCollectionTransaction trx(arangodb::transaction::StandaloneContext::Create(*vocbase), - *collection, - arangodb::AccessMode::Type::WRITE); + arangodb::SingleCollectionTransaction trx( + arangodb::transaction::StandaloneContext::Create(*vocbase), *collection, + arangodb::AccessMode::Type::WRITE); EXPECT_TRUE(trx.begin().ok()); for (auto& entry : docs) { @@ -191,7 +187,7 @@ TEST_F(SortLimitTest, CheckSimpleLimitSortedAscInInsertionOrder) { "FOR d IN testCollection0 SORT d.valAsc LIMIT 0, 10 RETURN d"; std::vector expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckLimitWithOffsetSortedAscInInsertionOrder) { @@ -199,7 +195,7 @@ TEST_F(SortLimitTest, CheckLimitWithOffsetSortedAscInInsertionOrder) { "FOR d IN testCollection0 SORT d.valAsc LIMIT 10, 10 RETURN d"; std::vector expected = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckSimpleLimitSortedAscInReverseInsertionOrder) { @@ -208,7 +204,7 @@ TEST_F(SortLimitTest, CheckSimpleLimitSortedAscInReverseInsertionOrder) { std::vector expected = {999, 998, 997, 996, 995, 994, 993, 992, 991, 990}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckLimitWithOffsetSortedAscInReverseInsertionOrder) { @@ -217,7 +213,7 @@ TEST_F(SortLimitTest, CheckLimitWithOffsetSortedAscInReverseInsertionOrder) { std::vector expected = {989, 988, 987, 986, 985, 984, 983, 982, 981, 980}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckSimpleLimitSortedDscInInsertionOrder) { @@ -226,7 +222,7 @@ TEST_F(SortLimitTest, CheckSimpleLimitSortedDscInInsertionOrder) { std::vector expected = {999, 998, 997, 996, 995, 994, 993, 992, 991, 990}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckLimitWithOffsetSortedDscInInsertionOrder) { @@ -235,7 +231,7 @@ TEST_F(SortLimitTest, CheckLimitWithOffsetSortedDscInInsertionOrder) { std::vector expected = {989, 988, 987, 986, 985, 984, 983, 982, 981, 980}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckSimpleLimitSortedDscInReverseInsertionOrder) { @@ -243,7 +239,7 @@ TEST_F(SortLimitTest, CheckSimpleLimitSortedDscInReverseInsertionOrder) { "FOR d IN testCollection0 SORT d.valDsc DESC LIMIT 0, 10 RETURN d"; std::vector expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckLimitWithOffsetSortedDscInReverseInsertionOrder) { @@ -251,7 +247,7 @@ TEST_F(SortLimitTest, CheckLimitWithOffsetSortedDscInReverseInsertionOrder) { "FOR d IN testCollection0 SORT d.valDsc DESC LIMIT 10, 10 RETURN d"; std::vector expected = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckLimitWithOffsetCompoundSort) { @@ -259,7 +255,7 @@ TEST_F(SortLimitTest, CheckLimitWithOffsetCompoundSort) { "FOR d IN testCollection0 SORT d.mod, d.valAsc LIMIT 2, 5 RETURN d"; std::vector expected = {200, 300, 400, 500, 600}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckLimitWithOffsetCompoundSortAgain) { @@ -268,7 +264,7 @@ TEST_F(SortLimitTest, CheckLimitWithOffsetCompoundSortAgain) { std::vector expected = {1, 101, 201, 301, 401, 501, 601, 701, 801, 901}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckInterloperFilterMovedUp) { @@ -278,7 +274,7 @@ TEST_F(SortLimitTest, CheckInterloperFilterMovedUp) { std::vector expected = {0, 100, 200, 300, 400, 500, 600, 700, 800, 900}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } TEST_F(SortLimitTest, CheckInterloperFilterNotMoved) { @@ -289,7 +285,7 @@ TEST_F(SortLimitTest, CheckInterloperFilterNotMoved) { std::vector expected = {0, 100, 200, 300, 400, 500, 600, 700, 800, 900}; EXPECT_EQ(sorterType(*vocbase, query, rules), "standard"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected, rules)); + verifyExpectedResults(*vocbase, query, expected, rules); } TEST_F(SortLimitTest, CheckInterloperEnumerateList) { @@ -298,5 +294,5 @@ TEST_F(SortLimitTest, CheckInterloperEnumerateList) { "LIMIT 0, 10 RETURN d"; std::vector expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; EXPECT_EQ(sorterType(*vocbase, query), "standard"); - EXPECT_TRUE(verifyExpectedResults(*vocbase, query, expected)); + verifyExpectedResults(*vocbase, query, expected); } From a6281daf9d96cc5dfb12aef3e241d0a1af20565e Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 27 Nov 2019 15:37:32 +0100 Subject: [PATCH 024/122] Removed debug profile execution of a query --- tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp b/tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp index 83b692021999..ba8b1d56f146 100644 --- a/tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp +++ b/tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp @@ -263,7 +263,7 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_plan) { verifySubquerySplicing(query, 1); auto expected = arangodb::velocypack::Parser::fromJson(R"([[1],[2]])"); - verifyQueryResult(query, expected->slice(), R"({"profile": 5})"); + verifyQueryResult(query, expected->slice()); } TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_in_subquery_plan) { From 7a8a1dc617b8f4ab72e10d0bc58212bdcf5cd8b9 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 27 Nov 2019 15:56:22 +0100 Subject: [PATCH 025/122] Use simpleVPack printing for Trace of Execute, better to read by humans --- arangod/Aql/ExecutionBlockImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index a54efe1db958..ab96aa34e9b0 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -647,7 +647,7 @@ void ExecutionBlockImpl::traceExecuteEnd( VPackBuilder builder; { VPackObjectBuilder guard(&builder); - block->toVelocyPack(transaction(), builder); + block->toSimpleVPack(transaction(), builder); } auto options = transaction()->transactionContextPtr()->getVPackOptions(); LOG_TOPIC("f12f9", INFO, Logger::QUERIES) From 8307d15ed6e715a131e121723a142774806a43ca Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 29 Nov 2019 14:03:58 +0100 Subject: [PATCH 026/122] Added tests for AqlItemBlockInputRange and adjusted implementation accordingly. --- arangod/Aql/AqlItemBlockInputRange.cpp | 110 ++++++--- arangod/Aql/AqlItemBlockInputRange.h | 11 +- tests/Aql/AqlItemBlockInputRangeTest.cpp | 292 +++++++++++++++++++++++ tests/CMakeLists.txt | 1 + 4 files changed, 374 insertions(+), 40 deletions(-) create mode 100644 tests/Aql/AqlItemBlockInputRangeTest.cpp diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 178997eb6ec3..2ec4ed2e1aea 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -23,6 +23,12 @@ #include "AqlItemBlockInputRange.h" #include "Aql/ShadowAqlItemRow.h" +#include "Logger/LogMacros.h" + +#include +#include +#include + using namespace arangodb; using namespace arangodb::aql; @@ -49,15 +55,16 @@ AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, std::pair AqlItemBlockInputRange::peek() { if (indexIsValid()) { - return std::make_pair(state(), InputAqlItemRow{_block, _rowIndex}); + return std::make_pair(nextState(), + InputAqlItemRow{_block, _rowIndex}); } - return std::make_pair(state(), InputAqlItemRow{CreateInvalidInputRowHint{}}); + return std::make_pair(nextState(), + InputAqlItemRow{CreateInvalidInputRowHint{}}); } std::pair AqlItemBlockInputRange::next() { auto res = peek(); - if (indexIsValid()) { - TRI_ASSERT(res.second); + if (res.second) { ++_rowIndex; } return res; @@ -74,50 +81,29 @@ bool AqlItemBlockInputRange::hasMoreAfterThis() const noexcept { } ExecutorState AqlItemBlockInputRange::state() const noexcept { - return hasMoreAfterThis() ? ExecutorState::HASMORE : _finalState; -} - -ExecutorState AqlItemBlockInputRange::shadowState() const noexcept { - if (_block == nullptr) { - return _finalState; - } - // We Return HASMORE, if the next shadow row is NOT relevant. - // So we can directly fetch the next shadow row without informing - // the executor about an empty subquery. - size_t nextRowIndex = _rowIndex + 1; - if (_block != nullptr && nextRowIndex < _block->size() && _block->isShadowRow(nextRowIndex)) { - ShadowAqlItemRow nextRow{_block, nextRowIndex}; - if (!nextRow.isRelevant()) { - return ExecutorState::HASMORE; - } - } - return ExecutorState::DONE; + return nextState(); } bool AqlItemBlockInputRange::hasShadowRow() const noexcept { - if (_block == nullptr) { - // No block => no ShadowRow - return false; - } + return isIndexValid(_rowIndex) && isShadowRowAtIndex(_rowIndex); +} - if (hasMore()) { - // As long as hasMore() is true, we still have DataRows and are not on a ShadowRow now. - return false; - } +bool AqlItemBlockInputRange::isIndexValid(std::size_t index) const noexcept { + return _block != nullptr && index < _block->size(); +} - if (_rowIndex < _block->size()) { - // We still have more rows here, get next ShadowRow - TRI_ASSERT(_block->isShadowRow(_rowIndex)); - return true; - } - return false; +bool AqlItemBlockInputRange::isShadowRowAtIndex(std::size_t index) const noexcept { + TRI_ASSERT(isIndexValid(index)); + return _block->isShadowRow(index); } std::pair AqlItemBlockInputRange::peekShadowRow() { if (hasShadowRow()) { - return std::make_pair(shadowState(), ShadowAqlItemRow{_block, _rowIndex}); + return std::make_pair(nextState(), + ShadowAqlItemRow{_block, _rowIndex}); } - return std::make_pair(shadowState(), ShadowAqlItemRow{CreateInvalidShadowRowHint{}}); + return std::make_pair(nextState(), + ShadowAqlItemRow{CreateInvalidShadowRowHint{}}); } std::pair AqlItemBlockInputRange::nextShadowRow() { @@ -138,6 +124,54 @@ std::pair AqlItemBlockInputRange::nextShadowRow } // Advance the current row. _rowIndex++; + /* + { + std::string out = "Depths: ["; + for (auto const& it : shadowRowIndexes) { + ShadowAqlItemRow sr(_block, it); + out += std::to_string(sr.getDepth()) + ", "; + } + LOG_DEVEL << out << "]"; + + VPackBuilder hund; + _block->toSimpleVPack(nullptr, hund); + LOG_DEVEL << hund.toJson(); + } + */ } return res; } + +template +ExecutorState AqlItemBlockInputRange::nextState() const noexcept { + size_t testRowIndex = _rowIndex; + if constexpr (LookAhead::NEXT == doPeek) { + // Look ahead one + testRowIndex++; + } + if (!isIndexValid(testRowIndex)) { + return _finalState; + } + + bool isShadowRow = isShadowRowAtIndex(testRowIndex); + + if constexpr (RowType::DATA == type) { + // We Return HASMORE, if the next row is a data row + if (!isShadowRow) { + return ExecutorState::HASMORE; + } + return ExecutorState::DONE; + } else { + TRI_ASSERT(RowType::SHADOW == type); + // We Return HASMORE, if the next shadow row is NOT relevant. + // So we can directly fetch the next shadow row without informing + // the executor about an empty subquery. + if (isShadowRow) { + ShadowAqlItemRow nextRow{_block, testRowIndex}; + if (!nextRow.isRelevant()) { + return ExecutorState::HASMORE; + } + } + return ExecutorState::DONE; + } +} diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index fd68ace50fff..85b1cafed5a2 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -44,8 +44,6 @@ class AqlItemBlockInputRange { ExecutorState state() const noexcept; - ExecutorState shadowState() const noexcept; - std::pair peek(); std::pair next(); @@ -61,10 +59,19 @@ class AqlItemBlockInputRange { std::pair nextShadowRow(); private: + bool isIndexValid(std::size_t index) const noexcept; + bool indexIsValid() const noexcept; bool hasMoreAfterThis() const noexcept; + bool isShadowRowAtIndex(std::size_t index) const noexcept; + + enum LookAhead { NOW, NEXT }; + enum RowType { DATA, SHADOW }; + template + ExecutorState nextState() const noexcept; + private: arangodb::aql::SharedAqlItemBlockPtr _block; std::size_t _rowIndex; diff --git a/tests/Aql/AqlItemBlockInputRangeTest.cpp b/tests/Aql/AqlItemBlockInputRangeTest.cpp new file mode 100644 index 000000000000..614c70ea7f64 --- /dev/null +++ b/tests/Aql/AqlItemBlockInputRangeTest.cpp @@ -0,0 +1,292 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2018 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#include "Aql/AqlItemBlockInputRange.h" +#include "gtest/gtest.h" + +#include "Aql/AqlItemBlockManager.h" +#include "Aql/InputAqlItemRow.h" +#include "Aql/ShadowAqlItemRow.h" +#include "AqlItemBlockHelper.h" +#include "Basics/VelocyPackHelper.h" + +#include +#include +#include + +using namespace arangodb; +using namespace arangodb::aql; +using namespace arangodb::basics; + +namespace arangodb { +namespace tests { +namespace aql { + +// Test empty constructor +class InputRangeTest : public ::testing::TestWithParam { + protected: + ResourceMonitor monitor; + AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; + + AqlItemBlockInputRange createEmpty() { + return AqlItemBlockInputRange{GetParam()}; + } + + AqlItemBlockInputRange createFromBlock(arangodb::aql::SharedAqlItemBlockPtr& block) { + auto const [start, end] = block->getRelevantRange(); + return AqlItemBlockInputRange(GetParam(), block, start, end); + } + + void validateEndReached(AqlItemBlockInputRange& testee) { + EXPECT_EQ(GetParam(), testee.state()); + // Test Data rows + EXPECT_FALSE(testee.hasMore()); + { + auto const [state, row] = testee.peek(); + EXPECT_EQ(GetParam(), state); + EXPECT_FALSE(row.isInitialized()); + } + { + auto const [state, row] = testee.next(); + EXPECT_EQ(GetParam(), state); + EXPECT_FALSE(row.isInitialized()); + } + // Test Shadow Rows + EXPECT_FALSE(testee.hasShadowRow()); + { + auto const [state, row] = testee.peekShadowRow(); + EXPECT_EQ(GetParam(), state); + EXPECT_FALSE(row.isInitialized()); + } + { + auto const [state, row] = testee.nextShadowRow(); + EXPECT_EQ(GetParam(), state); + EXPECT_FALSE(row.isInitialized()); + } + } + + void validateNextIsDataRow(AqlItemBlockInputRange& testee, + ExecutorState expectedState, int64_t value) { + EXPECT_TRUE(testee.hasMore()); + EXPECT_FALSE(testee.hasShadowRow()); + // We have the next row + EXPECT_EQ(testee.state(), ExecutorState::HASMORE); + auto rowIndexBefore = testee.getRowIndex(); + // Validate that shadowRowAPI does not move on + { + auto [state, row] = testee.peekShadowRow(); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_FALSE(row.isInitialized()); + ASSERT_EQ(rowIndexBefore, testee.getRowIndex()) + << "Skipped a non processed row."; + } + { + auto [state, row] = testee.nextShadowRow(); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_FALSE(row.isInitialized()); + ASSERT_EQ(rowIndexBefore, testee.getRowIndex()) + << "Skipped a non processed row."; + } + // Validate Data Row API + { + auto [state, row] = testee.peek(); + EXPECT_EQ(state, expectedState); + EXPECT_TRUE(row.isInitialized()); + auto val = row.getValue(0); + ASSERT_TRUE(val.isNumber()); + EXPECT_EQ(val.toInt64(), value); + ASSERT_EQ(rowIndexBefore, testee.getRowIndex()) + << "Skipped a non processed row."; + } + + { + auto [state, row] = testee.next(); + EXPECT_EQ(state, expectedState); + EXPECT_TRUE(row.isInitialized()); + auto val = row.getValue(0); + ASSERT_TRUE(val.isNumber()); + EXPECT_EQ(val.toInt64(), value); + ASSERT_NE(rowIndexBefore, testee.getRowIndex()) + << "Did not go to next row."; + } + } + + void validateNextIsShadowRow(AqlItemBlockInputRange& testee, ExecutorState expectedState, + int64_t value, uint64_t depth) { + EXPECT_TRUE(testee.hasShadowRow()); + // The next is a ShadowRow, the state shall be done + EXPECT_EQ(testee.state(), ExecutorState::DONE); + + auto rowIndexBefore = testee.getRowIndex(); + // Validate that inputRowAPI does not move on + { + auto [state, row] = testee.peek(); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_FALSE(row.isInitialized()); + ASSERT_EQ(rowIndexBefore, testee.getRowIndex()) + << "Skipped a non processed row."; + } + { + auto [state, row] = testee.next(); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_FALSE(row.isInitialized()); + ASSERT_EQ(rowIndexBefore, testee.getRowIndex()) + << "Skipped a non processed row."; + } + // Validate ShadowRow API + { + auto [state, row] = testee.peekShadowRow(); + EXPECT_EQ(state, expectedState); + EXPECT_TRUE(row.isInitialized()); + auto val = row.getValue(0); + ASSERT_TRUE(val.isNumber()); + EXPECT_EQ(val.toInt64(), value); + EXPECT_EQ(row.getDepth(), depth); + ASSERT_EQ(rowIndexBefore, testee.getRowIndex()) + << "Skipped a non processed row."; + } + { + auto [state, row] = testee.nextShadowRow(); + EXPECT_EQ(state, expectedState); + EXPECT_TRUE(row.isInitialized()); + auto val = row.getValue(0); + ASSERT_TRUE(val.isNumber()); + EXPECT_EQ(val.toInt64(), value); + EXPECT_EQ(row.getDepth(), depth); + ASSERT_NE(rowIndexBefore, testee.getRowIndex()) + << "Did not go to next row."; + } + } +}; + +TEST_P(InputRangeTest, empty_returns_given_state) { + auto testee = createEmpty(); + EXPECT_EQ(GetParam(), testee.state()); +} + +TEST_P(InputRangeTest, empty_does_not_have_more) { + auto testee = createEmpty(); + EXPECT_FALSE(testee.hasMore()); +} + +TEST_P(InputRangeTest, empty_peek_is_empty) { + auto testee = createEmpty(); + auto const [state, row] = testee.peek(); + EXPECT_EQ(GetParam(), state); + EXPECT_FALSE(row.isInitialized()); +} + +TEST_P(InputRangeTest, empty_next_is_empty) { + auto testee = createEmpty(); + auto const [state, row] = testee.next(); + EXPECT_EQ(GetParam(), state); + EXPECT_FALSE(row.isInitialized()); +} + +TEST_P(InputRangeTest, empty_does_not_have_more_shadow_rows) { + auto testee = createEmpty(); + EXPECT_FALSE(testee.hasShadowRow()); +} + +TEST_P(InputRangeTest, empty_peek_shadow_is_empty) { + auto testee = createEmpty(); + auto const [state, row] = testee.peekShadowRow(); + EXPECT_EQ(GetParam(), state); + EXPECT_FALSE(row.isInitialized()); +} + +TEST_P(InputRangeTest, empty_next_shadow_is_empty) { + auto testee = createEmpty(); + auto const [state, row] = testee.nextShadowRow(); + EXPECT_EQ(GetParam(), state); + EXPECT_FALSE(row.isInitialized()); +} + +TEST_P(InputRangeTest, no_shadow_rows_in_block) { + SharedAqlItemBlockPtr inputBlock = + buildBlock<1>(itemBlockManager, {{{1}}, {{2}}, {{3}}, {{4}}}, {}); + auto testee = createFromBlock(inputBlock); + + // We have 3 internal rows + validateNextIsDataRow(testee, ExecutorState::HASMORE, 1); + validateNextIsDataRow(testee, ExecutorState::HASMORE, 2); + validateNextIsDataRow(testee, ExecutorState::HASMORE, 3); + // Last Row needs to return upstream State + validateNextIsDataRow(testee, GetParam(), 4); + validateEndReached(testee); +} + +TEST_P(InputRangeTest, level_0_shadow_rows_in_block) { + SharedAqlItemBlockPtr inputBlock = + buildBlock<1>(itemBlockManager, {{{1}}, {{2}}, {{3}}, {{4}}}, {{1, 0}, {3, 0}}); + auto testee = createFromBlock(inputBlock); + + validateNextIsDataRow(testee, ExecutorState::DONE, 1); + validateNextIsShadowRow(testee, ExecutorState::DONE, 2, 0); + validateNextIsDataRow(testee, ExecutorState::DONE, 3); + // Last Row needs to return upstream State + validateNextIsShadowRow(testee, GetParam(), 4, 0); + validateEndReached(testee); +} + +TEST_P(InputRangeTest, multi_level_shadow_rows_in_block) { + SharedAqlItemBlockPtr inputBlock = + buildBlock<1>(itemBlockManager, {{{1}}, {{2}}, {{3}}, {{4}}, {{5}}, {{6}}, {{7}}}, + {{3, 0}, {4, 1}, {5, 2}}); + auto testee = createFromBlock(inputBlock); + + validateNextIsDataRow(testee, ExecutorState::HASMORE, 1); + validateNextIsDataRow(testee, ExecutorState::HASMORE, 2); + validateNextIsDataRow(testee, ExecutorState::DONE, 3); + validateNextIsShadowRow(testee, ExecutorState::HASMORE, 4, 0); + validateNextIsShadowRow(testee, ExecutorState::HASMORE, 5, 1); + validateNextIsShadowRow(testee, ExecutorState::DONE, 6, 2); + + // Last Row needs to return upstream State + validateNextIsDataRow(testee, GetParam(), 7); + validateEndReached(testee); +} + +TEST_P(InputRangeTest, multi_shadow_rows_batches_in_block) { + SharedAqlItemBlockPtr inputBlock = + buildBlock<1>(itemBlockManager, {{{1}}, {{2}}, {{3}}, {{4}}, {{5}}, {{6}}, {{7}}}, + {{3, 0}, {4, 1}, {5, 0}, {6, 1}}); + auto testee = createFromBlock(inputBlock); + + validateNextIsDataRow(testee, ExecutorState::HASMORE, 1); + validateNextIsDataRow(testee, ExecutorState::HASMORE, 2); + validateNextIsDataRow(testee, ExecutorState::DONE, 3); + validateNextIsShadowRow(testee, ExecutorState::HASMORE, 4, 0); + validateNextIsShadowRow(testee, ExecutorState::DONE, 5, 1); + validateNextIsShadowRow(testee, ExecutorState::HASMORE, 6, 0); + + // Last Row needs to return upstream State + validateNextIsShadowRow(testee, GetParam(), 7, 1); + validateEndReached(testee); +} + +INSTANTIATE_TEST_CASE_P(AqlItemBlockInputRangeTest, InputRangeTest, + ::testing::Values(ExecutorState::DONE, ExecutorState::HASMORE)); + +} // namespace aql +} // namespace tests +} // namespace arangodb \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d49562a28008..145503288127 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -25,6 +25,7 @@ set(ARANGODB_TESTS_SOURCES Agency/SupervisionTest.cpp Aql/AllRowsFetcherTest.cpp Aql/AqlHelper.cpp + Aql/AqlItemBlockInputRangeTest.cpp Aql/AqlItemBlockTest.cpp Aql/AqlItemRowPrinter.cpp Aql/AqlItemRowTest.cpp From c97314907864af2b95ff3d03c81d3b20f84d2666 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 2 Dec 2019 10:31:24 +0100 Subject: [PATCH 027/122] Fixed issue where ExecutionBlockImpl returns DONE, although there are still rows in the DataRange --- arangod/Aql/AqlItemBlockInputRange.cpp | 16 +--------------- arangod/Aql/ExecutionBlockImpl.cpp | 25 +++++++++++++++++-------- arangod/Aql/SingleRowFetcher.h | 8 ++++---- 3 files changed, 22 insertions(+), 27 deletions(-) diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 2ec4ed2e1aea..974f4c0ee320 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -124,20 +124,6 @@ std::pair AqlItemBlockInputRange::nextShadowRow } // Advance the current row. _rowIndex++; - /* - { - std::string out = "Depths: ["; - for (auto const& it : shadowRowIndexes) { - ShadowAqlItemRow sr(_block, it); - out += std::to_string(sr.getDepth()) + ", "; - } - LOG_DEVEL << out << "]"; - - VPackBuilder hund; - _block->toSimpleVPack(nullptr, hund); - LOG_DEVEL << hund.toJson(); - } - */ } return res; } @@ -174,4 +160,4 @@ ExecutorState AqlItemBlockInputRange::nextState() const noexcept { } return ExecutorState::DONE; } -} +} \ No newline at end of file diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 94c89833b791..b3bde87744c6 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1009,6 +1009,12 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // TODO: Need to make this member variable for waiting? AqlCall myCall = stack.popCall(); ExecState execState = ::NextState(myCall); + if (_lastRange.hasShadowRow()) { + // We have not been able to move all shadowRows into the output last time. + // Continue from there. + // TODO test if this works with COUNT COLLECT + execState = ExecState::SHADOWROWS; + } AqlCall executorRequest; while (execState != ExecState::DONE) { @@ -1088,17 +1094,20 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(_outputItemRow->produced()); _outputItemRow->advanceRow(); if (state == ExecutorState::DONE) { - // Right now we cannot support to have more than one set of - // ShadowRows inside of a Range. - // We do not know how to continue with the above executor after a shadowrow. - execState = ExecState::DONE; + if (_lastRange.hasMore()) { + // TODO this state is invalid, and can just show up now if we exclude SKIP + execState = ExecState::PRODUCE; + } else { + // Right now we cannot support to have more than one set of + // ShadowRows inside of a Range. + // We do not know how to continue with the above executor after a shadowrow. + TRI_ASSERT(!_lastRange.hasMore()); + execState = ExecState::DONE; + } } } else { execState = ExecState::DONE; } - // TRI_ASSERT(false); - // execState = ::NextState(myCall); - break; } default: @@ -1111,7 +1120,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // This is not strictly necessary here, as we shouldn't be called again // after DONE. _outputItemRow.reset(); - if (_lastRange.hasMore()) { + if (_lastRange.hasMore() || _lastRange.hasShadowRow()) { return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; } return {_upstreamState, skipped, std::move(outputBlock)}; diff --git a/arangod/Aql/SingleRowFetcher.h b/arangod/Aql/SingleRowFetcher.h index b4b5521e2caf..2afcaf465eaf 100644 --- a/arangod/Aql/SingleRowFetcher.h +++ b/arangod/Aql/SingleRowFetcher.h @@ -101,10 +101,10 @@ class SingleRowFetcher { template > [[nodiscard]] #else - [[nodiscard]] - TEST_VIRTUAL + [[nodiscard]] TEST_VIRTUAL #endif - std::pair fetchBlockForPassthrough(size_t atMost); + std::pair + fetchBlockForPassthrough(size_t atMost); [[nodiscard]] std::pair preFetchNumberOfRows(size_t atMost); @@ -172,6 +172,6 @@ class SingleRowFetcher { [[nodiscard]] ExecutionState returnState(bool isShadowRow) const; }; -} // namespace arangodb +} // namespace arangodb::aql #endif // ARANGOD_AQL_SINGLE_ROW_FETCHER_H From d83e3acfc9eb6db15ac53a25f88626eef4715976 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 2 Dec 2019 17:02:26 +0100 Subject: [PATCH 028/122] Final modification of ShadowRows in FilterExecutor. All but profiling tests pass. This needs some discussion about overfetching of data, and propagation of hardLimit --- arangod/Aql/AqlItemBlockInputRange.cpp | 41 ++++++------------------ arangod/Aql/AqlItemBlockInputRange.h | 4 --- arangod/Aql/ExecutionBlockImpl.cpp | 7 ++-- arangod/Aql/FilterExecutor.cpp | 8 ++++- tests/Aql/AqlItemBlockInputRangeTest.cpp | 1 + 5 files changed, 23 insertions(+), 38 deletions(-) diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 974f4c0ee320..3acf0bd1911b 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -39,22 +39,20 @@ AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state) AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, SharedAqlItemBlockPtr const& block, - std::size_t index, std::size_t endIndex) - : _block{block}, _rowIndex{index}, _endIndex(endIndex), _finalState{state} { - TRI_ASSERT(index <= endIndex); - TRI_ASSERT(endIndex <= block->size()); + std::size_t index, std::size_t) + : _block{block}, _rowIndex{index}, _endIndex(_block->size()), _finalState{state} { + TRI_ASSERT(index <= _block->size()); } AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, SharedAqlItemBlockPtr&& block, - std::size_t index, std::size_t endIndex) noexcept - : _block{std::move(block)}, _rowIndex{index}, _endIndex(endIndex), _finalState{state} { - TRI_ASSERT(index <= endIndex); - TRI_ASSERT(endIndex <= block->size()); + std::size_t index, std::size_t) noexcept + : _block{std::move(block)}, _rowIndex{index}, _endIndex(_block->size()), _finalState{state} { + TRI_ASSERT(index <= _block->size()); } std::pair AqlItemBlockInputRange::peek() { - if (indexIsValid()) { + if (hasMore()) { return std::make_pair(nextState(), InputAqlItemRow{_block, _rowIndex}); } @@ -70,14 +68,8 @@ std::pair AqlItemBlockInputRange::next() { return res; } -bool AqlItemBlockInputRange::indexIsValid() const noexcept { - return _block != nullptr && _rowIndex < _endIndex; -} - -bool AqlItemBlockInputRange::hasMore() const noexcept { return indexIsValid(); } - -bool AqlItemBlockInputRange::hasMoreAfterThis() const noexcept { - return indexIsValid() && _rowIndex + 1 < _endIndex; +bool AqlItemBlockInputRange::hasMore() const noexcept { + return isIndexValid(_rowIndex) && !isShadowRowAtIndex(_rowIndex); } ExecutorState AqlItemBlockInputRange::state() const noexcept { @@ -108,20 +100,7 @@ std::pair AqlItemBlockInputRange::peekShadowRow std::pair AqlItemBlockInputRange::nextShadowRow() { auto res = peekShadowRow(); - if (hasShadowRow()) { - auto const& shadowRowIndexes = _block->getShadowRowIndexes(); - auto it = std::find(shadowRowIndexes.begin(), shadowRowIndexes.end(), _rowIndex); - // We have a shadow row in this index, so we cannot be at the end now. - TRI_ASSERT(it != shadowRowIndexes.end()); - // Go to next ShadowRow. - it++; - if (it == shadowRowIndexes.end()) { - // No more shadow row here. - _endIndex = _block->size(); - } else { - // Set endIndex to the next ShadowRowIndex. - _endIndex = *it; - } + if (res.second.isInitialized()) { // Advance the current row. _rowIndex++; } diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index 85b1cafed5a2..e0a6a5827c35 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -61,10 +61,6 @@ class AqlItemBlockInputRange { private: bool isIndexValid(std::size_t index) const noexcept; - bool indexIsValid() const noexcept; - - bool hasMoreAfterThis() const noexcept; - bool isShadowRowAtIndex(std::size_t index) const noexcept; enum LookAhead { NOW, NEXT }; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index b3bde87744c6..50eb4f63ffe3 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1017,7 +1017,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } AqlCall executorRequest; - while (execState != ExecState::DONE) { + while (execState != ExecState::DONE && !_outputItemRow->isFull()) { switch (execState) { case ExecState::SKIP: { auto [state, skippedLocal, call] = @@ -1028,6 +1028,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if (state == ExecutorState::DONE) { execState = ExecState::SHADOWROWS; } else if (myCall.getOffset() > 0) { + TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more executorRequest = call; execState = ExecState::UPSTREAM; @@ -1040,15 +1041,17 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { case ExecState::PRODUCE: { auto linesBefore = _outputItemRow->numRowsWritten(); TRI_ASSERT(myCall.getLimit() > 0); + auto limit = (std::min)(myCall.getLimit(), _outputItemRow->numRowsLeft()); // Execute getSome auto const [state, stats, call] = - _executor.produceRows(myCall.getLimit(), _lastRange, *_outputItemRow); + _executor.produceRows(limit, _lastRange, *_outputItemRow); auto written = _outputItemRow->numRowsWritten() - linesBefore; _engine->_stats += stats; myCall.didProduce(written); if (state == ExecutorState::DONE) { execState = ExecState::SHADOWROWS; } else if (myCall.getLimit() > 0) { + TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more executorRequest = call; execState = ExecState::UPSTREAM; diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index e96a83e0383e..dd40309be60a 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -35,6 +35,8 @@ #include "Aql/SingleRowFetcher.h" #include "Aql/Stats.h" +#include "Logger/LogMacros.h" + #include using namespace arangodb; @@ -143,6 +145,10 @@ std::tuple FilterExecutor::produceRows( } AqlCall upstreamCall{}; - upstreamCall.softLimit = limit; + /* We can use this value as a heuristic on overfetching. + * by default we do not skip, and do not set any soft or hardLimit + * on upstream + * upstreamCall.softLimit = limit; + */ return {inputRange.state(), stats, upstreamCall}; } diff --git a/tests/Aql/AqlItemBlockInputRangeTest.cpp b/tests/Aql/AqlItemBlockInputRangeTest.cpp index 614c70ea7f64..c5180e23ec1c 100644 --- a/tests/Aql/AqlItemBlockInputRangeTest.cpp +++ b/tests/Aql/AqlItemBlockInputRangeTest.cpp @@ -128,6 +128,7 @@ class InputRangeTest : public ::testing::TestWithParam { ASSERT_NE(rowIndexBefore, testee.getRowIndex()) << "Did not go to next row."; } + EXPECT_EQ(expectedState, testee.state()); } void validateNextIsShadowRow(AqlItemBlockInputRange& testee, ExecutorState expectedState, From 9c8ae9dbbf9a604a7ea73e7c2cda0281b8266bfe Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 2 Dec 2019 17:08:40 +0100 Subject: [PATCH 029/122] Removed debug log includes --- arangod/Aql/AqlItemBlock.cpp | 2 -- arangod/Aql/AqlItemBlockInputRange.cpp | 2 -- arangod/Aql/FilterExecutor.cpp | 2 -- arangod/Aql/SingleRowFetcher.cpp | 1 - 4 files changed, 7 deletions(-) diff --git a/arangod/Aql/AqlItemBlock.cpp b/arangod/Aql/AqlItemBlock.cpp index 21ce81042122..c4527d2a6839 100644 --- a/arangod/Aql/AqlItemBlock.cpp +++ b/arangod/Aql/AqlItemBlock.cpp @@ -39,8 +39,6 @@ #include #include -#include "Logger/LogMacros.h" - using namespace arangodb; using namespace arangodb::aql; diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 3acf0bd1911b..32d2155ce8fb 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -23,8 +23,6 @@ #include "AqlItemBlockInputRange.h" #include "Aql/ShadowAqlItemRow.h" -#include "Logger/LogMacros.h" - #include #include #include diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index dd40309be60a..54b76c8b05ab 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -35,8 +35,6 @@ #include "Aql/SingleRowFetcher.h" #include "Aql/Stats.h" -#include "Logger/LogMacros.h" - #include using namespace arangodb; diff --git a/arangod/Aql/SingleRowFetcher.cpp b/arangod/Aql/SingleRowFetcher.cpp index a8f8a867b61e..017c1821973f 100644 --- a/arangod/Aql/SingleRowFetcher.cpp +++ b/arangod/Aql/SingleRowFetcher.cpp @@ -24,7 +24,6 @@ //////////////////////////////////////////////////////////////////////////////// #include "SingleRowFetcher.h" -#include #include "Aql/AqlItemBlock.h" #include "Aql/DependencyProxy.h" From 292bd2ae5158792a3e60bf6eadf44e42bb98c8f8 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 3 Dec 2019 17:43:50 +0100 Subject: [PATCH 030/122] Moved responsibility for the client call into the output row. This can precisely count the elements it has written, and forms a single source of truth. The output row now works as the requested block by client that needs to be filled. Now the FilterExecutorTests are red. --- arangod/Aql/ExecutionBlockImpl.cpp | 21 ++++++---------- arangod/Aql/ExecutionBlockImpl.h | 7 +++++- arangod/Aql/FilterExecutor.cpp | 7 +++--- arangod/Aql/FilterExecutor.h | 2 +- arangod/Aql/OutputAqlItemRow.cpp | 22 ++++++++++++++++- arangod/Aql/OutputAqlItemRow.h | 39 ++++++++++++++++++++++++------ tests/Aql/FilterExecutorTest.cpp | 6 ++--- 7 files changed, 75 insertions(+), 29 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 50eb4f63ffe3..84b8057174e1 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -301,13 +301,12 @@ std::pair ExecutionBlockImpl::g } template -std::unique_ptr ExecutionBlockImpl::createOutputRow( - SharedAqlItemBlockPtr& newBlock) const { +std::unique_ptr ExecutionBlockImpl::createOutputRow(SharedAqlItemBlockPtr& newBlock) { if /* constexpr */ (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable) { - return std::make_unique(newBlock, infos().getOutputRegisters(), - infos().registersToKeep(), - infos().registersToClear(), - OutputAqlItemRow::CopyRowBehavior::DoNotCopyInputRows); + return std::make_unique( + newBlock, infos().getOutputRegisters(), infos().registersToKeep(), + infos().registersToClear(), std::move(AqlCall{}), + OutputAqlItemRow::CopyRowBehavior::DoNotCopyInputRows); } else { return std::make_unique(newBlock, infos().getOutputRegisters(), infos().registersToKeep(), @@ -996,6 +995,7 @@ ExecState NextState(AqlCall const& call) { template <> std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { + AqlCall myCall = stack.popCall(); if (!_outputItemRow) { // TODO: FIXME Hard coded size SharedAqlItemBlockPtr newBlock = @@ -1007,7 +1007,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { size_t skipped = 0; // TODO: Need to make this member variable for waiting? - AqlCall myCall = stack.popCall(); + ExecState execState = ::NextState(myCall); if (_lastRange.hasShadowRow()) { // We have not been able to move all shadowRows into the output last time. @@ -1039,15 +1039,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::PRODUCE: { - auto linesBefore = _outputItemRow->numRowsWritten(); TRI_ASSERT(myCall.getLimit() > 0); - auto limit = (std::min)(myCall.getLimit(), _outputItemRow->numRowsLeft()); // Execute getSome - auto const [state, stats, call] = - _executor.produceRows(limit, _lastRange, *_outputItemRow); - auto written = _outputItemRow->numRowsWritten() - linesBefore; + auto const [state, stats, call] = _executor.produceRows(_lastRange, *_outputItemRow); _engine->_stats += stats; - myCall.didProduce(written); if (state == ExecutorState::DONE) { execState = ExecState::SHADOWROWS; } else if (myCall.getLimit() > 0) { diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index e90fb11f5ce7..88f410033edf 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -36,6 +36,7 @@ namespace arangodb::aql { +struct AqlCall; class AqlItemBlock; class ExecutionEngine; class ExecutionNode; @@ -225,7 +226,7 @@ class ExecutionBlockImpl final : public ExecutionBlock { [[nodiscard]] std::pair requestWrappedBlock( size_t nrItems, RegisterId nrRegs); - [[nodiscard]] std::unique_ptr createOutputRow(SharedAqlItemBlockPtr& newBlock) const; + [[nodiscard]] std::unique_ptr createOutputRow(SharedAqlItemBlockPtr& newBlock); [[nodiscard]] Query const& getQuery() const; @@ -244,6 +245,10 @@ class ExecutionBlockImpl final : public ExecutionBlock { // Trace the end of a getSome call, potentially with result void traceExecuteEnd(std::tuple const& result); + // Ensure that we have an output block of the desired dimenstions + // Will as a side effect modify _outputItemRow + void ensureOutputBlock(AqlCall&& call); + private: /** * @brief Used to allow the row Fetcher to access selected methods of this diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 54b76c8b05ab..fe7f81f6394e 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -123,26 +123,27 @@ std::tuple FilterExecutor::skipRowsRange( } std::tuple FilterExecutor::produceRows( - size_t limit, AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { + AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { TRI_IF_FAILURE("FilterExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } FilterStats stats{}; - while (inputRange.hasMore() && limit > 0) { + while (inputRange.hasMore()) { TRI_ASSERT(!output.isFull()); auto const& [state, input] = inputRange.next(); TRI_ASSERT(input.isInitialized()); if (input.getValue(_infos.getInputRegister()).toBoolean()) { output.copyRow(input); output.advanceRow(); - limit--; } else { stats.incrFiltered(); } } AqlCall upstreamCall{}; + upstreamCall.softLimit = output.softLimit(); + upstreamCall.hardLimit = output.hardLimit(); /* We can use this value as a heuristic on overfetching. * by default we do not skip, and do not set any soft or hardLimit * on upstream diff --git a/arangod/Aql/FilterExecutor.h b/arangod/Aql/FilterExecutor.h index 21cd564aea41..eea47aba7b87 100644 --- a/arangod/Aql/FilterExecutor.h +++ b/arangod/Aql/FilterExecutor.h @@ -96,7 +96,7 @@ class FilterExecutor { * @return ExecutorState, the stats, and a new Call that needs to be send to upstream */ [[nodiscard]] std::tuple produceRows( - size_t atMost, AqlItemBlockInputRange& input, OutputAqlItemRow& output); + AqlItemBlockInputRange& input, OutputAqlItemRow& output); /** * @brief skip the next Row of Aql Values. diff --git a/arangod/Aql/OutputAqlItemRow.cpp b/arangod/Aql/OutputAqlItemRow.cpp index 079653a618a8..ff62d61a0366 100644 --- a/arangod/Aql/OutputAqlItemRow.cpp +++ b/arangod/Aql/OutputAqlItemRow.cpp @@ -23,6 +23,15 @@ /// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// +/* +The following conditions need to hold true, we need to add c++ tests for this. + + output.isFull() == output.numRowsLeft() > 0; + output.numRowsLeft() <= output.allocatedRows() - output.numRowsWritten(); + output.numRowsLeft() <= output.softLimit(); + output.softLimit() <= output.hardLimit(); +*/ + #include "OutputAqlItemRow.h" #include "Aql/AqlItemBlock.h" @@ -41,13 +50,14 @@ OutputAqlItemRow::OutputAqlItemRow( std::shared_ptr const> outputRegisters, std::shared_ptr const> registersToKeep, std::shared_ptr const> registersToClear, - CopyRowBehavior copyRowBehavior) + AqlCall&& clientCall, CopyRowBehavior copyRowBehavior) : _block(std::move(block)), _baseIndex(0), _lastBaseIndex(0), _inputRowCopied(false), _lastSourceRow{CreateInvalidInputRowHint{}}, _numValuesWritten(0), + _call(std::move(clientCall)), _doNotCopyInputRow(copyRowBehavior == CopyRowBehavior::DoNotCopyInputRows), _outputRegisters(std::move(outputRegisters)), _registersToKeep(std::move(registersToKeep)), @@ -187,6 +197,12 @@ void OutputAqlItemRow::advanceRow() { TRI_ASSERT(produced()); TRI_ASSERT(allValuesWritten()); TRI_ASSERT(_inputRowCopied); + if (!_block->isShadowRow(_baseIndex)) { + // We have written a data row into the output. + // Need to count it. + _call.didProduce(1); + } + ++_baseIndex; _inputRowCopied = false; _numValuesWritten = 0; @@ -197,6 +213,10 @@ void OutputAqlItemRow::toVelocyPack(velocypack::Options const* options, VPackBui block().rowToSimpleVPack(_baseIndex, options, builder); } +AqlCall::Limit OutputAqlItemRow::softLimit() const { return _call.softLimit; } + +AqlCall::Limit OutputAqlItemRow::hardLimit() const { return _call.hardLimit; } + SharedAqlItemBlockPtr OutputAqlItemRow::stealBlock() { // numRowsWritten() inspects _block, so save this before resetting it! auto const numRows = numRowsWritten(); diff --git a/arangod/Aql/OutputAqlItemRow.h b/arangod/Aql/OutputAqlItemRow.h index 969b06e2e895..835b54e4b78f 100644 --- a/arangod/Aql/OutputAqlItemRow.h +++ b/arangod/Aql/OutputAqlItemRow.h @@ -26,6 +26,7 @@ #ifndef ARANGOD_AQL_OUTPUT_AQL_ITEM_ROW_H #define ARANGOD_AQL_OUTPUT_AQL_ITEM_ROW_H +#include "Aql/AqlCall.h" #include "Aql/InputAqlItemRow.h" #include "Aql/SharedAqlItemBlockPtr.h" #include "Aql/types.h" @@ -54,6 +55,7 @@ class OutputAqlItemRow { std::shared_ptr const> outputRegisters, std::shared_ptr const> registersToKeep, std::shared_ptr const> registersToClear, + AqlCall&& clientCall = AqlCall{}, CopyRowBehavior = CopyRowBehavior::CopyInputRows); ~OutputAqlItemRow() = default; @@ -95,7 +97,9 @@ class OutputAqlItemRow { void copyBlockInternalRegister(InputAqlItemRow const& sourceRow, RegisterId input, RegisterId output); - [[nodiscard]] std::size_t getNrRegisters() const { return block().getNrRegs(); } + [[nodiscard]] std::size_t getNrRegisters() const { + return block().getNrRegs(); + } /** * @brief May only be called after all output values in the current row have @@ -105,7 +109,9 @@ class OutputAqlItemRow { void advanceRow(); // returns true if row was produced - [[nodiscard]] bool produced() const { return _inputRowCopied && allValuesWritten(); } + [[nodiscard]] bool produced() const { + return _inputRowCopied && allValuesWritten(); + } /** * @brief Steal the AqlItemBlock held by the OutputAqlItemRow. The returned @@ -118,7 +124,9 @@ class OutputAqlItemRow { */ SharedAqlItemBlockPtr stealBlock(); - [[nodiscard]] bool isFull() const { return numRowsWritten() >= block().size(); } + [[nodiscard]] bool isFull() const { + return numRowsWritten() >= block().size(); + } /** * @brief Returns the number of rows that were fully written. @@ -131,7 +139,9 @@ class OutputAqlItemRow { * NOTE that we later want to replace this with some "atMost" value * passed from ExecutionBlockImpl. */ - [[nodiscard]] size_t numRowsLeft() const { return block().size() - _baseIndex; } + [[nodiscard]] size_t numRowsLeft() const { + return block().size() - _baseIndex; + } // Use this function with caution! We need it only for the ConstrainedSortExecutor void setBaseIndex(std::size_t index); @@ -161,6 +171,10 @@ class OutputAqlItemRow { void toVelocyPack(velocypack::Options const* options, velocypack::Builder& builder); + AqlCall::Limit softLimit() const; + + AqlCall::Limit hardLimit() const; + private: [[nodiscard]] std::unordered_set const& outputRegisters() const { TRI_ASSERT(_outputRegisters != nullptr); @@ -210,6 +224,13 @@ class OutputAqlItemRow { */ size_t _numValuesWritten; + /** + * @brief Call recieved by the client to produce this outputblock + * It is used for accounting of produced rows and number + * of rows requested by client. + */ + AqlCall _call; + /** * @brief Set if and only if the current ExecutionBlock passes the * AqlItemBlocks through. @@ -228,9 +249,13 @@ class OutputAqlItemRow { bool _allowSourceRowUninitialized; private: - [[nodiscard]] size_t nextUnwrittenIndex() const noexcept { return numRowsWritten(); } + [[nodiscard]] size_t nextUnwrittenIndex() const noexcept { + return numRowsWritten(); + } - [[nodiscard]] size_t numRegistersToWrite() const { return outputRegisters().size(); } + [[nodiscard]] size_t numRegistersToWrite() const { + return outputRegisters().size(); + } [[nodiscard]] bool allValuesWritten() const { // If we have a shadowRow in the output, it counts as written @@ -260,6 +285,6 @@ class OutputAqlItemRow { template void adjustShadowRowDepth(ItemRowType const& sourceRow); }; -} // namespace arangodb +} // namespace arangodb::aql #endif // ARANGOD_AQL_OUTPUT_AQL_ITEM_ROW_H diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index 5f64b03278d7..3d832576e388 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -368,7 +368,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange) { OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, infos.registersToClear()); EXPECT_EQ(output.numRowsWritten(), 0); - auto const [state, stats, call] = testee.produceRows(1000, input, output); + auto const [state, stats, call] = testee.produceRows(input, output); EXPECT_EQ(state, ExecutorState::DONE); EXPECT_EQ(stats.getFiltered(), 2); EXPECT_EQ(output.numRowsWritten(), 3); @@ -393,7 +393,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, infos.registersToClear()); EXPECT_EQ(output.numRowsWritten(), 0); - auto const [state, stats, call] = testee.produceRows(1000, input, output); + auto const [state, stats, call] = testee.produceRows(input, output); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(stats.getFiltered(), 2); EXPECT_EQ(output.numRowsWritten(), 3); @@ -452,7 +452,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange_has_more) { AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, infos.registersToClear()); - auto const [state, stats, call] = testee.produceRows(2, input, output); + auto const [state, stats, call] = testee.produceRows(input, output); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(stats.getFiltered(), 1); EXPECT_EQ(output.numRowsWritten(), 2); From ba3eea6f6f72edc3c42ebb3193be1ab655599955 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 4 Dec 2019 09:46:09 +0100 Subject: [PATCH 031/122] Fixed FilterExecutorTest --- tests/Aql/FilterExecutorTest.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index 3d832576e388..8b237d87af9d 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -387,11 +387,12 @@ TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - + size_t softLimit = 1000; AqlItemBlockInputRange input{ExecutorState::HASMORE, inBlock, 0, inBlock->size()}; - + AqlCall limitedCall{}; + limitedCall.softLimit = softLimit; OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); + infos.registersToClear(), std::move(limitedCall)); EXPECT_EQ(output.numRowsWritten(), 0); auto const [state, stats, call] = testee.produceRows(input, output); EXPECT_EQ(state, ExecutorState::HASMORE); @@ -401,9 +402,8 @@ TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { // Test the Call we send to upstream EXPECT_EQ(call.offset, 0); EXPECT_FALSE(call.hasHardLimit()); - // Avoid overfetching. I do not have a strong requirement on this - // test, however this is what we do right now. - EXPECT_EQ(call.getLimit(), 997); + // We have a given softLimit, so we do not do overfetching + EXPECT_EQ(call.getLimit(), softLimit - 3); EXPECT_FALSE(call.fullCount); } @@ -450,8 +450,10 @@ TEST_F(FilterExecutorTest, test_produce_datarange_has_more) { {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + block.reset(new AqlItemBlock(itemBlockManager, 2, 1)); OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, infos.registersToClear()); + auto const [state, stats, call] = testee.produceRows(input, output); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(stats.getFiltered(), 1); From d8f80f94c1b67442eb9da6410b10f5da4182deb9 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 4 Dec 2019 09:46:56 +0100 Subject: [PATCH 032/122] Expose required features to AqlCall controlled by the output aql item row. Adapted executionBlockImpl to report there --- arangod/Aql/ExecutionBlockImpl.cpp | 66 +++++++++++++++++++----------- arangod/Aql/ExecutionBlockImpl.h | 3 +- arangod/Aql/FilterExecutor.cpp | 3 +- arangod/Aql/OutputAqlItemRow.cpp | 4 ++ arangod/Aql/OutputAqlItemRow.h | 4 ++ 5 files changed, 52 insertions(+), 28 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 84b8057174e1..a8a6e49f4791 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -202,7 +202,7 @@ std::pair ExecutionBlockImpl::g TRI_ASSERT(newBlock != nullptr); TRI_ASSERT(newBlock->size() > 0); TRI_ASSERT(newBlock->size() <= atMost); - _outputItemRow = createOutputRow(newBlock); + _outputItemRow = createOutputRow(newBlock, AqlCall{}); } ExecutionState state = ExecutionState::HASMORE; @@ -301,16 +301,18 @@ std::pair ExecutionBlockImpl::g } template -std::unique_ptr ExecutionBlockImpl::createOutputRow(SharedAqlItemBlockPtr& newBlock) { +std::unique_ptr ExecutionBlockImpl::createOutputRow( + SharedAqlItemBlockPtr& newBlock, AqlCall&& call) { if /* constexpr */ (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable) { - return std::make_unique( - newBlock, infos().getOutputRegisters(), infos().registersToKeep(), - infos().registersToClear(), std::move(AqlCall{}), - OutputAqlItemRow::CopyRowBehavior::DoNotCopyInputRows); + return std::make_unique(newBlock, infos().getOutputRegisters(), + infos().registersToKeep(), + infos().registersToClear(), std::move(call), + OutputAqlItemRow::CopyRowBehavior::DoNotCopyInputRows); } else { return std::make_unique(newBlock, infos().getOutputRegisters(), infos().registersToKeep(), - infos().registersToClear()); + infos().registersToClear(), + std::move(call)); } } @@ -939,6 +941,17 @@ std::pair ExecutionBlockImpl::r executor(), *_engine, nrItems, nrRegs); } +template +void ExecutionBlockImpl::ensureOutputBlock(AqlCall&& call) { + if (_outputItemRow->isFull()) { + // We need to define the size of this block based on Input / Executor / Subquery depth + size_t blockSize = ExecutionBlock::DefaultBatchSize(); + SharedAqlItemBlockPtr newBlock = + _engine->itemBlockManager().requestBlock(blockSize, _infos.numberOfOutputRegisters()); + _outputItemRow = createOutputRow(newBlock, std::move(call)); + } +} + /// @brief request an AqlItemBlock from the memory manager template SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, @@ -995,20 +1008,20 @@ ExecState NextState(AqlCall const& call) { template <> std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { - AqlCall myCall = stack.popCall(); + ensureOutputBlock(stack.popCall()); + /* if (!_outputItemRow) { // TODO: FIXME Hard coded size SharedAqlItemBlockPtr newBlock = - _engine->itemBlockManager().requestBlock(1000, _infos.numberOfOutputRegisters()); - TRI_ASSERT(newBlock != nullptr); - TRI_ASSERT(newBlock->size() == 1000); - _outputItemRow = createOutputRow(newBlock); + _engine->itemBlockManager().requestBlock(1000, + _infos.numberOfOutputRegisters()); TRI_ASSERT(newBlock != nullptr); TRI_ASSERT(newBlock->size() + == 1000); _outputItemRow = createOutputRow(newBlock); } + */ size_t skipped = 0; - // TODO: Need to make this member variable for waiting? - - ExecState execState = ::NextState(myCall); + TRI_ASSERT(_outputItemRow); + ExecState execState = ::NextState(_outputItemRow->getClientCall()); if (_lastRange.hasShadowRow()) { // We have not been able to move all shadowRows into the output last time. // Continue from there. @@ -1020,39 +1033,41 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { while (execState != ExecState::DONE && !_outputItemRow->isFull()) { switch (execState) { case ExecState::SKIP: { + auto const& clientCall = _outputItemRow->getClientCall(); auto [state, skippedLocal, call] = - _executor.skipRowsRange(myCall.getOffset(), _lastRange); - myCall.didSkip(skippedLocal); + _executor.skipRowsRange(clientCall.getOffset(), _lastRange); + _outputItemRow->didSkip(skippedLocal); skipped += skippedLocal; if (state == ExecutorState::DONE) { execState = ExecState::SHADOWROWS; - } else if (myCall.getOffset() > 0) { + } else if (clientCall.getOffset() > 0) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more executorRequest = call; execState = ExecState::UPSTREAM; } else { // We are done with skipping. Skip is not allowed to request more - execState = ::NextState(myCall); + execState = ::NextState(clientCall); } break; } case ExecState::PRODUCE: { - TRI_ASSERT(myCall.getLimit() > 0); + auto const& clientCall = _outputItemRow->getClientCall(); + TRI_ASSERT(clientCall.getLimit() > 0); // Execute getSome auto const [state, stats, call] = _executor.produceRows(_lastRange, *_outputItemRow); _engine->_stats += stats; if (state == ExecutorState::DONE) { execState = ExecState::SHADOWROWS; - } else if (myCall.getLimit() > 0) { + } else if (clientCall.getLimit() > 0) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more executorRequest = call; execState = ExecState::UPSTREAM; } else { // We are done with producing. Produce is not allowed to request more - execState = ::NextState(myCall); + execState = ::NextState(clientCall); } break; } @@ -1067,10 +1082,11 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { size_t skippedLocal = 0; stack.pushCall(std::move(executorRequest)); std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); - // Do we need to call it? - // myCall.didSkip(skippedLocal); skipped += skippedLocal; - execState = ::NextState(myCall); + auto const& clientCall = _outputItemRow->getClientCall(); + // Do we need to call it? + // clientCall.didSkip(skippedLocal); + execState = ::NextState(clientCall); break; } case ExecState::SHADOWROWS: { diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 88f410033edf..d7d0b3267b09 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -226,7 +226,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { [[nodiscard]] std::pair requestWrappedBlock( size_t nrItems, RegisterId nrRegs); - [[nodiscard]] std::unique_ptr createOutputRow(SharedAqlItemBlockPtr& newBlock); + [[nodiscard]] std::unique_ptr createOutputRow(SharedAqlItemBlockPtr& newBlock, + AqlCall&& call); [[nodiscard]] Query const& getQuery() const; diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index fe7f81f6394e..88b20d33382e 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -129,8 +129,7 @@ std::tuple FilterExecutor::produceRows( } FilterStats stats{}; - while (inputRange.hasMore()) { - TRI_ASSERT(!output.isFull()); + while (inputRange.hasMore() && !output.isFull()) { auto const& [state, input] = inputRange.next(); TRI_ASSERT(input.isInitialized()); if (input.getValue(_infos.getInputRegister()).toBoolean()) { diff --git a/arangod/Aql/OutputAqlItemRow.cpp b/arangod/Aql/OutputAqlItemRow.cpp index ff62d61a0366..e281ca9aa094 100644 --- a/arangod/Aql/OutputAqlItemRow.cpp +++ b/arangod/Aql/OutputAqlItemRow.cpp @@ -217,6 +217,10 @@ AqlCall::Limit OutputAqlItemRow::softLimit() const { return _call.softLimit; } AqlCall::Limit OutputAqlItemRow::hardLimit() const { return _call.hardLimit; } +AqlCall const& OutputAqlItemRow::getClientCall() const { return _call; } + +void OutputAqlItemRow::didSkip(size_t n) { _call.didSkip(n); } + SharedAqlItemBlockPtr OutputAqlItemRow::stealBlock() { // numRowsWritten() inspects _block, so save this before resetting it! auto const numRows = numRowsWritten(); diff --git a/arangod/Aql/OutputAqlItemRow.h b/arangod/Aql/OutputAqlItemRow.h index 835b54e4b78f..d96d51d1a861 100644 --- a/arangod/Aql/OutputAqlItemRow.h +++ b/arangod/Aql/OutputAqlItemRow.h @@ -175,6 +175,10 @@ class OutputAqlItemRow { AqlCall::Limit hardLimit() const; + AqlCall const& getClientCall() const; + + void didSkip(size_t n); + private: [[nodiscard]] std::unordered_set const& outputRegisters() const { TRI_ASSERT(_outputRegisters != nullptr); From 6d18de707a5fcf7b10bbc37eef8485506e7c19d5 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 5 Dec 2019 08:02:19 +0100 Subject: [PATCH 033/122] Fixed bug with AqlCallStack pop operation that actually caused invalid memory access --- arangod/Aql/AqlCallStack.cpp | 4 ++-- arangod/Aql/AqlCallStack.h | 2 +- arangod/Aql/AqlItemBlockInputRange.h | 2 -- arangod/Aql/ExecutionBlockImpl.cpp | 15 ++++++++++++--- arangod/Aql/FilterExecutor.cpp | 3 +-- arangod/Aql/OutputAqlItemRow.cpp | 4 ++++ arangod/Aql/OutputAqlItemRow.h | 4 ++++ tests/Aql/SingleRowFetcherTest.cpp | 3 --- 8 files changed, 24 insertions(+), 13 deletions(-) diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp index 0c5f44b2647d..608febad859b 100644 --- a/arangod/Aql/AqlCallStack.cpp +++ b/arangod/Aql/AqlCallStack.cpp @@ -45,12 +45,12 @@ AqlCallStack::AqlCallStack(AqlCallStack const& other) bool AqlCallStack::isRelevant() const { return _depth == 0; } -AqlCall&& AqlCallStack::popCall() { +AqlCall AqlCallStack::popCall() { TRI_ASSERT(isRelevant()); TRI_ASSERT(!_operations.empty()); auto call = _operations.top(); _operations.pop(); - return std::move(call); + return call; } AqlCall const& AqlCallStack::peek() const { diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index 03a9a91d34c5..f9fbec8dd927 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -44,7 +44,7 @@ class AqlCallStack { // Get the top most Call element (this must be relevant). // This is popped of the stack and caller can take responsibility for it - AqlCall&& popCall(); + AqlCall popCall(); // Peek at the top most Call element (this must be relevant). // The responsibility will stay at the stack diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index e0a6a5827c35..668113457c15 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -50,8 +50,6 @@ class AqlItemBlockInputRange { std::size_t getRowIndex() noexcept { return _rowIndex; }; - std::size_t getEndIndex() noexcept { return _endIndex; }; - bool hasShadowRow() const noexcept; std::pair peekShadowRow(); diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index a8a6e49f4791..a34a76235d67 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -943,12 +943,14 @@ std::pair ExecutionBlockImpl::r template void ExecutionBlockImpl::ensureOutputBlock(AqlCall&& call) { - if (_outputItemRow->isFull()) { + if (_outputItemRow == nullptr || _outputItemRow->isFull()) { // We need to define the size of this block based on Input / Executor / Subquery depth size_t blockSize = ExecutionBlock::DefaultBatchSize(); SharedAqlItemBlockPtr newBlock = _engine->itemBlockManager().requestBlock(blockSize, _infos.numberOfOutputRegisters()); _outputItemRow = createOutputRow(newBlock, std::move(call)); + } else { + _outputItemRow->setCall(std::move(call)); } } @@ -1009,6 +1011,7 @@ template <> std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { ensureOutputBlock(stack.popCall()); + /* if (!_outputItemRow) { // TODO: FIXME Hard coded size @@ -1082,11 +1085,17 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { size_t skippedLocal = 0; stack.pushCall(std::move(executorRequest)); std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); + if (_upstreamState == ExecutionState::WAITING) { + // We do not return anything in WAITING state, also NOT skipped. + // TODO: Check if we need to leverage this restriction. + TRI_ASSERT(skipped == 0); + return {_upstreamState, 0, nullptr}; + } skipped += skippedLocal; - auto const& clientCall = _outputItemRow->getClientCall(); + ensureOutputBlock(_outputItemRow->stealClientCall()); // Do we need to call it? // clientCall.didSkip(skippedLocal); - execState = ::NextState(clientCall); + execState = ::NextState(_outputItemRow->getClientCall()); break; } case ExecState::SHADOWROWS: { diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 88b20d33382e..9ca361a78ad2 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -141,8 +141,7 @@ std::tuple FilterExecutor::produceRows( } AqlCall upstreamCall{}; - upstreamCall.softLimit = output.softLimit(); - upstreamCall.hardLimit = output.hardLimit(); + upstreamCall.softLimit = output.hardLimit(); /* We can use this value as a heuristic on overfetching. * by default we do not skip, and do not set any soft or hardLimit * on upstream diff --git a/arangod/Aql/OutputAqlItemRow.cpp b/arangod/Aql/OutputAqlItemRow.cpp index e281ca9aa094..4c6d358ef13e 100644 --- a/arangod/Aql/OutputAqlItemRow.cpp +++ b/arangod/Aql/OutputAqlItemRow.cpp @@ -219,6 +219,10 @@ AqlCall::Limit OutputAqlItemRow::hardLimit() const { return _call.hardLimit; } AqlCall const& OutputAqlItemRow::getClientCall() const { return _call; } +AqlCall&& OutputAqlItemRow::stealClientCall() { return std::move(_call); } + +void OutputAqlItemRow::setCall(AqlCall&& call) { _call = call; } + void OutputAqlItemRow::didSkip(size_t n) { _call.didSkip(n); } SharedAqlItemBlockPtr OutputAqlItemRow::stealBlock() { diff --git a/arangod/Aql/OutputAqlItemRow.h b/arangod/Aql/OutputAqlItemRow.h index d96d51d1a861..28a0127fa141 100644 --- a/arangod/Aql/OutputAqlItemRow.h +++ b/arangod/Aql/OutputAqlItemRow.h @@ -177,6 +177,10 @@ class OutputAqlItemRow { AqlCall const& getClientCall() const; + AqlCall&& stealClientCall(); + + void setCall(AqlCall&& call); + void didSkip(size_t n); private: diff --git a/tests/Aql/SingleRowFetcherTest.cpp b/tests/Aql/SingleRowFetcherTest.cpp index 54818943f3e0..9435cd93979c 100644 --- a/tests/Aql/SingleRowFetcherTest.cpp +++ b/tests/Aql/SingleRowFetcherTest.cpp @@ -69,7 +69,6 @@ class SingleRowFetcherTestPassBlocks : public ::testing::Test { void validateInputRange(AqlItemBlockInputRange& input, std::vector const& result) { - EXPECT_EQ(result.size(), input.getEndIndex() - input.getRowIndex()); for (auto const& value : result) { SCOPED_TRACE("Checking for value: " + value); // We need more rows @@ -1229,7 +1228,6 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_oneAndDone // First no data row auto [state, skipped, input] = testee.execute(stack); EXPECT_EQ(input.getRowIndex(), 0); - EXPECT_EQ(input.getEndIndex(), 4); EXPECT_EQ(skipped, 0); EXPECT_EQ(state, ExecutionState::DONE); } // testee is destroyed here @@ -1267,7 +1265,6 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_twoAndHasM EXPECT_EQ(state, ExecutionState::DONE); EXPECT_EQ(skipped, 0); EXPECT_EQ(input.getRowIndex(), 0); - EXPECT_EQ(input.getEndIndex(), 4); // Now validate the input range validateInputRange(input, std::vector{"a", "b", "c", "d"}); From c84be5942963f3c63b1ff6a506600a76cd2b969d Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 5 Dec 2019 08:24:18 +0100 Subject: [PATCH 034/122] outputRow now honors limits whan called for isFull. --- arangod/Aql/OutputAqlItemRow.h | 4 ++-- tests/Aql/FilterExecutorTest.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arangod/Aql/OutputAqlItemRow.h b/arangod/Aql/OutputAqlItemRow.h index 28a0127fa141..54d09cdb9821 100644 --- a/arangod/Aql/OutputAqlItemRow.h +++ b/arangod/Aql/OutputAqlItemRow.h @@ -125,7 +125,7 @@ class OutputAqlItemRow { SharedAqlItemBlockPtr stealBlock(); [[nodiscard]] bool isFull() const { - return numRowsWritten() >= block().size(); + return numRowsWritten() >= block().size() && _call.getLimit() > 0; } /** @@ -140,7 +140,7 @@ class OutputAqlItemRow { * passed from ExecutionBlockImpl. */ [[nodiscard]] size_t numRowsLeft() const { - return block().size() - _baseIndex; + return (std::min)(block().size() - _baseIndex, _call.getLimit()); } // Use this function with caution! We need it only for the ConstrainedSortExecutor diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index 8b237d87af9d..646ad62da2e7 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -387,10 +387,10 @@ TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - size_t softLimit = 1000; + size_t hardLimit = 1000; AqlItemBlockInputRange input{ExecutorState::HASMORE, inBlock, 0, inBlock->size()}; AqlCall limitedCall{}; - limitedCall.softLimit = softLimit; + limitedCall.hardLimit = hardLimit; OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, infos.registersToClear(), std::move(limitedCall)); EXPECT_EQ(output.numRowsWritten(), 0); @@ -403,7 +403,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { EXPECT_EQ(call.offset, 0); EXPECT_FALSE(call.hasHardLimit()); // We have a given softLimit, so we do not do overfetching - EXPECT_EQ(call.getLimit(), softLimit - 3); + EXPECT_EQ(call.getLimit(), hardLimit - 3); EXPECT_FALSE(call.fullCount); } From 2eb29adfb9948918dde822ba50d967f567ed1860 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 5 Dec 2019 08:34:59 +0100 Subject: [PATCH 035/122] Fixed isFull() in OutputAqlItemRow --- arangod/Aql/OutputAqlItemRow.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arangod/Aql/OutputAqlItemRow.h b/arangod/Aql/OutputAqlItemRow.h index 54d09cdb9821..9ece8c61353a 100644 --- a/arangod/Aql/OutputAqlItemRow.h +++ b/arangod/Aql/OutputAqlItemRow.h @@ -124,9 +124,7 @@ class OutputAqlItemRow { */ SharedAqlItemBlockPtr stealBlock(); - [[nodiscard]] bool isFull() const { - return numRowsWritten() >= block().size() && _call.getLimit() > 0; - } + [[nodiscard]] bool isFull() const { return numRowsLeft() == 0; } /** * @brief Returns the number of rows that were fully written. From 715186783f716bfc08113fc6766a2d0c63045018 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 5 Dec 2019 18:37:21 +0100 Subject: [PATCH 036/122] Use the inbound call in skipRows for executors, in order to do a more precise upstream call --- arangod/Aql/ExecutionBlockImpl.cpp | 8 ++++---- arangod/Aql/FilterExecutor.cpp | 7 ++++--- arangod/Aql/FilterExecutor.h | 2 +- arangod/Aql/OutputAqlItemRow.cpp | 2 ++ arangod/Aql/OutputAqlItemRow.h | 16 ++++++++++++++++ tests/Aql/FilterExecutorTest.cpp | 10 ++++++++-- 6 files changed, 35 insertions(+), 10 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index a34a76235d67..6643026ca185 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1032,14 +1032,12 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { execState = ExecState::SHADOWROWS; } AqlCall executorRequest; - - while (execState != ExecState::DONE && !_outputItemRow->isFull()) { + while (execState != ExecState::DONE && !_outputItemRow->allRowsUsed()) { switch (execState) { case ExecState::SKIP: { auto const& clientCall = _outputItemRow->getClientCall(); auto [state, skippedLocal, call] = - _executor.skipRowsRange(clientCall.getOffset(), _lastRange); - _outputItemRow->didSkip(skippedLocal); + _executor.skipRowsRange(_lastRange, _outputItemRow->getModifiableClientCall()); skipped += skippedLocal; if (state == ExecutorState::DONE) { @@ -1144,6 +1142,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // after DONE. _outputItemRow.reset(); if (_lastRange.hasMore() || _lastRange.hasShadowRow()) { + // We have skipped or/and return data, otherwise we cannot return HASMORE + TRI_ASSERT(skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0)); return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; } return {_upstreamState, skipped, std::move(outputBlock)}; diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 9ca361a78ad2..4366b71d80b6 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -102,11 +102,11 @@ std::pair FilterExecutor::expectedNumberOfRows(size_t at } std::tuple FilterExecutor::skipRowsRange( - size_t offset, AqlItemBlockInputRange& inputRange) { + AqlItemBlockInputRange& inputRange, AqlCall& call) { ExecutorState state = ExecutorState::HASMORE; InputAqlItemRow input{CreateInvalidInputRowHint{}}; size_t skipped = 0; - while (inputRange.hasMore() && skipped < offset) { + while (inputRange.hasMore() && skipped < call.getOffset()) { std::tie(state, input) = inputRange.next(); if (!input) { TRI_ASSERT(!inputRange.hasMore()); @@ -116,9 +116,10 @@ std::tuple FilterExecutor::skipRowsRange( skipped++; } } + call.didSkip(skipped); AqlCall upstreamCall{}; - upstreamCall.softLimit = offset - skipped; + upstreamCall.softLimit = call.getOffset(); return {state, skipped, upstreamCall}; } diff --git a/arangod/Aql/FilterExecutor.h b/arangod/Aql/FilterExecutor.h index eea47aba7b87..0b5237e08652 100644 --- a/arangod/Aql/FilterExecutor.h +++ b/arangod/Aql/FilterExecutor.h @@ -104,7 +104,7 @@ class FilterExecutor { * @return ExecutorState, the stats, and a new Call that needs to be send to upstream */ [[nodiscard]] std::tuple skipRowsRange( - size_t atMost, AqlItemBlockInputRange& input); + AqlItemBlockInputRange& inputRange, AqlCall& call); [[nodiscard]] std::pair expectedNumberOfRows(size_t atMost) const; diff --git a/arangod/Aql/OutputAqlItemRow.cpp b/arangod/Aql/OutputAqlItemRow.cpp index 4c6d358ef13e..61c6bce4b719 100644 --- a/arangod/Aql/OutputAqlItemRow.cpp +++ b/arangod/Aql/OutputAqlItemRow.cpp @@ -219,6 +219,8 @@ AqlCall::Limit OutputAqlItemRow::hardLimit() const { return _call.hardLimit; } AqlCall const& OutputAqlItemRow::getClientCall() const { return _call; } +AqlCall& OutputAqlItemRow::getModifiableClientCall() { return _call; }; + AqlCall&& OutputAqlItemRow::stealClientCall() { return std::move(_call); } void OutputAqlItemRow::setCall(AqlCall&& call) { _call = call; } diff --git a/arangod/Aql/OutputAqlItemRow.h b/arangod/Aql/OutputAqlItemRow.h index 9ece8c61353a..66b0a513e9ef 100644 --- a/arangod/Aql/OutputAqlItemRow.h +++ b/arangod/Aql/OutputAqlItemRow.h @@ -124,8 +124,22 @@ class OutputAqlItemRow { */ SharedAqlItemBlockPtr stealBlock(); + /** + * @brief Test if the data-Output is full. This contains checks against + * the client call as well. We are considered full as soon as + * hard or softLimit are reached. + */ [[nodiscard]] bool isFull() const { return numRowsLeft() == 0; } + /** + * @brief Test if all allocated rows are used. + * this does not consider the client call and allows to use + * the left-over space for ShadowRows. + */ + [[nodiscard]] bool allRowsUsed() const { + return block().size() <= _baseIndex; + } + /** * @brief Returns the number of rows that were fully written. */ @@ -175,6 +189,8 @@ class OutputAqlItemRow { AqlCall const& getClientCall() const; + AqlCall& getModifiableClientCall(); + AqlCall&& stealClientCall(); void setCall(AqlCall&& call); diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index 646ad62da2e7..dc37f114c757 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -421,10 +421,13 @@ TEST_F(FilterExecutorTest, test_skip_datarange_need_more) { {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); AqlItemBlockInputRange input{ExecutorState::HASMORE, inBlock, 0, inBlock->size()}; + AqlCall clientCall; + clientCall.offset = 1000; - auto const [state, skipped, call] = testee.skipRowsRange(1000, input); + auto const [state, skipped, call] = testee.skipRowsRange(input, clientCall); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(skipped, 3); + EXPECT_EQ(clientCall.getOffset(), 1000 - 3); EXPECT_FALSE(input.hasMore()); // Test the Call we send to upstream @@ -489,9 +492,12 @@ TEST_F(FilterExecutorTest, test_skip_datarange_has_more) { {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; - auto const [state, skipped, call] = testee.skipRowsRange(2, input); + AqlCall clientCall; + clientCall.offset = 2; + auto const [state, skipped, call] = testee.skipRowsRange(input, clientCall); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(skipped, 2); + EXPECT_EQ(clientCall.getOffset(), 0); EXPECT_TRUE(input.hasMore()); // We still have two values in block: false and true { From 26c1996714b2b5a3a7d9a51403f2b2c4adc89f38 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 6 Dec 2019 10:07:19 +0100 Subject: [PATCH 037/122] Make linker happy with operator< on CallLimit --- arangod/Aql/AqlCall.h | 13 +++++++++++++ arangod/Aql/ExecutionBlockImpl.cpp | 4 +++- arangod/Aql/FilterExecutor.cpp | 12 ++++++------ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 8f6592e3ee07..e88d7692db75 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -109,6 +109,19 @@ struct AqlCall { bool needsFullCount() const { return fullCount; } }; +constexpr bool operator<(AqlCall::Limit const& a, AqlCall::Limit const& b) { + if (std::holds_alternative(a)) { + return false; + } + if (std::holds_alternative(b)) { + return true; + } + if (std::get(a) < std::get(b)) { + return true; + } + return false; +} + } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 6643026ca185..1be374d904cd 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -201,7 +201,9 @@ std::pair ExecutionBlockImpl::g } TRI_ASSERT(newBlock != nullptr); TRI_ASSERT(newBlock->size() > 0); - TRI_ASSERT(newBlock->size() <= atMost); + // We cannot hold this assertion, if we are on a pass-through + // block and the upstream uses execute already. + // TRI_ASSERT(newBlock->size() <= atMost); _outputItemRow = createOutputRow(newBlock, AqlCall{}); } diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 4366b71d80b6..2aed9977b25d 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -101,6 +101,7 @@ std::pair FilterExecutor::expectedNumberOfRows(size_t at return _fetcher.preFetchNumberOfRows(atMost); } +// TODO Remove me, we are using the getSome skip variant here. std::tuple FilterExecutor::skipRowsRange( AqlItemBlockInputRange& inputRange, AqlCall& call) { ExecutorState state = ExecutorState::HASMORE; @@ -142,11 +143,10 @@ std::tuple FilterExecutor::produceRows( } AqlCall upstreamCall{}; - upstreamCall.softLimit = output.hardLimit(); - /* We can use this value as a heuristic on overfetching. - * by default we do not skip, and do not set any soft or hardLimit - * on upstream - * upstreamCall.softLimit = limit; - */ + auto const& clientCall = output.getClientCall(); + // This is a optimistic fetch. We do not do any overfetching here, only if we + // pass through all rows this fetch is correct, otherwise we have too few rows. + upstreamCall.softLimit = + clientCall.offset() + (std::min)(clientCall.softLimit, clientCall.hardLimit); return {inputRange.state(), stats, upstreamCall}; } From dfd6d49e78381e4614b70f9f19b2a024e60677fa Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 6 Dec 2019 13:22:06 +0100 Subject: [PATCH 038/122] Implement operator+ on AqlCall::Limit, also use std::visit for readability. --- arangod/Aql/AqlCall.h | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index e88d7692db75..5bec5e2399cd 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -24,6 +24,7 @@ #define ARANGOD_AQL_AQL_CALL_H 1 #include "Aql/ExecutionBlock.h" +#include "Basics/overload.h" #include #include @@ -92,14 +93,15 @@ struct AqlCall { } void didProduce(std::size_t n) { - if (std::holds_alternative(softLimit)) { - TRI_ASSERT(n <= std::get(softLimit)); - softLimit = std::get(softLimit) - n; - } - if (std::holds_alternative(hardLimit)) { - TRI_ASSERT(n <= std::get(hardLimit)); - hardLimit = std::get(hardLimit) - n; - } + auto minus = overload{ + [n](size_t& i) { + TRI_ASSERT(n <= i); + i -= n; + }, + [](auto) {}, + }; + std::visit(minus, softLimit); + std::visit(minus, hardLimit); } bool hasHardLimit() const { @@ -122,6 +124,18 @@ constexpr bool operator<(AqlCall::Limit const& a, AqlCall::Limit const& b) { return false; } +constexpr AqlCall::Limit operator+(AqlCall::Limit const& a, size_t n) { + return std::visit(overload{[n](size_t const& i) -> AqlCall::Limit { + return i + n; + }, + [](auto inf) -> AqlCall::Limit { return inf; }}, + a); +} + +constexpr AqlCall::Limit operator+(size_t n, AqlCall::Limit const& a) { + return a + n; +} + } // namespace aql } // namespace arangodb From 86db29ce97678291e09db3bd80338287273a4008 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 6 Dec 2019 13:23:02 +0100 Subject: [PATCH 039/122] FilterExecutor getSome now propagates offset + min(limits) to upstream. This is pessimistic and avoids overfetching in all cases. NOTE: FilterExecutor should be moved to skipvariant using getSome --- arangod/Aql/FilterExecutor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 2aed9977b25d..4be4c63eceb4 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -146,7 +146,7 @@ std::tuple FilterExecutor::produceRows( auto const& clientCall = output.getClientCall(); // This is a optimistic fetch. We do not do any overfetching here, only if we // pass through all rows this fetch is correct, otherwise we have too few rows. - upstreamCall.softLimit = - clientCall.offset() + (std::min)(clientCall.softLimit, clientCall.hardLimit); + upstreamCall.softLimit = clientCall.getOffset() + + (std::min)(clientCall.softLimit, clientCall.hardLimit); return {inputRange.state(), stats, upstreamCall}; } From aa4323fc10598e277226d30f583a3657a815bdfb Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 6 Dec 2019 14:40:46 +0100 Subject: [PATCH 040/122] Fixed profiler tests for filter. These cannot be guaranteed anymore and it was an undesired, yet uncritical side effect that the tests have been green before --- tests/js/server/aql/aql-profiler.js | 59 ++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/tests/js/server/aql/aql-profiler.js b/tests/js/server/aql/aql-profiler.js index f7c3ed51f89a..f0d84143739e 100644 --- a/tests/js/server/aql/aql-profiler.js +++ b/tests/js/server/aql/aql-profiler.js @@ -346,15 +346,39 @@ function ahuacatlProfilerTestSuite () { testFilterBlock2 : function () { const query = 'FOR i IN 1..@rows FILTER i % 13 != 0 RETURN i'; - const genNodeList = (rows, batches) => { + const genNodeList = (rows) => { + // This is an array 1..rows where true means it passes the filter + const list = Array.from(Array(rows)).map((_, index ) => { + return ((index + 1) % 13 != 0); + }); + + const recursiveFilterCallEstimator = (rowsToFetch) => { + if (rowsToFetch === 0 || list.length === 0) { + return 0; + } + if (rowsToFetch < 0) { + // We would have counted an overfetch! + assertTrue(false); + } + // We count one required call. + // We remove rowsToFetch elements from the beginning of the array. + // We count how many of those are true. + // We simulate an above call with rowsToFetch - the number of true counts. + // Redo until we have no more rows to fetch. + return 1 + recursiveFilterCallEstimator(rowsToFetch - list.splice(0, rowsToFetch).filter(e => e).length); + }; + let batchesAboveFilter = 0; + while (list.length > 0) { + batchesAboveFilter += recursiveFilterCallEstimator(defaultBatchSize); + } const rowsAfterFilter = rows - Math.floor(rows / 13); const batchesAfterFilter = Math.ceil(rowsAfterFilter / defaultBatchSize); return [ { type : SingletonBlock, calls : 1, items : 1 }, { type : CalculationBlock, calls : 1, items : 1 }, - { type : EnumerateListBlock, calls : batches, items : rows }, - { type : CalculationBlock, calls : batches, items : rows }, + { type : EnumerateListBlock, calls : batchesAboveFilter, items : rows }, + { type : CalculationBlock, calls : batchesAboveFilter, items : rows }, { type : FilterBlock, calls : batchesAfterFilter, items : rowsAfterFilter }, { type : ReturnBlock, calls : batchesAfterFilter, items : rowsAfterFilter }, ]; @@ -368,15 +392,38 @@ function ahuacatlProfilerTestSuite () { testFilterBlock3 : function () { const query = 'FOR i IN 1..@rows FILTER i % 13 == 0 RETURN i'; - const genNodeList = (rows, batches) => { + const genNodeList = (rows) => { + // This is an array 1..rows where true means it passes the filter + const list = Array.from(Array(rows)).map((_, index ) => { + return ((index + 1) % 13 == 0); + }); + const recursiveFilterCallEstimator = (rowsToFetch) => { + if (rowsToFetch === 0 || list.length === 0) { + return 0; + } + if (rowsToFetch < 0) { + // We would have counted an overfetch! + assertTrue(false); + } + // We count one required call. + // We remove rowsToFetch elements from the beginning of the array. + // We count how many of those are true. + // We simulate an above call with rowsToFetch - the number of true counts. + // Redo until we have no more rows to fetch. + return 1 + recursiveFilterCallEstimator(rowsToFetch - list.splice(0, rowsToFetch).filter(e => e).length); + }; + let batchesAboveFilter = 0; + while (list.length > 0) { + batchesAboveFilter += recursiveFilterCallEstimator(defaultBatchSize); + } const rowsAfterFilter = Math.floor(rows / 13); const batchesAfterFilter = Math.max(1, Math.ceil(rowsAfterFilter / defaultBatchSize)); return [ { type : SingletonBlock, calls : 1, items : 1 }, { type : CalculationBlock, calls : 1, items : 1 }, - { type : EnumerateListBlock, calls : batches, items : rows }, - { type : CalculationBlock, calls : batches, items : rows }, + { type : EnumerateListBlock, calls : batchesAboveFilter, items : rows }, + { type : CalculationBlock, calls : batchesAboveFilter, items : rows }, { type : FilterBlock, calls : batchesAfterFilter, items : rowsAfterFilter }, { type : ReturnBlock, calls : batchesAfterFilter, items : rowsAfterFilter }, ]; From 63f76e5d94f0e93eb462456d42ae13240f17d96a Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 6 Dec 2019 15:00:37 +0100 Subject: [PATCH 041/122] Fixed JSLint --- tests/js/server/aql/aql-profiler.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/js/server/aql/aql-profiler.js b/tests/js/server/aql/aql-profiler.js index f0d84143739e..27c7cb7a5727 100644 --- a/tests/js/server/aql/aql-profiler.js +++ b/tests/js/server/aql/aql-profiler.js @@ -1,5 +1,5 @@ /*jshint globalstrict:true, strict:true, esnext: true */ -/*global AQL_EXPLAIN */ +/*global AQL_EXPLAIN, assertTrue */ "use strict"; @@ -349,7 +349,7 @@ function ahuacatlProfilerTestSuite () { const genNodeList = (rows) => { // This is an array 1..rows where true means it passes the filter const list = Array.from(Array(rows)).map((_, index ) => { - return ((index + 1) % 13 != 0); + return ((index + 1) % 13 !== 0); }); const recursiveFilterCallEstimator = (rowsToFetch) => { @@ -395,7 +395,7 @@ function ahuacatlProfilerTestSuite () { const genNodeList = (rows) => { // This is an array 1..rows where true means it passes the filter const list = Array.from(Array(rows)).map((_, index ) => { - return ((index + 1) % 13 == 0); + return ((index + 1) % 13 === 0); }); const recursiveFilterCallEstimator = (rowsToFetch) => { if (rowsToFetch === 0 || list.length === 0) { From 7cddcfa85cd73f089f32592f44ccf58f4e488d86 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Fri, 13 Dec 2019 12:27:12 +0000 Subject: [PATCH 042/122] Remove special casing for FilterExecutor * use if constexpr to distinguish between ported executors and non-ported executors enabling step-by-step porting and testing --- arangod/Aql/ExecutionBlockImpl.cpp | 347 +++++++++++++++-------------- 1 file changed, 177 insertions(+), 170 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 1be374d904cd..5def27ec2802 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -130,6 +130,15 @@ CREATE_HAS_MEMBER_CHECK(skipRows, hasSkipRows); CREATE_HAS_MEMBER_CHECK(fetchBlockForPassthrough, hasFetchBlockForPassthrough); CREATE_HAS_MEMBER_CHECK(expectedNumberOfRows, hasExpectedNumberOfRows); +/* + * Determine whether we execute new style or old style skips, i.e. pre or post shadow row introduction + * TODO: This should be removed once all executors and fetchers are ported to the new style. + */ +template +static bool constexpr isNewStyleExecutor() { + return std::is_same::value; +} + template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, ExecutionNode const* node, @@ -156,9 +165,15 @@ ExecutionBlockImpl::~ExecutionBlockImpl() = default; template std::pair ExecutionBlockImpl::getSome(size_t atMost) { - traceGetSomeBegin(atMost); - auto result = getSomeWithoutTrace(atMost); - return traceGetSomeEnd(result.first, std::move(result.second)); + if constexpr (isNewStyleExecutor()) { + AqlCallStack stack{AqlCall::SimulateGetSome(atMost)}; + auto const [state, skipped, block] = execute(stack); + return {state, block}; + } else { + traceGetSomeBegin(atMost); + auto result = getSomeWithoutTrace(atMost); + return traceGetSomeEnd(result.first, std::move(result.second)); + } } template @@ -432,24 +447,30 @@ static SkipVariants constexpr skipType() { template std::pair ExecutionBlockImpl::skipSome(size_t const atMost) { - traceSkipSomeBegin(atMost); - auto state = ExecutionState::HASMORE; - - while (state == ExecutionState::HASMORE && _skipped < atMost) { - auto res = skipSomeOnceWithoutTrace(atMost - _skipped); - TRI_ASSERT(state != ExecutionState::WAITING || res.second == 0); - state = res.first; - _skipped += res.second; - TRI_ASSERT(_skipped <= atMost); - } + if constexpr (isNewStyleExecutor()) { + AqlCallStack stack{AqlCall::SimulateSkipSome(atMost)}; + auto const [state, skipped, block] = execute(stack); + return {state, skipped}; + } else { + traceSkipSomeBegin(atMost); + auto state = ExecutionState::HASMORE; + + while (state == ExecutionState::HASMORE && _skipped < atMost) { + auto res = skipSomeOnceWithoutTrace(atMost - _skipped); + TRI_ASSERT(state != ExecutionState::WAITING || res.second == 0); + state = res.first; + _skipped += res.second; + TRI_ASSERT(_skipped <= atMost); + } - size_t skipped = 0; - if (state != ExecutionState::WAITING) { - std::swap(skipped, _skipped); - } + size_t skipped = 0; + if (state != ExecutionState::WAITING) { + std::swap(skipped, _skipped); + } - TRI_ASSERT(skipped <= atMost); - return traceSkipSomeEnd(state, skipped); + TRI_ASSERT(skipped <= atMost); + return traceSkipSomeEnd(state, skipped); + } } template @@ -552,7 +573,8 @@ std::pair ExecutionBlockImpl::shutdown(int err template std::tuple ExecutionBlockImpl::execute(AqlCallStack stack) { // TODO remove this IF - if (std::is_same::value) { + // These are new style executors + if constexpr (isNewStyleExecutor()) { // Only this executor is fully implemented traceExecuteBegin(stack); auto res = executeWithoutTrace(stack); @@ -963,29 +985,6 @@ SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, return _engine->itemBlockManager().requestBlock(nrItems, nrRegs); } -// TODO: Remove this special implementations -template <> -std::pair ExecutionBlockImpl::getSome(size_t atMost) { - AqlCallStack stack{AqlCall::SimulateGetSome(atMost)}; - auto const [state, skipped, block] = execute(stack); - return {state, block}; -} - -template <> -std::pair ExecutionBlockImpl::skipSome(size_t const toSkip) { - AqlCallStack stack{AqlCall::SimulateSkipSome(toSkip)}; - auto const [state, skipped, block] = execute(stack); - return {state, skipped}; -} - -template -std::tuple -ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { - // TODO implement! - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); -} - // TODO move me up enum ExecState { SKIP, PRODUCE, FULLCOUNT, UPSTREAM, SHADOWROWS, DONE }; @@ -1009,146 +1008,154 @@ ExecState NextState(AqlCall const& call) { } } // namespace -template <> +template std::tuple -ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { - ensureOutputBlock(stack.popCall()); +ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { + if constexpr (isNewStyleExecutor()) { + ensureOutputBlock(stack.popCall()); + + /* + if (!_outputItemRow) { + // TODO: FIXME Hard coded size + SharedAqlItemBlockPtr newBlock = + _engine->itemBlockManager().requestBlock(1000, + _infos.numberOfOutputRegisters()); TRI_ASSERT(newBlock != nullptr); TRI_ASSERT(newBlock->size() + == 1000); _outputItemRow = createOutputRow(newBlock); + } + */ + size_t skipped = 0; - /* - if (!_outputItemRow) { - // TODO: FIXME Hard coded size - SharedAqlItemBlockPtr newBlock = - _engine->itemBlockManager().requestBlock(1000, - _infos.numberOfOutputRegisters()); TRI_ASSERT(newBlock != nullptr); TRI_ASSERT(newBlock->size() - == 1000); _outputItemRow = createOutputRow(newBlock); - } - */ - size_t skipped = 0; - - TRI_ASSERT(_outputItemRow); - ExecState execState = ::NextState(_outputItemRow->getClientCall()); - if (_lastRange.hasShadowRow()) { - // We have not been able to move all shadowRows into the output last time. - // Continue from there. - // TODO test if this works with COUNT COLLECT - execState = ExecState::SHADOWROWS; - } - AqlCall executorRequest; - while (execState != ExecState::DONE && !_outputItemRow->allRowsUsed()) { - switch (execState) { - case ExecState::SKIP: { - auto const& clientCall = _outputItemRow->getClientCall(); - auto [state, skippedLocal, call] = - _executor.skipRowsRange(_lastRange, _outputItemRow->getModifiableClientCall()); - skipped += skippedLocal; - - if (state == ExecutorState::DONE) { - execState = ExecState::SHADOWROWS; - } else if (clientCall.getOffset() > 0) { - TRI_ASSERT(_upstreamState != ExecutionState::DONE); - // We need to request more - executorRequest = call; - execState = ExecState::UPSTREAM; - } else { - // We are done with skipping. Skip is not allowed to request more - execState = ::NextState(clientCall); + TRI_ASSERT(_outputItemRow); + ExecState execState = ::NextState(_outputItemRow->getClientCall()); + if (_lastRange.hasShadowRow()) { + // We have not been able to move all shadowRows into the output last time. + // Continue from there. + // TODO test if this works with COUNT COLLECT + execState = ExecState::SHADOWROWS; + } + AqlCall executorRequest; + while (execState != ExecState::DONE && !_outputItemRow->allRowsUsed()) { + switch (execState) { + case ExecState::SKIP: { + auto const& clientCall = _outputItemRow->getClientCall(); + auto [state, skippedLocal, call] = + _executor.skipRowsRange(_lastRange, _outputItemRow->getModifiableClientCall()); + skipped += skippedLocal; + + if (state == ExecutorState::DONE) { + execState = ExecState::SHADOWROWS; + } else if (clientCall.getOffset() > 0) { + TRI_ASSERT(_upstreamState != ExecutionState::DONE); + // We need to request more + executorRequest = call; + execState = ExecState::UPSTREAM; + } else { + // We are done with skipping. Skip is not allowed to request more + execState = ::NextState(clientCall); + } + break; } - break; - } - case ExecState::PRODUCE: { - auto const& clientCall = _outputItemRow->getClientCall(); - TRI_ASSERT(clientCall.getLimit() > 0); - // Execute getSome - auto const [state, stats, call] = _executor.produceRows(_lastRange, *_outputItemRow); - _engine->_stats += stats; - if (state == ExecutorState::DONE) { - execState = ExecState::SHADOWROWS; - } else if (clientCall.getLimit() > 0) { - TRI_ASSERT(_upstreamState != ExecutionState::DONE); - // We need to request more - executorRequest = call; - execState = ExecState::UPSTREAM; - } else { - // We are done with producing. Produce is not allowed to request more - execState = ::NextState(clientCall); + case ExecState::PRODUCE: { + auto const& clientCall = _outputItemRow->getClientCall(); + TRI_ASSERT(clientCall.getLimit() > 0); + // Execute getSome + auto const [state, stats, call] = + _executor.produceRows(_lastRange, *_outputItemRow); + _engine->_stats += stats; + if (state == ExecutorState::DONE) { + execState = ExecState::SHADOWROWS; + } else if (clientCall.getLimit() > 0) { + TRI_ASSERT(_upstreamState != ExecutionState::DONE); + // We need to request more + executorRequest = call; + execState = ExecState::UPSTREAM; + } else { + // We are done with producing. Produce is not allowed to request more + execState = ::NextState(clientCall); + } + break; } - break; - } - case ExecState::FULLCOUNT: { - TRI_ASSERT(false); - } - case ExecState::UPSTREAM: { - // If this triggers the executors produceRows function has returned - // HASMORE even if it new that upstream has no further rows. - TRI_ASSERT(_upstreamState != ExecutionState::DONE); - TRI_ASSERT(!_lastRange.hasMore()); - size_t skippedLocal = 0; - stack.pushCall(std::move(executorRequest)); - std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); - if (_upstreamState == ExecutionState::WAITING) { - // We do not return anything in WAITING state, also NOT skipped. - // TODO: Check if we need to leverage this restriction. - TRI_ASSERT(skipped == 0); - return {_upstreamState, 0, nullptr}; + case ExecState::FULLCOUNT: { + TRI_ASSERT(false); } - skipped += skippedLocal; - ensureOutputBlock(_outputItemRow->stealClientCall()); - // Do we need to call it? - // clientCall.didSkip(skippedLocal); - execState = ::NextState(_outputItemRow->getClientCall()); - break; - } - case ExecState::SHADOWROWS: { - // TODO: Check if there is a situation where we are at this point, but at the end of a block - // Or if we would not recognize this beforehand - // TODO: Check if we can have the situation that we are between two shadow rows here. - // E.g. LastRow is releveant shadowRow. NextRow is non-relevant shadowRow. - // NOTE: I do not think this is an issue, as the Executor will always say that it cannot do anything with - // an empty input. Only exception might be COLLECT COUNT. - if (_lastRange.hasShadowRow()) { - auto const& [state, shadowRow] = _lastRange.nextShadowRow(); - TRI_ASSERT(shadowRow.isInitialized()); - _outputItemRow->copyRow(shadowRow); - if (shadowRow.isRelevant()) { - // We found a relevant shadow Row. - // We need to reset the Executor - // TODO: call reset! + case ExecState::UPSTREAM: { + // If this triggers the executors produceRows function has returned + // HASMORE even if it new that upstream has no further rows. + TRI_ASSERT(_upstreamState != ExecutionState::DONE); + TRI_ASSERT(!_lastRange.hasMore()); + size_t skippedLocal = 0; + stack.pushCall(std::move(executorRequest)); + std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); + if (_upstreamState == ExecutionState::WAITING) { + // We do not return anything in WAITING state, also NOT skipped. + // TODO: Check if we need to leverage this restriction. + TRI_ASSERT(skipped == 0); + return {_upstreamState, 0, nullptr}; } - TRI_ASSERT(_outputItemRow->produced()); - _outputItemRow->advanceRow(); - if (state == ExecutorState::DONE) { - if (_lastRange.hasMore()) { - // TODO this state is invalid, and can just show up now if we exclude SKIP - execState = ExecState::PRODUCE; - } else { - // Right now we cannot support to have more than one set of - // ShadowRows inside of a Range. - // We do not know how to continue with the above executor after a shadowrow. - TRI_ASSERT(!_lastRange.hasMore()); - execState = ExecState::DONE; + skipped += skippedLocal; + ensureOutputBlock(_outputItemRow->stealClientCall()); + // Do we need to call it? + // clientCall.didSkip(skippedLocal); + execState = ::NextState(_outputItemRow->getClientCall()); + break; + } + case ExecState::SHADOWROWS: { + // TODO: Check if there is a situation where we are at this point, but at the end of a block + // Or if we would not recognize this beforehand + // TODO: Check if we can have the situation that we are between two shadow rows here. + // E.g. LastRow is releveant shadowRow. NextRow is non-relevant shadowRow. + // NOTE: I do not think this is an issue, as the Executor will always say that it cannot do anything with + // an empty input. Only exception might be COLLECT COUNT. + if (_lastRange.hasShadowRow()) { + auto const& [state, shadowRow] = _lastRange.nextShadowRow(); + TRI_ASSERT(shadowRow.isInitialized()); + _outputItemRow->copyRow(shadowRow); + if (shadowRow.isRelevant()) { + // We found a relevant shadow Row. + // We need to reset the Executor + // TODO: call reset! + } + TRI_ASSERT(_outputItemRow->produced()); + _outputItemRow->advanceRow(); + if (state == ExecutorState::DONE) { + if (_lastRange.hasMore()) { + // TODO this state is invalid, and can just show up now if we exclude SKIP + execState = ExecState::PRODUCE; + } else { + // Right now we cannot support to have more than one set of + // ShadowRows inside of a Range. + // We do not know how to continue with the above executor after a shadowrow. + TRI_ASSERT(!_lastRange.hasMore()); + execState = ExecState::DONE; + } } + } else { + execState = ExecState::DONE; } - } else { - execState = ExecState::DONE; + break; } - break; + default: + // unreachable + TRI_ASSERT(false); } - default: - // unreachable - TRI_ASSERT(false); } - } - auto outputBlock = _outputItemRow->stealBlock(); - // This is not strictly necessary here, as we shouldn't be called again - // after DONE. - _outputItemRow.reset(); - if (_lastRange.hasMore() || _lastRange.hasShadowRow()) { - // We have skipped or/and return data, otherwise we cannot return HASMORE - TRI_ASSERT(skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0)); - return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; + auto outputBlock = _outputItemRow->stealBlock(); + // This is not strictly necessary here, as we shouldn't be called again + // after DONE. + _outputItemRow.reset(); + if (_lastRange.hasMore() || _lastRange.hasShadowRow()) { + // We have skipped or/and return data, otherwise we cannot return HASMORE + TRI_ASSERT(skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0)); + return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; + } + return {_upstreamState, skipped, std::move(outputBlock)}; + } else { + // TODO this branch must never be taken with an executor that has not been + // converted yet + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } - return {_upstreamState, skipped, std::move(outputBlock)}; } /// @brief reset all internal states after processing a shadow row. From 912351696ddcdaaac5ddaf16c0403e9206fcdcd1 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Fri, 13 Dec 2019 13:56:32 +0000 Subject: [PATCH 043/122] Add skeleton skip code --- arangod/Aql/ExecutionBlockImpl.cpp | 120 ++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 5def27ec2802..2459f5b0a378 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -988,6 +988,7 @@ SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, // TODO move me up enum ExecState { SKIP, PRODUCE, FULLCOUNT, UPSTREAM, SHADOWROWS, DONE }; +// TODO clean me up namespace { // This cannot return upstream call or shadowrows. ExecState NextState(AqlCall const& call) { @@ -1006,8 +1007,123 @@ ExecState NextState(AqlCall const& call) { // now we are done. return ExecState::DONE; } + } // namespace +// +// FETCHER: if we have one output row per input row, we can skip +// directly by just calling the fetcher and see whether +// it produced any output. +// With the new architecture we should be able to just skip +// ahead on the input range, fetching new blocks when necessary +// EXECUTOR: the executor has a specialised skipRowsRange method +// that will be called to skip +// GET_SOME: we just request rows from the executor and then discard +// them +// +enum class SkipRowsRangeVariant { FETCHER, EXECUTOR, GET_SOME }; + +template +struct ExecuteSkipRowsRange {}; + +using SkipRowsRangeResult = std::tuple; + +template <> +struct ExecuteSkipRowsRange { + template + static SkipRowsRangeResult executeSkipRowsRange(Executor& executor, + AqlItemBlockInputRange& input, + AqlCall& call) { + TRI_ASSERT(false); + /* auto res = fetcher.skipRows(toSkip); + return std::make_tuple(res.first, typename Executor::Stats{}, res.second); // tuple, cannot use initializer list due to build failure + */ + return std::make_tuple(ExecutorState::DONE, 0, call); // tuple, cannot use initializer list due to build failure + } +}; + +template <> +struct ExecuteSkipRowsRange { + template + static SkipRowsRangeResult executeSkipRowsRange(Executor& executor, + AqlItemBlockInputRange& input, + AqlCall& call) { + TRI_ASSERT(false); + // TODO forward to executor + // return executor.skipRows(toSkip); + return std::make_tuple(ExecutorState::DONE, 0, call); + } +}; + +template <> +struct ExecuteSkipRowsRange { + template + static SkipRowsRangeResult executeSkipRowsRange(Executor& executor, + AqlItemBlockInputRange& input, + AqlCall& call) { + // this function should never be executed + TRI_ASSERT(false); + // Make MSVC happy: + return std::make_tuple(ExecutorState::DONE, 0, call); + } +}; + +template +static SkipRowsRangeVariant constexpr skipRowsType() { + bool constexpr useFetcher = + Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable && + !std::is_same>::value; + + bool constexpr useExecutor = hasSkipRows::value; + + // ConstFetcher and SingleRowFetcher can skip, but + // it may not be done for modification subqueries. + static_assert(useFetcher == + (std::is_same::value || + (std::is_same>::value && + !std::is_same>::value)), + "Unexpected fetcher for SkipVariants::FETCHER"); + + static_assert(!useFetcher || hasSkipRows::value, + "Fetcher is chosen for skipping, but has not skipRows method!"); + + static_assert( + useExecutor == + (std::is_same::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same>::value || + std::is_same>::value), + "Unexpected executor for SkipVariants::EXECUTOR"); + + // The LimitExecutor will not work correctly with SkipVariants::FETCHER! + static_assert( + !std::is_same::value || useFetcher, + "LimitExecutor needs to implement skipRows() to work correctly"); + + if (useExecutor) { + return SkipRowsRangeVariant::EXECUTOR; + } else if (useFetcher) { + return SkipRowsRangeVariant::FETCHER; + } else { + return SkipRowsRangeVariant::GET_SOME; + } +} + template std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { @@ -1038,8 +1154,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { switch (execState) { case ExecState::SKIP: { auto const& clientCall = _outputItemRow->getClientCall(); + ExecuteSkipRowsRange()> skip; auto [state, skippedLocal, call] = - _executor.skipRowsRange(_lastRange, _outputItemRow->getModifiableClientCall()); + skip.executeSkipRowsRange(_executor, _lastRange, + _outputItemRow->getModifiableClientCall()); skipped += skippedLocal; if (state == ExecutorState::DONE) { From b27d945005168a5e84c981868d919a35e4ac5f08 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Fri, 13 Dec 2019 14:11:08 +0000 Subject: [PATCH 044/122] Replace C&P accident --- arangod/Aql/ExecutionBlockImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 2459f5b0a378..1af656d19caa 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1154,7 +1154,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { switch (execState) { case ExecState::SKIP: { auto const& clientCall = _outputItemRow->getClientCall(); - ExecuteSkipRowsRange()> skip; + ExecuteSkipRowsRange()> skip; auto [state, skippedLocal, call] = skip.executeSkipRowsRange(_executor, _lastRange, _outputItemRow->getModifiableClientCall()); From 7ad6c9ef24c67e80b71f28424280e397496975c3 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Fri, 13 Dec 2019 17:28:14 +0000 Subject: [PATCH 045/122] Simplify and implement skip for new executor interface --- arangod/Aql/ExecutionBlockImpl.cpp | 112 ++++++++++++++--------------- arangod/Aql/ExecutionBlockImpl.h | 3 + 2 files changed, 57 insertions(+), 58 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 1af656d19caa..7e6d9cb79523 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -968,6 +968,7 @@ std::pair ExecutionBlockImpl::r template void ExecutionBlockImpl::ensureOutputBlock(AqlCall&& call) { if (_outputItemRow == nullptr || _outputItemRow->isFull()) { + // Is this a TODO:? // We need to define the size of this block based on Input / Executor / Subquery depth size_t blockSize = ExecutionBlock::DefaultBatchSize(); SharedAqlItemBlockPtr newBlock = @@ -1023,51 +1024,8 @@ ExecState NextState(AqlCall const& call) { // enum class SkipRowsRangeVariant { FETCHER, EXECUTOR, GET_SOME }; -template -struct ExecuteSkipRowsRange {}; - -using SkipRowsRangeResult = std::tuple; - -template <> -struct ExecuteSkipRowsRange { - template - static SkipRowsRangeResult executeSkipRowsRange(Executor& executor, - AqlItemBlockInputRange& input, - AqlCall& call) { - TRI_ASSERT(false); - /* auto res = fetcher.skipRows(toSkip); - return std::make_tuple(res.first, typename Executor::Stats{}, res.second); // tuple, cannot use initializer list due to build failure - */ - return std::make_tuple(ExecutorState::DONE, 0, call); // tuple, cannot use initializer list due to build failure - } -}; - -template <> -struct ExecuteSkipRowsRange { - template - static SkipRowsRangeResult executeSkipRowsRange(Executor& executor, - AqlItemBlockInputRange& input, - AqlCall& call) { - TRI_ASSERT(false); - // TODO forward to executor - // return executor.skipRows(toSkip); - return std::make_tuple(ExecutorState::DONE, 0, call); - } -}; - -template <> -struct ExecuteSkipRowsRange { - template - static SkipRowsRangeResult executeSkipRowsRange(Executor& executor, - AqlItemBlockInputRange& input, - AqlCall& call) { - // this function should never be executed - TRI_ASSERT(false); - // Make MSVC happy: - return std::make_tuple(ExecutorState::DONE, 0, call); - } -}; - +// This function is just copy&pasted from above to decide which variant of skip +// is used for which executor. template static SkipRowsRangeVariant constexpr skipRowsType() { bool constexpr useFetcher = @@ -1124,21 +1082,61 @@ static SkipRowsRangeVariant constexpr skipRowsType() { } } +template +std::tuple ExecutionBlockImpl::executeSkipRowsRange( + AqlItemBlockInputRange& inputRange, AqlCall& call) { + if constexpr (isNewStyleExecutor()) { + if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { + // TODO: make statically sure that this method exists? + // the executor has a method skipRowsRange, so use it + return _executor.skipRowsRange(inputRange, call); + } else if constexpr (skipRowsType() == SkipRowsRangeVariant::FETCHER) { + // TODO: check whether this is right (and test!) + // just let the fetcher fetch some stuff and ignore it without even passing it + // to the executor + return _rowFetcher.execute(call); + } else if constexpr (skipRowsType() == SkipRowsRangeVariant::GET_SOME) { + // Here we need to skip by just having the executor produce rows which we then + // subsequently throw away. I do this by allocating a block and having the + // executor write to it. + // + // unsure about the role of call here as yet, might have to be std::move()'d + // into createOutputRow, and then use the resCall for return value. + // + // TODO: is outputBlock freed when the variable goes out of scope? + // TODO: do we need to use currently available blocks and then just discard? + size_t toSkip = std::min(call.getOffset(), DefaultBatchSize()); + SharedAqlItemBlockPtr outputBlock = + _engine->itemBlockManager().requestBlock(toSkip, _infos.numberOfOutputRegisters()); + TRI_ASSERT(outputBlock != nullptr); + TRI_ASSERT(outputBlock->size() == call.getOffset()); + // TODO: do we need to std::move(call) here? + auto outputRow = createOutputRow(outputBlock, AqlCall{}); + + auto const [state, stats, rescall] = _executor.produceRows(inputRange, *outputRow); + + size_t skipped = outputRow->numRowsWritten(); + call.didSkip(skipped); + + return std::make_tuple(state, skipped, call); + } else { + // TODO: this should be a compiler error + TRI_ASSERT(false); + return std::make_tuple(ExecutorState::DONE, 0, call); + } + } else { + TRI_ASSERT(false); + return std::make_tuple(ExecutorState::DONE, 0, call); + } + // Compiler is unhappy without this. + return std::make_tuple(ExecutorState::DONE, 0, call); +} + template std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if constexpr (isNewStyleExecutor()) { ensureOutputBlock(stack.popCall()); - - /* - if (!_outputItemRow) { - // TODO: FIXME Hard coded size - SharedAqlItemBlockPtr newBlock = - _engine->itemBlockManager().requestBlock(1000, - _infos.numberOfOutputRegisters()); TRI_ASSERT(newBlock != nullptr); TRI_ASSERT(newBlock->size() - == 1000); _outputItemRow = createOutputRow(newBlock); - } - */ size_t skipped = 0; TRI_ASSERT(_outputItemRow); @@ -1154,10 +1152,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { switch (execState) { case ExecState::SKIP: { auto const& clientCall = _outputItemRow->getClientCall(); - ExecuteSkipRowsRange()> skip; auto [state, skippedLocal, call] = - skip.executeSkipRowsRange(_executor, _lastRange, - _outputItemRow->getModifiableClientCall()); + executeSkipRowsRange(_lastRange, _outputItemRow->getModifiableClientCall()); skipped += skippedLocal; if (state == ExecutorState::DONE) { diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index d7d0b3267b09..b3da0a89a768 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -200,6 +200,9 @@ class ExecutionBlockImpl final : public ExecutionBlock { */ std::tuple executeWithoutTrace(AqlCallStack stack); + // execute a skipRowsRange call + std::tuple executeSkipRowsRange(AqlItemBlockInputRange& input, AqlCall& call); + /** * @brief Inner getSome() part, without the tracing calls. */ From 64186d35aefe29b51df783c4cadc41d23f589ac0 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Fri, 13 Dec 2019 17:53:22 +0000 Subject: [PATCH 046/122] Fixup skipping for filter executor --- arangod/Aql/ExecutionBlockImpl.cpp | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 7e6d9cb79523..e01c2649cd5d 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -129,6 +129,7 @@ CREATE_HAS_MEMBER_CHECK(initializeCursor, hasInitializeCursor); CREATE_HAS_MEMBER_CHECK(skipRows, hasSkipRows); CREATE_HAS_MEMBER_CHECK(fetchBlockForPassthrough, hasFetchBlockForPassthrough); CREATE_HAS_MEMBER_CHECK(expectedNumberOfRows, hasExpectedNumberOfRows); +CREATE_HAS_MEMBER_CHECK(skipRowsRange, hasSkipRowsRange); /* * Determine whether we execute new style or old style skips, i.e. pre or post shadow row introduction @@ -1032,7 +1033,7 @@ static SkipRowsRangeVariant constexpr skipRowsType() { Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable && !std::is_same>::value; - bool constexpr useExecutor = hasSkipRows::value; + bool constexpr useExecutor = hasSkipRowsRange::value; // ConstFetcher and SingleRowFetcher can skip, but // it may not be done for modification subqueries. @@ -1045,28 +1046,8 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert(!useFetcher || hasSkipRows::value, "Fetcher is chosen for skipping, but has not skipRows method!"); - static_assert( - useExecutor == - (std::is_same::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same>::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same>::value || - std::is_same>::value), - "Unexpected executor for SkipVariants::EXECUTOR"); + static_assert(useExecutor == (std::is_same::value), + "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! static_assert( From 266b8ad5cceea20aea966b12278d10bf0ec74360 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Fri, 20 Dec 2019 12:32:53 +0000 Subject: [PATCH 047/122] Rename some methods for clarity and consistency * AqlItemBlockInputRange - hasMore is now hasDataRow - next is now nextDataRow - peek is now peekDataRow - state is now upstreamState - additional method upstreamHasMore --- arangod/Aql/AqlItemBlockInputRange.cpp | 24 ++++++---- arangod/Aql/AqlItemBlockInputRange.h | 9 ++-- arangod/Aql/ExecutionBlockImpl.cpp | 59 ++++++++++++++++++------ arangod/Aql/FilterExecutor.cpp | 12 ++--- tests/Aql/AqlItemBlockInputRangeTest.cpp | 34 +++++++------- tests/Aql/FilterExecutorTest.cpp | 22 ++++----- tests/Aql/SingleRowFetcherTest.cpp | 10 ++-- 7 files changed, 104 insertions(+), 66 deletions(-) diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 32d2155ce8fb..7147a1ed7898 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -32,7 +32,7 @@ using namespace arangodb::aql; AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state) : _block(nullptr), _rowIndex(0), _endIndex(0), _finalState(state) { - TRI_ASSERT(!hasMore()); + TRI_ASSERT(!hasDataRow()); } AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, @@ -49,8 +49,12 @@ AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, TRI_ASSERT(index <= _block->size()); } -std::pair AqlItemBlockInputRange::peek() { - if (hasMore()) { +bool AqlItemBlockInputRange::hasDataRow() const noexcept { + return isIndexValid(_rowIndex) && !isShadowRowAtIndex(_rowIndex); +} + +std::pair AqlItemBlockInputRange::peekDataRow() { + if (hasDataRow()) { return std::make_pair(nextState(), InputAqlItemRow{_block, _rowIndex}); } @@ -58,20 +62,20 @@ std::pair AqlItemBlockInputRange::peek() { InputAqlItemRow{CreateInvalidInputRowHint{}}); } -std::pair AqlItemBlockInputRange::next() { - auto res = peek(); +std::pair AqlItemBlockInputRange::nextDataRow() { + auto res = peekDataRow(); if (res.second) { ++_rowIndex; } return res; } -bool AqlItemBlockInputRange::hasMore() const noexcept { - return isIndexValid(_rowIndex) && !isShadowRowAtIndex(_rowIndex); +ExecutorState AqlItemBlockInputRange::upstreamState() const noexcept { + return nextState(); } -ExecutorState AqlItemBlockInputRange::state() const noexcept { - return nextState(); +bool AqlItemBlockInputRange::upstreamHasMore() const noexcept { + return upstreamState() == ExecutorState::HASMORE; } bool AqlItemBlockInputRange::hasShadowRow() const noexcept { @@ -137,4 +141,4 @@ ExecutorState AqlItemBlockInputRange::nextState() const noexcept { } return ExecutorState::DONE; } -} \ No newline at end of file +} diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index 668113457c15..abe4a1fef316 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -40,13 +40,14 @@ class AqlItemBlockInputRange { AqlItemBlockInputRange(ExecutorState, arangodb::aql::SharedAqlItemBlockPtr&&, std::size_t startIndex, std::size_t endIndex) noexcept; - bool hasMore() const noexcept; + ExecutorState upstreamState() const noexcept; + bool upstreamHasMore() const noexcept; - ExecutorState state() const noexcept; + bool hasDataRow() const noexcept; - std::pair peek(); + std::pair peekDataRow(); - std::pair next(); + std::pair nextDataRow(); std::size_t getRowIndex() noexcept { return _rowIndex; }; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index e01c2649cd5d..bb5f3a860875 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -82,7 +82,7 @@ using namespace arangodb::aql; namespace { std::string const doneString = "DONE"; -std::string const hasMoreString = "HASMORE"; +std::string const hasDataRowString = "HASMORE"; std::string const waitingString = "WAITING"; std::string const unknownString = "UNKNOWN"; @@ -91,7 +91,7 @@ std::string const& stateToString(aql::ExecutionState state) { case aql::ExecutionState::DONE: return doneString; case aql::ExecutionState::HASMORE: - return hasMoreString; + return hasDataRowString; case aql::ExecutionState::WAITING: return waitingString; default: @@ -1063,6 +1063,10 @@ static SkipRowsRangeVariant constexpr skipRowsType() { } } +// Let's do it the C++ way. +template +struct dependent_false : std::false_type {}; + template std::tuple ExecutionBlockImpl::executeSkipRowsRange( AqlItemBlockInputRange& inputRange, AqlCall& call) { @@ -1070,11 +1074,15 @@ std::tuple ExecutionBlockImpl::execute if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { // TODO: make statically sure that this method exists? // the executor has a method skipRowsRange, so use it + // + // Input range needs data in it for this to work return _executor.skipRowsRange(inputRange, call); } else if constexpr (skipRowsType() == SkipRowsRangeVariant::FETCHER) { // TODO: check whether this is right (and test!) - // just let the fetcher fetch some stuff and ignore it without even passing it - // to the executor + // just let the fetcher fetch some stuff and ignore it without even + // passing it to the executor + // + // return _rowFetcher.execute(call); } else if constexpr (skipRowsType() == SkipRowsRangeVariant::GET_SOME) { // Here we need to skip by just having the executor produce rows which we then @@ -1086,6 +1094,7 @@ std::tuple ExecutionBlockImpl::execute // // TODO: is outputBlock freed when the variable goes out of scope? // TODO: do we need to use currently available blocks and then just discard? + // For this skip we need data in the input row. We could just run PRODUCE and ignore? size_t toSkip = std::min(call.getOffset(), DefaultBatchSize()); SharedAqlItemBlockPtr outputBlock = _engine->itemBlockManager().requestBlock(toSkip, _infos.numberOfOutputRegisters()); @@ -1101,8 +1110,8 @@ std::tuple ExecutionBlockImpl::execute return std::make_tuple(state, skipped, call); } else { - // TODO: this should be a compiler error - TRI_ASSERT(false); + static_assert(dependent_false::value, + "This value of SkipRowsRangeVariant is not supported"); return std::make_tuple(ExecutorState::DONE, 0, call); } } else { @@ -1113,21 +1122,44 @@ std::tuple ExecutionBlockImpl::execute return std::make_tuple(ExecutorState::DONE, 0, call); } +// This is the central function of an executor, and it acts like a +// coroutine: It can be called multiple times and keeps state across +// calls. +// +// The intended behaviour of this function is best described in terms of +// a state machine; the possible states are the ExecStates +// SKIP, PRODUCE, FULLCOUNT, UPSTREAM, SHADOWROWS, DONE +// +// SKIP skipping rows. How rows are skipped is determined by +// the Executor that is used. See SkipVariants +// PRODUCE calls produceRows of the executor +// FULLCOUNT +// UPSTREAM fetches rows from the upstream executor(s) to be processed by +// our executor. +// SHADOWROWS process any shadow rows +// DONE processing is done template std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if constexpr (isNewStyleExecutor()) { + // Make sure there's a block allocated and set + // the call ensureOutputBlock(stack.popCall()); - size_t skipped = 0; + + auto skipped = size_t{0}; TRI_ASSERT(_outputItemRow); - ExecState execState = ::NextState(_outputItemRow->getClientCall()); + + auto execState = ::NextState(_outputItemRow->getClientCall()); + + // ::NextState(_outputItemRow->getClientCall()); if (_lastRange.hasShadowRow()) { // We have not been able to move all shadowRows into the output last time. // Continue from there. // TODO test if this works with COUNT COLLECT execState = ExecState::SHADOWROWS; } + AqlCall executorRequest; while (execState != ExecState::DONE && !_outputItemRow->allRowsUsed()) { switch (execState) { @@ -1172,12 +1204,13 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } case ExecState::FULLCOUNT: { TRI_ASSERT(false); + // TODO: wat. } case ExecState::UPSTREAM: { // If this triggers the executors produceRows function has returned - // HASMORE even if it new that upstream has no further rows. + // HASMORE even if it knew that upstream has no further rows. TRI_ASSERT(_upstreamState != ExecutionState::DONE); - TRI_ASSERT(!_lastRange.hasMore()); + TRI_ASSERT(!_lastRange.hasDataRow()); size_t skippedLocal = 0; stack.pushCall(std::move(executorRequest)); std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); @@ -1213,14 +1246,14 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(_outputItemRow->produced()); _outputItemRow->advanceRow(); if (state == ExecutorState::DONE) { - if (_lastRange.hasMore()) { + if (_lastRange.hasDataRow()) { // TODO this state is invalid, and can just show up now if we exclude SKIP execState = ExecState::PRODUCE; } else { // Right now we cannot support to have more than one set of // ShadowRows inside of a Range. // We do not know how to continue with the above executor after a shadowrow. - TRI_ASSERT(!_lastRange.hasMore()); + TRI_ASSERT(!_lastRange.hasDataRow()); execState = ExecState::DONE; } } @@ -1239,7 +1272,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // This is not strictly necessary here, as we shouldn't be called again // after DONE. _outputItemRow.reset(); - if (_lastRange.hasMore() || _lastRange.hasShadowRow()) { + if (_lastRange.hasDataRow() || _lastRange.hasShadowRow()) { // We have skipped or/and return data, otherwise we cannot return HASMORE TRI_ASSERT(skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0)); return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 4be4c63eceb4..bbe10b193e2e 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -107,10 +107,10 @@ std::tuple FilterExecutor::skipRowsRange( ExecutorState state = ExecutorState::HASMORE; InputAqlItemRow input{CreateInvalidInputRowHint{}}; size_t skipped = 0; - while (inputRange.hasMore() && skipped < call.getOffset()) { - std::tie(state, input) = inputRange.next(); + while (inputRange.hasDataRow() && skipped < call.getOffset()) { + std::tie(state, input) = inputRange.nextDataRow(); if (!input) { - TRI_ASSERT(!inputRange.hasMore()); + TRI_ASSERT(!inputRange.hasDataRow()); break; } if (input.getValue(_infos.getInputRegister()).toBoolean()) { @@ -131,8 +131,8 @@ std::tuple FilterExecutor::produceRows( } FilterStats stats{}; - while (inputRange.hasMore() && !output.isFull()) { - auto const& [state, input] = inputRange.next(); + while (inputRange.hasDataRow() && !output.isFull()) { + auto const& [state, input] = inputRange.nextDataRow(); TRI_ASSERT(input.isInitialized()); if (input.getValue(_infos.getInputRegister()).toBoolean()) { output.copyRow(input); @@ -148,5 +148,5 @@ std::tuple FilterExecutor::produceRows( // pass through all rows this fetch is correct, otherwise we have too few rows. upstreamCall.softLimit = clientCall.getOffset() + (std::min)(clientCall.softLimit, clientCall.hardLimit); - return {inputRange.state(), stats, upstreamCall}; + return {inputRange.upstreamState(), stats, upstreamCall}; } diff --git a/tests/Aql/AqlItemBlockInputRangeTest.cpp b/tests/Aql/AqlItemBlockInputRangeTest.cpp index c5180e23ec1c..884058d6e7d7 100644 --- a/tests/Aql/AqlItemBlockInputRangeTest.cpp +++ b/tests/Aql/AqlItemBlockInputRangeTest.cpp @@ -57,16 +57,16 @@ class InputRangeTest : public ::testing::TestWithParam { } void validateEndReached(AqlItemBlockInputRange& testee) { - EXPECT_EQ(GetParam(), testee.state()); + EXPECT_EQ(GetParam(), testee.upstreamState()); // Test Data rows - EXPECT_FALSE(testee.hasMore()); + EXPECT_FALSE(testee.hasDataRow()); { - auto const [state, row] = testee.peek(); + auto const [state, row] = testee.peekDataRow(); EXPECT_EQ(GetParam(), state); EXPECT_FALSE(row.isInitialized()); } { - auto const [state, row] = testee.next(); + auto const [state, row] = testee.nextDataRow(); EXPECT_EQ(GetParam(), state); EXPECT_FALSE(row.isInitialized()); } @@ -86,10 +86,10 @@ class InputRangeTest : public ::testing::TestWithParam { void validateNextIsDataRow(AqlItemBlockInputRange& testee, ExecutorState expectedState, int64_t value) { - EXPECT_TRUE(testee.hasMore()); + EXPECT_TRUE(testee.hasDataRow()); EXPECT_FALSE(testee.hasShadowRow()); // We have the next row - EXPECT_EQ(testee.state(), ExecutorState::HASMORE); + EXPECT_EQ(testee.upstreamState(), ExecutorState::HASMORE); auto rowIndexBefore = testee.getRowIndex(); // Validate that shadowRowAPI does not move on { @@ -108,7 +108,7 @@ class InputRangeTest : public ::testing::TestWithParam { } // Validate Data Row API { - auto [state, row] = testee.peek(); + auto [state, row] = testee.peekDataRow(); EXPECT_EQ(state, expectedState); EXPECT_TRUE(row.isInitialized()); auto val = row.getValue(0); @@ -119,7 +119,7 @@ class InputRangeTest : public ::testing::TestWithParam { } { - auto [state, row] = testee.next(); + auto [state, row] = testee.nextDataRow(); EXPECT_EQ(state, expectedState); EXPECT_TRUE(row.isInitialized()); auto val = row.getValue(0); @@ -128,26 +128,26 @@ class InputRangeTest : public ::testing::TestWithParam { ASSERT_NE(rowIndexBefore, testee.getRowIndex()) << "Did not go to next row."; } - EXPECT_EQ(expectedState, testee.state()); + EXPECT_EQ(expectedState, testee.upstreamState()); } void validateNextIsShadowRow(AqlItemBlockInputRange& testee, ExecutorState expectedState, int64_t value, uint64_t depth) { EXPECT_TRUE(testee.hasShadowRow()); // The next is a ShadowRow, the state shall be done - EXPECT_EQ(testee.state(), ExecutorState::DONE); + EXPECT_EQ(testee.upstreamState(), ExecutorState::DONE); auto rowIndexBefore = testee.getRowIndex(); // Validate that inputRowAPI does not move on { - auto [state, row] = testee.peek(); + auto [state, row] = testee.peekDataRow(); EXPECT_EQ(state, ExecutorState::DONE); EXPECT_FALSE(row.isInitialized()); ASSERT_EQ(rowIndexBefore, testee.getRowIndex()) << "Skipped a non processed row."; } { - auto [state, row] = testee.next(); + auto [state, row] = testee.nextDataRow(); EXPECT_EQ(state, ExecutorState::DONE); EXPECT_FALSE(row.isInitialized()); ASSERT_EQ(rowIndexBefore, testee.getRowIndex()) @@ -181,24 +181,24 @@ class InputRangeTest : public ::testing::TestWithParam { TEST_P(InputRangeTest, empty_returns_given_state) { auto testee = createEmpty(); - EXPECT_EQ(GetParam(), testee.state()); + EXPECT_EQ(GetParam(), testee.upstreamState()); } TEST_P(InputRangeTest, empty_does_not_have_more) { auto testee = createEmpty(); - EXPECT_FALSE(testee.hasMore()); + EXPECT_FALSE(testee.hasDataRow()); } TEST_P(InputRangeTest, empty_peek_is_empty) { auto testee = createEmpty(); - auto const [state, row] = testee.peek(); + auto const [state, row] = testee.peekDataRow(); EXPECT_EQ(GetParam(), state); EXPECT_FALSE(row.isInitialized()); } TEST_P(InputRangeTest, empty_next_is_empty) { auto testee = createEmpty(); - auto const [state, row] = testee.next(); + auto const [state, row] = testee.nextDataRow(); EXPECT_EQ(GetParam(), state); EXPECT_FALSE(row.isInitialized()); } @@ -290,4 +290,4 @@ INSTANTIATE_TEST_CASE_P(AqlItemBlockInputRangeTest, InputRangeTest, } // namespace aql } // namespace tests -} // namespace arangodb \ No newline at end of file +} // namespace arangodb diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index dc37f114c757..6cceab6e09fc 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -372,7 +372,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange) { EXPECT_EQ(state, ExecutorState::DONE); EXPECT_EQ(stats.getFiltered(), 2); EXPECT_EQ(output.numRowsWritten(), 3); - EXPECT_FALSE(input.hasMore()); + EXPECT_FALSE(input.hasDataRow()); } TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { @@ -398,7 +398,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(stats.getFiltered(), 2); EXPECT_EQ(output.numRowsWritten(), 3); - EXPECT_FALSE(input.hasMore()); + EXPECT_FALSE(input.hasDataRow()); // Test the Call we send to upstream EXPECT_EQ(call.offset, 0); EXPECT_FALSE(call.hasHardLimit()); @@ -428,7 +428,7 @@ TEST_F(FilterExecutorTest, test_skip_datarange_need_more) { EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(skipped, 3); EXPECT_EQ(clientCall.getOffset(), 1000 - 3); - EXPECT_FALSE(input.hasMore()); + EXPECT_FALSE(input.hasDataRow()); // Test the Call we send to upstream EXPECT_EQ(call.offset, 0); @@ -461,21 +461,21 @@ TEST_F(FilterExecutorTest, test_produce_datarange_has_more) { EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(stats.getFiltered(), 1); EXPECT_EQ(output.numRowsWritten(), 2); - EXPECT_TRUE(input.hasMore()); + EXPECT_TRUE(input.hasDataRow()); // We still have two values in block: false and true { // pop false - auto const [state, row] = input.next(); + auto const [state, row] = input.nextDataRow(); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_FALSE(row.getValue(0).toBoolean()); } { // pop true - auto const [state, row] = input.next(); + auto const [state, row] = input.nextDataRow(); EXPECT_EQ(state, ExecutorState::DONE); EXPECT_TRUE(row.getValue(0).toBoolean()); } - EXPECT_FALSE(input.hasMore()); + EXPECT_FALSE(input.hasDataRow()); } TEST_F(FilterExecutorTest, test_skip_datarange_has_more) { @@ -498,21 +498,21 @@ TEST_F(FilterExecutorTest, test_skip_datarange_has_more) { EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(skipped, 2); EXPECT_EQ(clientCall.getOffset(), 0); - EXPECT_TRUE(input.hasMore()); + EXPECT_TRUE(input.hasDataRow()); // We still have two values in block: false and true { // pop false - auto const [state, row] = input.next(); + auto const [state, row] = input.nextDataRow(); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_FALSE(row.getValue(0).toBoolean()); } { // pop true - auto const [state, row] = input.next(); + auto const [state, row] = input.nextDataRow(); EXPECT_EQ(state, ExecutorState::DONE); EXPECT_TRUE(row.getValue(0).toBoolean()); } - EXPECT_FALSE(input.hasMore()); + EXPECT_FALSE(input.hasDataRow()); } } // namespace aql diff --git a/tests/Aql/SingleRowFetcherTest.cpp b/tests/Aql/SingleRowFetcherTest.cpp index 9435cd93979c..06f7b5fde85e 100644 --- a/tests/Aql/SingleRowFetcherTest.cpp +++ b/tests/Aql/SingleRowFetcherTest.cpp @@ -51,7 +51,7 @@ namespace arangodb { namespace tests { namespace aql { -// TODO check that blocks are not returned to early (e.g. not before the next row +// TODO check that blocks are not returned to early (e.g. not before the nextDataRow row // is fetched) // TODO check that, for SingleRowFetcher, blocks are reposited (passed through) immediately @@ -72,10 +72,10 @@ class SingleRowFetcherTestPassBlocks : public ::testing::Test { for (auto const& value : result) { SCOPED_TRACE("Checking for value: " + value); // We need more rows - ASSERT_TRUE(input.hasMore()); + ASSERT_TRUE(input.hasDataRow()); EXPECT_FALSE(input.hasShadowRow()); - auto [state, row] = input.next(); + auto [state, row] = input.nextDataRow(); if (value == result.back()) { EXPECT_EQ(state, ExecutorState::DONE); @@ -89,7 +89,7 @@ class SingleRowFetcherTestPassBlocks : public ::testing::Test { << inputVal.slice().toJson() << " should be equal to \"" << value << "\""; } // We always fetch to the end - EXPECT_FALSE(input.hasMore()); + EXPECT_FALSE(input.hasDataRow()); } void validateShadowRange(AqlItemBlockInputRange& input, @@ -99,7 +99,7 @@ class SingleRowFetcherTestPassBlocks : public ::testing::Test { " with value: " + value); // We need more rows ASSERT_TRUE(input.hasShadowRow()); - EXPECT_FALSE(input.hasMore()); + EXPECT_FALSE(input.hasDataRow()); auto [state, row] = input.nextShadowRow(); From 16281968c785c730c0a63aa8717c01ff9b6f58f9 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Tue, 24 Dec 2019 14:32:18 +0000 Subject: [PATCH 048/122] Introduce function to allocate an output block --- arangod/Aql/ExecutionBlockImpl.cpp | 17 +++++++++++------ arangod/Aql/ExecutionBlockImpl.h | 7 ++++++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index bb5f3a860875..fee0dc0f0c6c 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -966,15 +966,20 @@ std::pair ExecutionBlockImpl::r executor(), *_engine, nrItems, nrRegs); } +// TODO: We need to define the size of this block based on Input / Executor / Subquery depth +template +auto ExecutionBlockImpl::allocateOutputBlock(AqlCall&& call) + -> std::unique_ptr { + size_t blockSize = ExecutionBlock::DefaultBatchSize(); + SharedAqlItemBlockPtr newBlock = + _engine->itemBlockManager().requestBlock(blockSize, _infos.numberOfOutputRegisters()); + return createOutputRow(newBlock, std::move(call)); +} + template void ExecutionBlockImpl::ensureOutputBlock(AqlCall&& call) { if (_outputItemRow == nullptr || _outputItemRow->isFull()) { - // Is this a TODO:? - // We need to define the size of this block based on Input / Executor / Subquery depth - size_t blockSize = ExecutionBlock::DefaultBatchSize(); - SharedAqlItemBlockPtr newBlock = - _engine->itemBlockManager().requestBlock(blockSize, _infos.numberOfOutputRegisters()); - _outputItemRow = createOutputRow(newBlock, std::move(call)); + _outputItemRow = allocateOutputBlock(std::move(call)); } else { _outputItemRow->setCall(std::move(call)); } diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index b3da0a89a768..667957491c03 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -201,7 +201,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { std::tuple executeWithoutTrace(AqlCallStack stack); // execute a skipRowsRange call - std::tuple executeSkipRowsRange(AqlItemBlockInputRange& input, AqlCall& call); + std::tuple executeSkipRowsRange(AqlItemBlockInputRange& input, + AqlCall& call); /** * @brief Inner getSome() part, without the tracing calls. @@ -249,6 +250,10 @@ class ExecutionBlockImpl final : public ExecutionBlock { // Trace the end of a getSome call, potentially with result void traceExecuteEnd(std::tuple const& result); + // Allocate an output block and install a call in it + [[nodiscard]] auto allocateOutputBlock(AqlCall&& call) + -> std::unique_ptr; + // Ensure that we have an output block of the desired dimenstions // Will as a side effect modify _outputItemRow void ensureOutputBlock(AqlCall&& call); From 7e8180f80d8aa4352ddc66001bed4d07b44248c5 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Tue, 24 Dec 2019 14:33:21 +0000 Subject: [PATCH 049/122] Fix skipSome simulation bug --- arangod/Aql/ExecutionBlockImpl.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index fee0dc0f0c6c..3baa5d17805f 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -451,7 +451,16 @@ std::pair ExecutionBlockImpl::skipSome(size_t if constexpr (isNewStyleExecutor()) { AqlCallStack stack{AqlCall::SimulateSkipSome(atMost)}; auto const [state, skipped, block] = execute(stack); - return {state, skipped}; + + // execute returns ExecutionState::DONE here, which stops execution after simulating a skip. + // If we indiscriminately return ExecutionState::HASMORE, then we end up in an infinite loop + // + // luckily we can dispose of this kludge once executors have been ported. + if (skipped < atMost && state == ExecutionState::DONE) { + return {ExecutionState::DONE, skipped}; + } else { + return {ExecutionState::HASMORE, skipped}; + } } else { traceSkipSomeBegin(atMost); auto state = ExecutionState::HASMORE; From 4e86d1765b6bf5740fea4e75484f525282082787 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Tue, 24 Dec 2019 15:06:25 +0000 Subject: [PATCH 050/122] Some small cleanups --- arangod/Aql/ExecutionBlockImpl.cpp | 48 +++++++++++------------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 3baa5d17805f..b9832657da4f 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1086,43 +1086,32 @@ std::tuple ExecutionBlockImpl::execute AqlItemBlockInputRange& inputRange, AqlCall& call) { if constexpr (isNewStyleExecutor()) { if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { - // TODO: make statically sure that this method exists? - // the executor has a method skipRowsRange, so use it - // - // Input range needs data in it for this to work + // If the executor has a method skipRowsRange, to skip outputs more + // efficiently than just producing them to subsequently discard them, then + // we use it return _executor.skipRowsRange(inputRange, call); } else if constexpr (skipRowsType() == SkipRowsRangeVariant::FETCHER) { - // TODO: check whether this is right (and test!) - // just let the fetcher fetch some stuff and ignore it without even - // passing it to the executor - // - // + // If we know that every input row produces exactly one output row (this + // is a property of the executor), then we can just let the fetcher skip + // the number of rows that we would like to skip. return _rowFetcher.execute(call); } else if constexpr (skipRowsType() == SkipRowsRangeVariant::GET_SOME) { - // Here we need to skip by just having the executor produce rows which we then - // subsequently throw away. I do this by allocating a block and having the - // executor write to it. - // - // unsure about the role of call here as yet, might have to be std::move()'d - // into createOutputRow, and then use the resCall for return value. - // - // TODO: is outputBlock freed when the variable goes out of scope? - // TODO: do we need to use currently available blocks and then just discard? - // For this skip we need data in the input row. We could just run PRODUCE and ignore? + // In all other cases, we skip by letting the executor produce rows, and + // then throw them away. + size_t toSkip = std::min(call.getOffset(), DefaultBatchSize()); - SharedAqlItemBlockPtr outputBlock = - _engine->itemBlockManager().requestBlock(toSkip, _infos.numberOfOutputRegisters()); - TRI_ASSERT(outputBlock != nullptr); - TRI_ASSERT(outputBlock->size() == call.getOffset()); - // TODO: do we need to std::move(call) here? - auto outputRow = createOutputRow(outputBlock, AqlCall{}); + AqlCall skipCall{}; + skipCall.softLimit = toSkip; + skipCall.hardLimit = toSkip; + skipCall.offset = 0; - auto const [state, stats, rescall] = _executor.produceRows(inputRange, *outputRow); + // we can't mess with _outputItemRow, + auto skipOutput = allocateOutputBlock(std::move(skipCall)); + auto [state, stats, rescall] = _executor.produceRows(inputRange, *skipOutput); + auto skipped = skipOutput->numRowsWritten(); - size_t skipped = outputRow->numRowsWritten(); call.didSkip(skipped); - - return std::make_tuple(state, skipped, call); + return std::make_tuple(state, skipped, rescall); } else { static_assert(dependent_false::value, "This value of SkipRowsRangeVariant is not supported"); @@ -1166,7 +1155,6 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { auto execState = ::NextState(_outputItemRow->getClientCall()); - // ::NextState(_outputItemRow->getClientCall()); if (_lastRange.hasShadowRow()) { // We have not been able to move all shadowRows into the output last time. // Continue from there. From f5ef0e727e5d039cc37a3636da2faa2bf34e8df1 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 2 Jan 2020 16:50:30 +0100 Subject: [PATCH 051/122] Replaced assertion on atMost on the output size. Otherwise we got into issues with atMost changes between waiting calls (can occur in our intermediate state now) --- arangod/Aql/ExecutionBlockImpl.cpp | 35 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 89da10671dc7..75daf6a1a5d3 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -299,16 +299,7 @@ std::pair ExecutionBlockImpl::g } TRI_ASSERT(state == ExecutionState::HASMORE); - // When we're passing blocks through we have no control over the size of the - // output block. - // Plus, the ConstrainedSortExecutor will report an expectedNumberOfRows - // according to its heap size, thus resulting in a smaller allocated output - // block. However, it won't report DONE after, because a LIMIT block with - // fullCount must continue to count after the sorted output. - if /* constexpr */ (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Disable && - !std::is_same::value) { - TRI_ASSERT(_outputItemRow->numRowsWritten() == atMost); - } + TRI_ASSERT(_outputItemRow->isFull()); } auto outputBlock = _outputItemRow->stealBlock(); @@ -1349,23 +1340,31 @@ template class ::arangodb::aql::ExecutionBlockImpl; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; -template class ::arangodb::aql::ExecutionBlockImpl>; -template class ::arangodb::aql::ExecutionBlockImpl>; +template class ::arangodb::aql::ExecutionBlockImpl< + IResearchViewExecutor>; +template class ::arangodb::aql::ExecutionBlockImpl< + IResearchViewExecutor>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; -template class ::arangodb::aql::ExecutionBlockImpl>; -template class ::arangodb::aql::ExecutionBlockImpl>; +template class ::arangodb::aql::ExecutionBlockImpl< + IResearchViewExecutor>; +template class ::arangodb::aql::ExecutionBlockImpl< + IResearchViewExecutor>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; -template class ::arangodb::aql::ExecutionBlockImpl>; -template class ::arangodb::aql::ExecutionBlockImpl>; +template class ::arangodb::aql::ExecutionBlockImpl>; +template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; -template class ::arangodb::aql::ExecutionBlockImpl>; -template class ::arangodb::aql::ExecutionBlockImpl>; +template class ::arangodb::aql::ExecutionBlockImpl>; +template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>>; From 862d316c1ebe4c67e9fe876485f5ced9d535c822 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Mon, 6 Jan 2020 12:49:49 +0000 Subject: [PATCH 052/122] Attempt at fixing execute --- arangod/Aql/ExecutionBlockImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 75daf6a1a5d3..df13c3d1955a 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1194,7 +1194,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _engine->_stats += stats; if (state == ExecutorState::DONE) { execState = ExecState::SHADOWROWS; - } else if (clientCall.getLimit() > 0) { + } else if (clientCall.getLimit() > 0 && !_lastRange.hasDataRow()) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more executorRequest = call; From a656f39cecf67bd21af022774e960b61c473c0df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Fri, 17 Jan 2020 11:31:29 +0100 Subject: [PATCH 053/122] Fix merge conflict --- arangod/Aql/AqlCall.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 8f6592e3ee07..3c4aa4bb98c1 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -74,7 +74,7 @@ struct AqlCall { std::size_t getLimit() const { // By default we use batchsize - std::size_t limit = ExecutionBlock::DefaultBatchSize(); + std::size_t limit = ExecutionBlock::DefaultBatchSize; // We are not allowed to go above softLimit if (std::holds_alternative(softLimit)) { limit = (std::min)(std::get(softLimit), limit); @@ -112,4 +112,4 @@ struct AqlCall { } // namespace aql } // namespace arangodb -#endif \ No newline at end of file +#endif From beb7003160900fa524c6ecd084887624a807f51a Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 24 Jan 2020 12:37:54 +0100 Subject: [PATCH 054/122] Feature/aql subquery execution block impl execute implementation exec block impl tests (#10838) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added a generic Lambda Executor, this can be used in ExecutionBlockImplTests to have finegrained control over the action happening within the Executor * Added first test using the LambdaExecutor. * Added some tests around Execute. GetSome seems to be okayish. Skip not. Namely skipped numbers are not reported correctly. * Made the first ExecutionBlock Execute integration test pass. Still other tests are failing * Simplified the IsSkipSome test, this allows the C++ tests for Mixed Calls to pass. * Added a skip test and fixed the producing executor to recreate the list once for every input line. * More tests. Also added a custom AqlCall printer function for GTest. Tests still red, need to fix fullCount and hardLimit. * Implemented ostream operator for AQLCall. * Properly implemented fullCount incl. UnitTest * Added test for Callforwarding. They still have some todos, but these can only be solved by upgrading the OutputRow. Which should be part of separate PR * Added another test for CallForwarding in passthrough state * Added a Test Implementation for an Executor that uses a dynamic skip implementation. * Fixed skip with HARDLIMIT. * Startet to implement call forwarding test. However we need to improve Outputrow first this will be done in seperate branch * Removed designated initializers. Thanks for not supporting it MSVC! * Removed non-passthrough non-skip Lambda Executor again. We decided to disallow this. * Update tests/Aql/ExecutionBlockImplTest.cpp Co-Authored-By: Markus Pfeiffer * Started to add implementation of passthrough block allocation * Added a comparator to AqlCall. Mostly for tests * Fixed an issue in skip-passthrough version. Updated the tests. * Allow to 'overSkip' if we do fullCount * Enabled the first set of tests now. Only one set to go * Applied all fixes to get Integration testsuite green * Added some comments on the TestCases executed in the ExecutionBlockImpl * Added tes descriptions and removed a duplicate test * Added some comments on LamdbaExecutors * Added description of ExecutionBlockImple execute logic * Applied review comments, thanks to reviewers * Fixed modulo 2 off by one error * Renamed getLambda() => getProduceLambda() in test code, as we have produce and skip Co-authored-by: Markus Pfeiffer Co-authored-by: Tobias Gödderz --- arangod/Aql/AqlCall.h | 56 +- arangod/Aql/AqlItemBlockInputRange.cpp | 4 + arangod/Aql/AqlItemBlockInputRange.h | 2 + arangod/Aql/ExecutionBlockImpl.cpp | 298 +++-- arangod/Aql/ExecutionBlockImpl.h | 6 + arangod/Aql/OutputAqlItemRow.cpp | 5 +- arangod/Aql/OutputAqlItemRow.h | 7 +- tests/Aql/ExecutionBlockImplTest.cpp | 1172 +++++++++++++++++ tests/Aql/ExecutionBlockImplTestInstances.cpp | 3 + tests/Aql/TestLambdaExecutor.cpp | 112 ++ tests/Aql/TestLambdaExecutor.h | 233 ++++ tests/CMakeLists.txt | 1 + tests/Mocks/Servers.cpp | 21 +- tests/Mocks/Servers.h | 10 +- 14 files changed, 1824 insertions(+), 106 deletions(-) create mode 100644 tests/Aql/TestLambdaExecutor.cpp create mode 100644 tests/Aql/TestLambdaExecutor.h diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 5893c9d43afe..6ece6b80ea4d 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -57,7 +57,7 @@ struct AqlCall { // TODO Remove me, this will not be necessary later static bool IsSkipSomeCall(AqlCall const& call) { - return !call.hasHardLimit() && call.getLimit() == 0 && call.getOffset() > 0; + return !call.hasHardLimit() && call.getOffset() > 0; } // TODO Remove me, this will not be necessary later @@ -88,8 +88,15 @@ struct AqlCall { } void didSkip(std::size_t n) { - TRI_ASSERT(n <= offset); - offset -= n; + if (n <= offset) { + // TRI_ASSERT(n <= offset); + offset -= n; + } else { + TRI_ASSERT(fullCount); + // We might have skip,(produce?),fullCount + // in a single call here. + offset = 0; + } } void didProduce(std::size_t n) { @@ -136,6 +143,49 @@ constexpr AqlCall::Limit operator+(size_t n, AqlCall::Limit const& a) { return a + n; } +constexpr bool operator==(AqlCall::Limit const& a, size_t n) { + return std::visit(overload{[n](size_t const& i) -> bool { return i == n; }, + [](auto inf) -> bool { return false; }}, + a); +} + +constexpr bool operator==(size_t n, AqlCall::Limit const& a) { return a == n; } + +constexpr bool operator==(AqlCall::Limit const& a, + arangodb::aql::AqlCall::Infinity const& n) { + return std::visit(overload{[](size_t const& i) -> bool { return false; }, + [](auto inf) -> bool { return true; }}, + a); +} + +constexpr bool operator==(arangodb::aql::AqlCall::Infinity const& n, + AqlCall::Limit const& a) { + return a == n; +} + +constexpr bool operator==(AqlCall::Limit const& a, AqlCall::Limit const& b) { + return std::visit(overload{[&b](size_t const& i) -> bool { return i == b; }, + [&b](auto inf) -> bool { return inf == b; }}, + a); +} + +inline std::ostream& operator<<(std::ostream& out, + const arangodb::aql::AqlCall::Limit& limit) { + return std::visit(arangodb::overload{[&out](size_t const& i) -> std::ostream& { + return out << i; + }, + [&out](arangodb::aql::AqlCall::Infinity const&) -> std::ostream& { + return out << "unlimited"; + }}, + limit); +} + +inline std::ostream& operator<<(std::ostream& out, const arangodb::aql::AqlCall& call) { + return out << "skip: " << call.getOffset() << " softLimit: " << call.softLimit + << " hardLimit: " << call.hardLimit + << " fullCount: " << std::boolalpha << call.fullCount; +} + } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 7147a1ed7898..f8e9cef032b6 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -49,6 +49,10 @@ AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, TRI_ASSERT(index <= _block->size()); } +SharedAqlItemBlockPtr AqlItemBlockInputRange::getBlock() const noexcept { + return _block; +} + bool AqlItemBlockInputRange::hasDataRow() const noexcept { return isIndexValid(_rowIndex) && !isShadowRowAtIndex(_rowIndex); } diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index abe4a1fef316..42de49fbb586 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -40,6 +40,8 @@ class AqlItemBlockInputRange { AqlItemBlockInputRange(ExecutorState, arangodb::aql::SharedAqlItemBlockPtr&&, std::size_t startIndex, std::size_t endIndex) noexcept; + arangodb::aql::SharedAqlItemBlockPtr getBlock() const noexcept; + ExecutorState upstreamState() const noexcept; bool upstreamHasMore() const noexcept; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 441fe49f92ee..6f553bad1505 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -137,9 +137,27 @@ CREATE_HAS_MEMBER_CHECK(skipRowsRange, hasSkipRowsRange); * Determine whether we execute new style or old style skips, i.e. pre or post shadow row introduction * TODO: This should be removed once all executors and fetchers are ported to the new style. */ + +#ifdef ARANGODB_USE_GOOGLE_TESTS +// Forward declaration of Test Executors. +// only used as long as isNewStyleExecutor is required. +namespace arangodb { +namespace aql { +class TestLambdaExecutor; + +class TestLambdaSkipExecutor; +} // namespace aql +} // namespace arangodb +#endif + template static bool constexpr isNewStyleExecutor() { - return std::is_same::value; + return +#ifdef ARANGODB_USE_GOOGLE_TESTS + std::is_same_v || + std::is_same_v || +#endif + std::is_same_v; } template @@ -156,7 +174,8 @@ ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, _outputItemRow(), _query(*engine->getQuery()), _state(InternalState::FETCH_DATA), - _lastRange{ExecutorState::HASMORE} { + _lastRange{ExecutorState::HASMORE}, + _hasUsedDataRangeBlock{false} { // already insert ourselves into the statistics results if (_profile >= PROFILE_LEVEL_BLOCKS) { _engine->_stats.nodes.try_emplace(node->id(), ExecutionStats::Node()); @@ -314,6 +333,21 @@ std::pair ExecutionBlockImpl::g template std::unique_ptr ExecutionBlockImpl::createOutputRow( SharedAqlItemBlockPtr& newBlock, AqlCall&& call) { +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + if (newBlock != nullptr) { + // Assert that the block has enough registers. This must be guaranteed by + // the register planning. + TRI_ASSERT(newBlock->getNrRegs() == _infos.numberOfOutputRegisters()); + // Check that all output registers are empty. + for (auto const& reg : *_infos.getOutputRegisters()) { + for (size_t row = 0; row < newBlock->size(); row++) { + AqlValue const& val = newBlock->getValueReference(row, reg); + TRI_ASSERT(val.isEmpty()); + } + } + } +#endif + if /* constexpr */ (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable) { return std::make_unique(newBlock, infos().getOutputRegisters(), infos().registersToKeep(), @@ -626,8 +660,7 @@ void ExecutionBlockImpl::traceExecuteBegin(AqlCallStack const& stack) auto const& call = stack.peek(); LOG_TOPIC("1e717", INFO, Logger::QUERIES) << "[query#" << queryId << "] " - << "execute type=" << node->getTypeString() - << " offset=" << call.getOffset() << " limit= " << call.getLimit() + << "execute type=" << node->getTypeString() << " call= " << call << " this=" << (uintptr_t)this << " id=" << node->id(); } } @@ -945,16 +978,18 @@ struct RequestWrappedBlock { template std::pair ExecutionBlockImpl::requestWrappedBlock( size_t nrItems, RegisterCount nrRegs) { - static_assert(Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Disable || - !Executor::Properties::inputSizeRestrictsOutputSize, - "At most one of Properties::allowsBlockPassthrough or " - "Properties::inputSizeRestrictsOutputSize should be true for " - "each Executor"); - static_assert( - (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable) == - hasFetchBlockForPassthrough::value, - "Executors should implement the method fetchBlockForPassthrough() iff " - "Properties::allowsBlockPassthrough is true"); + if constexpr (!isNewStyleExecutor()) { + static_assert(Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Disable || + !Executor::Properties::inputSizeRestrictsOutputSize, + "At most one of Properties::allowsBlockPassthrough or " + "Properties::inputSizeRestrictsOutputSize should be true for " + "each Executor"); + static_assert((Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable) == + hasFetchBlockForPassthrough::value, + "Executors should implement the method " + "fetchBlockForPassthrough() iff " + "Properties::allowsBlockPassthrough is true"); + } static_assert( Executor::Properties::inputSizeRestrictsOutputSize == hasExpectedNumberOfRows::value, @@ -988,18 +1023,38 @@ std::pair ExecutionBlockImpl::r template auto ExecutionBlockImpl::allocateOutputBlock(AqlCall&& call) -> std::unique_ptr { - size_t blockSize = ExecutionBlock::DefaultBatchSize; - SharedAqlItemBlockPtr newBlock = - _engine->itemBlockManager().requestBlock(blockSize, _infos.numberOfOutputRegisters()); - return createOutputRow(newBlock, std::move(call)); + if constexpr (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable) { + SharedAqlItemBlockPtr newBlock{nullptr}; + // Passthrough variant, re-use the block stored in InputRange + if (!_hasUsedDataRangeBlock) { + // In the pass through variant we have the contract that we work on a + // block all or nothing, so if we have used the block once, we cannot use it again + // however we cannot remove the _lastRange as it may contain additional information. + newBlock = _lastRange.getBlock(); + _hasUsedDataRangeBlock = true; + } + + return createOutputRow(newBlock, std::move(call)); + } else { + // Non-Passthrough variant, we need to allocate the block ourselfs + size_t blockSize = ExecutionBlock::DefaultBatchSize; + SharedAqlItemBlockPtr newBlock = + _engine->itemBlockManager().requestBlock(blockSize, _infos.numberOfOutputRegisters()); + return createOutputRow(newBlock, std::move(call)); + } } template void ExecutionBlockImpl::ensureOutputBlock(AqlCall&& call) { - if (_outputItemRow == nullptr || _outputItemRow->isFull()) { + if (_outputItemRow == nullptr || !_outputItemRow->isInitialized()) { _outputItemRow = allocateOutputBlock(std::move(call)); } else { _outputItemRow->setCall(std::move(call)); +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE +// We only inject a new call into the output row. +// In the passhrough variant we need to ensure that inputBlock and outputBlock stay identical +// TODO add an external assertion for this. +#endif } } @@ -1011,7 +1066,15 @@ SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, } // TODO move me up -enum ExecState { SKIP, PRODUCE, FULLCOUNT, UPSTREAM, SHADOWROWS, DONE }; +enum ExecState { + SKIP, + PRODUCE, + FASTFORWARD, + FULLCOUNT, + UPSTREAM, + SHADOWROWS, + DONE +}; // TODO clean me up namespace { @@ -1029,6 +1092,10 @@ ExecState NextState(AqlCall const& call) { // then fullcount return ExecState::FULLCOUNT; } + if (call.hardLimit == 0) { + // We reached hardLimit, fast forward + return ExecState::FASTFORWARD; + } // now we are done. return ExecState::DONE; } @@ -1046,10 +1113,10 @@ ExecState NextState(AqlCall const& call) { // GET_SOME: we just request rows from the executor and then discard // them // -enum class SkipRowsRangeVariant { FETCHER, EXECUTOR, GET_SOME }; +enum class SkipRowsRangeVariant { FETCHER, EXECUTOR }; -// This function is just copy&pasted from above to decide which variant of skip -// is used for which executor. +// This function is just copy&pasted from above to decide which variant of +// skip is used for which executor. template static SkipRowsRangeVariant constexpr skipRowsType() { bool constexpr useFetcher = @@ -1069,7 +1136,11 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert(!useFetcher || hasSkipRows::value, "Fetcher is chosen for skipping, but has not skipRows method!"); - static_assert(useExecutor == (std::is_same::value), + static_assert(useExecutor == ( +#ifdef ARANGODB_USE_GOOGLE_TESTS + std::is_same_v || +#endif + std::is_same_v), "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! @@ -1082,7 +1153,8 @@ static SkipRowsRangeVariant constexpr skipRowsType() { } else if (useFetcher) { return SkipRowsRangeVariant::FETCHER; } else { - return SkipRowsRangeVariant::GET_SOME; + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } } @@ -1095,32 +1167,16 @@ std::tuple ExecutionBlockImpl::execute AqlItemBlockInputRange& inputRange, AqlCall& call) { if constexpr (isNewStyleExecutor()) { if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { - // If the executor has a method skipRowsRange, to skip outputs more - // efficiently than just producing them to subsequently discard them, then - // we use it + // If the executor has a method skipRowsRange, to skip outputs. + // Every non-passthrough executor needs to implement this. return _executor.skipRowsRange(inputRange, call); } else if constexpr (skipRowsType() == SkipRowsRangeVariant::FETCHER) { // If we know that every input row produces exactly one output row (this // is a property of the executor), then we can just let the fetcher skip // the number of rows that we would like to skip. - return _rowFetcher.execute(call); - } else if constexpr (skipRowsType() == SkipRowsRangeVariant::GET_SOME) { - // In all other cases, we skip by letting the executor produce rows, and - // then throw them away. - - size_t toSkip = std::min(call.getOffset(), DefaultBatchSize); - AqlCall skipCall{}; - skipCall.softLimit = toSkip; - skipCall.hardLimit = toSkip; - skipCall.offset = 0; - - // we can't mess with _outputItemRow, - auto skipOutput = allocateOutputBlock(std::move(skipCall)); - auto [state, stats, rescall] = _executor.produceRows(inputRange, *skipOutput); - auto skipped = skipOutput->numRowsWritten(); - - call.didSkip(skipped); - return std::make_tuple(state, skipped, rescall); + // Returning this will trigger to end in upstream state now, with the + // call that was handed it + return {inputRange.upstreamState(), 0, call}; } else { static_assert(dependent_false::value, "This value of SkipRowsRangeVariant is not supported"); @@ -1134,52 +1190,73 @@ std::tuple ExecutionBlockImpl::execute return std::make_tuple(ExecutorState::DONE, 0, call); } -// This is the central function of an executor, and it acts like a -// coroutine: It can be called multiple times and keeps state across -// calls. -// -// The intended behaviour of this function is best described in terms of -// a state machine; the possible states are the ExecStates -// SKIP, PRODUCE, FULLCOUNT, UPSTREAM, SHADOWROWS, DONE -// -// SKIP skipping rows. How rows are skipped is determined by -// the Executor that is used. See SkipVariants -// PRODUCE calls produceRows of the executor -// FULLCOUNT -// UPSTREAM fetches rows from the upstream executor(s) to be processed by -// our executor. -// SHADOWROWS process any shadow rows -// DONE processing is done +/** + * @brief This is the central function of an executor, and it acts like a + * coroutine: It can be called multiple times and keeps state across + * calls. + * + * The intended behaviour of this function is best described in terms of + * a state machine; the possible states are the ExecStates + * SKIP, PRODUCE, FULLCOUNT, FASTFORWARD, UPSTREAM, SHADOWROWS, DONE + * + * SKIP skipping rows. How rows are skipped is determined by + * the Executor that is used. See SkipVariants + * PRODUCE calls produceRows of the executor + * FULLCOUNT again skipping rows. like skip, but will skip all rows + * FASTFORWARD like fullcount, but does not count skipped rows. + * UPSTREAM fetches rows from the upstream executor(s) to be processed by + * our executor. + * SHADOWROWS process any shadow rows + * DONE processing of one output is done. We did handle offset / limit / fullCount without crossing BatchSize limits. + * This state does not indicate that we are DONE with all input, we are just done with one walk through this statemachine. + * + * We progress within the states in the following way: + * There is a nextState method that determines the next state based on the call, it can only lead to: + * SKIP, PRODUCE, FULLCOUNT, FASTFORWAD, DONE + * + * On the first call we will use nextState to get to our starting point. + * After any of SKIP, PRODUCE, FULLCOUNT, FASTFORWAD, DONE We either go to + * 1. DONE (if output is full) + * 2. SHADOWROWS (if executor is done) + * 3. UPSTREAM if executor has More, (Invariant: input fully consumed) + * 4. NextState (if none of the above applies) + * + * From SHADOWROWS we can only go to DONE + * From UPSTREAM we go to NextState. + * + * @tparam Executor The Executor that will implement the logic of what needs to happen to the data + * @param stack The call stack of lower levels + * @return std::tuple + * ExecutionState: WAITING -> We wait for IO, secure state, return you will be called again + * ExecutionState: HASMORE -> We still have data + * ExecutionState: DONE -> We do not have any more data, do never call again + * size_t -> Amount of documents skipped within this one call. (contains offset and fullCount) + * SharedAqlItemBlockPtr -> The resulting data + */ template std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if constexpr (isNewStyleExecutor()) { - // Make sure there's a block allocated and set - // the call - ensureOutputBlock(stack.popCall()); + AqlCall clientCall = stack.popCall(); auto skipped = size_t{0}; - TRI_ASSERT(_outputItemRow); - - auto execState = ::NextState(_outputItemRow->getClientCall()); + auto execState = ::NextState(clientCall); if (_lastRange.hasShadowRow()) { - // We have not been able to move all shadowRows into the output last time. - // Continue from there. + // We have not been able to move all shadowRows into the output last + // time. Continue from there. // TODO test if this works with COUNT COLLECT execState = ExecState::SHADOWROWS; } AqlCall executorRequest; - while (execState != ExecState::DONE && !_outputItemRow->allRowsUsed()) { + while (execState != ExecState::DONE) { switch (execState) { case ExecState::SKIP: { - auto const& clientCall = _outputItemRow->getClientCall(); - auto [state, skippedLocal, call] = - executeSkipRowsRange(_lastRange, _outputItemRow->getModifiableClientCall()); + auto [state, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); skipped += skippedLocal; - + // The execute might have modified the client call. if (state == ExecutorState::DONE) { execState = ExecState::SHADOWROWS; } else if (clientCall.getOffset() > 0) { @@ -1194,13 +1271,23 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::PRODUCE: { - auto const& clientCall = _outputItemRow->getClientCall(); + // Make sure there's a block allocated and set + // the call TRI_ASSERT(clientCall.getLimit() > 0); + ensureOutputBlock(std::move(clientCall)); + TRI_ASSERT(_outputItemRow); + // Execute getSome auto const [state, stats, call] = _executor.produceRows(_lastRange, *_outputItemRow); _engine->_stats += stats; - if (state == ExecutorState::DONE) { + + // Produce might have modified the clientCall + clientCall = _outputItemRow->getClientCall(); + + if (_outputItemRow->isInitialized() && _outputItemRow->allRowsUsed()) { + execState = ExecState::DONE; + } else if (state == ExecutorState::DONE) { execState = ExecState::SHADOWROWS; } else if (clientCall.getLimit() > 0 && !_lastRange.hasDataRow()) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); @@ -1213,9 +1300,38 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } break; } + case ExecState::FASTFORWARD: { + // We can either do FASTFORWARD or FULLCOUNT, difference is that + // fullcount counts what is produced now, FASTFORWARD simply drops + TRI_ASSERT(!clientCall.needsFullCount()); + // We can drop all dataRows from upstream + + while (_lastRange.hasDataRow()) { + auto [state, row] = _lastRange.nextDataRow(); + TRI_ASSERT(row.isInitialized()); + } + if (_lastRange.upstreamState() == ExecutorState::DONE) { + execState = ExecState::SHADOWROWS; + } else { + // We need to request more, simply send hardLimit 0 upstream + executorRequest = AqlCall{}; + executorRequest.hardLimit = 0; + execState = ExecState::UPSTREAM; + } + break; + } case ExecState::FULLCOUNT: { - TRI_ASSERT(false); - // TODO: wat. + auto [state, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); + skipped += skippedLocal; + + if (state == ExecutorState::DONE) { + execState = ExecState::SHADOWROWS; + } else { + // We need to request more + executorRequest = call; + execState = ExecState::UPSTREAM; + } + break; } case ExecState::UPSTREAM: { // If this triggers the executors produceRows function has returned @@ -1231,11 +1347,12 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(skipped == 0); return {_upstreamState, 0, nullptr}; } + // We have a new range, passthrough can use this range. + _hasUsedDataRangeBlock = false; skipped += skippedLocal; - ensureOutputBlock(_outputItemRow->stealClientCall()); - // Do we need to call it? - // clientCall.didSkip(skippedLocal); - execState = ::NextState(_outputItemRow->getClientCall()); + // We skipped through passthroug, so count that a skip was solved. + clientCall.didSkip(skippedLocal); + execState = ::NextState(clientCall); break; } case ExecState::SHADOWROWS: { @@ -1248,7 +1365,12 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if (_lastRange.hasShadowRow()) { auto const& [state, shadowRow] = _lastRange.nextShadowRow(); TRI_ASSERT(shadowRow.isInitialized()); + ensureOutputBlock(std::move(clientCall)); + TRI_ASSERT(_outputItemRow); + TRI_ASSERT(_outputItemRow->isInitialized()); + _outputItemRow->copyRow(shadowRow); + if (shadowRow.isRelevant()) { // We found a relevant shadow Row. // We need to reset the Executor @@ -1256,7 +1378,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } TRI_ASSERT(_outputItemRow->produced()); _outputItemRow->advanceRow(); - if (state == ExecutorState::DONE) { + clientCall = _outputItemRow->getClientCall(); + if (_outputItemRow->allRowsUsed()) { + execState = ExecState::DONE; + } else if (state == ExecutorState::DONE) { if (_lastRange.hasDataRow()) { // TODO this state is invalid, and can just show up now if we exclude SKIP execState = ExecState::PRODUCE; @@ -1278,8 +1403,9 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(false); } } - - auto outputBlock = _outputItemRow->stealBlock(); + // If we do not have an output, we simply return a nullptr here. + auto outputBlock = _outputItemRow != nullptr ? _outputItemRow->stealBlock() + : SharedAqlItemBlockPtr{nullptr}; // This is not strictly necessary here, as we shouldn't be called again // after DONE. _outputItemRow.reset(); diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 667957491c03..064b087574f3 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -289,6 +289,12 @@ class ExecutionBlockImpl final : public ExecutionBlock { size_t _skipped{}; DataRange _lastRange; + + // Only used in passthrough variant. + // We track if we have reference the range's block + // into an output block. + // If so we are not allowed to reuse it. + bool _hasUsedDataRangeBlock; }; } // namespace arangodb::aql diff --git a/arangod/Aql/OutputAqlItemRow.cpp b/arangod/Aql/OutputAqlItemRow.cpp index c5617e7aa71b..a94ab5f72ee4 100644 --- a/arangod/Aql/OutputAqlItemRow.cpp +++ b/arangod/Aql/OutputAqlItemRow.cpp @@ -66,7 +66,10 @@ OutputAqlItemRow::OutputAqlItemRow( _setBaseIndexNotUsed(true), #endif _allowSourceRowUninitialized(false) { - TRI_ASSERT(_block != nullptr); +} + +bool OutputAqlItemRow::isInitialized() const noexcept { + return _block != nullptr; } template diff --git a/arangod/Aql/OutputAqlItemRow.h b/arangod/Aql/OutputAqlItemRow.h index 66b0a513e9ef..74173815ac32 100644 --- a/arangod/Aql/OutputAqlItemRow.h +++ b/arangod/Aql/OutputAqlItemRow.h @@ -64,6 +64,8 @@ class OutputAqlItemRow { OutputAqlItemRow(OutputAqlItemRow&&) = delete; OutputAqlItemRow& operator=(OutputAqlItemRow&&) = delete; + bool isInitialized() const noexcept; + // Clones the given AqlValue template void cloneValueInto(RegisterId registerId, ItemRowType const& sourceRow, @@ -137,7 +139,7 @@ class OutputAqlItemRow { * the left-over space for ShadowRows. */ [[nodiscard]] bool allRowsUsed() const { - return block().size() <= _baseIndex; + return _block == nullptr || block().size() <= _baseIndex; } /** @@ -152,6 +154,9 @@ class OutputAqlItemRow { * passed from ExecutionBlockImpl. */ [[nodiscard]] size_t numRowsLeft() const { + if (_block == nullptr) { + return 0; + } return (std::min)(block().size() - _baseIndex, _call.getLimit()); } diff --git a/tests/Aql/ExecutionBlockImplTest.cpp b/tests/Aql/ExecutionBlockImplTest.cpp index 62a68a8c7224..fef41d9d983b 100644 --- a/tests/Aql/ExecutionBlockImplTest.cpp +++ b/tests/Aql/ExecutionBlockImplTest.cpp @@ -26,15 +26,22 @@ #include "gtest/gtest.h" #include "AqlItemBlockHelper.h" +#include "Mocks/Servers.h" #include "TestEmptyExecutorHelper.h" #include "TestExecutorHelper.h" +#include "TestLambdaExecutor.h" #include "WaitingExecutionBlockMock.h" #include "fakeit.hpp" +#include "Aql/AqlCallStack.h" #include "Aql/AqlItemBlock.h" +#include "Aql/AqlItemBlockSerializationFormat.h" +#include "Aql/ConstFetcher.h" #include "Aql/ExecutionBlockImpl.h" #include "Aql/ExecutionEngine.h" +#include "Aql/IdExecutor.h" #include "Aql/Query.h" +#include "Aql/RegisterPlan.h" #include "Aql/SingleRowFetcher.h" #include "Transaction/Context.h" #include "Transaction/Methods.h" @@ -46,6 +53,9 @@ namespace arangodb { namespace tests { namespace aql { +using LambdaExePassThrough = TestLambdaExecutor; +using LambdaExe = TestLambdaSkipExecutor; + // This test is supposed to only test getSome return values, // it is not supposed to test the fetch logic! @@ -391,6 +401,1168 @@ TEST_F(ExecutionBlockImplTest, ASSERT_EQ(block, nullptr); } +/** + * @brief Shared Test case initializer to test the execute API + * of the ExecutionBlockImpl implementation. + * This base class creates a server with a faked AQL query + * where we set our test node into. + * Also provides helper methods to create the building blocks of the query. + */ +class SharedExecutionBlockImplTest { + protected: + mocks::MockAqlServer server{}; + ResourceMonitor monitor{}; + std::unique_ptr fakedQuery{server.createFakeQuery()}; + std::vector> _execNodes; + + SharedExecutionBlockImplTest() { + auto engine = + std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); + fakedQuery->setEngine(engine.release()); + } + + /** + * @brief Creates and manages a ExecutionNode. + * These nodes can be used to create the Executors + * Caller does not need to manage the memory. + * + * @return ExecutionNode* Pointer to a dummy ExecutionNode. Memory is managed, do not delete. + */ + ExecutionNode* generateNodeDummy() { + auto dummy = std::make_unique(fakedQuery->plan(), _execNodes.size()); + auto res = dummy.get(); + _execNodes.emplace_back(std::move(dummy)); + return res; + } + + /** + * @brief Prepare the executor infos for a LambdaExecutor with passthrough. + * + * @param call produceRows implementation that should be used + * @param inputRegisters highest input register index. RegisterPlan::MaxRegisterId (default) describes there is no input. call is allowed to read any register <= inputRegisters. + * @param outputRegisters highest output register index. RegisterPlan::MaxRegisterId (default) describes there is no output. call is allowed to write any inputRegisters < register <= outputRegisters. Invariant inputRegisters <= outputRegisters + * @return LambdaExecutorInfos Infos to build the Executor. + */ + LambdaExecutorInfos makeInfos(ProduceCall call, + RegisterId inputRegisters = RegisterPlan::MaxRegisterId, + RegisterId outputRegisters = RegisterPlan::MaxRegisterId) { + if (inputRegisters != RegisterPlan::MaxRegisterId) { + EXPECT_LE(inputRegisters, outputRegisters); + // We cannot have no output registers here. + EXPECT_LT(outputRegisters, RegisterPlan::MaxRegisterId); + } else if (outputRegisters != RegisterPlan::MaxRegisterId) { + // Special case: we do not have input registers, but need an output register. + // For now we only allow a single output register, but actually we could leverage this restriction if necessary. + EXPECT_EQ(outputRegisters, 0); + } + + auto readAble = make_shared_unordered_set(); + auto writeAble = make_shared_unordered_set(); + auto registersToKeep = std::unordered_set{}; + if (inputRegisters != RegisterPlan::MaxRegisterId) { + for (RegisterId i = 0; i <= inputRegisters; ++i) { + readAble->emplace(i); + registersToKeep.emplace(i); + } + for (RegisterId i = inputRegisters + 1; i <= outputRegisters; ++i) { + writeAble->emplace(i); + } + } else if (outputRegisters != RegisterPlan::MaxRegisterId) { + for (RegisterId i = 0; i <= outputRegisters; ++i) { + writeAble->emplace(i); + } + } + RegisterId regsToRead = + (inputRegisters == RegisterPlan::MaxRegisterId) ? 0 : inputRegisters + 1; + RegisterId regsToWrite = + (outputRegisters == RegisterPlan::MaxRegisterId) ? 0 : outputRegisters + 1; + return LambdaExecutorInfos(readAble, writeAble, regsToRead, regsToWrite, {}, + registersToKeep, std::move(call)); + } + + /** + * @brief Prepare the executor infos for a LambdaExecutor with implemented skip. + * + * @param call produceRows implementation that should be used + * @param skipCall skipRowsRange implementation that should be used + * @param inputRegisters highest input register index. RegisterPlan::MaxRegisterId (default) describes there is no input. call is allowed to read any register <= inputRegisters. + * @param outputRegisters highest output register index. RegisterPlan::MaxRegisterId (default) describes there is no output. call is allowed to write any inputRegisters < register <= outputRegisters. Invariant inputRegisters <= outputRegisters + * @return LambdaExecutorInfos Infos to build the Executor. + */ + LambdaSkipExecutorInfos makeSkipInfos(ProduceCall call, SkipCall skipCall, + RegisterId inputRegisters = RegisterPlan::MaxRegisterId, + RegisterId outputRegisters = RegisterPlan::MaxRegisterId) { + if (inputRegisters != RegisterPlan::MaxRegisterId) { + EXPECT_LE(inputRegisters, outputRegisters); + // We cannot have no output registers here. + EXPECT_LT(outputRegisters, RegisterPlan::MaxRegisterId); + } else if (outputRegisters != RegisterPlan::MaxRegisterId) { + // Special case: we do not have input registers, but need an output register. + // For now we only allow a single output register, but actually we could leverage this restriction if necessary. + EXPECT_EQ(outputRegisters, 0); + } + + auto readAble = make_shared_unordered_set(); + auto writeAble = make_shared_unordered_set(); + auto registersToKeep = std::unordered_set{}; + if (inputRegisters != RegisterPlan::MaxRegisterId) { + for (RegisterId i = 0; i <= inputRegisters; ++i) { + readAble->emplace(i); + registersToKeep.emplace(i); + } + for (RegisterId i = inputRegisters + 1; i <= outputRegisters; ++i) { + writeAble->emplace(i); + } + } else if (outputRegisters != RegisterPlan::MaxRegisterId) { + for (RegisterId i = 0; i <= outputRegisters; ++i) { + writeAble->emplace(i); + } + } + RegisterId regsToRead = + (inputRegisters == RegisterPlan::MaxRegisterId) ? 0 : inputRegisters + 1; + RegisterId regsToWrite = + (outputRegisters == RegisterPlan::MaxRegisterId) ? 0 : outputRegisters + 1; + return LambdaSkipExecutorInfos(readAble, writeAble, regsToRead, regsToWrite, + {}, registersToKeep, std::move(call), + std::move(skipCall)); + } + /** + * @brief Create a Singleton ExecutionBlock. Just like the original one in the + * query. it is already initialized and ready to use. + * + * @return std::unique_ptr The singleton ExecutionBlock. + */ + std::unique_ptr createSingleton() { + auto res = std::make_unique>>( + fakedQuery->engine(), generateNodeDummy(), IdExecutorInfos{0, {}, {}}); + InputAqlItemRow inputRow{CreateInvalidInputRowHint{}}; + auto const [state, result] = res->initializeCursor(inputRow); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_TRUE(result.ok()); + return res; + } + + /** + * @brief Generate a generic produce call with the following behaviour: + * 1. It does not produce any output, it just validates that it gets expected input + * 2. Return the input state, along with an unlimited produce call. + * + * In addition we have the following assertions: + * 1. Whenever this produce is called, it asserts that is called with the expectedCall + * 2. This call has been called less then 10 times (emergency bailout against infinite loop) + * 3. If there is an input row, this row is valid. + * 4. If called with empty input, we still have exactly numRowsLeftNoInput many rows free in the output + * 5. If called with input, we still have exactly numRowsLeftWithInput many rows free in the output + * + * @param nrCalls Reference! Will count how many times this function was invoked. + * @param expectedCall The call that is expected on every invocation of this function. + * @param numRowsLeftNoInput The number of available rows in the output, if we have empty input (cold start) + * @param numRowsLeftWithInput The number of available rows in the output, if we have given an input + * @return ProduceCall The call ready to hand over to the LambdaExecutorInfos + */ + ProduceCall generateProduceCall(size_t& nrCalls, AqlCall expectedCall, + size_t numRowsLeftNoInput = ExecutionBlock::DefaultBatchSize, + size_t numRowsLeftWithInput = ExecutionBlock::DefaultBatchSize) { + return [&nrCalls, numRowsLeftNoInput, numRowsLeftWithInput, + expectedCall](AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { + auto const& clientCall = output.getClientCall(); + if (nrCalls > 10) { + EXPECT_TRUE(false); + // This is emergency bailout, we ask way to often here + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); + } + nrCalls++; + if (input.hasDataRow()) { + // We expact only the empty initial row, so just consume it + auto const [state, row] = input.nextDataRow(); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_TRUE(row.isInitialized()); + EXPECT_EQ(output.numRowsLeft(), numRowsLeftWithInput); + } else { + EXPECT_EQ(output.numRowsLeft(), numRowsLeftNoInput); + } + EXPECT_EQ(clientCall.getOffset(), expectedCall.getOffset()); + EXPECT_EQ(clientCall.softLimit, expectedCall.softLimit); + EXPECT_EQ(clientCall.hardLimit, expectedCall.hardLimit); + EXPECT_EQ(clientCall.needsFullCount(), expectedCall.needsFullCount()); + + NoStats stats{}; + AqlCall call{}; + return {input.upstreamState(), stats, call}; + }; + } + + /** + * @brief Generate a generic skip call with the following behaviour: + * 1. For every given input: skip it, and count skip as one. + * 2. Do never skip more then offset() + * 3. Return the input state, the locally skipped number, a call with softLimit = offset + softLimit, hardLimit = offset + hardLimit + * + * In addition we have the following assertions: + * 1. Whenever this produce is called, it asserts that is called with the expectedCall + * 2. This call has been called less then 10 times (emergency bailout against infinite loop) + * 3. If there is an input row, this row is valid. + * + * @param nrCalls Reference! Will count how many times this function was invoked. + * @param expectedCall The call that is expected on every invocation of this function. + * @return SkipCall The call ready to hand over to the LambdaExecutorInfos + */ + SkipCall generateSkipCall(size_t& nrCalls, AqlCall expectedCall) { + return [&nrCalls, + expectedCall](AqlItemBlockInputRange& inputRange, + AqlCall& clientCall) -> std::tuple { + if (nrCalls > 10) { + EXPECT_TRUE(false); + // This is emergency bailout, we ask way to often here + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); + } + nrCalls++; + EXPECT_EQ(clientCall.getOffset(), expectedCall.getOffset()); + EXPECT_EQ(clientCall.softLimit, expectedCall.softLimit); + EXPECT_EQ(clientCall.hardLimit, expectedCall.hardLimit); + EXPECT_EQ(clientCall.needsFullCount(), expectedCall.needsFullCount()); + size_t localSkip = 0; + while (inputRange.hasDataRow() && clientCall.getOffset() > localSkip) { + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + localSkip++; + } + clientCall.didSkip(localSkip); + + AqlCall upstreamCall = clientCall; + upstreamCall.softLimit = clientCall.getOffset() + clientCall.softLimit; + upstreamCall.hardLimit = clientCall.getOffset() + clientCall.hardLimit; + upstreamCall.offset = 0; + + return {inputRange.upstreamState(), localSkip, upstreamCall}; + }; + } + + /** + * @brief Generate a call that failes whenever it is actually called. + * Used to check that SKIP is not invoked + * + * @return SkipCall The always failing call to be used for the executor. + */ + SkipCall generateNeverSkipCall() { + return [](AqlItemBlockInputRange& input, + AqlCall& call) -> std::tuple { + // Should not be called here. No Skip! + EXPECT_TRUE(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + }; + } + + /** + * @brief Generate a call that failes whenever it is actually called. + * Used to check that produce is not invoked + * + * @return ProduceCall The always failing call to be used for the executor. + */ + ProduceCall generateNeverProduceCall() { + return [](AqlItemBlockInputRange& input, + OutputAqlItemRow& output) -> std::tuple { + // Should not be called here. No limit, only skip! + EXPECT_TRUE(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + }; + } +}; + +/** + * @brief Test the internal statemachine of the ExecutionBlockImpl. + * These test-cases focus on a single executor and assert that this Executor is called + * correctly given an input. None of the test is focussed on the generated output. That is done in the IntegrationTest part + * This is a parameterized test and tests passthrough (true) and non-passthrough variants (false) + */ +class ExecutionBlockImplExecuteSpecificTest : public SharedExecutionBlockImplTest, + public testing::TestWithParam { + protected: + /** + * @brief Generic test runner. Creates Lambda Executors, and returns ExecutionBlockImpl.execute(call), + * + * @param prod The Produce call that should be used within the Lambda Executor + * @param skip The Skip call that should be used wiithin the Lambda Executor (only used for non-passthrough) + * @param call The AqlCall that should be applied on the Executors. + * @return std::tuple Response of execute(call); + */ + auto runTest(ProduceCall& prod, SkipCall& skip, AqlCall call) + -> std::tuple { + AqlCallStack stack{std::move(call)}; + auto singleton = createSingleton(); + if (GetParam()) { + ExecutionBlockImpl testee{fakedQuery->engine(), + generateNodeDummy(), + makeInfos(prod)}; + testee.addDependency(singleton.get()); + return testee.execute(stack); + } else { + ExecutionBlockImpl testee{fakedQuery->engine(), generateNodeDummy(), + makeSkipInfos(prod, skip)}; + testee.addDependency(singleton.get()); + return testee.execute(stack); + } + } +}; + +// Test a default call: no skip, no limits. +TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_unlimited_call) { + AqlCall fullCall{}; + size_t nrCalls = 0; + + // Note here: passthrough only reserves the correct amount of rows. + // As we fetch from a singleton (1 row) we will have 0 rows (cold-start) and then exactly 1 row + // in the executor. + // Non passthrough does not make an estimate for this, so Batchsize is used. + ProduceCall execImpl = GetParam() ? generateProduceCall(nrCalls, fullCall, 0, 1) + : generateProduceCall(nrCalls, fullCall); + SkipCall skipCall = generateNeverSkipCall(); + auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); + + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + // Once with empty, once with the line by Singleton + EXPECT_EQ(nrCalls, 2); +} + +// Test a softlimit call: no skip, given softlimit. +TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_softlimit_call) { + AqlCall fullCall{}; + fullCall.softLimit = 20; + size_t nrCalls = 0; + + // Note here: passthrough only reserves the correct amount of rows. + // As we fetch from a singleton (1 row) we will have 0 rows (cold-start) and then exactly 1 row + // in the executor. + // Non passthrough the available lines (visible to executor) are only the given soft limit. + ProduceCall execImpl = GetParam() ? generateProduceCall(nrCalls, fullCall, 0, 1) + : generateProduceCall(nrCalls, fullCall, 20, 20); + SkipCall skipCall = generateNeverSkipCall(); + auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); + + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + // Once with empty, once with the line by Singleton + EXPECT_EQ(nrCalls, 2); +} + +// Test a hardlimit call: no skip, given hardlimit. +TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_hardlimit_call) { + AqlCall fullCall{}; + fullCall.hardLimit = 20; + size_t nrCalls = 0; + + // Note here: passthrough only reserves the correct amount of rows. + // As we fetch from a singleton (1 row) we will have 0 rows (cold-start) and then exactly 1 row + // in the executor. + // Non passthrough the available lines (visible to executor) are only the given soft limit. + ProduceCall execImpl = GetParam() ? generateProduceCall(nrCalls, fullCall, 0, 1) + : generateProduceCall(nrCalls, fullCall, 20, 20); + SkipCall skipCall = generateNeverSkipCall(); + auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); + + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + // Once with empty, once with the line by Singleton + EXPECT_EQ(nrCalls, 2); +} + +// Test a skip call: given skip, no limits. +TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_offset_call) { + AqlCall fullCall{}; + fullCall.offset = 20; + size_t nrCalls = 0; + + // Note here: We skip everything, no produce should be called + ProduceCall execImpl = generateNeverProduceCall(); + SkipCall skipCall = generateSkipCall(nrCalls, fullCall); + + auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); + + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 1); + if (GetParam()) { + // Do never call skip, pass through + EXPECT_EQ(nrCalls, 0); + } else { + // Call once without input, second with input + EXPECT_EQ(nrCalls, 2); + } + + EXPECT_EQ(block, nullptr); +} + +// Test a skip call: given skip, limit: 0 (formerly known as skipSome) +TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_offset_only_call) { + AqlCall fullCall{}; + fullCall.offset = 20; + // This test simulates a simple "skipSome" call on the old API. + // It is releveant in any intermediate state. + fullCall.softLimit = 0; + size_t nrCalls = 0; + + // Note here: We skip everything, no produce should be called + ProduceCall execImpl = generateNeverProduceCall(); + SkipCall skipCall = generateSkipCall(nrCalls, fullCall); + + auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); + + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 1); + if (GetParam()) { + // Do never call skip, pass through + EXPECT_EQ(nrCalls, 0); + } else { + // Call once without input, second with input + EXPECT_EQ(nrCalls, 2); + } + + EXPECT_EQ(block, nullptr); +} + +INSTANTIATE_TEST_CASE_P(ExecutionBlockImplExecuteTest, + ExecutionBlockImplExecuteSpecificTest, ::testing::Bool()); + +enum class CallAsserterState { INITIAL, SKIP, GET, COUNT, DONE }; + +/** + * @brief Base class for call assertions. + * Every asserter holds an internal statemachine. + * And is called on every invocation of the LambdaFunction. + * According to it's internal machine, it asserts that the input Call + * Is expected in this situation. + * + */ +struct BaseCallAsserter { + // Actual number of calls for this machine + size_t call = 0; + // Maximum allowed calls to this machine, we assert that call <= maxCall + size_t maxCall = 0; + // Internal state + CallAsserterState state = CallAsserterState::DONE; + // The expected outer call, the machine needs to extract relevant parts + AqlCall const expected; + + /** + * @brief Construct a new Base Call Asserter object + * + * @param expectedCall The given outer call. As we play several rounds (e.g. one call for skip one for get) the asserter needs to decompose this call + */ + explicit BaseCallAsserter(AqlCall const& expectedCall) + : expected{expectedCall} {} + + /** + * @brief Test if we need to expect a skip phase + * + * @return true Yes we have skip + * @return false No we do not have skip + */ + auto hasSkip() const -> bool { return expected.getOffset() > 0; } + /** + * @brief Test if we need to expect a produce phase + * + * @return true + * @return false + */ + auto hasLimit() const -> bool { return expected.getLimit() > 0; } + /** + * @brief Test if we need to expect a fullcount phase + * + * @return true + * @return false + */ + auto needsFullCount() const -> bool { return expected.needsFullCount(); } +}; + +/** + * @brief Asserter used for the skipRows implementation. + * Assumes that we are always called once with an empty input. + * And once with a given input. + * Will expect to be called for skip and fullCount (4 counts) + * Does expect to not be called if skip and/or fullCount are ommited. + */ +struct SkipCallAsserter : public BaseCallAsserter { + explicit SkipCallAsserter(AqlCall const& expectedCall) + : BaseCallAsserter{expectedCall} { + // Calculate number of calls + // Ordering here is important, as it defines the start + // state of the asserter. We first get called for skip + // so skip needs to be last here + if (needsFullCount()) { + maxCall += 2; + state = CallAsserterState::COUNT; + } + if (hasSkip()) { + maxCall += 2; + state = CallAsserterState::SKIP; + } + // It is possible that we actually have 0 calls. + // if there is neither skip nor limit + } + + auto gotCalled(AqlCall const& got) -> void { + call++; + switch (state) { + case CallAsserterState::SKIP: { + EXPECT_EQ(got.getOffset(), expected.getOffset()); + if (call % 2 == 0) { + if (needsFullCount()) { + state = CallAsserterState::COUNT; + } else { + state = CallAsserterState::DONE; + } + } + break; + } + case CallAsserterState::COUNT: { + EXPECT_EQ(got.getLimit(), 0); + EXPECT_EQ(got.getOffset(), 0); + EXPECT_TRUE(got.needsFullCount()); + if (call % 2 == 0) { + state = CallAsserterState::DONE; + } + break; + } + case CallAsserterState::INITIAL: + case CallAsserterState::GET: + case CallAsserterState::DONE: { + // This should not be reached + EXPECT_FALSE(true); + break; + } + } + EXPECT_LE(call, maxCall); + if (call > maxCall) { + // Security bailout to avoid infinite loops + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); + } + } +}; + +/** + * @brief Asserter used for the produce method. + * Asserts to be called twice if data is requested. (limit > 0) + * Once with, once without data. + */ +struct CallAsserter : public BaseCallAsserter { + explicit CallAsserter(AqlCall const& expectedCall) + : BaseCallAsserter{expectedCall} { + // Calculate number of calls + if (hasLimit()) { + maxCall += 2; + state = CallAsserterState::INITIAL; + } + } + + auto gotCalled(AqlCall const& got) -> void { + EXPECT_EQ(got.getOffset(), 0); + call++; + switch (state) { + case CallAsserterState::INITIAL: { + EXPECT_EQ(got.getLimit(), expected.getLimit()); + state = CallAsserterState::GET; + break; + } + case CallAsserterState::GET: { + EXPECT_EQ(got.getLimit(), expected.getLimit()); + state = CallAsserterState::DONE; + break; + } + case CallAsserterState::SKIP: + case CallAsserterState::COUNT: + case CallAsserterState::DONE: { + // This should not be reached + EXPECT_FALSE(true); + break; + } + } + EXPECT_LE(call, maxCall); + if (call > maxCall) { + // Security bailout to avoid infinite loops + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); + } + } +}; + +/** + * @brief Asserter used "above" an executor that implements + * skip and produce, and transforms everything to produce. + * Expects to be called twice for each sitation (with and without input). + * Expect up to three situations: SKIP, GET and FULLCOUNT. + */ +struct GetOnlyCallAsserter : public BaseCallAsserter { + explicit GetOnlyCallAsserter(AqlCall const& expectedCall) + : BaseCallAsserter{expectedCall} { + // Calculate number of calls + // Ordering here is important, as it defines the start + // state of the asserter. We first get called for skip + // so skip needs to be last here + if (needsFullCount()) { + maxCall += 2; + state = CallAsserterState::COUNT; + } + if (hasLimit()) { + maxCall += 2; + state = CallAsserterState::GET; + } + if (hasSkip()) { + maxCall += 2; + state = CallAsserterState::SKIP; + } + // Make sure setup worked + EXPECT_GT(maxCall, 0); + EXPECT_NE(state, CallAsserterState::DONE); + } + + auto gotCalled(AqlCall const& got) -> void { + EXPECT_EQ(got.getOffset(), 0); + EXPECT_FALSE(got.needsFullCount()); + call++; + + switch (state) { + case CallAsserterState::SKIP: { + EXPECT_EQ(got.getLimit(), expected.getOffset()); + if (call % 2 == 0) { + // We only switch to next state every second call. + // The first call is "empty" and only forwards to upwards + if (hasLimit()) { + state = CallAsserterState::GET; + } else if (needsFullCount()) { + state = CallAsserterState::COUNT; + } else { + state = CallAsserterState::DONE; + } + } + break; + } + case CallAsserterState::GET: { + EXPECT_EQ(got.getLimit(), expected.getLimit()); + if (call % 2 == 0) { + // We only switch to next state every second call. + // The first call is "empty" and only forwards to upwards + if (needsFullCount()) { + state = CallAsserterState::COUNT; + } else { + state = CallAsserterState::DONE; + } + } + break; + } + + case CallAsserterState::COUNT: { + // We do not test 0,0,false + EXPECT_TRUE(needsFullCount()); + EXPECT_EQ(got.softLimit, AqlCall::Infinity{}); + EXPECT_EQ(got.hardLimit, AqlCall::Infinity{}); + if (call % 2 == 0) { + // We only switch to next state every second call. + // The first call is "empty" and only forwards to upwards + state = CallAsserterState::DONE; + } + break; + } + case CallAsserterState::INITIAL: + case CallAsserterState::DONE: { + // This should not be reached + EXPECT_FALSE(true); + break; + } + } + EXPECT_LE(call, maxCall); + if (call > maxCall) { + // Security bailout to avoid infinite loops + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); + } + } +}; + +/** + * @brief Integration tests. + * These test tests a chain of Executors. + * It focuses on the part that all executors + * get injected the correct calls in each iteration + * of the Execute state machine. + * Also asserts that "UPSTREAM" is called with the correct + * forwarded call. + * This is a parameterized testsuite that uses a set of pseudo-random AqlCalls of different formats. + */ +class ExecutionBlockImplExecuteIntegrationTest + : public SharedExecutionBlockImplTest, + public testing::TestWithParam { + protected: + /** + * @brief Create a Producing ExecutionBlock + * For every input row this block will write the array given in data + * into the output once. + * Each entry in the array goes into one line and is writen into outReg. + * + * @param dependency The dependecy of this block (produces input) + * @param data The data to be written, needs to be an array. + * @param outReg The register to be written to + * @return std::unique_ptr ready to use ProducerBlock. + */ + std::unique_ptr produceBlock(ExecutionBlock* dependency, + std::shared_ptr data, + RegisterId outReg) { + TRI_ASSERT(dependency != nullptr); + TRI_ASSERT(data != nullptr); + TRI_ASSERT(data->slice().isArray()); + // We make this a shared ptr just to make sure someone retains the data. + auto iterator = std::make_shared(data->slice()); + auto writeData = [data, outReg, iterator](AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) + -> std::tuple { + while (inputRange.hasDataRow() && !output.isFull()) { + auto const& [state, input] = inputRange.peekDataRow(); + EXPECT_TRUE(input.isInitialized()); + while (!output.isFull() && iterator->valid()) { + output.cloneValueInto(outReg, input, AqlValue{iterator->value()}); + output.advanceRow(); + iterator->next(); + } + if (!iterator->valid()) { + // Consume input + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + iterator->reset(); + } + } + // We always use a default unlimited call here, we only have Singleton above. + AqlCall call{}; + return {inputRange.upstreamState(), NoStats{}, call}; + }; + + auto skipData = + [data, iterator](AqlItemBlockInputRange& inputRange, + AqlCall& clientCall) -> std::tuple { + size_t skipped = 0; + while (inputRange.hasDataRow() && + (clientCall.getOffset() > 0 || + (clientCall.getLimit() == 0 && clientCall.needsFullCount()))) { + auto const& [state, input] = inputRange.peekDataRow(); + EXPECT_TRUE(input.isInitialized()); + while ((clientCall.getOffset() > 0 || + (clientCall.getLimit() == 0 && clientCall.needsFullCount())) && + iterator->valid()) { + clientCall.didSkip(1); + skipped++; + iterator->next(); + } + if (!iterator->valid()) { + // Consume input + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + iterator->reset(); + } + } + AqlCall call{}; + call.offset = 0; + if (clientCall.getOffset() > 0) { + call.softLimit = clientCall.getOffset(); + } // else softLimit == unlimited + call.fullCount = false; + return {inputRange.upstreamState(), skipped, call}; + }; + auto infos = outReg == 0 + ? makeSkipInfos(std::move(writeData), skipData, + RegisterPlan::MaxRegisterId, outReg) + : makeSkipInfos(std::move(writeData), skipData, outReg - 1, outReg); + auto producer = + std::make_unique>(fakedQuery->engine(), + generateNodeDummy(), + std::move(infos)); + producer->addDependency(dependency); + return producer; + } + + /** + * @brief Create a simple row forwarding Block. + * It simply takes one input row and copies it into the output + * + * @param dependency The dependecy of this block (produces input) + * @param maxReg The number of registers in input and output. (required for forwarding of data) + * @return std::unique_ptr ready to use ForwardingBlock. + */ + std::unique_ptr forwardBlock(ExecutionBlock* dependency, RegisterId maxReg) { + TRI_ASSERT(dependency != nullptr); + auto forwardData = [](AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) + -> std::tuple { + while (inputRange.hasDataRow() && !output.isFull()) { + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + output.copyRow(input); + output.advanceRow(); + } + return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; + }; + auto producer = std::make_unique>( + fakedQuery->engine(), generateNodeDummy(), + makeInfos(std::move(forwardData), maxReg, maxReg)); + producer->addDependency(dependency); + return producer; + } + + /** + * @brief Helper method to validate the result + * It will take into account the call used as Parameter + * and slice the expectated outcome to it. + * + * It asserts the following: + * 1. skipped == offset() + (data.length - hardLimit [fullcount]) + * 2. result.length = (hardLimit||data.length) - offset. + * 3. result register entry matches the entry at the correct position in data. + * + * @param data The data to be expected, if we would just get it in full + * @param skipped The number of rows the executor reported as skipped + * @param result The resulting data output + * @param testReg The register to evaluate + */ + void ValidateResult(std::shared_ptr data, size_t skipped, + SharedAqlItemBlockPtr result, RegisterId testReg) { + auto const& call = GetParam(); + + TRI_ASSERT(data != nullptr); + TRI_ASSERT(data->slice().isArray()); + + VPackSlice expected = data->slice(); + VPackArrayIterator expectedIt{expected}; + // Skip Part + size_t offset = + (std::min)(call.getOffset(), static_cast(expected.length())); + + if (!call.needsFullCount()) { + // Otherweise skipped = offset + fullCount + EXPECT_EQ(offset, skipped); + } + + for (size_t i = 0; i < offset; ++i) { + // The first have been skipped + expectedIt++; + } + size_t limit = + (std::min)(call.getLimit(), static_cast(expected.length()) - offset); + if (result != nullptr) { + // GetSome part + EXPECT_EQ(limit, result->size()); + for (size_t i = 0; i < limit; ++i) { + // The next have to match + auto got = result->getValueReference(i, testReg).slice(); + EXPECT_TRUE(basics::VelocyPackHelper::equal(got, *expectedIt, false)) + << "Expected: " << expectedIt.value().toJson() << " got: " << got.toJson() + << " in row " << i << " and register " << testReg; + expectedIt++; + } + } else { + EXPECT_EQ(limit, 0); + } + + // Now test Fullcount + if (call.needsFullCount()) { + ASSERT_TRUE(expected.length() >= offset + limit); + size_t fullCount = expected.length() - offset - limit; + EXPECT_EQ(offset + fullCount, skipped); + } + } +}; + +// Test a simple produce block. that has is supposed to write 1000 rows. +TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_produce_only) { + auto singleton = createSingleton(); + + auto builder = std::make_shared(); + builder->openArray(); + for (size_t i = 0; i < 1000; ++i) { + builder->add(VPackValue(i)); + } + builder->close(); + RegisterId outReg = 0; + auto producer = produceBlock(singleton.get(), builder, outReg); + + auto const& call = GetParam(); + AqlCallStack stack{call}; + auto const [state, skipped, block] = producer->execute(stack); + if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { + EXPECT_EQ(state, ExecutionState::HASMORE); + } else { + EXPECT_EQ(state, ExecutionState::DONE); + } + + ValidateResult(builder, skipped, block, outReg); +} + +// Test two consecutive produce blocks. +// The first writes 10 lines +// The second another 10 per input (100 in total) +TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_produce_using_two) { + auto singleton = createSingleton(); + + auto builder = std::make_shared(); + builder->openArray(); + for (size_t i = 0; i < 10; ++i) { + builder->add(VPackValue(i)); + } + builder->close(); + RegisterId outRegFirst = 0; + RegisterId outRegSecond = 1; + auto producerFirst = produceBlock(singleton.get(), builder, outRegFirst); + auto producer = produceBlock(producerFirst.get(), builder, outRegSecond); + auto const& call = GetParam(); + AqlCallStack stack{call}; + auto const [state, skipped, block] = producer->execute(stack); + if (call.getLimit() < 100) { + if (call.hasHardLimit()) { + // On hard limit we need to stop + EXPECT_EQ(state, ExecutionState::DONE); + } else { + // On soft limit we need to be able to produce more + EXPECT_EQ(state, ExecutionState::HASMORE); + } + } else { + EXPECT_FALSE(call.hasHardLimit()); + EXPECT_EQ(state, ExecutionState::DONE); + } + + auto firstRegBuilder = std::make_shared(); + auto secondRegBuilder = std::make_shared(); + firstRegBuilder->openArray(); + secondRegBuilder->openArray(); + for (size_t i = 0; i < 10; ++i) { + // i => 0 -> 9 + for (size_t j = 0; j < 10; ++j) { + // j => 0 -> 9 + firstRegBuilder->add(VPackValue(i)); + secondRegBuilder->add(VPackValue(j)); + } + } + secondRegBuilder->close(); + firstRegBuilder->close(); + ValidateResult(firstRegBuilder, skipped, block, outRegFirst); + ValidateResult(secondRegBuilder, skipped, block, outRegSecond); +} + +// Explicitly test call forwarding, on exectors. +// We use two pass-through producers, that simply copy over input and assert an calls. +// On top of them we have a 1000 line producer. +// We expect the result to be identical to the 1000 line producer only. +TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_passthrough) { + auto singleton = createSingleton(); + + auto builder = std::make_shared(); + builder->openArray(); + for (size_t i = 0; i < 1000; ++i) { + builder->add(VPackValue(i)); + } + builder->close(); + RegisterId outReg = 0; + auto producer = produceBlock(singleton.get(), builder, outReg); + + CallAsserter upperState{GetParam()}; + CallAsserter lowerState{GetParam()}; + + auto testForwarding = + [&](AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) -> std::tuple { + upperState.gotCalled(output.getClientCall()); + while (inputRange.hasDataRow() && !output.isFull()) { + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + output.copyRow(input); + output.advanceRow(); + } + return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; + }; + auto forwardCall = [&](AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) + -> std::tuple { + lowerState.gotCalled(output.getClientCall()); + while (inputRange.hasDataRow() && !output.isFull()) { + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + output.copyRow(input); + output.advanceRow(); + } + return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; + }; + auto upper = std::make_unique>( + fakedQuery->engine(), generateNodeDummy(), + makeInfos(std::move(testForwarding), outReg, outReg)); + upper->addDependency(producer.get()); + auto lower = std::make_unique>( + fakedQuery->engine(), generateNodeDummy(), + makeInfos(std::move(forwardCall), outReg, outReg)); + lower->addDependency(upper.get()); + + auto const& call = GetParam(); + AqlCallStack stack{call}; + auto const [state, skipped, block] = lower->execute(stack); + if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { + EXPECT_EQ(state, ExecutionState::HASMORE); + } else { + EXPECT_EQ(state, ExecutionState::DONE); + } + ValidateResult(builder, skipped, block, outReg); +} + +// Explicitly test call forwarding, on exectors. +// We use one pass-through producer, that simply copy over input and assert an calls. +// And we have one non-passthrough below it, that requests all data from upstream, and internally +// does skipping. +// On top of them we have a 1000 line producer. +// We expect the result to be identical to the 1000 line producer only. +TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_implement_skip) { + auto singleton = createSingleton(); + + auto builder = std::make_shared(); + builder->openArray(); + for (size_t i = 0; i < 1000; ++i) { + builder->add(VPackValue(i)); + } + builder->close(); + RegisterId outReg = 0; + auto producer = produceBlock(singleton.get(), builder, outReg); + GetOnlyCallAsserter upperState{GetParam()}; + CallAsserter lowerState{GetParam()}; + SkipCallAsserter skipState{GetParam()}; + + auto testForwarding = + [&](AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) -> std::tuple { + upperState.gotCalled(output.getClientCall()); + while (inputRange.hasDataRow() && !output.isFull()) { + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + output.copyRow(input); + output.advanceRow(); + } + return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; + }; + auto forwardCall = [&](AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) + -> std::tuple { + lowerState.gotCalled(output.getClientCall()); + while (inputRange.hasDataRow() && !output.isFull()) { + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + output.copyRow(input); + output.advanceRow(); + } + auto getClient = output.getClientCall(); + AqlCall request{}; + request.softLimit = (std::min)(getClient.softLimit, getClient.hardLimit); + return {inputRange.upstreamState(), NoStats{}, request}; + }; + auto forwardSkipCall = [&](AqlItemBlockInputRange& inputRange, + AqlCall& call) -> std::tuple { + skipState.gotCalled(call); + size_t skipped = 0; + while (inputRange.hasDataRow() && + (call.getOffset() > 0 || (call.getLimit() == 0 && call.needsFullCount()))) { + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + skipped++; + } + call.didSkip(skipped); + // Do forward a softLimit call only. + // Do not oeverfetch here. + AqlCall request; + if (call.getOffset() > 0) { + request.softLimit = call.getOffset(); + } // else fullCount case, simple get UNLIMITED from above + + return {inputRange.upstreamState(), skipped, request}; + }; + + auto upper = std::make_unique>( + fakedQuery->engine(), generateNodeDummy(), + makeInfos(std::move(testForwarding), outReg, outReg)); + upper->addDependency(producer.get()); + auto lower = std::make_unique>( + fakedQuery->engine(), generateNodeDummy(), + makeSkipInfos(std::move(forwardCall), std::move(forwardSkipCall), outReg, outReg)); + lower->addDependency(upper.get()); + + auto const& call = GetParam(); + AqlCallStack stack{call}; + auto const [state, skipped, block] = lower->execute(stack); + if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { + EXPECT_EQ(state, ExecutionState::HASMORE); + } else { + EXPECT_EQ(state, ExecutionState::DONE); + } + ValidateResult(builder, skipped, block, outReg); +} + +// The numbers here are random, but all of them are below 1000 which is the default batch size +static constexpr auto defaultCall = []() -> const AqlCall { return AqlCall{}; }; + +static constexpr auto skipCall = []() -> const AqlCall { + AqlCall res{}; + res.offset = 15; + return res; +}; + +static constexpr auto softLimit = []() -> const AqlCall { + AqlCall res{}; + res.softLimit = 35; + return res; +}; + +static constexpr auto hardLimit = []() -> const AqlCall { + AqlCall res{}; + res.hardLimit = 76; + return res; +}; + +static constexpr auto fullCount = []() -> const AqlCall { + AqlCall res{}; + res.hardLimit = 17; + res.fullCount = true; + return res; +}; + +static constexpr auto skipAndSoftLimit = []() -> const AqlCall { + AqlCall res{}; + res.offset = 16; + res.softLimit = 64; + return res; +}; + +static constexpr auto skipAndHardLimit = []() -> const AqlCall { + AqlCall res{}; + res.offset = 32; + res.hardLimit = 71; + return res; +}; +static constexpr auto skipAndHardLimitAndFullCount = []() -> const AqlCall { + AqlCall res{}; + res.offset = 8; + res.hardLimit = 57; + res.fullCount = true; + return res; +}; +static constexpr auto onlyFullCount = []() -> const AqlCall { + AqlCall res{}; + res.hardLimit = 0; + res.fullCount = true; + return res; +}; +static constexpr auto onlySkipAndCount = []() -> const AqlCall { + AqlCall res{}; + res.offset = 16; + res.hardLimit = 0; + res.fullCount = true; + return res; +}; + +INSTANTIATE_TEST_CASE_P(ExecutionBlockExecuteIntegration, ExecutionBlockImplExecuteIntegrationTest, + ::testing::Values(defaultCall(), skipCall(), + softLimit(), hardLimit(), fullCount(), + skipAndSoftLimit(), skipAndHardLimit(), + skipAndHardLimitAndFullCount(), + onlyFullCount(), onlySkipAndCount())); + } // namespace aql } // namespace tests } // namespace arangodb diff --git a/tests/Aql/ExecutionBlockImplTestInstances.cpp b/tests/Aql/ExecutionBlockImplTestInstances.cpp index aeb02fe8982d..b5bfc68a589b 100644 --- a/tests/Aql/ExecutionBlockImplTestInstances.cpp +++ b/tests/Aql/ExecutionBlockImplTestInstances.cpp @@ -1,6 +1,9 @@ #include "Aql/ExecutionBlockImpl.cpp" #include "TestEmptyExecutorHelper.h" #include "TestExecutorHelper.h" +#include "TestLambdaExecutor.h" template class ::arangodb::aql::ExecutionBlockImpl; template class ::arangodb::aql::ExecutionBlockImpl; +template class ::arangodb::aql::ExecutionBlockImpl; +template class ::arangodb::aql::ExecutionBlockImpl; diff --git a/tests/Aql/TestLambdaExecutor.cpp b/tests/Aql/TestLambdaExecutor.cpp new file mode 100644 index 000000000000..92083adc9fb1 --- /dev/null +++ b/tests/Aql/TestLambdaExecutor.cpp @@ -0,0 +1,112 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#include "TestLambdaExecutor.h" +#include "Basics/Exceptions.h" +#include "Basics/debugging.h" + +#include "Aql/AqlCall.h" + +using namespace arangodb; +using namespace arangodb::aql; + +LambdaExecutorInfos::LambdaExecutorInfos( + std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, ProduceCall lambda) + : ExecutorInfos(readableInputRegisters, writeableOutputRegisters, nrInputRegisters, + nrOutputRegisters, registersToClear, registersToKeep), + _produceLambda(lambda) {} + +auto LambdaExecutorInfos::getProduceLambda() const -> ProduceCall const& { + return _produceLambda; +} + +LambdaSkipExecutorInfos::LambdaSkipExecutorInfos( + std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, ProduceCall lambda, SkipCall skipLambda) + : ExecutorInfos(readableInputRegisters, writeableOutputRegisters, nrInputRegisters, + nrOutputRegisters, registersToClear, registersToKeep), + _produceLambda(lambda), + _skipLambda(skipLambda) {} + +auto LambdaSkipExecutorInfos::getProduceLambda() const -> ProduceCall const& { + return _produceLambda; +} + +auto LambdaSkipExecutorInfos::getSkipLambda() const -> SkipCall const& { + return _skipLambda; +} + +TestLambdaExecutor::TestLambdaExecutor(Fetcher&, Infos& infos) + : _infos(infos) {} + +TestLambdaExecutor::~TestLambdaExecutor() {} + +auto TestLambdaExecutor::fetchBlockForPassthrough(size_t atMost) + -> std::tuple { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +auto TestLambdaExecutor::produceRows(OutputAqlItemRow& output) + -> std::tuple { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +auto TestLambdaExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { + return _infos.getProduceLambda()(input, output); +} + +TestLambdaSkipExecutor::TestLambdaSkipExecutor(Fetcher&, Infos& infos) + : _infos(infos) {} + +TestLambdaSkipExecutor::~TestLambdaSkipExecutor() {} + +auto TestLambdaSkipExecutor::fetchBlockForPassthrough(size_t atMost) + -> std::tuple { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +auto TestLambdaSkipExecutor::produceRows(OutputAqlItemRow& output) + -> std::tuple { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +auto TestLambdaSkipExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { + return _infos.getProduceLambda()(input, output); +} + +auto TestLambdaSkipExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple { + return _infos.getSkipLambda()(input, call); +} \ No newline at end of file diff --git a/tests/Aql/TestLambdaExecutor.h b/tests/Aql/TestLambdaExecutor.h new file mode 100644 index 000000000000..9663ca6ca249 --- /dev/null +++ b/tests/Aql/TestLambdaExecutor.h @@ -0,0 +1,233 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_TEST_LAMBDA_EXECUTOR_H +#define ARANGOD_AQL_TEST_LAMBDA_EXECUTOR_H + +#include "Aql/ExecutionState.h" +#include "Aql/ExecutorInfos.h" +#include "Aql/SharedAqlItemBlockPtr.h" +#include "Aql/Stats.h" +#include "Aql/types.h" + +namespace arangodb { +namespace aql { + +struct AqlCall; +class AqlItemBlockInputRange; +class OutputAqlItemRow; +class SharedAqlItemBlockPtr; + +template +class SingleRowFetcher; + +/** + * @brief This is a shorthand for the produceRows signature + */ +using ProduceCall = + std::function(AqlItemBlockInputRange& input, OutputAqlItemRow& output)>; + +/** + * @brief This is a shorthand for the skipRowsInRange signature + */ +using SkipCall = + std::function(AqlItemBlockInputRange& input, AqlCall& call)>; + +/** + * @brief Executorinfos for the lambda executors. + * Contains basice RegisterPlanning information, and a ProduceCall. + * This produceCall will be executed whenever the LambdaExecutor is called with produceRows + */ +class LambdaExecutorInfos : public ExecutorInfos { + public: + LambdaExecutorInfos(std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, ProduceCall lambda); + + LambdaExecutorInfos() = delete; + LambdaExecutorInfos(LambdaExecutorInfos&&) = default; + LambdaExecutorInfos(LambdaExecutorInfos const&) = delete; + ~LambdaExecutorInfos() = default; + + auto getProduceLambda() const -> ProduceCall const&; + + private: + ProduceCall _produceLambda; +}; + +/** + * @brief Executorinfos for the lambda executors. + * Contains basice RegisterPlanning information, a ProduceCall, and a SkipCall + * The produceCall will be executed whenever the LambdaExecutor is called with produceRows + * The skipCall will be executed whenever the LambdaExecutor is called with skipRowsInRange + */ +class LambdaSkipExecutorInfos : public ExecutorInfos { + public: + LambdaSkipExecutorInfos(std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, + ProduceCall lambda, SkipCall skipLambda); + + LambdaSkipExecutorInfos() = delete; + LambdaSkipExecutorInfos(LambdaSkipExecutorInfos&&) = default; + LambdaSkipExecutorInfos(LambdaSkipExecutorInfos const&) = delete; + ~LambdaSkipExecutorInfos() = default; + + auto getProduceLambda() const -> ProduceCall const&; + auto getSkipLambda() const -> SkipCall const&; + + private: + ProduceCall _produceLambda; + SkipCall _skipLambda; +}; + +/** + * @brief A passthrough test executor. + * Does only implement produceRows, also the implementation just calls + * the ProduceCall given in the Infos. + * + */ +class TestLambdaExecutor { + public: + struct Properties { + static const bool preservesOrder = true; + static const BlockPassthrough allowsBlockPassthrough = BlockPassthrough::Enable; + static const bool inputSizeRestrictsOutputSize = false; + }; + using Fetcher = SingleRowFetcher; + using Infos = LambdaExecutorInfos; + using Stats = NoStats; + + TestLambdaExecutor() = delete; + TestLambdaExecutor(TestLambdaExecutor&&) = default; + TestLambdaExecutor(TestLambdaExecutor const&) = delete; + TestLambdaExecutor(Fetcher&, Infos&); + ~TestLambdaExecutor(); + + /** + * @brief NOT IMPLEMENTED. JUST FOR COMPILER + * TODO: REMOVE ME after we have switch everything over to produceRow. + * + * @param atMost + * @return std::tuple + */ + auto fetchBlockForPassthrough(size_t atMost) + -> std::tuple; + /** + * @brief NOT IMPLEMENTED. JUST FOR COMPILER + * TODO: REMOVE ME after we have switch everything over to produceRow. + * + * @param output + * @return std::tuple + */ + auto produceRows(OutputAqlItemRow& output) -> std::tuple; + + /** + * @brief produceRows API. Just calls the ProduceCall in the Infos. + * + * @param input The input data range (might be empty) + * @param output The output rows (might be full) + * @return std::tuple + */ + auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + + private: + Infos& _infos; +}; + +/** + * @brief A non-passthrough test executor. + * Does implement produceRows, also the implementation just calls + * the ProduceCall given in the Infos. + * Does implement skipRowsRange, also the implementation just calls + * the SkipCall given in the Infos. + * + */ +class TestLambdaSkipExecutor { + public: + struct Properties { + static const bool preservesOrder = true; + static const BlockPassthrough allowsBlockPassthrough = BlockPassthrough::Disable; + static const bool inputSizeRestrictsOutputSize = false; + }; + using Fetcher = SingleRowFetcher; + using Infos = LambdaSkipExecutorInfos; + using Stats = NoStats; + + TestLambdaSkipExecutor() = delete; + TestLambdaSkipExecutor(TestLambdaSkipExecutor&&) = default; + TestLambdaSkipExecutor(TestLambdaSkipExecutor const&) = delete; + TestLambdaSkipExecutor(Fetcher&, Infos&); + ~TestLambdaSkipExecutor(); + + /** + * @brief NOT IMPLEMENTED. JUST FOR COMPILER + * TODO: REMOVE ME after we have switch everything over to produceRow. + * + * @param atMost + * @return std::tuple + */ + auto fetchBlockForPassthrough(size_t atMost) + -> std::tuple; + + /** + * @brief NOT IMPLEMENTED. JUST FOR COMPILER + * TODO: REMOVE ME after we have switch everything over to produceRow. + * + * @param output + * @return std::tuple + */ + auto produceRows(OutputAqlItemRow& output) -> std::tuple; + + /** + * @brief skipRows API. Just calls the SkipCall in the infos + * + * @param inputRange The input data range (might be empty) + * @param call The call forwarded by the client. + * @return std::tuple + */ + auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple; + + /** + * @brief produceRows API. Just calls the ProduceCall in the Infos. + * + * @param input The input data range (might be empty) + * @param output The output rows (might be full) + * @return std::tuple + */ + auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + + private: + Infos& _infos; +}; + +} // namespace aql +} // namespace arangodb + +#endif \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3357f97b7f14..2c97528af8cb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -67,6 +67,7 @@ set(ARANGODB_TESTS_SOURCES Aql/SubqueryStartExecutorTest.cpp Aql/TestEmptyExecutorHelper.cpp Aql/TestExecutorHelper.cpp + Aql/TestLambdaExecutor.cpp Aql/TraversalExecutorTest.cpp Aql/UpdateExecutorTest.cpp Aql/UpsertExecutorTest.cpp diff --git a/tests/Mocks/Servers.cpp b/tests/Mocks/Servers.cpp index 535623ecee89..b0a33d69a65c 100644 --- a/tests/Mocks/Servers.cpp +++ b/tests/Mocks/Servers.cpp @@ -62,6 +62,7 @@ #include "RestServer/UpgradeFeature.h" #include "RestServer/ViewTypesFeature.h" #include "Scheduler/SchedulerFeature.h" +#include "Servers.h" #include "Sharding/ShardingFeature.h" #include "StorageEngine/EngineSelectorFeature.h" #include "Transaction/Methods.h" @@ -326,7 +327,8 @@ void MockServer::startFeatures() { if (_server.hasFeature()) { auto& sched = _server.getFeature(); // Needed to set nrMaximalThreads - sched.validateOptions(std::make_shared("", "", "", nullptr)); + sched.validateOptions( + std::make_shared("", "", "", nullptr)); } for (ApplicationFeature& f : orderedFeatures) { @@ -450,12 +452,15 @@ std::shared_ptr MockAqlServer::createFakeTransac noCollections, opts); } -std::unique_ptr MockAqlServer::createFakeQuery() const { +std::unique_ptr MockAqlServer::createFakeQuery(bool activateTracing) const { auto bindParams = std::make_shared(); bindParams->openObject(); bindParams->close(); auto queryOptions = std::make_shared(); queryOptions->openObject(); + if (activateTracing) { + queryOptions->add("profile", VPackValue(aql::PROFILE_LEVEL_TRACE_2)); + } queryOptions->close(); aql::QueryString fakeQueryString(""); auto query = @@ -556,8 +561,8 @@ void MockClusterServer::agencyDropDatabase(std::string const& name) { MockDBServer::MockDBServer(bool start) : MockClusterServer() { arangodb::ServerState::instance()->setRole(arangodb::ServerState::RoleEnum::ROLE_DBSERVER); - addFeature(false); // do not start the thread - addFeature(false); // do not start the thread + addFeature(false); // do not start the thread + addFeature(false); // do not start the thread if (start) { startFeatures(); createDatabase("_system"); @@ -578,10 +583,10 @@ TRI_vocbase_t* MockDBServer::createDatabase(std::string const& name) { maintenance::ActionDescription ad( {{std::string(maintenance::NAME), std::string(maintenance::CREATE_DATABASE)}, {std::string(maintenance::DATABASE), std::string(name)}}, - maintenance::HIGHER_PRIORITY); + maintenance::HIGHER_PRIORITY); auto& mf = _server.getFeature(); maintenance::CreateDatabase cd(mf, ad); - cd.first(); // Does the job + cd.first(); // Does the job } auto& databaseFeature = _server.getFeature(); @@ -603,10 +608,10 @@ void MockDBServer::dropDatabase(std::string const& name) { maintenance::ActionDescription ad( {{std::string(maintenance::NAME), std::string(maintenance::DROP_DATABASE)}, {std::string(maintenance::DATABASE), std::string(name)}}, - maintenance::HIGHER_PRIORITY); + maintenance::HIGHER_PRIORITY); auto& mf = _server.getFeature(); maintenance::DropDatabase dd(mf, ad); - dd.first(); // Does the job + dd.first(); // Does the job } MockCoordinator::MockCoordinator(bool start) : MockClusterServer() { diff --git a/tests/Mocks/Servers.h b/tests/Mocks/Servers.h index 596c45b83f5f..de8d7d3ba552 100644 --- a/tests/Mocks/Servers.h +++ b/tests/Mocks/Servers.h @@ -61,9 +61,7 @@ class MockServer { void init(); TRI_vocbase_t& getSystemDatabase() const; - std::string const testFilesystemPath() const { - return _testFilesystemPath; - } + std::string const testFilesystemPath() const { return _testFilesystemPath; } // add a feature to the underlying server, keep track of it; // all added features will be prepared in startFeatures(), and unprepared in @@ -100,9 +98,7 @@ class MockServer { void stopFeatures(); protected: - - arangodb::application_features::ApplicationServer::State - _oldApplicationServerState; + arangodb::application_features::ApplicationServer::State _oldApplicationServerState; arangodb::application_features::ApplicationServer _server; StorageEngineMock _engine; std::unordered_map _features; @@ -132,7 +128,7 @@ class MockAqlServer : public MockServer, ~MockAqlServer(); std::shared_ptr createFakeTransaction() const; - std::unique_ptr createFakeQuery() const; + std::unique_ptr createFakeQuery(bool activateTracing = false) const; }; class MockRestServer : public MockServer, From 5b3e717e804b0380c8088bced91a1c68f5908ebd Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 27 Jan 2020 14:28:29 +0100 Subject: [PATCH 055/122] Feature/aql subquery execution block impl execute implementation shortest path executor (#10780) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Hook up new ShortestPathExecutor * Small fixes * Removed now unused code. * Moved ShortestPathExecutor tests to use Execute only. * Added a test for shortestPathExecutor where the output is smaller than the amount of entries within the path. * Added a generic Lambda Executor, this can be used in ExecutionBlockImplTests to have finegrained control over the action happening within the Executor * Fixed Windows compile issue * Added first test using the LambdaExecutor. * Added some tests around Execute. GetSome seems to be okayish. Skip not. Namely skipped numbers are not reported correctly. * Made the first ExecutionBlock Execute integration test pass. Still other tests are failing * Simplified the IsSkipSome test, this allows the C++ tests for Mixed Calls to pass. * Windows compile fix * Attempt at fixing ShortestPathExecutorTest for Windows * Windows compile fix * Added a skip test and fixed the producing executor to recreate the list once for every input line. * Frankenstein my tests into the branch * More tests. Also added a custom AqlCall printer function for GTest. Tests still red, need to fix fullCount and hardLimit. * Implemented ostream operator for AQLCall. * Properly implemented fullCount incl. UnitTest * Added test for Callforwarding. They still have some todos, but these can only be solved by upgrading the OutputRow. Which should be part of separate PR * Added another test for CallForwarding in passthrough state * Added a Test Implementation for an Executor that uses a dynamic skip implementation. * Fixed skip with HARDLIMIT. * Startet to implement call forwarding test. However we need to improve Outputrow first this will be done in seperate branch * Hack * Removed designated initializers. Thanks for not supporting it MSVC! * Update ShortestPathExecutorTest * Removed non-passthrough non-skip Lambda Executor again. We decided to disallow this. * Update tests/Aql/ExecutionBlockImplTest.cpp Co-Authored-By: Markus Pfeiffer * Started to add implementation of passthrough block allocation * Added a comparator to AqlCall. Mostly for tests * Fixed an issue in skip-passthrough version. Updated the tests. * Allow to 'overSkip' if we do fullCount * Enabled the first set of tests now. Only one set to go * Applied all fixes to get Integration testsuite green * Added some comments on the TestCases executed in the ExecutionBlockImpl * Added tes descriptions and removed a duplicate test * Added some comments on LamdbaExecutors * Added description of ExecutionBlockImple execute logic * Umbau 2.0 * Reshape output and skip a bit * Some test fixes * Applied review comments, thanks to reviewers * Fixed modulo 2 off by one error * Square 1 * Input validation * ShortestPathExecutor with fullCount and tests Co-authored-by: Markus Pfeiffer Co-authored-by: Tobias Gödderz --- arangod/Aql/AqlCall.h | 18 +- arangod/Aql/ExecutionBlockImpl.cpp | 8 +- arangod/Aql/ShortestPathExecutor.cpp | 319 ++++--- arangod/Aql/ShortestPathExecutor.h | 90 +- tests/Aql/ShortestPathExecutorTest.cpp | 1049 ++++++++---------------- tests/Aql/TestLambdaExecutor.cpp | 2 +- tests/Aql/TestLambdaExecutor.h | 2 +- 7 files changed, 599 insertions(+), 889 deletions(-) diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 6ece6b80ea4d..231c11402141 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -74,8 +74,10 @@ struct AqlCall { std::size_t getOffset() const { return offset; } std::size_t getLimit() const { - // By default we use batchsize - std::size_t limit = ExecutionBlock::DefaultBatchSize; + return clampToLimit(ExecutionBlock::DefaultBatchSize); + } + + std::size_t clampToLimit(size_t limit) const { // By default we use batchsize // We are not allowed to go above softLimit if (std::holds_alternative(softLimit)) { limit = (std::min)(std::get(softLimit), limit); @@ -116,6 +118,18 @@ struct AqlCall { } bool needsFullCount() const { return fullCount; } + + bool shouldSkip() const { + if (getOffset() > 0) { + // Still need to skip. + return true; + } + if (getLimit() > 0) { + // Still need to produce. + return false; + } + return needsFullCount(); + } }; constexpr bool operator<(AqlCall::Limit const& a, AqlCall::Limit const& b) { diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 6f553bad1505..035f760bc720 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -157,7 +157,8 @@ static bool constexpr isNewStyleExecutor() { std::is_same_v || std::is_same_v || #endif - std::is_same_v; + std::is_same_v || + std::is_same_v; } template @@ -1138,9 +1139,10 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert(useExecutor == ( #ifdef ARANGODB_USE_GOOGLE_TESTS - std::is_same_v || + (std::is_same_v) || #endif - std::is_same_v), + std::is_same_v || + std::is_same_v), "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! diff --git a/arangod/Aql/ShortestPathExecutor.cpp b/arangod/Aql/ShortestPathExecutor.cpp index 3d660353b7df..45679024e646 100644 --- a/arangod/Aql/ShortestPathExecutor.cpp +++ b/arangod/Aql/ShortestPathExecutor.cpp @@ -67,43 +67,53 @@ ShortestPathExecutorInfos::ShortestPathExecutorInfos( _source(std::move(source)), _target(std::move(target)) {} -ShortestPathExecutorInfos::ShortestPathExecutorInfos(ShortestPathExecutorInfos&&) = default; -ShortestPathExecutorInfos::~ShortestPathExecutorInfos() = default; - arangodb::graph::ShortestPathFinder& ShortestPathExecutorInfos::finder() const { TRI_ASSERT(_finder); return *_finder.get(); } -bool ShortestPathExecutorInfos::useRegisterForInput(bool isTarget) const { - if (isTarget) { - return _target.type == InputVertex::REGISTER; - } - return _source.type == InputVertex::REGISTER; +bool ShortestPathExecutorInfos::useRegisterForSourceInput() const { + return _source.type == InputVertex::Type::REGISTER; } -RegisterId ShortestPathExecutorInfos::getInputRegister(bool isTarget) const { - TRI_ASSERT(useRegisterForInput(isTarget)); - if (isTarget) { - return _target.reg; - } +bool ShortestPathExecutorInfos::useRegisterForTargetInput() const { + return _target.type == InputVertex::Type::REGISTER; +} + +RegisterId ShortestPathExecutorInfos::getSourceInputRegister() const { + TRI_ASSERT(useRegisterForSourceInput()); return _source.reg; } -std::string const& ShortestPathExecutorInfos::getInputValue(bool isTarget) const { - TRI_ASSERT(!useRegisterForInput(isTarget)); - if (isTarget) { - return _target.value; - } +RegisterId ShortestPathExecutorInfos::getTargetInputRegister() const { + TRI_ASSERT(useRegisterForTargetInput()); + return _target.reg; +} + +std::string const& ShortestPathExecutorInfos::getSourceInputValue() const { + TRI_ASSERT(!useRegisterForSourceInput()); return _source.value; } +std::string const& ShortestPathExecutorInfos::getTargetInputValue() const { + TRI_ASSERT(!useRegisterForTargetInput()); + return _target.value; +} + bool ShortestPathExecutorInfos::usesOutputRegister(OutputName type) const { return _registerMapping.find(type) != _registerMapping.end(); } +ShortestPathExecutorInfos::InputVertex ShortestPathExecutorInfos::getSourceVertex() const noexcept { + return _source; +} + +ShortestPathExecutorInfos::InputVertex ShortestPathExecutorInfos::getTargetVertex() const noexcept { + return _target; +} + static std::string typeToString(ShortestPathExecutorInfos::OutputName type) { - switch(type) { + switch (type) { case ShortestPathExecutorInfos::VERTEX: return std::string{"VERTEX"}; case ShortestPathExecutorInfos::EDGE: @@ -117,8 +127,8 @@ RegisterId ShortestPathExecutorInfos::findRegisterChecked(OutputName type) const auto const& it = _registerMapping.find(type); if (ADB_UNLIKELY(it == _registerMapping.end())) { THROW_ARANGO_EXCEPTION_MESSAGE( - TRI_ERROR_INTERNAL, - "Logic error: requested unused register type " + typeToString(type)); + TRI_ERROR_INTERNAL, + "Logic error: requested unused register type " + typeToString(type)); } return it->second; } @@ -132,121 +142,197 @@ graph::TraverserCache* ShortestPathExecutorInfos::cache() const { return _finder->options().cache(); } -ShortestPathExecutor::ShortestPathExecutor(Fetcher& fetcher, Infos& infos) +ShortestPathExecutor::ShortestPathExecutor(Fetcher&, Infos& infos) : _infos(infos), - _fetcher(fetcher), - _input{CreateInvalidInputRowHint{}}, - _rowState(ExecutionState::HASMORE), + _inputRow{CreateInvalidInputRowHint{}}, _finder{infos.finder()}, _path{new arangodb::graph::ShortestPathResult{}}, - _posInPath(1), + _posInPath(0), _sourceBuilder{}, _targetBuilder{} { - if (!_infos.useRegisterForInput(false)) { - _sourceBuilder.add(VPackValue(_infos.getInputValue(false))); + if (!_infos.useRegisterForSourceInput()) { + _sourceBuilder.add(VPackValue(_infos.getSourceInputValue())); } - if (!_infos.useRegisterForInput(true)) { - _targetBuilder.add(VPackValue(_infos.getInputValue(true))); + if (!_infos.useRegisterForTargetInput()) { + _targetBuilder.add(VPackValue(_infos.getTargetInputValue())); } } -ShortestPathExecutor::~ShortestPathExecutor() = default; - // Shutdown query std::pair ShortestPathExecutor::shutdown(int errorCode) { _finder.destroyEngines(); return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; } +auto ShortestPathExecutor::doOutputPath(OutputAqlItemRow& output) -> void { + while (!output.isFull() && _posInPath < _path->length()) { + if (_infos.usesOutputRegister(ShortestPathExecutorInfos::VERTEX)) { + AqlValue vertex = _path->vertexToAqlValue(_infos.cache(), _posInPath); + output.cloneValueInto(_infos.getOutputRegister(ShortestPathExecutorInfos::VERTEX), + _inputRow, vertex); + } + if (_infos.usesOutputRegister(ShortestPathExecutorInfos::EDGE)) { + AqlValue edge = _path->edgeToAqlValue(_infos.cache(), _posInPath); + output.cloneValueInto(_infos.getOutputRegister(ShortestPathExecutorInfos::EDGE), + _inputRow, edge); + } + output.advanceRow(); + _posInPath++; + } +} + +auto ShortestPathExecutor::doSkipPath(AqlCall& call) -> size_t { + auto skip = size_t{0}; + + // call.getOffset() > 0 means we're in SKIP mode + if (call.getOffset() > 0) { + if (call.getOffset() < pathLengthAvailable()) { + skip = call.getOffset(); + } else { + skip = pathLengthAvailable(); + } + } else { + // call.getOffset() == 0, we might be in SKIP, PRODUCE, or + // FASTFORWARD/FULLCOUNT, but we only FASTFORWARD/FULLCOUNT if + // call.getLimit() == 0 as well. + if (call.needsFullCount() && call.getLimit() == 0) { + skip = pathLengthAvailable(); + } + } + _posInPath += skip; + call.didSkip(skip); + return skip; +} + +auto ShortestPathExecutor::fetchPath(AqlItemBlockInputRange& input) -> bool { + // We only want to call fetchPath if we don't have a path currently available + TRI_ASSERT(pathLengthAvailable() == 0); + _path->clear(); + _posInPath = 0; + + while (input.hasDataRow()) { + auto source = VPackSlice{}; + auto target = VPackSlice{}; + std::tie(std::ignore, _inputRow) = input.nextDataRow(); + TRI_ASSERT(_inputRow.isInitialized()); + + // Ordering important here. + // Read source and target vertex, then try to find a shortest path (if both worked). + if (getVertexId(_infos.getSourceVertex(), _inputRow, _sourceBuilder, source) && + getVertexId(_infos.getTargetVertex(), _inputRow, _targetBuilder, target) && + _finder.shortestPath(source, target, *_path)) { + return true; + } + } + // Note that we only return false if + // the input does not have a data row, so if we return false + // here, we are DONE (we cannot produce any output anymore). + return false; +} +auto ShortestPathExecutor::pathLengthAvailable() -> size_t { + // Subtraction must not undeflow + TRI_ASSERT(_posInPath <= _path->length()); + return _path->length() - _posInPath; +} + std::pair ShortestPathExecutor::produceRows(OutputAqlItemRow& output) { - NoStats s; + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} - // Can be length 0 but never nullptr. - TRI_ASSERT(_path); +auto ShortestPathExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { while (true) { - if (_posInPath < _path->length()) { - if (_infos.usesOutputRegister(ShortestPathExecutorInfos::VERTEX)) { - AqlValue vertex = _path->vertexToAqlValue(_infos.cache(), _posInPath); - AqlValueGuard guard{vertex, true}; - output.moveValueInto(_infos.getOutputRegister(ShortestPathExecutorInfos::VERTEX), - _input, guard); + if (pathLengthAvailable() > 0) { + // TODO maybe have doOutput report whether output is full? + doOutputPath(output); + if (output.isFull()) { + if (pathLengthAvailable() > 0) { + return {ExecutorState::HASMORE, NoStats{}, AqlCall{}}; + } else { + // We don't have rows available for output. If + // upstream is DONE, we will not be able to produce more + // if upstream HASMORE, we do not know, so we say HASMORE. + return {input.upstreamState(), NoStats{}, AqlCall{}}; + } } - if (_infos.usesOutputRegister(ShortestPathExecutorInfos::EDGE)) { - AqlValue edge = _path->edgeToAqlValue(_infos.cache(), _posInPath); - AqlValueGuard guard{edge, true}; - output.moveValueInto(_infos.getOutputRegister(ShortestPathExecutorInfos::EDGE), - _input, guard); + } else { + // If fetchPath fails, this means that input has not given us a dataRow + // that yielded a path. + // If upstream is DONE, we are done too, and if upstream + // HASMORE, we can potentially make more. + if (!fetchPath(input)) { + TRI_ASSERT(!input.hasDataRow()); + return {input.upstreamState(), NoStats{}, AqlCall{}}; } - _posInPath++; - return {computeState(), s}; } - TRI_ASSERT(_posInPath >= _path->length()); - if (!fetchPath()) { - TRI_ASSERT(_posInPath >= _path->length()); - // Either WAITING or DONE - return {_rowState, s}; - } - TRI_ASSERT(_posInPath < _path->length()); } } -bool ShortestPathExecutor::fetchPath() { - VPackSlice start; - VPackSlice end; - do { - // Make sure we have a valid start *and* end vertex - do { - std::tie(_rowState, _input) = _fetcher.fetchRow(); - if (!_input.isInitialized()) { - // Either WAITING or DONE and nothing produced. - TRI_ASSERT(_rowState == ExecutionState::WAITING || _rowState == ExecutionState::DONE); - return false; - } - } while (!getVertexId(false, start) || !getVertexId(true, end)); - TRI_ASSERT(start.isString()); - TRI_ASSERT(end.isString()); - _path->clear(); - } while (!_finder.shortestPath(start, end, *_path)); - _posInPath = 0; - return true; -} +auto ShortestPathExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple { + auto skipped = size_t{0}; + + while (true) { + skipped += doSkipPath(call); -ExecutionState ShortestPathExecutor::computeState() const { - if (_rowState == ExecutionState::HASMORE || _posInPath < _path->length()) { - return ExecutionState::HASMORE; + if (pathLengthAvailable() == 0) { + if (!fetchPath(input)) { + TRI_ASSERT(!input.hasDataRow()); + return {input.upstreamState(), skipped, AqlCall{}}; + } + } else { + // if we end up here there is path available, but + // we have skipped as much as we were asked to. + TRI_ASSERT(call.getOffset() == 0); + return {ExecutorState::HASMORE, skipped, AqlCall{}}; + } } - return ExecutionState::DONE; } -bool ShortestPathExecutor::getVertexId(bool isTarget, VPackSlice& id) { - if (_infos.useRegisterForInput(isTarget)) { - // The input row stays valid until the next fetchRow is executed. - // So the slice can easily point to it. - RegisterId reg = _infos.getInputRegister(isTarget); - AqlValue const& in = _input.getValue(reg); - if (in.isObject()) { - try { - auto idString = _finder.options().trx()->extractIdString(in.slice()); - if (isTarget) { - _targetBuilder.clear(); - _targetBuilder.add(VPackValue(idString)); - id = _targetBuilder.slice(); - } else { - _sourceBuilder.clear(); - _sourceBuilder.add(VPackValue(idString)); - id = _sourceBuilder.slice(); +bool ShortestPathExecutor::getVertexId(ShortestPathExecutorInfos::InputVertex const& vertex, + InputAqlItemRow& row, + VPackBuilder& builder, VPackSlice& id) { + switch (vertex.type) { + case ShortestPathExecutorInfos::InputVertex::Type::REGISTER: { + AqlValue const& in = row.getValue(vertex.reg); + if (in.isObject()) { + try { + auto idString = _finder.options().trx()->extractIdString(in.slice()); + builder.clear(); + builder.add(VPackValue(idString)); + id = builder.slice(); + // Guranteed by extractIdValue + TRI_ASSERT(::isValidId(id)); + } catch (...) { + // _id or _key not present... ignore this error and fall through + // returning no path + return false; + } + return true; + } else if (in.isString()) { + id = in.slice(); + // Validation + if (!::isValidId(id)) { + _finder.options().query()->registerWarning( + TRI_ERROR_BAD_PARAMETER, + "Invalid input for Shortest Path: " + "Only id strings or objects with " + "_id are allowed"); + return false; } - // Guranteed by extractIdValue - TRI_ASSERT(::isValidId(id)); - } catch (...) { - // _id or _key not present... ignore this error and fall through - // returning no path + return true; + } else { + _finder.options().query()->registerWarning( + TRI_ERROR_BAD_PARAMETER, + "Invalid input for Shortest Path: " + "Only id strings or objects with " + "_id are allowed"); return false; } - return true; - } else if (in.isString()) { - id = in.slice(); - // Validation + } + case ShortestPathExecutorInfos::InputVertex::Type::CONSTANT: { + id = builder.slice(); if (!::isValidId(id)) { _finder.options().query()->registerWarning( TRI_ERROR_BAD_PARAMETER, @@ -256,28 +342,7 @@ bool ShortestPathExecutor::getVertexId(bool isTarget, VPackSlice& id) { return false; } return true; - } else { - _finder.options().query()->registerWarning( - TRI_ERROR_BAD_PARAMETER, - "Invalid input for Shortest Path: " - "Only id strings or objects with " - "_id are allowed"); - return false; - } - } else { - if (isTarget) { - id = _targetBuilder.slice(); - } else { - id = _sourceBuilder.slice(); - } - if (!::isValidId(id)) { - _finder.options().query()->registerWarning( - TRI_ERROR_BAD_PARAMETER, - "Invalid input for Shortest Path: " - "Only id strings or objects with " - "_id are allowed"); - return false; } - return true; } + return false; } diff --git a/arangod/Aql/ShortestPathExecutor.h b/arangod/Aql/ShortestPathExecutor.h index 36e9a84849ac..93bdfe2fe3ca 100644 --- a/arangod/Aql/ShortestPathExecutor.h +++ b/arangod/Aql/ShortestPathExecutor.h @@ -23,12 +23,18 @@ #ifndef ARANGOD_AQL_SHORTEST_PATH_EXECUTOR_H #define ARANGOD_AQL_SHORTEST_PATH_EXECUTOR_H +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" +#include "Graph/ShortestPathFinder.h" +#include "Graph/ShortestPathResult.h" #include +using namespace arangodb::velocypack; + namespace arangodb { class Result; @@ -53,16 +59,17 @@ class NoStats; class ShortestPathExecutorInfos : public ExecutorInfos { public: struct InputVertex { - enum { CONSTANT, REGISTER } type; + enum class Type { CONSTANT, REGISTER }; + Type type; // TODO make the following two a union instead RegisterId reg; std::string value; // cppcheck-suppress passedByValue explicit InputVertex(std::string value) - : type(CONSTANT), reg(0), value(std::move(value)) {} + : type(Type::CONSTANT), reg(0), value(std::move(value)) {} explicit InputVertex(RegisterId reg) - : type(REGISTER), reg(reg), value("") {} + : type(Type::REGISTER), reg(reg), value("") {} }; enum OutputName { VERTEX, EDGE }; @@ -81,9 +88,9 @@ class ShortestPathExecutorInfos : public ExecutorInfos { ShortestPathExecutorInfos() = delete; - ShortestPathExecutorInfos(ShortestPathExecutorInfos&&); + ShortestPathExecutorInfos(ShortestPathExecutorInfos&&) = default; ShortestPathExecutorInfos(ShortestPathExecutorInfos const&) = delete; - ~ShortestPathExecutorInfos(); + ~ShortestPathExecutorInfos() = default; arangodb::graph::ShortestPathFinder& finder() const; @@ -92,38 +99,44 @@ class ShortestPathExecutorInfos : public ExecutorInfos { * * @param isTarget defines if we look for target(true) or source(false) */ - bool useRegisterForInput(bool isTarget) const; + [[nodiscard]] bool useRegisterForSourceInput() const; + [[nodiscard]] bool useRegisterForTargetInput() const; /** * @brief get the register used for the input * * @param isTarget defines if we look for target(true) or source(false) */ - RegisterId getInputRegister(bool isTarget) const; + [[nodiscard]] RegisterId getSourceInputRegister() const; + [[nodiscard]] RegisterId getTargetInputRegister() const; /** * @brief get the const value for the input * * @param isTarget defines if we look for target(true) or source(false) */ - std::string const& getInputValue(bool isTarget) const; + [[nodiscard]] std::string const& getSourceInputValue() const; + [[nodiscard]] std::string const& getTargetInputValue() const; /** * @brief test if we have an output register for this type * * @param type: Either VERTEX or EDGE */ - bool usesOutputRegister(OutputName type) const; + [[nodiscard]] bool usesOutputRegister(OutputName type) const; /** * @brief get the output register for the given type */ - RegisterId getOutputRegister(OutputName type) const; + [[nodiscard]] RegisterId getOutputRegister(OutputName type) const; + + [[nodiscard]] graph::TraverserCache* cache() const; - graph::TraverserCache* cache() const; + [[nodiscard]] InputVertex getSourceVertex() const noexcept; + [[nodiscard]] InputVertex getTargetVertex() const noexcept; private: - RegisterId findRegisterChecked(OutputName type) const; + [[nodiscard]] RegisterId findRegisterChecked(OutputName type) const; private: /// @brief the shortest path finder. @@ -157,57 +170,60 @@ class ShortestPathExecutor { ShortestPathExecutor(ShortestPathExecutor&&) = default; ShortestPathExecutor(Fetcher& fetcher, Infos&); - ~ShortestPathExecutor(); + ~ShortestPathExecutor() = default; /** * @brief Shutdown will be called once for every query * * @return ExecutionState and no error. */ - std::pair shutdown(int errorCode); + [[nodiscard]] auto shutdown(int errorCode) -> std::pair; + /** * @brief produce the next Row of Aql Values. - * - * @return ExecutionState, and if successful exactly one new Row of AqlItems. */ - std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(OutputAqlItemRow& output) + -> std::pair; + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple; private: /** - * @brief Fetch input row(s) and compute path - * - * @return false if we are done and no path could be found. + * @brief fetches a path given the current row in input. + * a flag indicating whether we found a path, put it into the + * internal state. */ - bool fetchPath(); + [[nodiscard]] auto fetchPath(AqlItemBlockInputRange& input) -> bool; + [[nodiscard]] auto pathLengthAvailable() -> size_t; /** - * @brief compute the correct return state - * - * @return DONE if no more is expected + * @brief produce the output from the currently stored path until either + * the path is exhausted or there is no output space left. */ - - ExecutionState computeState() const; - + auto doOutputPath(OutputAqlItemRow& output) -> void; + auto doSkipPath(AqlCall& call) -> size_t; /** * @brief get the id of a input vertex - * Result will be in id parameter, it - * is guaranteed that the memory - * is managed until the next call of fetchPath. - * - * @return DONE if no more is expected + * Result will be written into the given Slice. + * This is either managed by the handed in builder (might be overwritten), + * or by the handed in row, or a constant value in the options. + * In any case it will stay valid at least until the reference to the input + * row is lost, or the builder is resetted. */ - bool getVertexId(bool isTarget, arangodb::velocypack::Slice& id); + [[nodiscard]] auto getVertexId(ShortestPathExecutorInfos::InputVertex const& vertex, + InputAqlItemRow& row, Builder& builder, Slice& id) -> bool; private: Infos& _infos; - Fetcher& _fetcher; - InputAqlItemRow _input; - ExecutionState _rowState; + InputAqlItemRow _inputRow; + /// @brief the shortest path finder. arangodb::graph::ShortestPathFinder& _finder; + /// @brief current computed path. std::unique_ptr _path; - size_t _posInPath; /// @brief temporary memory mangement for source id diff --git a/tests/Aql/ShortestPathExecutorTest.cpp b/tests/Aql/ShortestPathExecutorTest.cpp index 4765135c4739..28a8bdc3fb9b 100644 --- a/tests/Aql/ShortestPathExecutorTest.cpp +++ b/tests/Aql/ShortestPathExecutorTest.cpp @@ -32,6 +32,8 @@ #include "Mocks/Servers.h" #include "Aql/AqlItemBlock.h" +#include "Aql/AqlItemBlockHelper.h" +#include "Aql/AqlItemBlockManager.h" #include "Aql/AqlValue.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" @@ -47,9 +49,12 @@ #include "Graph/TraverserCache.h" #include "Graph/TraverserOptions.h" +#include "../Mocks/Servers.h" + using namespace arangodb; using namespace arangodb::aql; using namespace arangodb::graph; +using namespace arangodb::tests::mocks; namespace arangodb { namespace tests { @@ -114,6 +119,10 @@ class TokenTranslator : public TraverserCache { std::unordered_set _edges; }; +// FakePathFinder only stores a lump of pairs (source and targets) by which +// sequences of outputs can be found. It also stores which paths it has been +// asked for to verify later whether the outputs produced by the +// ShortestPathExecutor are the ones we expected. class FakePathFinder : public ShortestPathFinder { public: FakePathFinder(ShortestPathOptions& opts, TokenTranslator& translator) @@ -130,6 +139,7 @@ class FakePathFinder : public ShortestPathFinder { TRI_ASSERT(source.isString()); TRI_ASSERT(target.isString()); _calledWith.emplace_back(std::make_pair(source.copyString(), target.copyString())); + std::string const s = source.copyString(); std::string const t = target.copyString(); for (auto const& p : _paths) { @@ -146,14 +156,13 @@ class FakePathFinder : public ShortestPathFinder { return false; } - std::vector const& findPath(std::pair const& src) { + std::vector const& findPath(std::pair const& src) const { for (auto const& p : _paths) { if (p.front() == src.first && p.back() == src.second) { return p; } } - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + return _theEmptyPath; } std::pair const& calledAt(size_t index) { @@ -161,10 +170,15 @@ class FakePathFinder : public ShortestPathFinder { return _calledWith[index]; } + [[nodiscard]] auto getCalledWith() -> std::vector> { + return _calledWith; + }; + // Needs to provide lookupFunctionality for Cache private: std::vector> _paths; std::vector> _calledWith; + std::vector const _theEmptyPath{}; TokenTranslator& _translator; }; @@ -175,762 +189,361 @@ struct TestShortestPathOptions : public ShortestPathOptions { } }; +using Vertex = ShortestPathExecutorInfos::InputVertex; +using RegisterSet = std::unordered_set; +using RegisterMapping = + std::unordered_map; +using Path = std::vector; +using PathSequence = std::vector; + +enum class ShortestPathOutput { VERTEX_ONLY, VERTEX_AND_EDGE }; + +// TODO: this needs a << operator +struct ShortestPathTestParameters { + static RegisterSet _makeOutputRegisters(ShortestPathOutput in) { + switch (in) { + case ShortestPathOutput::VERTEX_ONLY: + return RegisterSet{std::initializer_list{2}}; + case ShortestPathOutput::VERTEX_AND_EDGE: + return RegisterSet{std::initializer_list{2, 3}}; + } + return RegisterSet{}; + } + static RegisterMapping _makeRegisterMapping(ShortestPathOutput in) { + switch (in) { + case ShortestPathOutput::VERTEX_ONLY: + return RegisterMapping{{ShortestPathExecutorInfos::OutputName::VERTEX, 2}}; + break; + case ShortestPathOutput::VERTEX_AND_EDGE: + return RegisterMapping{{ShortestPathExecutorInfos::OutputName::VERTEX, 2}, + {ShortestPathExecutorInfos::OutputName::EDGE, 3}}; + } + return RegisterMapping{}; + } + + ShortestPathTestParameters( + std::tuple, PathSequence, AqlCall, ShortestPathOutput, size_t> params) + : _source(std::get<0>(params)), + _target(std::get<1>(params)), + _outputRegisters(_makeOutputRegisters(std::get<5>(params))), + _registerMapping(_makeRegisterMapping(std::get<5>(params))), + _inputMatrix{std::get<2>(params)}, + _inputMatrixCopy{std::get<2>(params)}, + _paths(std::get<3>(params)), + _call(std::get<4>(params)), + _blockSize(std::get<6>(params)) {} + + Vertex _source; + Vertex _target; + RegisterSet _inputRegisters; + RegisterSet _outputRegisters; + RegisterMapping _registerMapping; + MatrixBuilder<2> _inputMatrix; + MatrixBuilder<2> _inputMatrixCopy; + PathSequence _paths; + AqlCall _call; + size_t _blockSize{1000}; +}; + class ShortestPathExecutorTest : public ::testing::Test, - public arangodb::tests::LogSuppressor { + public ::testing::WithParamInterface, PathSequence, AqlCall, ShortestPathOutput, size_t>> { protected: - RegisterId sourceIn; - RegisterId targetIn; - ShortestPathExecutorInfos::InputVertex constSource; - ShortestPathExecutorInfos::InputVertex constTarget; - ShortestPathExecutorInfos::InputVertex regSource; - ShortestPathExecutorInfos::InputVertex regTarget; - ShortestPathExecutorInfos::InputVertex brokenSource; - ShortestPathExecutorInfos::InputVertex brokenTarget; + ShortestPathTestParameters parameters; + + MockAqlServer server; + ExecutionState state; + ResourceMonitor monitor; + AqlItemBlockManager itemBlockManager; + std::unique_ptr fakedQuery; + TestShortestPathOptions options; + TokenTranslator& translator; + + // parameters are copied because they are const otherwise + // and that doesn't mix with std::move + ShortestPathExecutorInfos infos; + + FakePathFinder& finder; + + SharedAqlItemBlockPtr inputBlock; + AqlItemBlockInputRange input; + + std::shared_ptr fakeUnusedBlock; + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher; + + ShortestPathExecutor testee; ShortestPathExecutorTest() - : sourceIn(0), - targetIn(1), - constSource("vertex/source"), - constTarget("vertex/target"), - regSource(sourceIn), - regTarget(targetIn), - brokenSource{"IwillBreakYourSearch"}, - brokenTarget{"I will also break your search"} {} - - void ValidateResult(ShortestPathExecutorInfos& infos, OutputAqlItemRow& result, - std::vector> const& resultPaths) { - if (!resultPaths.empty()) { - FakePathFinder& finder = static_cast(infos.finder()); - TokenTranslator& translator = *(static_cast(infos.cache())); - auto block = result.stealBlock(); - ASSERT_NE(block, nullptr); - size_t index = 0; - for (size_t i = 0; i < resultPaths.size(); ++i) { - auto path = finder.findPath(resultPaths[i]); - for (size_t j = 0; j < path.size(); ++j) { + : parameters(GetParam()), + server{}, + itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), + fakedQuery(server.createFakeQuery()), + options(fakedQuery.get()), + translator(*(static_cast(options.cache()))), + infos(std::make_shared(parameters._inputRegisters), + std::make_shared(parameters._outputRegisters), 2, 4, + {}, {0, 1}, std::make_unique(options, translator), + std::move(parameters._registerMapping), + std::move(parameters._source), std::move(parameters._target)), + finder(static_cast(infos.finder())), + inputBlock(buildBlock<2>(itemBlockManager, std::move(parameters._inputMatrix))), + input(AqlItemBlockInputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size())), + fakeUnusedBlock(VPackParser::fromJson("[]")), + fetcher(itemBlockManager, fakeUnusedBlock->steal(), false), + testee(fetcher, infos) { + for (auto&& p : parameters._paths) { + finder.addPath(std::move(p)); + } + } + + size_t ExpectedNumberOfRowsProduced(size_t expectedFound) { + if (parameters._call.getOffset() >= expectedFound) { + return 0; + } else { + expectedFound -= parameters._call.getOffset(); + } + return parameters._call.clampToLimit(expectedFound); + } + + // We only verify that the shortest path executor was called with + // correct inputs + void ValidateCalledWith() { + auto pathsQueriedBetween = finder.getCalledWith(); + auto block = buildBlock<2>(itemBlockManager, std::move(parameters._inputMatrix)); + + // We should always only call the finder at most for all input rows + ASSERT_LE(pathsQueriedBetween.size(), block->size()); + + auto blockIndex = size_t{0}; + for (auto const& input : pathsQueriedBetween) { + auto source = std::string{}; + auto target = std::string{}; + + if (infos.useRegisterForSourceInput()) { + AqlValue value = block->getValue(blockIndex, infos.getSourceInputRegister()); + ASSERT_TRUE(value.isString()); + source = value.slice().copyString(); + } else { + source = infos.getSourceInputValue(); + } + + if (infos.useRegisterForTargetInput()) { + AqlValue value = block->getValue(blockIndex, infos.getTargetInputRegister()); + ASSERT_TRUE(value.isString()); + target = value.slice().copyString(); + } else { + target = infos.getTargetInputValue(); + } + ASSERT_EQ(source, input.first); + ASSERT_EQ(target, input.second); + blockIndex++; + } + } + + // TODO: check fullcount correctness. + void ValidateResult(std::vector& results, + size_t skippedInitial, size_t skippedFullCount) { + auto pathsQueriedBetween = finder.getCalledWith(); + + FakePathFinder& finder = static_cast(infos.finder()); + TokenTranslator& translator = *(static_cast(infos.cache())); + + auto expectedRowsFound = std::vector{}; + auto expectedPathStarts = std::set{}; + for (auto&& p : pathsQueriedBetween) { + auto& f = finder.findPath(p); + expectedPathStarts.insert(expectedRowsFound.size()); + expectedRowsFound.insert(expectedRowsFound.end(), f.begin(), f.end()); + } + + auto expectedNrRowsSkippedInitial = + std::min(parameters._call.getOffset(), expectedRowsFound.size()); + EXPECT_EQ(skippedInitial, expectedNrRowsSkippedInitial); + + // TODO: Really we're relying on the fact here that the executor + // calls the path finder with the correct inputs, where we should + // assert/compute the paths that could be produced if the + // finder is called with the input parameters given in the test + auto expectedNrRowsProduced = ExpectedNumberOfRowsProduced(expectedRowsFound.size()); + + auto expectedRowsIndex = size_t{skippedInitial}; + for (auto const& block : results) { + if (block != nullptr) { + ASSERT_NE(block, nullptr); + for (size_t blockIndex = 0; blockIndex < block->size(); ++blockIndex, ++expectedRowsIndex) { if (infos.usesOutputRegister(ShortestPathExecutorInfos::VERTEX)) { AqlValue value = - block->getValue(index, infos.getOutputRegister(ShortestPathExecutorInfos::VERTEX)); + block->getValue(blockIndex, + infos.getOutputRegister(ShortestPathExecutorInfos::VERTEX)); EXPECT_TRUE(value.isObject()); EXPECT_TRUE(arangodb::basics::VelocyPackHelper::compare( value.slice(), - translator.translateVertex(arangodb::velocypack::StringRef(path[j])), + translator.translateVertex(arangodb::velocypack::StringRef( + expectedRowsFound[expectedRowsIndex])), false) == 0); } if (infos.usesOutputRegister(ShortestPathExecutorInfos::EDGE)) { AqlValue value = - block->getValue(index, infos.getOutputRegister(ShortestPathExecutorInfos::EDGE)); - if (j == 0) { + block->getValue(blockIndex, + infos.getOutputRegister(ShortestPathExecutorInfos::EDGE)); + + if (expectedPathStarts.find(expectedRowsIndex) != expectedPathStarts.end()) { EXPECT_TRUE(value.isNull(false)); } else { EXPECT_TRUE(value.isObject()); VPackSlice edge = value.slice(); // FROM and TO checks are enough here. EXPECT_TRUE(arangodb::velocypack::StringRef(edge.get(StaticStrings::FromString)) - .compare(path[j - 1]) == 0); + .compare(expectedRowsFound[expectedRowsIndex - 1]) == 0); EXPECT_TRUE(arangodb::velocypack::StringRef(edge.get(StaticStrings::ToString)) - .compare(path[j]) == 0); + .compare(expectedRowsFound[expectedRowsIndex]) == 0); } } - ++index; } } } - } + ASSERT_EQ(expectedRowsIndex - skippedInitial, expectedNrRowsProduced); - void TestExecutor(bool waiting, ShortestPathExecutorInfos& infos, - std::shared_ptr const& input, - std::vector> const& resultPaths) { - if (waiting) { - TestExecutorWaiting(infos, input, resultPaths); - } else { - TestExecutorNotWaiting(infos, input, resultPaths); + // If a fullCount was requested, the sum (skippedInitial + produced + + // skippedFullCount) should be exactly the number of rows we produced. + if (parameters._call.fullCount) { + ASSERT_EQ(skippedInitial + (expectedRowsIndex - skippedInitial) + skippedFullCount, + expectedRowsFound.size()); } } - void TestExecutorWaiting(ShortestPathExecutorInfos& infos, - std::shared_ptr const& input, - std::vector> const& resultPaths) { - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 4)}; - - NoStats stats{}; - ExecutionState state = ExecutionState::HASMORE; - auto& finder = dynamic_cast(infos.finder()); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - ShortestPathExecutor testee(fetcher, infos); - // Fetch fullPath - for (size_t i = 0; i < resultPaths.size(); ++i) { - EXPECT_EQ(state, ExecutionState::HASMORE); - // if we pull, we always wait - std::tie(state, stats) = testee.produceRows(result); - EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_FALSE(result.produced()); - state = ExecutionState::HASMORE; // For simplicity on path fetching. - auto path = finder.findPath(resultPaths[i]); - for (ADB_IGNORE_UNUSED auto const& v : path) { - ASSERT_EQ(state, ExecutionState::HASMORE); - std::tie(state, stats) = testee.produceRows(result); - EXPECT_TRUE(result.produced()); - result.advanceRow(); - } - auto gotCalledWith = finder.calledAt(i); - EXPECT_EQ(gotCalledWith.first, resultPaths[i].first); - EXPECT_EQ(gotCalledWith.second, resultPaths[i].second); - } - if (resultPaths.empty()) { - // Fetch at least twice, one waiting - std::tie(state, stats) = testee.produceRows(result); - EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_FALSE(result.produced()); - // One no findings - std::tie(state, stats) = testee.produceRows(result); + void TestExecutor() { + // We use a copy here because we modify the call and want to keep track + // of whether things happen the correct way. + auto ourCall = AqlCall{parameters._call}; + auto skippedInitial = size_t{0}; + auto skippedFullCount = size_t{0}; + auto state = ExecutorState{ExecutorState::HASMORE}; + auto outputs = std::vector{}; + + // TODO: Do we have to emulate pauses because + // upstream needs to produce more? + // that would require breaking up the input + // matrix into chunks and feeding those into + // the executor. + + // If an offset is requested, skip + if (ourCall.getOffset() > 0) { + std::tie(state, skippedInitial, std::ignore) = testee.skipRowsRange(input, ourCall); } - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_FALSE(result.produced()); - ValidateResult(infos, result, resultPaths); - } + // Produce rows + while (state == ExecutorState::HASMORE && ourCall.getLimit() > 0) { + SharedAqlItemBlockPtr block = + itemBlockManager.requestBlock(parameters._blockSize, 4); - void TestExecutorNotWaiting(ShortestPathExecutorInfos& infos, - std::shared_ptr const& input, - std::vector> const& resultPaths) { - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 4)}; - - NoStats stats{}; - ExecutionState state = ExecutionState::HASMORE; - auto& finder = dynamic_cast(infos.finder()); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - ShortestPathExecutor testee(fetcher, infos); - // Fetch fullPath - for (size_t i = 0; i < resultPaths.size(); ++i) { - EXPECT_EQ(state, ExecutionState::HASMORE); - auto path = finder.findPath(resultPaths[i]); - for (ADB_IGNORE_UNUSED auto const& v : path) { - ASSERT_EQ(state, ExecutionState::HASMORE); - std::tie(state, stats) = testee.produceRows(result); - EXPECT_TRUE(result.produced()); - result.advanceRow(); - } - auto gotCalledWith = finder.calledAt(i); - EXPECT_EQ(gotCalledWith.first, resultPaths[i].first); - EXPECT_EQ(gotCalledWith.second, resultPaths[i].second); - } - if (resultPaths.empty()) { - // We need to fetch once - std::tie(state, stats) = testee.produceRows(result); - } - EXPECT_FALSE(result.produced()); - EXPECT_EQ(state, ExecutionState::DONE); - ValidateResult(infos, result, resultPaths); - } + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + output.setCall(std::move(ourCall)); - void RunSimpleTest(bool waiting, ShortestPathExecutorInfos::InputVertex&& source, - ShortestPathExecutorInfos::InputVertex&& target) { - RegisterId vOutReg = 2; - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - auto inputRegisters = std::make_shared>( - std::initializer_list{}); - auto outputRegisters = std::make_shared>( - std::initializer_list{vOutReg}); - std::unordered_map registerMapping{ - {ShortestPathExecutorInfos::OutputName::VERTEX, vOutReg}}; - TestShortestPathOptions options(fakedQuery.get()); - TokenTranslator& translator = *(static_cast(options.cache())); - std::unique_ptr finderPtr = - std::make_unique(options, translator); - std::shared_ptr input; - ShortestPathExecutorInfos infos{inputRegisters, - outputRegisters, - 2, - 4, - {}, - {0, 1}, - std::move(finderPtr), - std::move(registerMapping), - std::move(source), - std::move(target)}; - - std::vector> resultPaths; - resultPaths.clear(); - input = VPackParser::fromJson(R"([["vertex/source","vertex/target"]])"); - TestExecutor(waiting, infos, input, resultPaths); - } + std::tie(state, std::ignore, std::ignore) = testee.produceRows(input, output); - void RunTestWithNoRowsUpstream(bool waiting, ShortestPathExecutorInfos::InputVertex&& source, - ShortestPathExecutorInfos::InputVertex&& target, - bool useEdgeOutput) { - RegisterId vOutReg = 2; - RegisterId eOutReg = 3; - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - auto inputRegisters = std::make_shared>( - std::initializer_list{}); - auto outputRegisters = std::make_shared>( - std::initializer_list{vOutReg}); - std::unordered_map registerMapping{ - {ShortestPathExecutorInfos::OutputName::VERTEX, vOutReg}}; - if (useEdgeOutput) { - registerMapping.emplace(ShortestPathExecutorInfos::OutputName::EDGE, eOutReg); - outputRegisters->emplace(eOutReg); + outputs.emplace_back(output.stealBlock()); + ourCall = output.stealClientCall(); } - TestShortestPathOptions options(fakedQuery.get()); - TokenTranslator& translator = *(static_cast(options.cache())); - std::unique_ptr finderPtr = - std::make_unique(options, translator); - std::shared_ptr input; - ShortestPathExecutorInfos infos{inputRegisters, - outputRegisters, - 2, - 4, - {}, - {0, 1}, - std::move(finderPtr), - std::move(registerMapping), - std::move(source), - std::move(target)}; - - std::vector> resultPaths; - resultPaths.clear(); - input = VPackParser::fromJson("[]"); - TestExecutor(waiting, infos, input, resultPaths); - } - void RunTestWithRowsUpstreamNoPaths(bool waiting, - ShortestPathExecutorInfos::InputVertex&& source, - ShortestPathExecutorInfos::InputVertex&& target, - bool useEdgeOutput) { - RegisterId vOutReg = 2; - RegisterId eOutReg = 3; - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - auto inputRegisters = std::make_shared>( - std::initializer_list{}); - auto outputRegisters = std::make_shared>( - std::initializer_list{vOutReg}); - std::unordered_map registerMapping{ - {ShortestPathExecutorInfos::OutputName::VERTEX, vOutReg}}; - if (useEdgeOutput) { - registerMapping.emplace(ShortestPathExecutorInfos::OutputName::EDGE, eOutReg); - outputRegisters->emplace(eOutReg); + // FullCount + if (ourCall.needsFullCount()) { + // Emulate being called with a full count + ourCall.hardLimit = 0; + ourCall.softLimit = 0; + std::tie(state, skippedFullCount, std::ignore) = testee.skipRowsRange(input, ourCall); } - TestShortestPathOptions options(fakedQuery.get()); - TokenTranslator& translator = *(static_cast(options.cache())); - std::unique_ptr finderPtr = - std::make_unique(options, translator); - std::shared_ptr input; - ShortestPathExecutorInfos infos{inputRegisters, - outputRegisters, - 2, - 4, - {}, - {0, 1}, - std::move(finderPtr), - std::move(registerMapping), - std::move(source), - std::move(target)}; - - std::vector> resultPaths; - input = VPackParser::fromJson(R"([["vertex/source","vertex/target"]])"); - TestExecutor(waiting, infos, input, resultPaths); - } - void RunTestWithRowsUpstreamOnePath(bool waiting, - ShortestPathExecutorInfos::InputVertex&& source, - ShortestPathExecutorInfos::InputVertex&& target, - bool useEdgeOutput) { - RegisterId vOutReg = 2; - RegisterId eOutReg = 3; - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - auto inputRegisters = std::make_shared>( - std::initializer_list{}); - auto outputRegisters = std::make_shared>( - std::initializer_list{vOutReg}); - std::unordered_map registerMapping{ - {ShortestPathExecutorInfos::OutputName::VERTEX, vOutReg}}; - if (useEdgeOutput) { - registerMapping.emplace(ShortestPathExecutorInfos::OutputName::EDGE, eOutReg); - outputRegisters->emplace(eOutReg); - } - TestShortestPathOptions options(fakedQuery.get()); - TokenTranslator& translator = *(static_cast(options.cache())); - std::unique_ptr finderPtr = - std::make_unique(options, translator); - std::shared_ptr input; - ShortestPathExecutorInfos infos{inputRegisters, - outputRegisters, - 2, - 4, - {}, - {0, 1}, - std::move(finderPtr), - std::move(registerMapping), - std::move(source), - std::move(target)}; - - std::vector> resultPaths; - FakePathFinder& finder = static_cast(infos.finder()); - input = VPackParser::fromJson(R"([["vertex/source","vertex/target"]])"); - finder.addPath(std::vector{"vertex/source", "vertex/intermed", - "vertex/target"}); - resultPaths.emplace_back(std::make_pair("vertex/source", "vertex/target")); - TestExecutor(waiting, infos, input, resultPaths); + ValidateCalledWith(); + ValidateResult(outputs, skippedInitial, skippedFullCount); } - - void RunTestWithMultipleRowsUpstream(bool waiting, - ShortestPathExecutorInfos::InputVertex&& source, - ShortestPathExecutorInfos::InputVertex&& target, - bool useEdgeOutput) { - RegisterId vOutReg = 2; - RegisterId eOutReg = 3; - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - auto inputRegisters = std::make_shared>( - std::initializer_list{}); - auto outputRegisters = std::make_shared>( - std::initializer_list{vOutReg}); - std::unordered_map registerMapping{ - {ShortestPathExecutorInfos::OutputName::VERTEX, vOutReg}}; - if (useEdgeOutput) { - registerMapping.emplace(ShortestPathExecutorInfos::OutputName::EDGE, eOutReg); - outputRegisters->emplace(eOutReg); - } - TestShortestPathOptions options(fakedQuery.get()); - TokenTranslator& translator = *(static_cast(options.cache())); - std::unique_ptr finderPtr = - std::make_unique(options, translator); - std::shared_ptr input; - ShortestPathExecutorInfos infos{inputRegisters, - outputRegisters, - 2, - 4, - {}, - {0, 1}, - std::move(finderPtr), - std::move(registerMapping), - std::move(source), - std::move(target)}; - - std::vector> resultPaths; - FakePathFinder& finder = static_cast(infos.finder()); - input = VPackParser::fromJson(R"([["vertex/source","vertex/target"], ["vertex/a", "vertex/d"]])"); - // We add enough paths for all combinations - // Otherwise waiting / more / done is getting complicated - finder.addPath(std::vector{"vertex/source", "vertex/intermed", - "vertex/target"}); - finder.addPath(std::vector{"vertex/a", "vertex/b", "vertex/c", - "vertex/d"}); - finder.addPath(std::vector{"vertex/source", "vertex/b", - "vertex/c", "vertex/d"}); - finder.addPath( - std::vector{"vertex/a", "vertex/b", "vertex/target"}); - resultPaths.emplace_back(std::make_pair("vertex/source", "vertex/target")); - // Add the expected second path - if (infos.useRegisterForInput(false)) { - // Source is register - if (infos.useRegisterForInput(true)) { - // Target is register - resultPaths.emplace_back(std::make_pair("vertex/a", "vertex/d")); - } else { - // Target constant - resultPaths.emplace_back(std::make_pair("vertex/a", "vertex/target")); - } - } else { - // Source is constant - if (infos.useRegisterForInput(true)) { - // Target is register - resultPaths.emplace_back(std::make_pair("vertex/source", "vertex/d")); - } else { - // Target constant - resultPaths.emplace_back( - std::make_pair("vertex/source", "vertex/target")); - } - } - TestExecutor(waiting, infos, input, resultPaths); +}; // namespace aql + +/* + * We currently only have one test, but it's heavily parameterised. + * We emulate the call sequence of ExecutionBlockImpl, so, we skip, produce, and + * fullcount (depending on what the AqlCall parameter prescribes). + * + * the test with all combinations of parameters defined below, and compare the + * produced output of the executor with the expected output (which in turn is + * computed from the parameters). + * + * The parameters are + * - sources: constant or register source (then drawn from input) + * - targets: constant or register source (then drawn from input) + * - inputs: a matrix of input rows + * - paths: paths present in the fakePathFinder + * - calls: AqlCalls giving the offset, limits, and fullCount + * - variants: whether to output vertices only or vertices and edges + * - blockSizes: which outputBlock sizes to test with + * + * We never actually perform a shortest path search: testing this is the + * responsibility of the test for the shortest path finder. + */ + +TEST_P(ShortestPathExecutorTest, the_test) { TestExecutor(); } + +Vertex const constSource("vertex/source"), constTarget("vertex/target"), + regSource(0), regTarget(1), brokenSource{"IwillBreakYourSearch"}, + brokenTarget{"I will also break your search"}; +MatrixBuilder<2> const noneRow{{{{}}}}; +MatrixBuilder<2> const oneRow{{{{R"("vertex/source")"}, {R"("vertex/target")"}}}}; +MatrixBuilder<2> const twoRows{{{{R"("vertex/source")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/a")"}, {R"("vertex/b")"}}}}; +MatrixBuilder<2> const threeRows{{{{R"("vertex/source")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/a")"}, {R"("vertex/b")"}}}, + {{{R"("vertex/a")"}, {R"("vertex/target")"}}}}; +MatrixBuilder<2> const someRows{{{{R"("vertex/c")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/b")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/e")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/a")"}, {R"("vertex/target")"}}}}; + +auto pathBetween(std::string const& start, std::string const& end, size_t n) -> Path { + auto path = std::vector{}; + path.push_back(start); + for (size_t i = 0; i < n; ++i) { + path.push_back(std::to_string(i)); } -}; - -// simple tests - -TEST_F(ShortestPathExecutorTest, Waiting_TestingInvalidInputs_UsingBrokenStartVertex) { - RunSimpleTest(true, std::move(brokenSource), std::move(constTarget)); -} - -TEST_F(ShortestPathExecutorTest, Waiting_TestingInvalidInputs_UsingBrokenEndVertex) { - RunSimpleTest(true, std::move(constSource), std::move(brokenTarget)); -} - -TEST_F(ShortestPathExecutorTest, Waiting_TestingInvalidInputs_UsingBrokenStartAndEndVertex) { - RunSimpleTest(true, std::move(brokenSource), std::move(brokenTarget)); -} - -TEST_F(ShortestPathExecutorTest, NotWaiting_TestingInvalidInputs_UsingBrokenStartVertex) { - RunSimpleTest(false, std::move(brokenSource), std::move(constTarget)); -} - -TEST_F(ShortestPathExecutorTest, NotWaiting_TestingInvalidInputs_UsingBrokenEndVertex) { - RunSimpleTest(false, std::move(constSource), std::move(brokenTarget)); -} - -TEST_F(ShortestPathExecutorTest, NotWaiting_TestingInvalidInputs_UsingBrokenStartAndEndVertex) { - RunSimpleTest(false, std::move(brokenSource), std::move(brokenTarget)); -} - -// no rows - -TEST_F(ShortestPathExecutorTest, - Waiting_NoRows_UsingVertexOutputOnly_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithNoRowsUpstream(true, std::move(constSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_NoRows_UsingVertexOutputOnly_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithNoRowsUpstream(true, std::move(constSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_NoRows_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithNoRowsUpstream(true, std::move(regSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_NoRows_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithNoRowsUpstream(true, std::move(regSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_NoRows_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithNoRowsUpstream(true, std::move(constSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_NoRows_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithNoRowsUpstream(true, std::move(constSource), std::move(regTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_NoRows_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithNoRowsUpstream(true, std::move(regSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_NoRows_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithNoRowsUpstream(true, std::move(regSource), std::move(regTarget), true); -} - -// with rows, no path - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsNoPath_UsingVertexOutputOnly_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(constSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsNoPath_UsingVertexOutputOnly_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(constSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsNoPath_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(regSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsNoPath_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(regSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsNoPath_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(constSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsNoPath_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(constSource), std::move(regTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsNoPath_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(regSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsNoPath_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(regSource), std::move(regTarget), true); -} - -// with rows, one path - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsOnePath_UsingVertexOutputOnly_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(constSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsOnePath_UsingVertexOutputOnly_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(constSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsOnePath_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(regSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsOnePath_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(regSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsOnePath_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(constSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsOnePath_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(constSource), std::move(regTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsOnePath_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(regSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithRowsOnePath_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(true, std::move(regSource), std::move(regTarget), true); -} - -// with multiple rows - -TEST_F(ShortestPathExecutorTest, - Waiting_WithMultipleRows_UsingVertexOutputOnly_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithMultipleRowsUpstream(true, std::move(constSource), - std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithMultipleRows_UsingVertexOutputOnly_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithMultipleRowsUpstream(true, std::move(constSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithMultipleRows_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithMultipleRowsUpstream(true, std::move(regSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithMultipleRows_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithMultipleRowsUpstream(true, std::move(regSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithMultipleRows_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithMultipleRowsUpstream(true, std::move(constSource), - std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithMultipleRows_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithMultipleRowsUpstream(true, std::move(constSource), std::move(regTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithMultipleRows_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithMultipleRowsUpstream(true, std::move(regSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - Waiting_WithMultipleRows_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithMultipleRowsUpstream(true, std::move(regSource), std::move(regTarget), true); -} - -// no rows - -TEST_F(ShortestPathExecutorTest, - NotWaiting_NoRows_UsingVertexOutputOnly_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithNoRowsUpstream(false, std::move(constSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_NoRows_UsingVertexOutputOnly_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithNoRowsUpstream(false, std::move(constSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_NoRows_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithNoRowsUpstream(false, std::move(regSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_NoRows_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithNoRowsUpstream(false, std::move(regSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_NoRows_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithNoRowsUpstream(false, std::move(constSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_NoRows_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithNoRowsUpstream(false, std::move(constSource), std::move(regTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_NoRows_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithNoRowsUpstream(false, std::move(regSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_NoRows_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithNoRowsUpstream(false, std::move(regSource), std::move(regTarget), true); -} - -// with rows, no path - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsNoPath_UsingVertexOutputOnly_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(constSource), - std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsNoPath_UsingVertexOutputOnly_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(constSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsNoPath_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(regSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsNoPath_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(regSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsNoPath_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(constSource), - std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsNoPath_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(constSource), std::move(regTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsNoPath_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(regSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsNoPath_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(regSource), std::move(regTarget), true); -} - -// with rows, one path - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsOnePath_UsingVertexOutputOnly_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(constSource), - std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsOnePath_UsingVertexOutputOnly_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(constSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsOnePath_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(regSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsOnePath_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(regSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsOnePath_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(constSource), - std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsOnePath_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(constSource), std::move(regTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsOnePath_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(regSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithRowsOnePath_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithRowsUpstreamOnePath(false, std::move(regSource), std::move(regTarget), true); -} - -// with multiple rows - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithMultipleRows_UsingVertexOutputOnly_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithMultipleRowsUpstream(false, std::move(constSource), - std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithMultipleRows_UsingVertexOutputOnly_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithMultipleRowsUpstream(false, std::move(constSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithMultipleRows_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithMultipleRowsUpstream(false, std::move(regSource), std::move(constTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithMultipleRows_UsingVertexOutputOnly_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithMultipleRowsUpstream(false, std::move(regSource), std::move(regTarget), false); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithMultipleRows_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingConstantTargetInput) { - RunTestWithMultipleRowsUpstream(false, std::move(constSource), - std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithMultipleRows_UsingVertexAndEdgeOutput_UsingConstantSourceInput_UsingRegisterTargetInput) { - RunTestWithMultipleRowsUpstream(false, std::move(constSource), std::move(regTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithMultipleRows_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingConstantTargetInput) { - RunTestWithMultipleRowsUpstream(false, std::move(regSource), std::move(constTarget), true); -} - -TEST_F(ShortestPathExecutorTest, - NotWaiting_WithMultipleRows_UsingVertexAndEdgeOutput_UsingRegisterSourceInput_UsingRegisterTargetInput) { - RunTestWithMultipleRowsUpstream(false, std::move(regSource), std::move(regTarget), true); -} + path.push_back(end); + return {path}; +} + +PathSequence const noPath = {}; +PathSequence const onePath = {pathBetween("vertex/source", "vertex/target", 10)}; +PathSequence const threePaths = {pathBetween("vertex/source", "vertex/target", 10), + pathBetween("vertex/source", "vertex/b", 100), + pathBetween("vertex/a", "vertex/b", 1000)}; +PathSequence const somePaths = {pathBetween("vertex/source", "vertex/target", 10), + pathBetween("vertex/source", "vertex/b", 100), + pathBetween("vertex/a", "vertex/b", 1000), + pathBetween("vertex/c", "vertex/d", 2001)}; +PathSequence const someOtherPaths = {pathBetween("vertex/a", "vertex/target", 10), + pathBetween("vertex/b", "vertex/target", 999), + pathBetween("vertex/c", "vertex/target", 1001), + pathBetween("vertex/d", "vertex/target", 2000), + pathBetween("vertex/e", "vertex/target", 200), + pathBetween("vertex/f", "vertex/target", 15), + pathBetween("vertex/g", "vertex/target", 10)}; + +auto sources = testing::Values(constSource, regSource, brokenSource); +auto targets = testing::Values(constTarget, regTarget, brokenTarget); +auto inputs = testing::Values(noneRow, oneRow, twoRows, threeRows, someRows); +auto paths = testing::Values(noPath, onePath, threePaths, somePaths); +auto calls = + testing::Values(AqlCall{}, AqlCall{0, 0, 0, false}, AqlCall{0, 1, 0, false}, + AqlCall{0, 0, 1, false}, AqlCall{0, 1, 1, false}, AqlCall{1, 1, 1}, + AqlCall{100, 1, 1}, AqlCall{1000}, AqlCall{0, 0, 0, true}, + AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, true}); + +auto variants = testing::Values(ShortestPathOutput::VERTEX_ONLY, + ShortestPathOutput::VERTEX_AND_EDGE); +auto blockSizes = testing::Values(size_t{5}, 1000); + +INSTANTIATE_TEST_CASE_P(ShortestPathExecutorTestInstance, ShortestPathExecutorTest, + testing::Combine(sources, targets, inputs, paths, calls, + variants, blockSizes)); } // namespace aql } // namespace tests diff --git a/tests/Aql/TestLambdaExecutor.cpp b/tests/Aql/TestLambdaExecutor.cpp index 92083adc9fb1..4fe9c2812cc2 100644 --- a/tests/Aql/TestLambdaExecutor.cpp +++ b/tests/Aql/TestLambdaExecutor.cpp @@ -109,4 +109,4 @@ auto TestLambdaSkipExecutor::produceRows(AqlItemBlockInputRange& input, OutputAq auto TestLambdaSkipExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) -> std::tuple { return _infos.getSkipLambda()(input, call); -} \ No newline at end of file +} diff --git a/tests/Aql/TestLambdaExecutor.h b/tests/Aql/TestLambdaExecutor.h index 9663ca6ca249..4905ea56f915 100644 --- a/tests/Aql/TestLambdaExecutor.h +++ b/tests/Aql/TestLambdaExecutor.h @@ -230,4 +230,4 @@ class TestLambdaSkipExecutor { } // namespace aql } // namespace arangodb -#endif \ No newline at end of file +#endif From 56d9cf07150091204c45a6a59ac2afe7117ebdee Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 28 Jan 2020 23:03:21 +0100 Subject: [PATCH 056/122] Feature/aql subquery execution block impl execute implementation exec block impl tests shadow rows (#10970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added a generic Lambda Executor, this can be used in ExecutionBlockImplTests to have finegrained control over the action happening within the Executor * Added first test using the LambdaExecutor. * Added some tests around Execute. GetSome seems to be okayish. Skip not. Namely skipped numbers are not reported correctly. * Made the first ExecutionBlock Execute integration test pass. Still other tests are failing * Simplified the IsSkipSome test, this allows the C++ tests for Mixed Calls to pass. * Added a skip test and fixed the producing executor to recreate the list once for every input line. * More tests. Also added a custom AqlCall printer function for GTest. Tests still red, need to fix fullCount and hardLimit. * Implemented ostream operator for AQLCall. * Properly implemented fullCount incl. UnitTest * Added test for Callforwarding. They still have some todos, but these can only be solved by upgrading the OutputRow. Which should be part of separate PR * Added another test for CallForwarding in passthrough state * Added a Test Implementation for an Executor that uses a dynamic skip implementation. * Fixed skip with HARDLIMIT. * Startet to implement call forwarding test. However we need to improve Outputrow first this will be done in seperate branch * Removed designated initializers. Thanks for not supporting it MSVC! * Removed non-passthrough non-skip Lambda Executor again. We decided to disallow this. * Update tests/Aql/ExecutionBlockImplTest.cpp Co-Authored-By: Markus Pfeiffer * Started to add implementation of passthrough block allocation * Added a comparator to AqlCall. Mostly for tests * Fixed an issue in skip-passthrough version. Updated the tests. * Allow to 'overSkip' if we do fullCount * Enabled the first set of tests now. Only one set to go * Applied all fixes to get Integration testsuite green * Added some comments on the TestCases executed in the ExecutionBlockImpl * Added tes descriptions and removed a duplicate test * Added some comments on LamdbaExecutors * Added description of ExecutionBlockImple execute logic * Added tests for ExecutionBlockImpl, when there is a WAITING passed through. There is no test yet for in between waitings. * Allow to configure WAITING variant in WAITING block mock. * Use the WaitingBlockMock to be independent of Singleton in test * Added WAITING + Passthrough tests * Added a test to path through shadowRows without data. * Waiting ExecutionBlock Mock can now return/skip partial blocks. However it does NOT CARE for ShadowRows * Added tests and fixed an issue in ShadowRow forwarding * Added tests for shadowRow forwarding * Added the intenral state of Execute as member of ExecutionBlockImpl. We need to retain parts of this state accross returns * Fixed all Waiting and multiple call tests * Applied review comments, thanks to reviewers * Fixed modulo 2 off by one error * Fixed invalid memory access in test code. Do never return a lambda that references a stack variable... * Renamed getLambda() => getProduceLambda() in test code, as we have produce and skip * Allways -> Always. * Added tests for pass-by of non-relevant Subqueries. Fixed a bug found on the way in WaitingBlockMock. Now some other tests fail \o/ * Fixed failed tests. Now tests correspond to correct behaviour of WaitingExecutionBlockMock * Intermediate version, doe snot work * Fixed the empty subquery tests Co-authored-by: Markus Pfeiffer Co-authored-by: Tobias Gödderz --- arangod/Aql/AqlCallStack.cpp | 10 + arangod/Aql/AqlCallStack.h | 4 + arangod/Aql/ExecutionBlockImpl.cpp | 150 +-- arangod/Aql/ExecutionBlockImpl.h | 33 + tests/Aql/ExecutionBlockImplTest.cpp | 1395 ++++++++++++++++++++--- tests/Aql/TestLambdaExecutor.cpp | 24 +- tests/Aql/TestLambdaExecutor.h | 34 +- tests/Aql/WaitingExecutionBlockMock.cpp | 91 +- tests/Aql/WaitingExecutionBlockMock.h | 18 +- 9 files changed, 1529 insertions(+), 230 deletions(-) diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp index 608febad859b..33975e259bf0 100644 --- a/arangod/Aql/AqlCallStack.cpp +++ b/arangod/Aql/AqlCallStack.cpp @@ -83,5 +83,15 @@ void AqlCallStack::pop() { _operations.pop(); // We can never pop the main query, so one element needs to stay TRI_ASSERT(!_operations.empty()); + } else { + _depth--; } +} + +auto AqlCallStack::increaseSubqueryDepth() -> void { + // Avoid overflow. If you actually have a subquery nesting of size_t many subqueries + // there is a rather high chance that your query will not perform well. + TRI_ASSERT(_depth < std::numeric_limits::max() - 2); + _depth++; + TRI_ASSERT(!isRelevant()); } \ No newline at end of file diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index f9fbec8dd927..214de85d1e4d 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -67,6 +67,10 @@ class AqlCallStack { // Can be savely called on every subquery Start. void pop(); + // Increase the subquery by one, not placing another call on the stack + // This is used to bypass all executors until we reach the next subquery start. + void increaseSubqueryDepth(); + private: // The list of operations, stacked by depth (e.g. bottom element is from main query) std::stack _operations; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 035f760bc720..b8e170e6c649 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -176,6 +176,9 @@ ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, _query(*engine->getQuery()), _state(InternalState::FETCH_DATA), _lastRange{ExecutorState::HASMORE}, + _execState{ExecState::CHECKCALL}, + _upstreamRequest{}, + _clientRequest{}, _hasUsedDataRangeBlock{false} { // already insert ourselves into the statistics results if (_profile >= PROFILE_LEVEL_BLOCKS) { @@ -1051,36 +1054,12 @@ void ExecutionBlockImpl::ensureOutputBlock(AqlCall&& call) { _outputItemRow = allocateOutputBlock(std::move(call)); } else { _outputItemRow->setCall(std::move(call)); -#ifdef ARANGODB_ENABLE_MAINTAINER_MODE -// We only inject a new call into the output row. -// In the passhrough variant we need to ensure that inputBlock and outputBlock stay identical -// TODO add an external assertion for this. -#endif } } -/// @brief request an AqlItemBlock from the memory manager -template -SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, - RegisterId nrRegs) { - return _engine->itemBlockManager().requestBlock(nrItems, nrRegs); -} - -// TODO move me up -enum ExecState { - SKIP, - PRODUCE, - FASTFORWARD, - FULLCOUNT, - UPSTREAM, - SHADOWROWS, - DONE -}; - -// TODO clean me up -namespace { // This cannot return upstream call or shadowrows. -ExecState NextState(AqlCall const& call) { +template +auto ExecutionBlockImpl::nextState(AqlCall const& call) const -> ExecState { if (call.getOffset() > 0) { // First skip return ExecState::SKIP; @@ -1101,7 +1080,12 @@ ExecState NextState(AqlCall const& call) { return ExecState::DONE; } -} // namespace +/// @brief request an AqlItemBlock from the memory manager +template +SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, + RegisterId nrRegs) { + return _engine->itemBlockManager().requestBlock(nrItems, nrRegs); +} // // FETCHER: if we have one output row per input row, we can skip @@ -1239,36 +1223,55 @@ template std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if constexpr (isNewStyleExecutor()) { + if (!stack.isRelevant()) { + // We are bypassing subqueries. + // This executor is not allowed to perform actions + // However we need to maintain the upstream state. + size_t skippedLocal = 0; + typename Fetcher::DataRange bypassedRange{ExecutorState::HASMORE}; + std::tie(_upstreamState, skippedLocal, bypassedRange) = _rowFetcher.execute(stack); + return {_upstreamState, skippedLocal, bypassedRange.getBlock()}; + } AqlCall clientCall = stack.popCall(); - auto skipped = size_t{0}; - - auto execState = ::NextState(clientCall); - - if (_lastRange.hasShadowRow()) { - // We have not been able to move all shadowRows into the output last - // time. Continue from there. - // TODO test if this works with COUNT COLLECT - execState = ExecState::SHADOWROWS; + // We can only have returned the following internal states + TRI_ASSERT(_execState == ExecState::CHECKCALL || _execState == ExecState::SHADOWROWS || + _execState == ExecState::UPSTREAM); + // Skip can only be > 0 if we are in upstream cases. + TRI_ASSERT(_skipped == 0 || _execState == ExecState::UPSTREAM); + if (_execState == ExecState::UPSTREAM) { + // We have been in waiting state. + // We may have local work on the original call. + // The client does not have the right to change her + // mind just because we told her to hold the line. + + // The client cannot request less data! + TRI_ASSERT(_clientRequest.getOffset() <= clientCall.getOffset()); + TRI_ASSERT(_clientRequest.getLimit() <= clientCall.getLimit()); + TRI_ASSERT(_clientRequest.needsFullCount() == clientCall.needsFullCount()); + clientCall = _clientRequest; } - AqlCall executorRequest; - while (execState != ExecState::DONE) { - switch (execState) { + while (_execState != ExecState::DONE) { + switch (_execState) { + case ExecState::CHECKCALL: { + _execState = nextState(clientCall); + break; + } case ExecState::SKIP: { auto [state, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); - skipped += skippedLocal; + _skipped += skippedLocal; // The execute might have modified the client call. if (state == ExecutorState::DONE) { - execState = ExecState::SHADOWROWS; + _execState = ExecState::SHADOWROWS; } else if (clientCall.getOffset() > 0) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more - executorRequest = call; - execState = ExecState::UPSTREAM; + _upstreamRequest = call; + _execState = ExecState::UPSTREAM; } else { // We are done with skipping. Skip is not allowed to request more - execState = ::NextState(clientCall); + _execState = ExecState::CHECKCALL; } break; } @@ -1288,17 +1291,17 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { clientCall = _outputItemRow->getClientCall(); if (_outputItemRow->isInitialized() && _outputItemRow->allRowsUsed()) { - execState = ExecState::DONE; + _execState = ExecState::DONE; } else if (state == ExecutorState::DONE) { - execState = ExecState::SHADOWROWS; + _execState = ExecState::SHADOWROWS; } else if (clientCall.getLimit() > 0 && !_lastRange.hasDataRow()) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more - executorRequest = call; - execState = ExecState::UPSTREAM; + _upstreamRequest = call; + _execState = ExecState::UPSTREAM; } else { // We are done with producing. Produce is not allowed to request more - execState = ::NextState(clientCall); + _execState = ExecState::CHECKCALL; } break; } @@ -1313,25 +1316,25 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(row.isInitialized()); } if (_lastRange.upstreamState() == ExecutorState::DONE) { - execState = ExecState::SHADOWROWS; + _execState = ExecState::SHADOWROWS; } else { // We need to request more, simply send hardLimit 0 upstream - executorRequest = AqlCall{}; - executorRequest.hardLimit = 0; - execState = ExecState::UPSTREAM; + _upstreamRequest = AqlCall{}; + _upstreamRequest.hardLimit = 0; + _execState = ExecState::UPSTREAM; } break; } case ExecState::FULLCOUNT: { auto [state, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); - skipped += skippedLocal; + _skipped += skippedLocal; if (state == ExecutorState::DONE) { - execState = ExecState::SHADOWROWS; + _execState = ExecState::SHADOWROWS; } else { // We need to request more - executorRequest = call; - execState = ExecState::UPSTREAM; + _upstreamRequest = call; + _execState = ExecState::UPSTREAM; } break; } @@ -1339,22 +1342,26 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // If this triggers the executors produceRows function has returned // HASMORE even if it knew that upstream has no further rows. TRI_ASSERT(_upstreamState != ExecutionState::DONE); + // We need to make sure _lastRange is all used TRI_ASSERT(!_lastRange.hasDataRow()); + TRI_ASSERT(!_lastRange.hasShadowRow()); size_t skippedLocal = 0; - stack.pushCall(std::move(executorRequest)); + auto callCopy = _upstreamRequest; + stack.pushCall(std::move(callCopy)); std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); if (_upstreamState == ExecutionState::WAITING) { + // We need to persist the old call before we return. + // We might have some local accounting to this call. + _clientRequest = clientCall; // We do not return anything in WAITING state, also NOT skipped. - // TODO: Check if we need to leverage this restriction. - TRI_ASSERT(skipped == 0); return {_upstreamState, 0, nullptr}; } // We have a new range, passthrough can use this range. _hasUsedDataRangeBlock = false; - skipped += skippedLocal; + _skipped += skippedLocal; // We skipped through passthroug, so count that a skip was solved. clientCall.didSkip(skippedLocal); - execState = ::NextState(clientCall); + _execState = ExecState::CHECKCALL; break; } case ExecState::SHADOWROWS: { @@ -1376,27 +1383,29 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if (shadowRow.isRelevant()) { // We found a relevant shadow Row. // We need to reset the Executor - // TODO: call reset! + // cppcheck-suppress unreadVariable + constexpr bool customInit = hasInitializeCursor::value; + InitializeCursor::init(_executor, _rowFetcher, _infos); } TRI_ASSERT(_outputItemRow->produced()); _outputItemRow->advanceRow(); clientCall = _outputItemRow->getClientCall(); if (_outputItemRow->allRowsUsed()) { - execState = ExecState::DONE; + _execState = ExecState::DONE; } else if (state == ExecutorState::DONE) { if (_lastRange.hasDataRow()) { // TODO this state is invalid, and can just show up now if we exclude SKIP - execState = ExecState::PRODUCE; + _execState = ExecState::PRODUCE; } else { // Right now we cannot support to have more than one set of // ShadowRows inside of a Range. // We do not know how to continue with the above executor after a shadowrow. TRI_ASSERT(!_lastRange.hasDataRow()); - execState = ExecState::DONE; + _execState = ExecState::DONE; } } } else { - execState = ExecState::DONE; + _execState = ExecState::DONE; } break; } @@ -1408,9 +1417,16 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // If we do not have an output, we simply return a nullptr here. auto outputBlock = _outputItemRow != nullptr ? _outputItemRow->stealBlock() : SharedAqlItemBlockPtr{nullptr}; + // We are locally done with our output. + // Next time we need to check the client call again + _execState = ExecState::CHECKCALL; // This is not strictly necessary here, as we shouldn't be called again // after DONE. _outputItemRow.reset(); + + // We return skipped here, reset member + size_t skipped = _skipped; + _skipped = 0; if (_lastRange.hasDataRow() || _lastRange.hasShadowRow()) { // We have skipped or/and return data, otherwise we cannot return HASMORE TRI_ASSERT(skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0)); diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 064b087574f3..16169b212d06 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -26,6 +26,7 @@ #ifndef ARANGOD_AQL_EXECUTION_BLOCK_IMPL_H #define ARANGOD_AQL_EXECUTION_BLOCK_IMPL_H 1 +#include "Aql/AqlCall.h" #include "Aql/ConstFetcher.h" #include "Aql/DependencyProxy.h" #include "Aql/ExecutionBlock.h" @@ -106,8 +107,30 @@ class ExecutionBlockImpl final : public ExecutionBlock { "allowsBlockPassthrough must imply preservesOrder, but does not!"); private: + // Used in getSome/skipSome implementation. deprecated enum class InternalState { FETCH_DATA, FETCH_SHADOWROWS, DONE }; + // Used in execute implmentation + // Defines the internal state this executor is in. + enum class ExecState { + // We need to check the client call to define the next state (inital state) + CHECKCALL, + // We are skipping rows in offset + SKIP, + // We are producing rows + PRODUCE, + // We are done producing (limit reached) and drop all rows that are unneeded + FASTFORWARD, + // We are done producing (limit reached), but we count all rows that could be used on higher limit + FULLCOUNT, + // We need more information from dependency + UPSTREAM, + // We are done with a subquery, we need to pass forward ShadowRows + SHADOWROWS, + // Locally done, ready to return, will set state to resetted + DONE + }; + public: /** * @brief Construct a new ExecutionBlock @@ -258,6 +281,10 @@ class ExecutionBlockImpl final : public ExecutionBlock { // Will as a side effect modify _outputItemRow void ensureOutputBlock(AqlCall&& call); + // Compute the next state based on the given call. + // Can only be one of Skip/Produce/FullCount/FastForward/Done + [[nodiscard]] auto nextState(AqlCall const& call) const -> ExecState; + private: /** * @brief Used to allow the row Fetcher to access selected methods of this @@ -290,6 +317,12 @@ class ExecutionBlockImpl final : public ExecutionBlock { DataRange _lastRange; + ExecState _execState; + + AqlCall _upstreamRequest; + + AqlCall _clientRequest; + // Only used in passthrough variant. // We track if we have reference the range's block // into an output block. diff --git a/tests/Aql/ExecutionBlockImplTest.cpp b/tests/Aql/ExecutionBlockImplTest.cpp index fef41d9d983b..49bf8ad9dcf0 100644 --- a/tests/Aql/ExecutionBlockImplTest.cpp +++ b/tests/Aql/ExecutionBlockImplTest.cpp @@ -489,9 +489,11 @@ class SharedExecutionBlockImplTest { * @param outputRegisters highest output register index. RegisterPlan::MaxRegisterId (default) describes there is no output. call is allowed to write any inputRegisters < register <= outputRegisters. Invariant inputRegisters <= outputRegisters * @return LambdaExecutorInfos Infos to build the Executor. */ - LambdaSkipExecutorInfos makeSkipInfos(ProduceCall call, SkipCall skipCall, - RegisterId inputRegisters = RegisterPlan::MaxRegisterId, - RegisterId outputRegisters = RegisterPlan::MaxRegisterId) { + LambdaSkipExecutorInfos makeSkipInfos( + ProduceCall call, SkipCall skipCall, + RegisterId inputRegisters = RegisterPlan::MaxRegisterId, + RegisterId outputRegisters = RegisterPlan::MaxRegisterId, + ResetCall reset = []() -> void {}) { if (inputRegisters != RegisterPlan::MaxRegisterId) { EXPECT_LE(inputRegisters, outputRegisters); // We cannot have no output registers here. @@ -524,22 +526,7 @@ class SharedExecutionBlockImplTest { (outputRegisters == RegisterPlan::MaxRegisterId) ? 0 : outputRegisters + 1; return LambdaSkipExecutorInfos(readAble, writeAble, regsToRead, regsToWrite, {}, registersToKeep, std::move(call), - std::move(skipCall)); - } - /** - * @brief Create a Singleton ExecutionBlock. Just like the original one in the - * query. it is already initialized and ready to use. - * - * @return std::unique_ptr The singleton ExecutionBlock. - */ - std::unique_ptr createSingleton() { - auto res = std::make_unique>>( - fakedQuery->engine(), generateNodeDummy(), IdExecutorInfos{0, {}, {}}); - InputAqlItemRow inputRow{CreateInvalidInputRowHint{}}; - auto const [state, result] = res->initializeCursor(inputRow); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_TRUE(result.ok()); - return res; + std::move(skipCall), std::move(reset)); } /** @@ -679,6 +666,22 @@ class SharedExecutionBlockImplTest { class ExecutionBlockImplExecuteSpecificTest : public SharedExecutionBlockImplTest, public testing::TestWithParam { protected: + /** + * @brief Create a Singleton ExecutionBlock. Just like the original one in the + * query. it is already initialized and ready to use. + * + * @return std::unique_ptr The singleton ExecutionBlock. + */ + std::unique_ptr createSingleton() { + auto res = std::make_unique>>( + fakedQuery->engine(), generateNodeDummy(), IdExecutorInfos{0, {}, {}}); + InputAqlItemRow inputRow{CreateInvalidInputRowHint{}}; + auto const [state, result] = res->initializeCursor(inputRow); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_TRUE(result.ok()); + return res; + } + /** * @brief Generic test runner. Creates Lambda Executors, and returns ExecutionBlockImpl.execute(call), * @@ -704,6 +707,57 @@ class ExecutionBlockImplExecuteSpecificTest : public SharedExecutionBlockImplTes return testee.execute(stack); } } + + auto onceLinesProducer(ExecutionBlock* dependency, size_t numberLines) + -> std::unique_ptr { + RegisterId outReg = 0; + RegisterId inReg = RegisterPlan::MaxRegisterId; + SkipCall skipCall = generateNeverSkipCall(); + auto didProduce = std::make_shared(false); + auto builder = std::make_shared(); + builder->openArray(); + for (size_t i = 0; i < numberLines; ++i) { + builder->add(VPackValue(i)); + } + builder->close(); + + ProduceCall prodCall = + [outReg, didProduce, + builder](AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) -> std::tuple { + if (!inputRange.hasDataRow()) { + // Initial call, we have not produced yet. + // Ask for more + AqlCall call{}; + return {inputRange.upstreamState(), NoStats{}, call}; + } + // We only need to get here exactly once + EXPECT_FALSE(*didProduce); + if (*didProduce) { + // Should never get here. Emergency exit. + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + *didProduce = true; + auto slice = builder->slice(); + // We need to ensure that the data fits into the given output. + EXPECT_GE(output.numRowsLeft(), slice.length()); + auto [state, input] = inputRange.nextDataRow(); + for (auto it : VPackArrayIterator(slice)) { + output.cloneValueInto(outReg, input, AqlValue(it)); + output.advanceRow(); + } + AqlCall call{}; + return {inputRange.upstreamState(), NoStats{}, call}; + }; + + std::unique_ptr res = + std::make_unique>(fakedQuery->engine(), + generateNodeDummy(), + makeSkipInfos(prodCall, skipCall, + inReg, outReg)); + res->addDependency(dependency); + return res; + } }; // Test a default call: no skip, no limits. @@ -824,6 +878,145 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_offset_only_call) { EXPECT_EQ(block, nullptr); } +TEST_P(ExecutionBlockImplExecuteSpecificTest, test_relevant_shadowrow_does_not_fit_in_output) { + if (GetParam()) { + // This test is only for non-passthrough variants + SUCCEED(); + return; + } + std::deque blockDeque; + { + SharedAqlItemBlockPtr block = + buildBlock<0>(fakedQuery->engine()->itemBlockManager(), {{}, {}}, {{1, 0}}); + blockDeque.push_back(std::move(block)); + } + auto singleton = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + // Produce one full block. The shadowRow has no space left + auto testee = onceLinesProducer(singleton.get(), ExecutionBlock::DefaultBatchSize); + + AqlCall fullCall{}; + AqlCallStack stack{fullCall}; + { + // First call. Fetch all rows (data only) + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + ASSERT_NE(block, nullptr); + EXPECT_EQ(block->size(), ExecutionBlock::DefaultBatchSize); + EXPECT_FALSE(block->hasShadowRows()); + } + { + // Second call. only a single shadowRow left + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ASSERT_NE(block, nullptr); + EXPECT_EQ(block->size(), 1); + EXPECT_TRUE(block->hasShadowRows()); + ASSERT_TRUE(block->isShadowRow(0)); + ShadowAqlItemRow shadow{block, 0}; + EXPECT_EQ(shadow.getDepth(), 0); + } +} + +TEST_P(ExecutionBlockImplExecuteSpecificTest, set_of_shadowrows_does_not_fit_in_output) { + if (GetParam()) { + // This test is only for non-passthrough variants + SUCCEED(); + return; + } + std::deque blockDeque; + { + SharedAqlItemBlockPtr block = buildBlock<0>(fakedQuery->engine()->itemBlockManager(), + {{}, {}, {}}, {{1, 0}, {2, 1}}); + blockDeque.push_back(std::move(block)); + } + auto singleton = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + // Produce one full block. The shadowRow has no space left + auto testee = onceLinesProducer(singleton.get(), ExecutionBlock::DefaultBatchSize); + + AqlCall fullCall{}; + AqlCallStack stack{fullCall}; + { + // First call. Fetch all rows (data only) + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + ASSERT_NE(block, nullptr); + EXPECT_EQ(block->size(), ExecutionBlock::DefaultBatchSize); + EXPECT_FALSE(block->hasShadowRows()); + } + { + // Second call. only the shadowRows are left + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ASSERT_NE(block, nullptr); + ASSERT_EQ(block->size(), 2); + EXPECT_TRUE(block->hasShadowRows()); + { + ASSERT_TRUE(block->isShadowRow(0)); + ShadowAqlItemRow shadow{block, 0}; + EXPECT_EQ(shadow.getDepth(), 0); + } + { + ASSERT_TRUE(block->isShadowRow(1)); + ShadowAqlItemRow shadow{block, 1}; + EXPECT_EQ(shadow.getDepth(), 1); + } + } +} + +TEST_P(ExecutionBlockImplExecuteSpecificTest, set_of_shadowrows_does_not_fit_fully_in_output) { + if (GetParam()) { + // This test is only for non-passthrough variants + SUCCEED(); + return; + } + std::deque blockDeque; + { + SharedAqlItemBlockPtr block = buildBlock<0>(fakedQuery->engine()->itemBlockManager(), + {{}, {}, {}}, {{1, 0}, {2, 1}}); + blockDeque.push_back(std::move(block)); + } + auto singleton = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + // Produce one full block. The shadowRow has no space left + auto testee = onceLinesProducer(singleton.get(), ExecutionBlock::DefaultBatchSize - 1); + + AqlCall fullCall{}; + AqlCallStack stack{fullCall}; + { + // First call. Fetch all rows (data + relevant shadow row) + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + ASSERT_NE(block, nullptr); + EXPECT_EQ(block->size(), ExecutionBlock::DefaultBatchSize); + EXPECT_TRUE(block->hasShadowRows()); + ASSERT_TRUE(block->isShadowRow(block->size() - 1)); + ShadowAqlItemRow shadow{block, block->size() - 1}; + EXPECT_EQ(shadow.getDepth(), 0); + } + { + // Second call. only the shadowRows are left + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ASSERT_NE(block, nullptr); + EXPECT_EQ(block->size(), 1); + EXPECT_TRUE(block->hasShadowRows()); + ASSERT_TRUE(block->isShadowRow(0)); + ShadowAqlItemRow shadow{block, 0}; + EXPECT_EQ(shadow.getDepth(), 1); + } +} + INSTANTIATE_TEST_CASE_P(ExecutionBlockImplExecuteTest, ExecutionBlockImplExecuteSpecificTest, ::testing::Bool()); @@ -847,6 +1040,9 @@ struct BaseCallAsserter { // The expected outer call, the machine needs to extract relevant parts AqlCall const expected; + // Initial state of this executor. Will return to this state on reset. + CallAsserterState initialState = CallAsserterState::DONE; + /** * @brief Construct a new Base Call Asserter object * @@ -855,6 +1051,17 @@ struct BaseCallAsserter { explicit BaseCallAsserter(AqlCall const& expectedCall) : expected{expectedCall} {} + virtual ~BaseCallAsserter() {} + + /** + * @brief Reset to 0 calls and to initialState + * + */ + auto virtual reset() -> void { + call = 0; + state = initialState; + } + /** * @brief Test if we need to expect a skip phase * @@ -876,6 +1083,24 @@ struct BaseCallAsserter { * @return false */ auto needsFullCount() const -> bool { return expected.needsFullCount(); } + + auto gotCalled(AqlCall const& got) -> void { + call++; + SCOPED_TRACE("In call " + std::to_string(call) + " of " + + std::to_string(maxCall) + " state " + + std::to_string( + static_cast::type>(state))); + gotCalledWithoutTrace(got); + EXPECT_LE(call, maxCall); + if (call > maxCall) { + // Security bailout to avoid infinite loops + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); + } + } + + auto getNumberCalls() const -> size_t { return call; } + + virtual auto gotCalledWithoutTrace(AqlCall const& got) -> void = 0; }; /** @@ -894,18 +1119,18 @@ struct SkipCallAsserter : public BaseCallAsserter { // so skip needs to be last here if (needsFullCount()) { maxCall += 2; - state = CallAsserterState::COUNT; + initialState = CallAsserterState::COUNT; } if (hasSkip()) { maxCall += 2; - state = CallAsserterState::SKIP; + initialState = CallAsserterState::SKIP; } // It is possible that we actually have 0 calls. // if there is neither skip nor limit + state = initialState; } - auto gotCalled(AqlCall const& got) -> void { - call++; + auto gotCalledWithoutTrace(AqlCall const& got) -> void override { switch (state) { case CallAsserterState::SKIP: { EXPECT_EQ(got.getOffset(), expected.getOffset()); @@ -935,11 +1160,6 @@ struct SkipCallAsserter : public BaseCallAsserter { break; } } - EXPECT_LE(call, maxCall); - if (call > maxCall) { - // Security bailout to avoid infinite loops - THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); - } } }; @@ -954,13 +1174,15 @@ struct CallAsserter : public BaseCallAsserter { // Calculate number of calls if (hasLimit()) { maxCall += 2; - state = CallAsserterState::INITIAL; + initialState = CallAsserterState::INITIAL; } + // It is possible that we actually have 0 calls. + // if there is neither skip nor limit + state = initialState; } - auto gotCalled(AqlCall const& got) -> void { + auto gotCalledWithoutTrace(AqlCall const& got) -> void override { EXPECT_EQ(got.getOffset(), 0); - call++; switch (state) { case CallAsserterState::INITIAL: { EXPECT_EQ(got.getLimit(), expected.getLimit()); @@ -980,19 +1202,14 @@ struct CallAsserter : public BaseCallAsserter { break; } } - EXPECT_LE(call, maxCall); - if (call > maxCall) { - // Security bailout to avoid infinite loops - THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); - } } }; /** * @brief Asserter used "above" an executor that implements * skip and produce, and transforms everything to produce. - * Expects to be called twice for each sitation (with and without input). - * Expect up to three situations: SKIP, GET and FULLCOUNT. + * Expects to be called twice for each sitation (with and without + * input). Expect up to three situations: SKIP, GET and FULLCOUNT. */ struct GetOnlyCallAsserter : public BaseCallAsserter { explicit GetOnlyCallAsserter(AqlCall const& expectedCall) @@ -1003,25 +1220,25 @@ struct GetOnlyCallAsserter : public BaseCallAsserter { // so skip needs to be last here if (needsFullCount()) { maxCall += 2; - state = CallAsserterState::COUNT; + initialState = CallAsserterState::COUNT; } if (hasLimit()) { maxCall += 2; - state = CallAsserterState::GET; + initialState = CallAsserterState::GET; } if (hasSkip()) { maxCall += 2; - state = CallAsserterState::SKIP; + initialState = CallAsserterState::SKIP; } + state = initialState; // Make sure setup worked EXPECT_GT(maxCall, 0); EXPECT_NE(state, CallAsserterState::DONE); } - auto gotCalled(AqlCall const& got) -> void { + auto gotCalledWithoutTrace(AqlCall const& got) -> void override { EXPECT_EQ(got.getOffset(), 0); EXPECT_FALSE(got.needsFullCount()); - call++; switch (state) { case CallAsserterState::SKIP: { @@ -1072,14 +1289,23 @@ struct GetOnlyCallAsserter : public BaseCallAsserter { break; } } - EXPECT_LE(call, maxCall); - if (call > maxCall) { - // Security bailout to avoid infinite loops - THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); - } } }; +/** + * @brief None asserter, does not assert any thing within a call. + * Only asserts that we are not called more than maxCalls times. + * + */ +struct NoneAsserter : public BaseCallAsserter { + explicit NoneAsserter(AqlCall const& expectedCall, size_t maxCalls) + : BaseCallAsserter{expectedCall} { + maxCall = maxCalls; + } + + auto gotCalledWithoutTrace(AqlCall const& got) -> void override {} +}; + /** * @brief Integration tests. * These test tests a chain of Executors. @@ -1089,11 +1315,85 @@ struct GetOnlyCallAsserter : public BaseCallAsserter { * Also asserts that "UPSTREAM" is called with the correct * forwarded call. * This is a parameterized testsuite that uses a set of pseudo-random AqlCalls of different formats. + * The second parameter is a boolean to flag if we use WAITING on singleton. */ class ExecutionBlockImplExecuteIntegrationTest : public SharedExecutionBlockImplTest, - public testing::TestWithParam { + public testing::TestWithParam> { protected: + /** + * @brief Get the Call object + * + * @return AqlCall used as test parameter + */ + AqlCall getCall() const { + auto const [call, waits] = GetParam(); + return call; + } + + /** + * @brief Get the combination if we are waiting or not. + * + * @return true We need waiting + * @return false We do not. + */ + bool doesWaiting() const { + auto const [call, waits] = GetParam(); + return waits; + } + + /** + * @brief Assert that the given value is equal to the given number + * + * @param block The AqlItemBlock the value is stored in + * @param row The row number of the value + * @param reg The register number of the value + * @param expected the expected number + */ + auto AssertValueEquals(SharedAqlItemBlockPtr const& block, size_t row, + RegisterId reg, size_t expected) const -> void { + ASSERT_NE(block, nullptr); + ASSERT_GT(block->size(), row); + ASSERT_GE(block->getNrRegs(), reg); + auto const& value = block->getValueReference(row, reg); + ASSERT_TRUE(value.isNumber()); + EXPECT_EQ(static_cast(value.toInt64()), expected); + } + + /** + * @brief Assert that the given row in the block, is a shadow row of the expected depth + * + * @param block The AqlItemBlock the row is stored in + * @param row The shadow row number + * @param expected The expected depth + */ + auto AssertIsShadowRowOfDepth(SharedAqlItemBlockPtr const& block, size_t row, + size_t expected) -> void { + ASSERT_NE(block, nullptr); + ASSERT_GT(block->size(), row); + ASSERT_TRUE(block->isShadowRow(row)); + auto val = block->getShadowRowDepth(row); + ASSERT_TRUE(val.isNumber()); + EXPECT_EQ(static_cast(val.toInt64()), expected); + } + + /** + * @brief Create a Singleton ExecutionBlock. Just like the original one in + * the query. it is already initialized and ready to use. + * + * @return std::unique_ptr The singleton ExecutionBlock. + */ + std::unique_ptr createSingleton() { + std::deque blockDeque; + SharedAqlItemBlockPtr block = + buildBlock<0>(fakedQuery->engine()->itemBlockManager(), {{}}); + blockDeque.push_back(std::move(block)); + return std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + } + /** * @brief Create a Producing ExecutionBlock * For every input row this block will write the array given in data @@ -1111,8 +1411,10 @@ class ExecutionBlockImplExecuteIntegrationTest TRI_ASSERT(dependency != nullptr); TRI_ASSERT(data != nullptr); TRI_ASSERT(data->slice().isArray()); + // We make this a shared ptr just to make sure someone retains the data. auto iterator = std::make_shared(data->slice()); + auto resetCall = [iterator]() -> void { iterator->reset(); }; auto writeData = [data, outReg, iterator](AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) -> std::tuple { @@ -1167,10 +1469,10 @@ class ExecutionBlockImplExecuteIntegrationTest call.fullCount = false; return {inputRange.upstreamState(), skipped, call}; }; - auto infos = outReg == 0 - ? makeSkipInfos(std::move(writeData), skipData, - RegisterPlan::MaxRegisterId, outReg) - : makeSkipInfos(std::move(writeData), skipData, outReg - 1, outReg); + auto infos = outReg == 0 ? makeSkipInfos(std::move(writeData), skipData, + RegisterPlan::MaxRegisterId, outReg, resetCall) + : makeSkipInfos(std::move(writeData), skipData, + outReg - 1, outReg, resetCall); auto producer = std::make_unique>(fakedQuery->engine(), generateNodeDummy(), @@ -1183,14 +1485,18 @@ class ExecutionBlockImplExecuteIntegrationTest * @brief Create a simple row forwarding Block. * It simply takes one input row and copies it into the output * + * @param asserter A call asserter, that will invoke getCalled on every call * @param dependency The dependecy of this block (produces input) * @param maxReg The number of registers in input and output. (required for forwarding of data) * @return std::unique_ptr ready to use ForwardingBlock. */ - std::unique_ptr forwardBlock(ExecutionBlock* dependency, RegisterId maxReg) { + std::unique_ptr forwardBlock(BaseCallAsserter& asserter, + ExecutionBlock* dependency, + RegisterId maxReg) { TRI_ASSERT(dependency != nullptr); - auto forwardData = [](AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) + auto forwardData = [&asserter](AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) -> std::tuple { + asserter.gotCalled(output.getClientCall()); while (inputRange.hasDataRow() && !output.isFull()) { auto const& [state, input] = inputRange.nextDataRow(); EXPECT_TRUE(input.isInitialized()); @@ -1206,6 +1512,77 @@ class ExecutionBlockImplExecuteIntegrationTest return producer; } + /** + * @brief Create a simple row forwarding Block. + * It simply takes one input row and copies it into the output. + * Implements Skip + * + * @param produceAsserter A call asserter, that will invoke getCalled on every produce call + * @param skipAsserter A call asserter, that will invoke getCalled on every skip call + * @param dependency The dependecy of this block (produces input) + * @param maxReg The number of registers in input and output. (required for forwarding of data) + * @return std::unique_ptr ready to use ForwardingBlock. + */ + std::unique_ptr forwardBlock(BaseCallAsserter& produceAsserter, + BaseCallAsserter& skipAsserter, + ExecutionBlock* dependency, + RegisterId maxReg) { + TRI_ASSERT(dependency != nullptr); + auto forwardData = [&produceAsserter](AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) + -> std::tuple { + produceAsserter.gotCalled(output.getClientCall()); + while (inputRange.hasDataRow() && !output.isFull()) { + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + output.copyRow(input); + output.advanceRow(); + } + return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; + }; + + auto skipData = [&skipAsserter](AqlItemBlockInputRange& inputRange, + AqlCall& call) -> std::tuple { + skipAsserter.gotCalled(call); + + size_t skipped = 0; + while (inputRange.hasDataRow() && call.shouldSkip()) { + auto const& [state, input] = inputRange.nextDataRow(); + EXPECT_TRUE(input.isInitialized()); + skipped++; + call.didSkip(1); + } + // Do forward a softLimit call only. + // Do not oeverfetch here. + AqlCall request; + if (call.getOffset() > 0) { + request.softLimit = call.getOffset(); + } // else fullCount case, simple get UNLIMITED from above + + return {inputRange.upstreamState(), skipped, request}; + }; + auto producer = std::make_unique>( + fakedQuery->engine(), generateNodeDummy(), + makeSkipInfos(std::move(forwardData), std::move(skipData), maxReg, maxReg)); + producer->addDependency(dependency); + return producer; + } + + void ValidateSkipMatches(AqlCall const& call, size_t dataLength, size_t actual) const { + size_t expected = 0; + // Skip Offset, but not more then available + expected += std::min(call.getOffset(), dataLength); + if (call.needsFullCount()) { + // We can only fullCount on hardlimit. If this fails check test code! + EXPECT_TRUE(call.hasHardLimit()); + // We consume either hardLimit + offset, or all data. + size_t consumed = std::min(call.getLimit() + call.getOffset(), dataLength); + // consumed >= dataLength, if it is smaller we have a remainder for fullCount. + expected += dataLength - consumed; + } + EXPECT_EQ(expected, actual); + } + /** * @brief Helper method to validate the result * It will take into account the call used as Parameter @@ -1220,34 +1597,33 @@ class ExecutionBlockImplExecuteIntegrationTest * @param skipped The number of rows the executor reported as skipped * @param result The resulting data output * @param testReg The register to evaluate + * @param numShadowRows Number of preceeding shadowRows in result. */ void ValidateResult(std::shared_ptr data, size_t skipped, - SharedAqlItemBlockPtr result, RegisterId testReg) { - auto const& call = GetParam(); + SharedAqlItemBlockPtr result, RegisterId testReg, + size_t numShadowRows = 0) { + auto const& call = getCall(); TRI_ASSERT(data != nullptr); TRI_ASSERT(data->slice().isArray()); VPackSlice expected = data->slice(); + ValidateSkipMatches(call, static_cast(expected.length()), skipped); + VPackArrayIterator expectedIt{expected}; // Skip Part size_t offset = (std::min)(call.getOffset(), static_cast(expected.length())); - if (!call.needsFullCount()) { - // Otherweise skipped = offset + fullCount - EXPECT_EQ(offset, skipped); - } - for (size_t i = 0; i < offset; ++i) { // The first have been skipped expectedIt++; } size_t limit = (std::min)(call.getLimit(), static_cast(expected.length()) - offset); - if (result != nullptr) { + if (result != nullptr && result->size() > numShadowRows) { // GetSome part - EXPECT_EQ(limit, result->size()); + EXPECT_EQ(limit, result->size() - numShadowRows); for (size_t i = 0; i < limit; ++i) { // The next have to match auto got = result->getValueReference(i, testReg).slice(); @@ -1259,16 +1635,71 @@ class ExecutionBlockImplExecuteIntegrationTest } else { EXPECT_EQ(limit, 0); } + } - // Now test Fullcount - if (call.needsFullCount()) { - ASSERT_TRUE(expected.length() >= offset + limit); - size_t fullCount = expected.length() - offset - limit; - EXPECT_EQ(offset + fullCount, skipped); - } + /** + * @brief Test that there is a shadowrow at the given index of the given depth + * + * @param block The returned block + * @param rowIndex The row index to test + * @param depth The expected shadowRow depth + */ + void ValidateShadowRow(SharedAqlItemBlockPtr block, size_t rowIndex, size_t depth) { + ASSERT_TRUE(block != nullptr); + EXPECT_TRUE(block->hasShadowRows()); + ASSERT_TRUE(block->isShadowRow(rowIndex)); + ShadowAqlItemRow row{block, rowIndex}; + EXPECT_EQ(row.getDepth(), depth); } }; +// This test asserts that the mock we are using here is working as expected. +// If this does not work we will undefined follow up errors +TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_waiting_block_mock) { + std::deque blockDeque; + auto builder = std::make_shared(); + { + MatrixBuilder<1> matrix; + matrix.reserve(250); + builder->openArray(); + for (size_t i = 0; i < 250; ++i) { + builder->add(VPackValue(i)); + matrix.emplace_back(RowBuilder<1>{i}); + } + builder->close(); + SharedAqlItemBlockPtr block = + buildBlock<1>(fakedQuery->engine()->itemBlockManager(), std::move(matrix)); + blockDeque.push_back(std::move(block)); + } + + WaitingExecutionBlockMock testee{fakedQuery->engine(), generateNodeDummy(), + std::move(blockDeque), + doesWaiting() + ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER}; + + auto const& call = getCall(); + AqlCallStack stack{call}; + + auto [state, skipped, block] = testee.execute(stack); + if (doesWaiting()) { + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + std::tie(state, skipped, block) = testee.execute(stack); + } + if (call.getLimit() > builder->slice().length() || call.needsFullCount() || + call.hasHardLimit()) { + // We need to consume everything + EXPECT_EQ(state, ExecutionState::DONE); + } else { + // We cannot consume everything. + EXPECT_EQ(state, ExecutionState::HASMORE); + } + + ValidateResult(builder, skipped, block, 0); +} + // Test a simple produce block. that has is supposed to write 1000 rows. TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_produce_only) { auto singleton = createSingleton(); @@ -1282,8 +1713,14 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_produce_only) { RegisterId outReg = 0; auto producer = produceBlock(singleton.get(), builder, outReg); - auto const& call = GetParam(); + auto const& call = getCall(); AqlCallStack stack{call}; + if (doesWaiting()) { + auto const [state, skipped, block] = producer->execute(stack); + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } auto const [state, skipped, block] = producer->execute(stack); if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { EXPECT_EQ(state, ExecutionState::HASMORE); @@ -1310,8 +1747,14 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_produce_using_two) { RegisterId outRegSecond = 1; auto producerFirst = produceBlock(singleton.get(), builder, outRegFirst); auto producer = produceBlock(producerFirst.get(), builder, outRegSecond); - auto const& call = GetParam(); + auto const& call = getCall(); AqlCallStack stack{call}; + if (doesWaiting()) { + auto const [state, skipped, block] = producer->execute(stack); + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } auto const [state, skipped, block] = producer->execute(stack); if (call.getLimit() < 100) { if (call.hasHardLimit()) { @@ -1360,43 +1803,22 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_passthroug RegisterId outReg = 0; auto producer = produceBlock(singleton.get(), builder, outReg); - CallAsserter upperState{GetParam()}; - CallAsserter lowerState{GetParam()}; - - auto testForwarding = - [&](AqlItemBlockInputRange& inputRange, - OutputAqlItemRow& output) -> std::tuple { - upperState.gotCalled(output.getClientCall()); - while (inputRange.hasDataRow() && !output.isFull()) { - auto const& [state, input] = inputRange.nextDataRow(); - EXPECT_TRUE(input.isInitialized()); - output.copyRow(input); - output.advanceRow(); - } - return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; - }; - auto forwardCall = [&](AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) - -> std::tuple { - lowerState.gotCalled(output.getClientCall()); - while (inputRange.hasDataRow() && !output.isFull()) { - auto const& [state, input] = inputRange.nextDataRow(); - EXPECT_TRUE(input.isInitialized()); - output.copyRow(input); - output.advanceRow(); - } - return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; - }; - auto upper = std::make_unique>( - fakedQuery->engine(), generateNodeDummy(), - makeInfos(std::move(testForwarding), outReg, outReg)); - upper->addDependency(producer.get()); - auto lower = std::make_unique>( - fakedQuery->engine(), generateNodeDummy(), - makeInfos(std::move(forwardCall), outReg, outReg)); - lower->addDependency(upper.get()); + CallAsserter upperState{getCall()}; + auto upper = forwardBlock(upperState, producer.get(), outReg); + CallAsserter lowerState{getCall()}; + auto lower = forwardBlock(lowerState, upper.get(), outReg); - auto const& call = GetParam(); + auto const& call = getCall(); AqlCallStack stack{call}; + if (doesWaiting()) { + auto const [state, skipped, block] = lower->execute(stack); + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + // Reset call counters + upperState.reset(); + lowerState.reset(); + } auto const [state, skipped, block] = lower->execute(stack); if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { EXPECT_EQ(state, ExecutionState::HASMORE); @@ -1423,22 +1845,13 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_implement_ builder->close(); RegisterId outReg = 0; auto producer = produceBlock(singleton.get(), builder, outReg); - GetOnlyCallAsserter upperState{GetParam()}; - CallAsserter lowerState{GetParam()}; - SkipCallAsserter skipState{GetParam()}; - - auto testForwarding = - [&](AqlItemBlockInputRange& inputRange, - OutputAqlItemRow& output) -> std::tuple { - upperState.gotCalled(output.getClientCall()); - while (inputRange.hasDataRow() && !output.isFull()) { - auto const& [state, input] = inputRange.nextDataRow(); - EXPECT_TRUE(input.isInitialized()); - output.copyRow(input); - output.advanceRow(); - } - return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; - }; + + GetOnlyCallAsserter upperState{getCall()}; + auto upper = forwardBlock(upperState, producer.get(), outReg); + + CallAsserter lowerState{getCall()}; + SkipCallAsserter skipState{getCall()}; + auto forwardCall = [&](AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) -> std::tuple { lowerState.gotCalled(output.getClientCall()); @@ -1457,13 +1870,12 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_implement_ AqlCall& call) -> std::tuple { skipState.gotCalled(call); size_t skipped = 0; - while (inputRange.hasDataRow() && - (call.getOffset() > 0 || (call.getLimit() == 0 && call.needsFullCount()))) { + while (inputRange.hasDataRow() && call.shouldSkip()) { auto const& [state, input] = inputRange.nextDataRow(); EXPECT_TRUE(input.isInitialized()); skipped++; + call.didSkip(1); } - call.didSkip(skipped); // Do forward a softLimit call only. // Do not oeverfetch here. AqlCall request; @@ -1474,17 +1886,19 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_implement_ return {inputRange.upstreamState(), skipped, request}; }; - auto upper = std::make_unique>( - fakedQuery->engine(), generateNodeDummy(), - makeInfos(std::move(testForwarding), outReg, outReg)); - upper->addDependency(producer.get()); auto lower = std::make_unique>( fakedQuery->engine(), generateNodeDummy(), makeSkipInfos(std::move(forwardCall), std::move(forwardSkipCall), outReg, outReg)); lower->addDependency(upper.get()); - auto const& call = GetParam(); + auto const& call = getCall(); AqlCallStack stack{call}; + if (doesWaiting()) { + auto const [state, skipped, block] = lower->execute(stack); + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } auto const [state, skipped, block] = lower->execute(stack); if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { EXPECT_EQ(state, ExecutionState::HASMORE); @@ -1494,6 +1908,710 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_implement_ ValidateResult(builder, skipped, block, outReg); } +// Simulate many upstream calls, the block upstream only returns a single +// line. This test forces the executor into internal loops and into keeping +// internal state due to doesWaiting variant +TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_multiple_upstream_calls) { + // The WAITING block mock can only stop returning after a full block. + // As the used calls have "random" sizes, we simply create 1 line blocks only. + // This is less then optimal, but we will have an easily predictable result, with a complex internal structure + std::deque blockDeque; + auto builder = std::make_shared(); + builder->openArray(); + for (size_t i = 0; i < 1000; ++i) { + SharedAqlItemBlockPtr block = + buildBlock<1>(fakedQuery->engine()->itemBlockManager(), {{i}}); + blockDeque.push_back(std::move(block)); + builder->add(VPackValue(i)); + } + builder->close(); + + auto producer = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + + NoneAsserter produceAsserter{getCall(), ExecutionBlock::DefaultBatchSize * 3}; + NoneAsserter skipAsserter{getCall(), ExecutionBlock::DefaultBatchSize * 3}; + RegisterId outReg = 0; + auto testee = forwardBlock(produceAsserter, skipAsserter, producer.get(), outReg); + auto const& call = getCall(); + AqlCallStack stack{call}; + auto [state, skipped, block] = testee->execute(stack); + size_t killSwitch = 0; + while (state == ExecutionState::WAITING) { + EXPECT_TRUE(doesWaiting()); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + std::tie(state, skipped, block) = testee->execute(stack); + // Kill switch to avoid endless loop in case of error. + // We should get this through with much fewer than two times Batchsize calls. + killSwitch++; + if (killSwitch >= ExecutionBlock::DefaultBatchSize * 2) { + ASSERT_TRUE(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); + } + } + if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { + EXPECT_EQ(state, ExecutionState::HASMORE); + } else { + EXPECT_EQ(state, ExecutionState::DONE); + } + + ValidateResult(builder, skipped, block, outReg); +} + +// Simulate many upstream calls, the block upstream only returns a single +// line. This test forces the executor into internal loops and into keeping +// internal state due to doesWaiting variant. Using a passthrough executor. +TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_multiple_upstream_calls_passthrough) { + // The WAITING block mock can only stop returning after a full block. + // As the used calls have "random" sizes, we simply create 1 line blocks only. + // This is less then optimal, but we will have an easily predictable result, with a complex internal structure + std::deque blockDeque; + auto builder = std::make_shared(); + builder->openArray(); + for (size_t i = 0; i < 1000; ++i) { + SharedAqlItemBlockPtr block = + buildBlock<1>(fakedQuery->engine()->itemBlockManager(), {{i}}); + blockDeque.push_back(std::move(block)); + builder->add(VPackValue(i)); + } + builder->close(); + + auto producer = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + + NoneAsserter produceAsserter{getCall(), ExecutionBlock::DefaultBatchSize * 3}; + RegisterId outReg = 0; + auto testee = forwardBlock(produceAsserter, producer.get(), outReg); + auto call = getCall(); + auto limit = call.getLimit(); + size_t offset = call.getOffset(); + bool fullCount = call.needsFullCount(); + + if (limit == 0) { + // we can bypass everything and get away with a single call + AqlCallStack stack{call}; + auto [state, skipped, block] = testee->execute(stack); + if (doesWaiting()) { + size_t waited = 0; + while (state == ExecutionState::WAITING && waited < 2 /* avoid endless waiting*/) { + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + waited++; + std::tie(state, skipped, block) = testee->execute(stack); + } + EXPECT_LT(waited, 2); + } + EXPECT_EQ(block, nullptr); + if (fullCount) { + // We skipped everything + EXPECT_EQ(skipped, 1000); + EXPECT_EQ(state, ExecutionState::DONE); + } else { + EXPECT_EQ(skipped, offset); + EXPECT_EQ(state, ExecutionState::HASMORE); + } + } else { + VPackArrayIterator it{builder->slice()}; + // Skip over offset + for (size_t i = 0; i < offset; ++i) { + ++it; + } + for (size_t i = 0; i < limit && it.valid(); ++i) { + AqlCallStack stack{call}; + auto [state, skipped, block] = testee->execute(stack); + if (doesWaiting()) { + size_t waited = 0; + while (state == ExecutionState::WAITING && waited < 3 /* avoid endless waiting*/) { + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + waited++; + std::tie(state, skipped, block) = testee->execute(stack); + } + if (offset > 0 && i == 0) { + // We wait some time before the first row is produced + EXPECT_LT(waited, 3); + } else { + // We wait once, then we we get a line. + EXPECT_EQ(1, waited); + } + } + + ASSERT_NE(block, nullptr); + ASSERT_EQ(block->size(), 1); + // Book-keeping for call. + // We need to request data from above with the correct call. + if (skipped > 0) { + call.didSkip(skipped); + } + call.didProduce(1); + auto got = block->getValueReference(0, outReg).slice(); + EXPECT_TRUE(basics::VelocyPackHelper::equal(got, *it, false)) + << "Expected: " << it.value().toJson() << " got: " << got.toJson() + << " in row " << i << " and register " << outReg; + if (i == 0) { + // The first data row includes skip + EXPECT_EQ(skipped, offset); + } else { + if (call.getLimit() == 0 && call.hasHardLimit() && call.needsFullCount()) { + // The last row, with fullCount needs to contain data. + EXPECT_EQ(skipped, 1000 - limit - offset); + } else { + // Do not skip on later data rows + // Except the last one on fullcount + EXPECT_EQ(skipped, 0); + } + } + // NOTE: We might want to get into this situation. + // Even if the output is full, we fulfill the fullCount request + // Might however trigger waiting instead. + /* + if (call.hasHardLimit() && !call.needsFullCount() && call.getLimit() == 0) { + EXPECT_EQ(state, ExecutionState::DONE); + } else { + EXPECT_EQ(state, ExecutionState::HASMORE); + } + */ + if ((it.isLast() && call.getLimit() > 0) || + (call.getLimit() == 0 && call.hasHardLimit())) { + // This is an unlimited test. + // We reached end of output, but still have limit left + EXPECT_EQ(state, ExecutionState::DONE); + } else { + EXPECT_EQ(state, ExecutionState::HASMORE); + } + + it++; + } + } +} // namespace aql + +// Test to simulate an empty Subquery +TEST_P(ExecutionBlockImplExecuteIntegrationTest, only_relevant_shadowRows) { + std::deque blockDeque; + VPackBuilder builder; + builder.openArray(); + for (size_t i = 0; i < 3; ++i) { + SharedAqlItemBlockPtr block = + buildBlock<1>(fakedQuery->engine()->itemBlockManager(), {{i}}, {{0, 0}}); + blockDeque.push_back(std::move(block)); + builder.add(VPackValue(0)); + } + builder.close(); + + // We have 3 consecutive shadowRows of Depth 0 + auto producer = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + + RegisterId outReg = 0; + // We ask: + // Empty input + // On waiting: Empty Input + // input with shadow row only + size_t maxCalls = doesWaiting() ? 9 : 6; + NoneAsserter getAsserter{getCall(), maxCalls}; + NoneAsserter skipAsserter{getCall(), maxCalls}; + auto testee = forwardBlock(getAsserter, skipAsserter, producer.get(), outReg); + + for (size_t i = 0; i < 3; ++i) { + // We always take a new call. We do not want the call to be modified cross + // subqueries, this would not be done by Executors. + auto const& call = getCall(); + AqlCallStack stack{call}; + // We cannot group shadowRows within a single call. + // So we end up with 3 results, each 1 shadowRow, no matter what the call is + auto [state, skipped, block] = testee->execute(stack); + if (doesWaiting()) { + // We wait between lines + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + std::tie(state, skipped, block) = testee->execute(stack); + } + if (i == 2) { + // Only the last one is done + EXPECT_EQ(state, ExecutionState::DONE); + } else { + EXPECT_EQ(state, ExecutionState::HASMORE); + } + // Cannot skip a shadowRow + EXPECT_EQ(skipped, 0); + ASSERT_NE(block, nullptr); + ASSERT_EQ(block->size(), 1); + EXPECT_TRUE(block->hasShadowRows()); + EXPECT_TRUE(block->isShadowRow(0)); + auto rowIndex = block->getShadowRowDepth(0); + EXPECT_TRUE(basics::VelocyPackHelper::equal(rowIndex.slice(), + builder.slice().at(i), false)) + << "Expected: " << builder.slice().at(i).toJson() + << " got: " << rowIndex.slice().toJson(); + } +} + +// Test a classical input ending in a relevant shadowRow +TEST_P(ExecutionBlockImplExecuteIntegrationTest, input_and_relevant_shadowRow) { + std::deque blockDeque; + { + SharedAqlItemBlockPtr block = + buildBlock<0>(fakedQuery->engine()->itemBlockManager(), {{}, {}}, {{1, 0}}); + blockDeque.push_back(std::move(block)); + } + auto singleton = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + + auto builder = std::make_shared(); + { + builder->openArray(); + for (size_t i = 0; i < 999; ++i) { + builder->add(VPackValue(i)); + } + builder->close(); + } + + RegisterId outReg = 0; + auto producer = produceBlock(singleton.get(), builder, outReg); + + CallAsserter getAsserter{getCall()}; + SkipCallAsserter skipAsserter{getCall()}; + auto testee = forwardBlock(getAsserter, skipAsserter, producer.get(), outReg); + + auto const& call = getCall(); + AqlCallStack stack{call}; + if (doesWaiting()) { + auto const [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } + auto const [state, skipped, block] = testee->execute(stack); + + if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { + EXPECT_EQ(state, ExecutionState::HASMORE); + // Do not append shadowRow on softLimit + ValidateResult(builder, skipped, block, outReg, 0); + } else { + EXPECT_EQ(state, ExecutionState::DONE); + // Forward to shadowRow on hardLimit + ValidateResult(builder, skipped, block, outReg, 1); + ASSERT_TRUE(block != nullptr); + ValidateShadowRow(block, block->size() - 1, 0); + } +} + +// Test a classical input ending in a relevant shadowRow and a non-relevant shadow_row +TEST_P(ExecutionBlockImplExecuteIntegrationTest, input_and_non_relevant_shadowRow) { + std::deque blockDeque; + { + SharedAqlItemBlockPtr block = buildBlock<0>(fakedQuery->engine()->itemBlockManager(), + {{}, {}, {}}, {{1, 0}, {2, 1}}); + blockDeque.push_back(std::move(block)); + } + auto singleton = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + + auto builder = std::make_shared(); + { + builder->openArray(); + for (size_t i = 0; i < 998; ++i) { + builder->add(VPackValue(i)); + } + builder->close(); + } + + RegisterId outReg = 0; + auto producer = produceBlock(singleton.get(), builder, outReg); + + CallAsserter getAsserter{getCall()}; + SkipCallAsserter skipAsserter{getCall()}; + auto testee = forwardBlock(getAsserter, skipAsserter, producer.get(), outReg); + + auto const& call = getCall(); + AqlCallStack stack{call}; + if (doesWaiting()) { + auto const [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } + auto const [state, skipped, block] = testee->execute(stack); + + if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { + EXPECT_EQ(state, ExecutionState::HASMORE); + // Do not append shadowRow on softLimit + ValidateResult(builder, skipped, block, outReg, 0); + } else { + EXPECT_EQ(state, ExecutionState::DONE); + // Forward to shadowRow on hardLimit + ValidateResult(builder, skipped, block, outReg, 2); + ASSERT_TRUE(block != nullptr); + // Include both shadow rows + ValidateShadowRow(block, block->size() - 2, 0); + ValidateShadowRow(block, block->size() - 1, 1); + } +} + +// Test multiple subqueries +TEST_P(ExecutionBlockImplExecuteIntegrationTest, multiple_subqueries) { + std::deque blockDeque; + { + // First subquery + SharedAqlItemBlockPtr block = buildBlock<1>(fakedQuery->engine()->itemBlockManager(), + {{1}, {3}, {4}}, {{1, 0}, {2, 1}}); + blockDeque.push_back(std::move(block)); + } + { + // Second subquery + SharedAqlItemBlockPtr block = buildBlock<1>(fakedQuery->engine()->itemBlockManager(), + {{2}, {5}, {6}}, {{1, 0}, {2, 1}}); + blockDeque.push_back(std::move(block)); + } + auto singleton = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + size_t dataRowCount = 250; + auto builder = std::make_shared(); + { + builder->openArray(); + for (size_t i = 0; i < dataRowCount; ++i) { + builder->add(VPackValue(i)); + } + builder->close(); + } + + RegisterId outReg = 1; + auto producer = produceBlock(singleton.get(), builder, outReg); + + CallAsserter getAsserter{getCall()}; + SkipCallAsserter skipAsserter{getCall()}; + auto testee = forwardBlock(getAsserter, skipAsserter, producer.get(), outReg); + for (size_t subqueryRun = 1; subqueryRun < 3; ++subqueryRun) { + getAsserter.reset(); + skipAsserter.reset(); + auto subqueryData = std::make_shared(); + subqueryData->openArray(); + for (size_t i = 0; i < dataRowCount; ++i) { + subqueryData->add(VPackValue(subqueryRun)); + } + subqueryData->close(); + auto const& call = getCall(); + AqlCallStack stack{call}; + if (doesWaiting()) { + auto const [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } + auto const [state, skipped, block] = testee->execute(stack); + + if (std::holds_alternative(call.softLimit) && !call.hasHardLimit()) { + EXPECT_EQ(state, ExecutionState::HASMORE); + // Do not append shadowRow on softLimit + ValidateResult(builder, skipped, block, outReg, 0); + ValidateResult(subqueryData, skipped, block, 0, 0); + if (subqueryRun == 1) { + getAsserter.reset(); + skipAsserter.reset(); + // Now trigger fast-forward to move to next subquery + AqlCall forwardCall{}; + forwardCall.hardLimit = 0; + forwardCall.fullCount = false; + AqlCallStack forwardStack{forwardCall}; + auto const [forwardState, forwardSkipped, forwardBlock] = + testee->execute(forwardStack); + // We do not care for any data left + EXPECT_EQ(forwardState, ExecutionState::HASMORE); + EXPECT_EQ(forwardSkipped, 0); + // However there need to be two shadow rows + ASSERT_NE(forwardBlock, nullptr); + ASSERT_EQ(forwardBlock->size(), 2); + ValidateShadowRow(forwardBlock, 0, 0); + ValidateShadowRow(forwardBlock, 1, 1); + } + + } else { + if (subqueryRun == 1) { + // In the first run, we actually have more after fullCount + EXPECT_EQ(state, ExecutionState::HASMORE); + } else { + // In the second run we do not have more after fullCount, we have returned everything + EXPECT_EQ(state, ExecutionState::DONE); + } + + // Forward to shadowRow on hardLimit + ValidateResult(builder, skipped, block, outReg, 2); + ValidateResult(subqueryData, skipped, block, 0, 2); + ASSERT_NE(block, nullptr); + // Include both shadow rows + ValidateShadowRow(block, block->size() - 2, 0); + ValidateShadowRow(block, block->size() - 1, 1); + } + } +} + +// Test empty subquery. +// We cannot do a passthrough test here, as the UpstreamBlock does not +// support shadow rows and would create errors, if an offset is forwarded to it. +TEST_P(ExecutionBlockImplExecuteIntegrationTest, empty_subquery) { + std::deque blockDeque; + { + // Here we prepare the following: + // 1 query with 1 row + 2 ShadowRows (depth 0, depth 1) + // 1 query with 0 row + 1 ShadowRows (depth 0) + // 1 query with 0 row + 2 ShadowRow (depth 0, depth 1) + SharedAqlItemBlockPtr block = + buildBlock<1>(fakedQuery->engine()->itemBlockManager(), + {{1}, {2}, {3}, {4}, {5}, {6}}, + {{1, 0}, {2, 1}, {3, 0}, {4, 0}, {5, 1}}); + blockDeque.push_back(std::move(block)); + } + auto singleton = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ONCE + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + + RegisterId outReg = 0; + CallAsserter getAsserter{getCall()}; + SkipCallAsserter skipAsserter{getCall()}; + auto testee = forwardBlock(getAsserter, skipAsserter, singleton.get(), outReg); + + if (doesWaiting()) { + AqlCallStack stack{getCall()}; + // we only wait exactly once, only one block upstream that is not sliced. + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } + auto call = getCall(); + bool skip = call.getOffset() > 0 || (call.getLimit() == 0 && call.needsFullCount()); + { + // First subquery + AqlCallStack stack{getCall()}; + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::HASMORE); + ASSERT_NE(block, nullptr); + if (skip) { + EXPECT_EQ(skipped, 1); + EXPECT_EQ(block->size(), 2); + } else { + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block->size(), 3); + } + size_t row = 0; + if (!skip) { + ASSERT_FALSE(block->isShadowRow(row)); + AssertValueEquals(block, row, outReg, 1); + row++; + } + AssertIsShadowRowOfDepth(block, row, 0); + AssertValueEquals(block, row, outReg, 2); + row++; + AssertIsShadowRowOfDepth(block, row, 1); + AssertValueEquals(block, row, outReg, 3); + if (skip) { + // first empty input, then we skip input + EXPECT_EQ(skipAsserter.getNumberCalls(), 2); + // we need to call getSome never + EXPECT_EQ(getAsserter.getNumberCalls(), 0); + } else { + // we do not skip + EXPECT_EQ(skipAsserter.getNumberCalls(), 0); + // first empty input, then we produce input + EXPECT_EQ(getAsserter.getNumberCalls(), 2); + } + getAsserter.reset(); + skipAsserter.reset(); + } + + { + // Second subquery + AqlCallStack stack{getCall()}; + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::HASMORE); + ASSERT_NE(block, nullptr); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block->size(), 1); + size_t row = 0; + AssertIsShadowRowOfDepth(block, row, 0); + AssertValueEquals(block, row, outReg, 4); + if (skip) { + // wo do not have empty input, we can skip + EXPECT_EQ(skipAsserter.getNumberCalls(), 1); + // we need to call getSome never + EXPECT_EQ(getAsserter.getNumberCalls(), 0); + } else { + // we do not skip + EXPECT_EQ(skipAsserter.getNumberCalls(), 0); + // wo do not have empty input, we can produce + EXPECT_EQ(getAsserter.getNumberCalls(), 1); + } + getAsserter.reset(); + skipAsserter.reset(); + } + + { + // Third subquery + AqlCallStack stack{getCall()}; + auto const& [state, skipped, block] = testee->execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + ASSERT_NE(block, nullptr); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block->size(), 2); + size_t row = 0; + AssertIsShadowRowOfDepth(block, row, 0); + AssertValueEquals(block, row, outReg, 5); + row++; + AssertIsShadowRowOfDepth(block, row, 1); + AssertValueEquals(block, row, outReg, 6); + if (skip) { + // wo do not have empty input, we can skip + EXPECT_EQ(skipAsserter.getNumberCalls(), 1); + // we need to call getSome never + EXPECT_EQ(getAsserter.getNumberCalls(), 0); + } else { + // we do not skip + EXPECT_EQ(skipAsserter.getNumberCalls(), 0); + // wo do not have empty input, we can produce + EXPECT_EQ(getAsserter.getNumberCalls(), 1); + } + + getAsserter.reset(); + skipAsserter.reset(); + } +} + +// Test forward outer queries. +// The executors should not be called if there is no relevant call on the Stack +// Block shall be returned unmodified. +TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_outer_subquery_forwarding_passthrough) { + std::deque blockDeque; + auto builder = std::make_shared(); + { + MatrixBuilder<1> matrix; + matrix.reserve(250); + builder->openArray(); + for (size_t i = 0; i < 250; ++i) { + builder->add(VPackValue(i)); + matrix.emplace_back(RowBuilder<1>{i}); + } + builder->close(); + SharedAqlItemBlockPtr block = + buildBlock<1>(fakedQuery->engine()->itemBlockManager(), std::move(matrix)); + blockDeque.push_back(std::move(block)); + } + + // Note: WaitingExecutionBlockMock does not use the ExecutionBlockImpl logic + // and will React to any call on the spec, if it is relevant or not. + auto singleton = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + + auto const& call = getCall(); + AqlCallStack stack{call}; + ASSERT_TRUE(stack.isRelevant()); + stack.increaseSubqueryDepth(); + EXPECT_FALSE(stack.isRelevant()); + + ProduceCall prodCall = generateNeverProduceCall(); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateNodeDummy(), + makeInfos(prodCall)}; + + testee.addDependency(singleton.get()); + + auto [state, skipped, block] = testee.execute(stack); + if (doesWaiting()) { + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + std::tie(state, skipped, block) = testee.execute(stack); + } + + if (call.getLimit() > builder->slice().length() || call.needsFullCount() || + call.hasHardLimit()) { + // We need to consume everything + EXPECT_EQ(state, ExecutionState::DONE); + } else { + // We cannot consume everything. + EXPECT_EQ(state, ExecutionState::HASMORE); + } + ValidateResult(builder, skipped, block, 0); +} + +// Test forward outer queries. +// The executors should not be called if there is no relevant call on the Stack +// Block shall be returned unmodified. +TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_outer_subquery_forwarding) { + std::deque blockDeque; + auto builder = std::make_shared(); + { + MatrixBuilder<1> matrix; + matrix.reserve(250); + builder->openArray(); + for (size_t i = 0; i < 250; ++i) { + builder->add(VPackValue(i)); + matrix.emplace_back(RowBuilder<1>{i}); + } + builder->close(); + SharedAqlItemBlockPtr block = + buildBlock<1>(fakedQuery->engine()->itemBlockManager(), std::move(matrix)); + blockDeque.push_back(std::move(block)); + } + + // Note: WaitingExecutionBlockMock does not use the ExecutionBlockImpl logic + // and will React to any call on the spec, if it is relevant or not. + auto singleton = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + doesWaiting() ? WaitingExecutionBlockMock::WaitingBehaviour::ALWAYS + : WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + + auto const& call = getCall(); + AqlCallStack stack{call}; + ASSERT_TRUE(stack.isRelevant()); + stack.increaseSubqueryDepth(); + EXPECT_FALSE(stack.isRelevant()); + + ProduceCall prodCall = generateNeverProduceCall(); + SkipCall skipCall = generateNeverSkipCall(); + ExecutionBlockImpl testee{fakedQuery->engine(), generateNodeDummy(), + makeSkipInfos(prodCall, skipCall)}; + + testee.addDependency(singleton.get()); + + auto [state, skipped, block] = testee.execute(stack); + if (doesWaiting()) { + EXPECT_EQ(state, ExecutionState::WAITING); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + std::tie(state, skipped, block) = testee.execute(stack); + } + + if (call.getLimit() > builder->slice().length() || call.needsFullCount() || + call.hasHardLimit()) { + // We need to consume everything + EXPECT_EQ(state, ExecutionState::DONE); + } else { + // We cannot consume everything. + EXPECT_EQ(state, ExecutionState::HASMORE); + } + ValidateResult(builder, skipped, block, 0); +} + // The numbers here are random, but all of them are below 1000 which is the default batch size static constexpr auto defaultCall = []() -> const AqlCall { return AqlCall{}; }; @@ -1532,7 +2650,7 @@ static constexpr auto skipAndSoftLimit = []() -> const AqlCall { static constexpr auto skipAndHardLimit = []() -> const AqlCall { AqlCall res{}; res.offset = 32; - res.hardLimit = 71; + res.hardLimit = 51; return res; }; static constexpr auto skipAndHardLimitAndFullCount = []() -> const AqlCall { @@ -1556,12 +2674,13 @@ static constexpr auto onlySkipAndCount = []() -> const AqlCall { return res; }; -INSTANTIATE_TEST_CASE_P(ExecutionBlockExecuteIntegration, ExecutionBlockImplExecuteIntegrationTest, - ::testing::Values(defaultCall(), skipCall(), - softLimit(), hardLimit(), fullCount(), - skipAndSoftLimit(), skipAndHardLimit(), - skipAndHardLimitAndFullCount(), - onlyFullCount(), onlySkipAndCount())); +INSTANTIATE_TEST_CASE_P( + ExecutionBlockExecuteIntegration, ExecutionBlockImplExecuteIntegrationTest, + ::testing::Combine(::testing::Values(defaultCall(), skipCall(), softLimit(), + hardLimit(), fullCount(), skipAndSoftLimit(), + skipAndHardLimit(), skipAndHardLimitAndFullCount(), + onlyFullCount(), onlySkipAndCount()), + ::testing::Bool())); } // namespace aql } // namespace tests diff --git a/tests/Aql/TestLambdaExecutor.cpp b/tests/Aql/TestLambdaExecutor.cpp index 4fe9c2812cc2..9f7cdb99e417 100644 --- a/tests/Aql/TestLambdaExecutor.cpp +++ b/tests/Aql/TestLambdaExecutor.cpp @@ -34,25 +34,30 @@ LambdaExecutorInfos::LambdaExecutorInfos( std::shared_ptr> writeableOutputRegisters, RegisterId nrInputRegisters, RegisterId nrOutputRegisters, std::unordered_set registersToClear, - std::unordered_set registersToKeep, ProduceCall lambda) + std::unordered_set registersToKeep, ProduceCall lambda, ResetCall reset) : ExecutorInfos(readableInputRegisters, writeableOutputRegisters, nrInputRegisters, nrOutputRegisters, registersToClear, registersToKeep), - _produceLambda(lambda) {} + _produceLambda(lambda), + _resetLambda(reset) {} auto LambdaExecutorInfos::getProduceLambda() const -> ProduceCall const& { return _produceLambda; } +auto LambdaExecutorInfos::reset() -> void { _resetLambda(); } + LambdaSkipExecutorInfos::LambdaSkipExecutorInfos( std::shared_ptr> readableInputRegisters, std::shared_ptr> writeableOutputRegisters, RegisterId nrInputRegisters, RegisterId nrOutputRegisters, std::unordered_set registersToClear, - std::unordered_set registersToKeep, ProduceCall lambda, SkipCall skipLambda) + std::unordered_set registersToKeep, ProduceCall lambda, + SkipCall skipLambda, ResetCall reset) : ExecutorInfos(readableInputRegisters, writeableOutputRegisters, nrInputRegisters, nrOutputRegisters, registersToClear, registersToKeep), _produceLambda(lambda), - _skipLambda(skipLambda) {} + _skipLambda(skipLambda), + _resetLambda(reset) {} auto LambdaSkipExecutorInfos::getProduceLambda() const -> ProduceCall const& { return _produceLambda; @@ -62,8 +67,11 @@ auto LambdaSkipExecutorInfos::getSkipLambda() const -> SkipCall const& { return _skipLambda; } -TestLambdaExecutor::TestLambdaExecutor(Fetcher&, Infos& infos) - : _infos(infos) {} +auto LambdaSkipExecutorInfos::reset() -> void { _resetLambda(); } + +TestLambdaExecutor::TestLambdaExecutor(Fetcher&, Infos& infos) : _infos(infos) { + _infos.reset(); +} TestLambdaExecutor::~TestLambdaExecutor() {} @@ -85,7 +93,9 @@ auto TestLambdaExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlIte } TestLambdaSkipExecutor::TestLambdaSkipExecutor(Fetcher&, Infos& infos) - : _infos(infos) {} + : _infos(infos) { + _infos.reset(); +} TestLambdaSkipExecutor::~TestLambdaSkipExecutor() {} diff --git a/tests/Aql/TestLambdaExecutor.h b/tests/Aql/TestLambdaExecutor.h index 4905ea56f915..4ec0c995e956 100644 --- a/tests/Aql/TestLambdaExecutor.h +++ b/tests/Aql/TestLambdaExecutor.h @@ -52,6 +52,11 @@ using ProduceCall = using SkipCall = std::function(AqlItemBlockInputRange& input, AqlCall& call)>; +/** + * @brief This is a shorthand for the reset state signature + */ +using ResetCall = std::function; + /** * @brief Executorinfos for the lambda executors. * Contains basice RegisterPlanning information, and a ProduceCall. @@ -59,11 +64,13 @@ using SkipCall = */ class LambdaExecutorInfos : public ExecutorInfos { public: - LambdaExecutorInfos(std::shared_ptr> readableInputRegisters, - std::shared_ptr> writeableOutputRegisters, - RegisterId nrInputRegisters, RegisterId nrOutputRegisters, - std::unordered_set registersToClear, - std::unordered_set registersToKeep, ProduceCall lambda); + LambdaExecutorInfos( + std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, ProduceCall lambda, + ResetCall reset = []() -> void {}); LambdaExecutorInfos() = delete; LambdaExecutorInfos(LambdaExecutorInfos&&) = default; @@ -71,9 +78,11 @@ class LambdaExecutorInfos : public ExecutorInfos { ~LambdaExecutorInfos() = default; auto getProduceLambda() const -> ProduceCall const&; + auto reset() -> void; private: ProduceCall _produceLambda; + ResetCall _resetLambda; }; /** @@ -84,12 +93,13 @@ class LambdaExecutorInfos : public ExecutorInfos { */ class LambdaSkipExecutorInfos : public ExecutorInfos { public: - LambdaSkipExecutorInfos(std::shared_ptr> readableInputRegisters, - std::shared_ptr> writeableOutputRegisters, - RegisterId nrInputRegisters, RegisterId nrOutputRegisters, - std::unordered_set registersToClear, - std::unordered_set registersToKeep, - ProduceCall lambda, SkipCall skipLambda); + LambdaSkipExecutorInfos( + std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, ProduceCall lambda, + SkipCall skipLambda, ResetCall reset = []() -> void {}); LambdaSkipExecutorInfos() = delete; LambdaSkipExecutorInfos(LambdaSkipExecutorInfos&&) = default; @@ -98,10 +108,12 @@ class LambdaSkipExecutorInfos : public ExecutorInfos { auto getProduceLambda() const -> ProduceCall const&; auto getSkipLambda() const -> SkipCall const&; + auto reset() -> void; private: ProduceCall _produceLambda; SkipCall _skipLambda; + ResetCall _resetLambda; }; /** diff --git a/tests/Aql/WaitingExecutionBlockMock.cpp b/tests/Aql/WaitingExecutionBlockMock.cpp index 593efb9060a3..00e56e4a07d7 100644 --- a/tests/Aql/WaitingExecutionBlockMock.cpp +++ b/tests/Aql/WaitingExecutionBlockMock.cpp @@ -37,12 +37,14 @@ using namespace arangodb::tests::aql; WaitingExecutionBlockMock::WaitingExecutionBlockMock(ExecutionEngine* engine, ExecutionNode const* node, - std::deque&& data) + std::deque&& data, + WaitingBehaviour variant) : ExecutionBlock(engine, node), _data(std::move(data)), _resourceMonitor(), _inflight(0), - _hasWaited(false) {} + _hasWaited(false), + _variant{variant} {} std::pair WaitingExecutionBlockMock::initializeCursor( arangodb::aql::InputAqlItemRow const& input) { @@ -109,8 +111,87 @@ std::pair WaitingExecutionBlockMock::skip } } +// NOTE: Does not care for shadowrows! std::tuple WaitingExecutionBlockMock::execute(AqlCallStack stack) { - // TODO implement! - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); + while (!stack.isRelevant()) { + stack.pop(); + } + auto myCall = stack.popCall(); + if (_variant != WaitingBehaviour::NEVER && !_hasWaited) { + // If we orderd waiting check on _hasWaited and wait if not + _hasWaited = true; + return {ExecutionState::WAITING, 0, nullptr}; + } + if (_variant == WaitingBehaviour::ALWAYS) { + // If we allways wait, reset. + _hasWaited = false; + } + size_t skipped = 0; + SharedAqlItemBlockPtr result = nullptr; + while (!_data.empty()) { + if (_data.front()->size() <= _inflight) { + dropBlock(); + continue; + } + TRI_ASSERT(_data.front()->size() > _inflight); + // Drop while skip + if (myCall.getOffset() > 0) { + size_t canSkip = (std::min)(_data.front()->size() - _inflight, myCall.getOffset()); + _inflight += canSkip; + myCall.didSkip(canSkip); + skipped += canSkip; + continue; + } else if (myCall.getLimit() > 0) { + size_t canReturn = _data.front()->size() - _inflight; + + if (canReturn <= myCall.getLimit()) { + if (result != nullptr) { + // Sorry we can only return one block. + // This means we have prepared the first block. + // But still need more data. + return {ExecutionState::HASMORE, skipped, result}; + } + // We can return the remainder of this block + if (_inflight == 0) { + // use full block + result = std::move(_data.front()); + } else { + // Slice out the last part + result = _data.front()->slice(_inflight, _data.front()->size()); + } + dropBlock(); + } else { + // Slice out limit many rows starting at _inflight + result = _data.front()->slice(_inflight, _inflight + myCall.getLimit()); + // adjust _inflight to the fist non-returned row. + _inflight += myCall.getLimit(); + } + TRI_ASSERT(result != nullptr); + myCall.didProduce(result->size()); + } else if (myCall.needsFullCount()) { + size_t counts = _data.front()->size() - _inflight; + dropBlock(); + myCall.didSkip(counts); + skipped += counts; + } else { + if (myCall.getLimit() == 0 && !myCall.needsFullCount() && myCall.hasHardLimit()) { + while (!_data.empty()) { + // Drop data we are in fastForward phase + dropBlock(); + } + } + if (_data.empty()) { + return {ExecutionState::DONE, skipped, result}; + } else { + return {ExecutionState::HASMORE, skipped, result}; + } + } + } + return {ExecutionState::DONE, skipped, result}; +} + +void WaitingExecutionBlockMock::dropBlock() { + TRI_ASSERT(!_data.empty()); + _data.pop_front(); + _inflight = 0; } \ No newline at end of file diff --git a/tests/Aql/WaitingExecutionBlockMock.h b/tests/Aql/WaitingExecutionBlockMock.h index f644b37ccc0f..3234060d7936 100644 --- a/tests/Aql/WaitingExecutionBlockMock.h +++ b/tests/Aql/WaitingExecutionBlockMock.h @@ -45,16 +45,27 @@ namespace aql { */ class WaitingExecutionBlockMock final : public arangodb::aql::ExecutionBlock { public: + /** + * @brief Define how often this Block should return "WAITING" + */ + enum WaitingBehaviour { + NEVER, // Never return WAITING + ONCE, // Return WAITING on the first execute call, afterwards return all blocks + ALWAYS // Return Waiting once for every execute Call. + }; + /** * @brief Create a WAITING ExecutionBlockMock * * @param engine Required by API. * @param node Required by API. * @param data Must be a shared_ptr to an VPackArray. + * @param variant The waiting behaviour of this block (default ONCE), see WaitingBehaviour */ WaitingExecutionBlockMock(arangodb::aql::ExecutionEngine* engine, arangodb::aql::ExecutionNode const* node, - std::deque&& data); + std::deque&& data, + WaitingBehaviour variant = WaitingBehaviour::ALWAYS); virtual std::pair shutdown(int errorCode) override; @@ -95,16 +106,19 @@ class WaitingExecutionBlockMock final : public arangodb::aql::ExecutionBlock { */ std::pair skipSome(size_t atMost) override; - // TODO: Document and implement me! std::tuple execute( arangodb::aql::AqlCallStack stack) override; + private: + void dropBlock(); + private: std::deque _data; arangodb::aql::ResourceMonitor _resourceMonitor; size_t _inflight; bool _returnedDone = false; bool _hasWaited; + WaitingBehaviour _variant; }; } // namespace aql From 7222d3ebb4e845cad271586c5158958099df23d2 Mon Sep 17 00:00:00 2001 From: Lars Maier Date: Fri, 31 Jan 2020 09:48:21 +0100 Subject: [PATCH 057/122] Feature/aql subquery execution block impl execute implementation sorted collect (#11020) * SortedCollect executor and some tests. * More tests. * More const and skip implemented. * Fixed some bugs found by tests. * Refactored skip. * Broken state with test framework. * Extracted testing framework into separate file and adjusted it to use WaitingBlockMock. However it is not yet fully implemented * Added another SortedExecutor test without fullcount. This passes. Also assert on skip and State * Removed obsolete assertion in skip, added another nonFullcount test. * More tests. Fixed some bugs in the test framework. * More cleanup. * Remove version control marker. * Removed wrong assertion. * Apply suggestions from code review Co-Authored-By: Michael Hackstein * Remove dead code. * Removed fetcher. Co-authored-by: Michael Hackstein --- arangod/Aql/AqlCall.h | 13 +- arangod/Aql/AqlItemBlockInputRange.cpp | 2 +- arangod/Aql/ExecutionBlockImpl.cpp | 57 +- arangod/Aql/ExecutionBlockImpl.h | 2 +- arangod/Aql/OutputAqlItemRow.cpp | 17 +- arangod/Aql/OutputAqlItemRow.h | 2 +- arangod/Aql/SortedCollectExecutor.cpp | 231 +++-- arangod/Aql/SortedCollectExecutor.h | 33 +- tests/Aql/ExecutorTestHelper.h | 190 ++++ tests/Aql/SortedCollectExecutorTest.cpp | 1151 +++++++++++++++-------- 10 files changed, 1171 insertions(+), 527 deletions(-) diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 231c11402141..72acbb351c85 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -70,6 +70,7 @@ struct AqlCall { Limit softLimit{Infinity{}}; Limit hardLimit{Infinity{}}; bool fullCount{false}; + std::size_t skippedRows{0}; std::size_t getOffset() const { return offset; } @@ -89,9 +90,8 @@ struct AqlCall { return limit; } - void didSkip(std::size_t n) { + void didSkip(std::size_t n) noexcept { if (n <= offset) { - // TRI_ASSERT(n <= offset); offset -= n; } else { TRI_ASSERT(fullCount); @@ -99,6 +99,15 @@ struct AqlCall { // in a single call here. offset = 0; } + skippedRows += n; + } + + [[nodiscard]] std::size_t getSkipCount() const noexcept { + return skippedRows; + } + + [[nodiscard]] bool needSkipMore() const noexcept { + return (0 < getOffset()) || (getLimit() == 0 && needsFullCount()); } void didProduce(std::size_t n) { diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index f8e9cef032b6..fa171c5a7243 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -133,7 +133,7 @@ ExecutorState AqlItemBlockInputRange::nextState() const noexcept { } return ExecutorState::DONE; } else { - TRI_ASSERT(RowType::SHADOW == type); + static_assert(RowType::SHADOW == type); // We Return HASMORE, if the next shadow row is NOT relevant. // So we can directly fetch the next shadow row without informing // the executor about an empty subquery. diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index b8e170e6c649..a624a575029d 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -133,11 +133,6 @@ CREATE_HAS_MEMBER_CHECK(fetchBlockForPassthrough, hasFetchBlockForPassthrough); CREATE_HAS_MEMBER_CHECK(expectedNumberOfRows, hasExpectedNumberOfRows); CREATE_HAS_MEMBER_CHECK(skipRowsRange, hasSkipRowsRange); -/* - * Determine whether we execute new style or old style skips, i.e. pre or post shadow row introduction - * TODO: This should be removed once all executors and fetchers are ported to the new style. - */ - #ifdef ARANGODB_USE_GOOGLE_TESTS // Forward declaration of Test Executors. // only used as long as isNewStyleExecutor is required. @@ -150,21 +145,26 @@ class TestLambdaSkipExecutor; } // namespace arangodb #endif -template -static bool constexpr isNewStyleExecutor() { - return +template +constexpr bool is_one_of_v = (std::is_same_v || ...); + +/* + * Determine whether we execute new style or old style skips, i.e. pre or post shadow row introduction + * TODO: This should be removed once all executors and fetchers are ported to the new style. + */ + +template +constexpr bool isNewStyleExecutor = + is_one_of_v || - std::is_same_v || + TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif - std::is_same_v || - std::is_same_v; -} + ShortestPathExecutor>; template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, ExecutionNode const* node, - typename Executor::Infos&& infos) + typename Executor::Infos infos) : ExecutionBlock(engine, node), _dependencyProxy(_dependencies, engine->itemBlockManager(), infos.getInputRegisters(), @@ -191,7 +191,7 @@ ExecutionBlockImpl::~ExecutionBlockImpl() = default; template std::pair ExecutionBlockImpl::getSome(size_t atMost) { - if constexpr (isNewStyleExecutor()) { + if constexpr (isNewStyleExecutor) { AqlCallStack stack{AqlCall::SimulateGetSome(atMost)}; auto const [state, skipped, block] = execute(stack); return {state, block}; @@ -355,13 +355,12 @@ std::unique_ptr ExecutionBlockImpl::createOutputRow( if /* constexpr */ (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable) { return std::make_unique(newBlock, infos().getOutputRegisters(), infos().registersToKeep(), - infos().registersToClear(), std::move(call), + infos().registersToClear(), call, OutputAqlItemRow::CopyRowBehavior::DoNotCopyInputRows); } else { return std::make_unique(newBlock, infos().getOutputRegisters(), infos().registersToKeep(), - infos().registersToClear(), - std::move(call)); + infos().registersToClear(), call); } } @@ -489,7 +488,7 @@ static SkipVariants constexpr skipType() { template std::pair ExecutionBlockImpl::skipSome(size_t const atMost) { - if constexpr (isNewStyleExecutor()) { + if constexpr (isNewStyleExecutor) { AqlCallStack stack{AqlCall::SimulateSkipSome(atMost)}; auto const [state, skipped, block] = execute(stack); @@ -625,7 +624,7 @@ template std::tuple ExecutionBlockImpl::execute(AqlCallStack stack) { // TODO remove this IF // These are new style executors - if constexpr (isNewStyleExecutor()) { + if constexpr (isNewStyleExecutor) { // Only this executor is fully implemented traceExecuteBegin(stack); auto res = executeWithoutTrace(stack); @@ -982,7 +981,7 @@ struct RequestWrappedBlock { template std::pair ExecutionBlockImpl::requestWrappedBlock( size_t nrItems, RegisterCount nrRegs) { - if constexpr (!isNewStyleExecutor()) { + if constexpr (!isNewStyleExecutor) { static_assert(Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Disable || !Executor::Properties::inputSizeRestrictsOutputSize, "At most one of Properties::allowsBlockPassthrough or " @@ -1113,20 +1112,19 @@ static SkipRowsRangeVariant constexpr skipRowsType() { // ConstFetcher and SingleRowFetcher can skip, but // it may not be done for modification subqueries. static_assert(useFetcher == - (std::is_same::value || - (std::is_same>::value && + (std::is_same_v || + (std::is_same_v> && !std::is_same>::value)), "Unexpected fetcher for SkipVariants::FETCHER"); static_assert(!useFetcher || hasSkipRows::value, "Fetcher is chosen for skipping, but has not skipRows method!"); - static_assert(useExecutor == ( + static_assert(useExecutor == (is_one_of_v) || + TestLambdaSkipExecutor, #endif - std::is_same_v || - std::is_same_v), + SortedCollectExecutor>), "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! @@ -1151,7 +1149,8 @@ struct dependent_false : std::false_type {}; template std::tuple ExecutionBlockImpl::executeSkipRowsRange( AqlItemBlockInputRange& inputRange, AqlCall& call) { - if constexpr (isNewStyleExecutor()) { + if constexpr (isNewStyleExecutor) { + call.skippedRows = 0; if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { // If the executor has a method skipRowsRange, to skip outputs. // Every non-passthrough executor needs to implement this. @@ -1222,7 +1221,7 @@ std::tuple ExecutionBlockImpl::execute template std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { - if constexpr (isNewStyleExecutor()) { + if constexpr (isNewStyleExecutor) { if (!stack.isRelevant()) { // We are bypassing subqueries. // This executor is not allowed to perform actions diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 16169b212d06..5044e8f279b9 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -142,7 +142,7 @@ class ExecutionBlockImpl final : public ExecutionBlock { * @param node The Node used to create this ExecutionBlock */ ExecutionBlockImpl(ExecutionEngine* engine, ExecutionNode const* node, - typename Executor::Infos&&); + typename Executor::Infos); ~ExecutionBlockImpl() override; diff --git a/arangod/Aql/OutputAqlItemRow.cpp b/arangod/Aql/OutputAqlItemRow.cpp index a94ab5f72ee4..330948f49a80 100644 --- a/arangod/Aql/OutputAqlItemRow.cpp +++ b/arangod/Aql/OutputAqlItemRow.cpp @@ -50,14 +50,14 @@ OutputAqlItemRow::OutputAqlItemRow( std::shared_ptr const> outputRegisters, std::shared_ptr const> registersToKeep, std::shared_ptr const> registersToClear, - AqlCall&& clientCall, CopyRowBehavior copyRowBehavior) + AqlCall clientCall, CopyRowBehavior copyRowBehavior) : _block(std::move(block)), _baseIndex(0), _lastBaseIndex(0), _inputRowCopied(false), _lastSourceRow{CreateInvalidInputRowHint{}}, _numValuesWritten(0), - _call(std::move(clientCall)), + _call(clientCall), _doNotCopyInputRow(copyRowBehavior == CopyRowBehavior::DoNotCopyInputRows), _outputRegisters(std::move(outputRegisters)), _registersToKeep(std::move(registersToKeep)), @@ -66,6 +66,19 @@ OutputAqlItemRow::OutputAqlItemRow( _setBaseIndexNotUsed(true), #endif _allowSourceRowUninitialized(false) { +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + if (_block != nullptr) { + for (auto const& reg : *_outputRegisters) { + TRI_ASSERT(reg < _block->getNrRegs()); + } + for (auto const& reg : *_registersToKeep) { + TRI_ASSERT(reg < _block->getNrRegs()); + } + for (auto const& reg : *_registersToClear) { + TRI_ASSERT(reg < _block->getNrRegs()); + } + } +#endif } bool OutputAqlItemRow::isInitialized() const noexcept { diff --git a/arangod/Aql/OutputAqlItemRow.h b/arangod/Aql/OutputAqlItemRow.h index 74173815ac32..29b9e26017bf 100644 --- a/arangod/Aql/OutputAqlItemRow.h +++ b/arangod/Aql/OutputAqlItemRow.h @@ -55,7 +55,7 @@ class OutputAqlItemRow { std::shared_ptr const> outputRegisters, std::shared_ptr const> registersToKeep, std::shared_ptr const> registersToClear, - AqlCall&& clientCall = AqlCall{}, + AqlCall clientCall = AqlCall{}, CopyRowBehavior = CopyRowBehavior::CopyInputRows); ~OutputAqlItemRow() = default; diff --git a/arangod/Aql/SortedCollectExecutor.cpp b/arangod/Aql/SortedCollectExecutor.cpp index 341023815cf0..3967bf6d2faa 100644 --- a/arangod/Aql/SortedCollectExecutor.cpp +++ b/arangod/Aql/SortedCollectExecutor.cpp @@ -17,6 +17,7 @@ /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// +/// @author Lars Maier /// @author Tobias Goedderz /// @author Michael Hackstein /// @author Heiko Kernbach @@ -35,8 +36,13 @@ #include #include +#include #include +// Set this to true to activate devel logging +#define LOG_DEVEL_SORTED_COLLECT_ENABLED false +#define LOG_DEVEL_SC LOG_DEVEL_IF(LOG_DEVEL_SORTED_COLLECT_ENABLED) + using namespace arangodb; using namespace arangodb::aql; @@ -79,7 +85,7 @@ void SortedCollectExecutor::CollectGroup::initialize(size_t capacity) { } } -void SortedCollectExecutor::CollectGroup::reset(InputAqlItemRow& input) { +void SortedCollectExecutor::CollectGroup::reset(InputAqlItemRow const& input) { _shouldDeleteBuilderBuffer = true; ConditionalDeleter> deleter(_shouldDeleteBuilderBuffer); std::shared_ptr> buffer(new VPackBuffer, deleter); @@ -143,11 +149,9 @@ SortedCollectExecutorInfos::SortedCollectExecutorInfos( _count(count), _trxPtr(trxPtr) {} -SortedCollectExecutor::SortedCollectExecutor(Fetcher& fetcher, Infos& infos) +SortedCollectExecutor::SortedCollectExecutor(Fetcher&, Infos& infos) : _infos(infos), - _fetcher(fetcher), - _currentGroup(infos.getCount(), infos), - _fetcherDone(false) { + _currentGroup(infos.getCount(), infos) { // reserve space for the current row _currentGroup.initialize(_infos.getGroupRegisters().size()); // reset and recreate new group @@ -156,7 +160,7 @@ SortedCollectExecutor::SortedCollectExecutor(Fetcher& fetcher, Infos& infos) _currentGroup.reset(emptyInput); }; -void SortedCollectExecutor::CollectGroup::addLine(InputAqlItemRow& input) { +void SortedCollectExecutor::CollectGroup::addLine(InputAqlItemRow const& input) { // remember the last valid row we had _lastInputRow = input; @@ -203,7 +207,7 @@ void SortedCollectExecutor::CollectGroup::addLine(InputAqlItemRow& input) { } } -bool SortedCollectExecutor::CollectGroup::isSameGroup(InputAqlItemRow& input) { +bool SortedCollectExecutor::CollectGroup::isSameGroup(InputAqlItemRow const& input) const { // if we do not have valid input, return false if (!input.isInitialized()) { return false; @@ -239,7 +243,7 @@ void SortedCollectExecutor::CollectGroup::groupValuesToArray(VPackBuilder& build } void SortedCollectExecutor::CollectGroup::writeToOutput(OutputAqlItemRow& output, - InputAqlItemRow& input) { + InputAqlItemRow const& input) { // Thanks to the edge case that we have to emit a row even if we have no // input We cannot assert here that the input row is valid ;( @@ -284,27 +288,55 @@ void SortedCollectExecutor::CollectGroup::writeToOutput(OutputAqlItemRow& output output.moveValueInto(infos.getCollectRegister(), _lastInputRow, guard); } } + + output.advanceRow(); } std::pair SortedCollectExecutor::produceRows(OutputAqlItemRow& output) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +std::pair SortedCollectExecutor::expectedNumberOfRows(size_t atMost) const { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +auto SortedCollectExecutor::produceRows(AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) + -> std::tuple { TRI_IF_FAILURE("SortedCollectExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - ExecutionState state; - InputAqlItemRow input{CreateInvalidInputRowHint{}}; + AqlCall clientCall = output.getClientCall(); + TRI_ASSERT(clientCall.offset == 0); - while (true) { - if (_fetcherDone) { - if (_currentGroup.isValid()) { - _currentGroup.writeToOutput(output, input); - InputAqlItemRow input{CreateInvalidInputRowHint{}}; - _currentGroup.reset(input); - TRI_ASSERT(!_currentGroup.isValid()); - return {ExecutionState::DONE, {}}; + size_t rowsProduces = 0; + bool pendingGroup = false; + + while (!output.isFull()) { + auto [state, input] = inputRange.peekDataRow(); + + LOG_DEVEL_SC << "SortedCollectExecutor::produceRows " << state << " " + << input.isInitialized(); + + if (state == ExecutorState::DONE && !(_haveSeenData || input.isInitialized())) { + // we have never been called with data + LOG_DEVEL_SC << "never called with data"; + if (_infos.getGroupRegisters().empty()) { + // by definition we need to emit one collect row + _currentGroup.writeToOutput(output, InputAqlItemRow{CreateInvalidInputRowHint{}}); + rowsProduces += 1; } - return {ExecutionState::DONE, {}}; + break; } + + // either state != DONE or we have an input row + TRI_ASSERT(state == ExecutorState::HASMORE || state == ExecutorState::DONE); + if (!input.isInitialized() && state != ExecutorState::DONE) { + LOG_DEVEL_SC << "need more input rows"; + break; + } + TRI_IF_FAILURE("SortedCollectBlock::getOrSkipSomeOuter") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } @@ -313,73 +345,128 @@ std::pair SortedCollectExecutor::produceRows(OutputAqlI THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - std::tie(state, input) = _fetcher.fetchRow(); + if (input.isInitialized()) { + _haveSeenData = true; - if (state == ExecutionState::WAITING) { - return {state, {}}; - } - - if (state == ExecutionState::DONE) { - _fetcherDone = true; - } - - // if we are in the same group, we need to add lines to the current group - if (_currentGroup.isSameGroup(input)) { - _currentGroup.addLine(input); + // if we are in the same group, we need to add lines to the current group + if (_currentGroup.isSameGroup(input)) { + LOG_DEVEL_SC << "input is same group"; + _currentGroup.addLine(input); - if (state == ExecutionState::DONE) { - TRI_ASSERT(!output.produced()); - _currentGroup.writeToOutput(output, input); - // Invalidate group - input = InputAqlItemRow{CreateInvalidInputRowHint{}}; - _currentGroup.reset(input); - return {ExecutionState::DONE, {}}; - } - } else { - if (_currentGroup.isValid()) { + } else if (_currentGroup.isValid()) { + LOG_DEVEL_SC << "input is new group, writing old group"; // Write the current group. // Start a new group from input + rowsProduces += 1; _currentGroup.writeToOutput(output, input); - TRI_ASSERT(output.produced()); - _currentGroup.reset(input); // reset and recreate new group - if (input.isInitialized()) { - return {ExecutionState::HASMORE, {}}; + + if (output.isFull()) { + LOG_DEVEL_SC << "now output is full, exit early"; + pendingGroup = true; + _currentGroup.reset(InputAqlItemRow{CreateInvalidInputRowHint{}}); + break; } - TRI_ASSERT(state == ExecutionState::DONE); - return {ExecutionState::DONE, {}}; + _currentGroup.reset(input); // reset and recreate new group + } else { - if (!input.isInitialized()) { - if (_infos.getGroupRegisters().empty()) { - // we got exactly 0 rows as input. - // by definition we need to emit one collect row - _currentGroup.writeToOutput(output, input); - TRI_ASSERT(output.produced()); - } - TRI_ASSERT(state == ExecutionState::DONE); - return {ExecutionState::DONE, {}}; - } + LOG_DEVEL_SC << "generating new group"; // old group was not valid, do not write it _currentGroup.reset(input); // reset and recreate new group } } + + inputRange.nextDataRow(); + + bool produceMore = !output.isFull(); + if (!produceMore) { + pendingGroup = true; + break; + } + + if (state == ExecutorState::DONE) { + rowsProduces += 1; + _currentGroup.writeToOutput(output, input); + _currentGroup.reset(InputAqlItemRow{CreateInvalidInputRowHint{}}); + break; + } } + + auto newState = pendingGroup ? ExecutorState::HASMORE : inputRange.upstreamState(); + + LOG_DEVEL_SC << "reporting state: " << newState; + return {newState, Stats{}, AqlCall{}}; } -std::pair SortedCollectExecutor::expectedNumberOfRows(size_t atMost) const { - if (!_fetcherDone) { - ExecutionState state; - size_t expectedRows; - std::tie(state, expectedRows) = _fetcher.preFetchNumberOfRows(atMost); - if (state == ExecutionState::WAITING) { - TRI_ASSERT(expectedRows == 0); - return {state, 0}; - } - return {ExecutionState::HASMORE, expectedRows + 1}; +auto SortedCollectExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& clientCall) + -> std::tuple { + TRI_IF_FAILURE("SortedCollectExecutor::skipRowsRange") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - // The fetcher will NOT send anything any more - // We will at most return the current oepn group - if (_currentGroup.isValid()) { - return {ExecutionState::HASMORE, 1}; + + TRI_ASSERT(clientCall.needSkipMore()); + while (clientCall.needSkipMore()) { + LOG_DEVEL_SC << "clientCall.getSkipCount() == " << clientCall.getSkipCount(); + LOG_DEVEL_SC << "clientCall.needSkipMore() == " << clientCall.needSkipMore(); + + { + auto [state, input] = inputRange.peekDataRow(); + + LOG_DEVEL_SC << "SortedCollectExecutor::skipRowsRange " << state << " " + << std::boolalpha << input.isInitialized(); + + if (input.isInitialized()) { + // we received data + _haveSeenData = true; + + // if we are in the same group, we can skip this line + if (_currentGroup.isSameGroup(input)) { + LOG_DEVEL_SC << "input is same group"; + std::ignore = inputRange.nextDataRow(); + /* do nothing */ + } else { + if (_currentGroup.isValid()) { + LOG_DEVEL_SC << "input is new group, skipping current group"; + // The current group is completed, skip it and create a new one + clientCall.didSkip(1); + _currentGroup.reset(InputAqlItemRow{CreateInvalidInputRowHint{}}); + continue; + } + + LOG_DEVEL_SC << "group is invalid, creating new group"; + _currentGroup.reset(input); + std::ignore = inputRange.nextDataRow(); + } + } + + if (!clientCall.needSkipMore()) { + LOG_DEVEL_SC << "stop skipping early, there could be a pending group"; + break; + } + + if (state == ExecutorState::DONE) { + if (!_haveSeenData) { + // we have never been called with data + LOG_DEVEL_SC << "never called with data"; + if (_infos.getGroupRegisters().empty()) { + // by definition we need to emit one collect row + clientCall.didSkip(1); + } + } else { + LOG_DEVEL_SC << "skipping final group"; + clientCall.didSkip(1); + _currentGroup.reset(InputAqlItemRow{CreateInvalidInputRowHint{}}); + } + break; + } else if (!input.isInitialized()) { + TRI_ASSERT(state == ExecutorState::HASMORE); + LOG_DEVEL_SC << "waiting for more data to skip"; + break; + } + } } - return {ExecutionState::DONE, 0}; + + LOG_DEVEL_SC << " skipped rows: " << clientCall.getSkipCount(); + LOG_DEVEL_SC << "reporting state: " << inputRange.upstreamState(); + + return {inputRange.upstreamState(), clientCall.getSkipCount(), AqlCall{}}; } diff --git a/arangod/Aql/SortedCollectExecutor.h b/arangod/Aql/SortedCollectExecutor.h index 0c0ff60f2c0b..94eaa59a38d6 100644 --- a/arangod/Aql/SortedCollectExecutor.h +++ b/arangod/Aql/SortedCollectExecutor.h @@ -153,14 +153,14 @@ class SortedCollectExecutor { ~CollectGroup(); void initialize(size_t capacity); - void reset(InputAqlItemRow& input); + void reset(InputAqlItemRow const& input); bool isValid() const { return _lastInputRow.isInitialized(); } - void addLine(InputAqlItemRow& input); - bool isSameGroup(InputAqlItemRow& input); + void addLine(InputAqlItemRow const& input); + bool isSameGroup(InputAqlItemRow const& input) const; void groupValuesToArray(velocypack::Builder& builder); - void writeToOutput(OutputAqlItemRow& output, InputAqlItemRow& input); + void writeToOutput(OutputAqlItemRow& output, InputAqlItemRow const& input); }; public: @@ -183,14 +183,31 @@ class SortedCollectExecutor { * * @return ExecutionState, and if successful exactly one new Row of AqlItems. */ - std::pair produceRows(OutputAqlItemRow& output); + auto produceRows(OutputAqlItemRow& output) -> std::pair; + + /** + * @brief produce the next Rows of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + + /** + * @brief skip the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple skipRowsRange( + AqlItemBlockInputRange& inputRange, AqlCall& call); /** * This executor has no chance to estimate how many rows * it will produce exactly. It can however only * overestimate never underestimate. */ - std::pair expectedNumberOfRows(size_t atMost) const; + [[nodiscard]] auto expectedNumberOfRows(size_t atMost) const + -> std::pair; private: Infos const& infos() const noexcept { return _infos; }; @@ -198,12 +215,10 @@ class SortedCollectExecutor { private: Infos const& _infos; - Fetcher& _fetcher; - /// @brief details about the current group CollectGroup _currentGroup; - bool _fetcherDone; // Flag if fetcher is done + bool _haveSeenData = false; }; } // namespace aql diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index 3e6a05ec141e..f90e3d7bf98d 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -25,10 +25,17 @@ #include "gtest/gtest.h" +#include "AqlItemBlockHelper.h" +#include "WaitingExecutionBlockMock.h" + +#include "Aql/AqlCall.h" +#include "Aql/AqlCallStack.h" #include "Aql/ExecutionBlock.h" +#include "Aql/ExecutionBlockImpl.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutionStats.h" #include "Aql/OutputAqlItemRow.h" +#include "Aql/Query.h" #include "Aql/SharedAqlItemBlockPtr.h" #include @@ -37,6 +44,189 @@ namespace arangodb { namespace tests { namespace aql { +template +struct ExecutorTestHelper { + using SplitType = std::variant, std::size_t, std::monostate>; + + ExecutorTestHelper(ExecutorTestHelper const&) = delete; + ExecutorTestHelper(ExecutorTestHelper&&) = delete; + explicit ExecutorTestHelper(arangodb::aql::Query& query) + : _expectedSkip{0}, + _expectedState{ExecutionState::HASMORE}, + _query(query), + _dummyNode{std::make_unique(_query.plan(), 42)} {} + + auto setCall(AqlCall c) -> ExecutorTestHelper& { + _call = c; + return *this; + } + + auto setInputValue(MatrixBuilder in) -> ExecutorTestHelper& { + _input = std::move(in); + return *this; + } + + template + auto setInputValueList(Ts&&... ts) -> ExecutorTestHelper& { + _input = MatrixBuilder{{ts}...}; + return *this; + } + + auto setInputSplit(std::vector const& list) -> ExecutorTestHelper& { + _inputSplit = list; + return *this; + } + + auto setInputSplitStep(std::size_t step) -> ExecutorTestHelper& { + _inputSplit = step; + return *this; + } + + auto setInputSplitType(SplitType split) -> ExecutorTestHelper& { + _inputSplit = split; + return *this; + } + + template + auto setOutputSplit(T&& list) -> ExecutorTestHelper& { + ASSERT_FALSE(true); + _outputSplit = std::forward(list); + return *this; + } + + auto expectOutput(std::array const& regs, + MatrixBuilder const& out) -> ExecutorTestHelper& { + _outputRegisters = regs; + _output = out; + return *this; + } + + template + auto expectOutputValueList(Ts&&... ts) -> ExecutorTestHelper& { + static_assert(outputColumns == 1); + _outputRegisters[0] = 1; + _output = MatrixBuilder{{ts}...}; + return *this; + } + + auto expectSkipped(std::size_t skip) -> ExecutorTestHelper& { + _expectedSkip = skip; + return *this; + } + + auto expectedState(ExecutionState state) -> ExecutorTestHelper& { + _expectedState = state; + return *this; + } + + auto run(typename E::Infos infos) -> void { + ResourceMonitor monitor; + AqlItemBlockManager itemBlockManager(&monitor, SerializationFormat::SHADOWROWS); + + auto inputBlock = generateInputRanges(itemBlockManager); + + auto testeeNode = std::make_unique(_query.plan(), 1); + + ExecutionBlockImpl testee{_query.engine(), testeeNode.get(), std::move(infos)}; + testee.addDependency(inputBlock.get()); + + AqlCallStack stack{_call}; + auto const [state, skipped, result] = testee.execute(stack); + EXPECT_EQ(skipped, _expectedSkip); + + EXPECT_EQ(state, _expectedState); + + SharedAqlItemBlockPtr expectedOutputBlock = + buildBlock(itemBlockManager, std::move(_output)); + testOutputBlock(result, expectedOutputBlock); + + // ToDo: + // Test stats + }; + + private: + void testOutputBlock(SharedAqlItemBlockPtr const& outputBlock, + SharedAqlItemBlockPtr const& expectedOutputBlock) { + velocypack::Options vpackOptions; + + EXPECT_EQ(outputBlock->size(), expectedOutputBlock->size()); + for (size_t i = 0; i < outputBlock->size(); i++) { + for (size_t j = 0; j < outputColumns; j++) { + AqlValue const& x = outputBlock->getValueReference(i, _outputRegisters[j]); + AqlValue const& y = expectedOutputBlock->getValueReference(i, j); + + EXPECT_TRUE(AqlValue::Compare(&vpackOptions, x, y, true) == 0) + << "Row " << i << " Column " << j << " (Reg " << _outputRegisters[j] + << ") do not agree"; + } + } + } + + auto generateInputRanges(AqlItemBlockManager& itemBlockManager) + -> std::unique_ptr { + using VectorSizeT = std::vector; + + MatrixBuilder matrix; + + std::deque blockDeque; + + std::optional iter, end; + + if (std::holds_alternative(_inputSplit)) { + iter = std::get(_inputSplit).begin(); + end = std::get(_inputSplit).end(); + } + + for (auto const& value : _input) { + matrix.push_back(value); + + bool openNewBlock = + std::visit(overload{[&](VectorSizeT& list) { + if (*iter != *end && matrix.size() == **iter) { + iter->operator++(); + return true; + } + + return false; + }, + [&](std::size_t size) { + return matrix.size() == size; + }, + [](auto) { return false; }}, + _inputSplit); + if (openNewBlock) { + SharedAqlItemBlockPtr inputBlock = + buildBlock(itemBlockManager, std::move(matrix)); + blockDeque.emplace_back(inputBlock); + matrix.clear(); + } + } + + if (!matrix.empty()) { + SharedAqlItemBlockPtr inputBlock = + buildBlock(itemBlockManager, std::move(matrix)); + blockDeque.emplace_back(inputBlock); + } + + return std::make_unique( + _query.engine(), _dummyNode.get(), std::move(blockDeque), + WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + } + + AqlCall _call; + MatrixBuilder _input; + MatrixBuilder _output; + std::array _outputRegisters; + size_t _expectedSkip; + ExecutionState _expectedState; + + SplitType _inputSplit = {std::monostate()}; + SplitType _outputSplit = {std::monostate()}; + + arangodb::aql::Query& _query; + std::unique_ptr _dummyNode; +}; + enum class ExecutorCall { SKIP_ROWS, PRODUCE_ROWS, diff --git a/tests/Aql/SortedCollectExecutorTest.cpp b/tests/Aql/SortedCollectExecutorTest.cpp index 5a0ad7dfe8a5..c267ab80c092 100644 --- a/tests/Aql/SortedCollectExecutorTest.cpp +++ b/tests/Aql/SortedCollectExecutorTest.cpp @@ -21,10 +21,12 @@ /// @author Michael Hackstein /// @author Heiko Kernbach /// @author Jan Christoph Uhde +/// @author Lars Maier //////////////////////////////////////////////////////////////////////////////// #include "gtest/gtest.h" +#include "ExecutorTestHelper.h" #include "RowFetcherHelper.h" #include "Aql/AqlItemBlock.h" @@ -37,6 +39,7 @@ #include "Transaction/Context.h" #include "Transaction/Methods.h" +#include "AqlItemBlockHelper.h" #include "Mocks/Servers.h" #include @@ -46,9 +49,7 @@ using namespace arangodb; using namespace arangodb::aql; -namespace arangodb { -namespace tests { -namespace aql { +namespace arangodb::tests::aql { class SortedCollectExecutorTestNoRowsUpstream : public ::testing::Test { protected: @@ -103,30 +104,40 @@ class SortedCollectExecutorTestNoRowsUpstream : public ::testing::Test { block(new AqlItemBlock(itemBlockManager, 1000, 2)) {} }; -TEST_F(SortedCollectExecutorTestNoRowsUpstream, producer_doesnt_wait) { - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); - SortedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); -} - -TEST_F(SortedCollectExecutorTestNoRowsUpstream, producer_waits) { - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); +TEST_F(SortedCollectExecutorTestNoRowsUpstream, producer_gets_empty_input) { + auto input = VPackParser::fromJson("[ [1], [2] ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); SortedCollectExecutor testee(fetcher, infos); - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); + AqlCall clientCall; + + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE); + + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(1, infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), {}); + + { + auto [state, stats, upstreamCall] = testee.produceRows(emptyInputRange, result); + ASSERT_EQ(ExecutorState::HASMORE, state); + ASSERT_FALSE(upstreamCall.hasHardLimit()); + ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + ASSERT_EQ(0, upstreamCall.offset); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(0, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + auto [state, stats, upstreamCall] = testee.produceRows(inputRange, result); + ASSERT_EQ(ExecutorState::DONE, state); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(0, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } } class SortedCollectExecutorTestRowsUpstream : public ::testing::Test { @@ -185,209 +196,291 @@ class SortedCollectExecutorTestRowsUpstream : public ::testing::Test { block(new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)) {} }; -TEST_F(SortedCollectExecutorTestRowsUpstream, producer_doesnt_wait) { +TEST_F(SortedCollectExecutorTestRowsUpstream, producer_1) { auto input = VPackParser::fromJson("[ [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); SortedCollectExecutor testee(fetcher, infos); - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - auto block = result.stealBlock(); - - // check for groups in this executor they are guaranteed to be ordered - - // First group - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 1); - // check for collect - x = block->getValue(1, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 2); + SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {2}}); + AqlCall clientCall; + + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size()); + + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(2, infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), {}); + + { + auto [state, stats, upstreamCall] = testee.produceRows(emptyInputRange, result); + ASSERT_EQ(ExecutorState::HASMORE, state); + ASSERT_FALSE(upstreamCall.hasHardLimit()); + ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + ASSERT_EQ(0, upstreamCall.offset); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(0, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + auto [state, stats, upstreamCall] = testee.produceRows(inputRange, result); + ASSERT_EQ(ExecutorState::DONE, state); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(2, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + AqlValue x = outputBlock->getValue(0, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 1); + x = outputBlock->getValue(1, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 2); + } } -TEST_F(SortedCollectExecutorTestRowsUpstream, producer_doesnt_wait_2) { +TEST_F(SortedCollectExecutorTestRowsUpstream, producer_2) { auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); SortedCollectExecutor testee(fetcher, infos); - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - auto block = result.stealBlock(); - - // check for collects - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 1); - - x = block->getValue(1, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 2); - - x = block->getValue(2, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 3); + SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {2}, {3}}); + AqlCall clientCall; + + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size()); + + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), {}); + + { + auto [state, stats, upstreamCall] = testee.produceRows(emptyInputRange, result); + ASSERT_EQ(ExecutorState::HASMORE, state); + ASSERT_FALSE(upstreamCall.hasHardLimit()); + ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + ASSERT_EQ(0, upstreamCall.offset); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(0, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + auto [state, stats, upstreamCall] = testee.produceRows(inputRange, result); + ASSERT_EQ(ExecutorState::DONE, state); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(inputBlock->size(), result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + AqlValue x = outputBlock->getValue(0, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 1); + x = outputBlock->getValue(1, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 2); + x = outputBlock->getValue(2, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 3); + } } -TEST_F(SortedCollectExecutorTestRowsUpstream, producer_doesnt_wait_3) { +TEST_F(SortedCollectExecutorTestRowsUpstream, producer_3) { // Input order needs to be guaranteed auto input = VPackParser::fromJson("[ [1], [1], [2], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); SortedCollectExecutor testee(fetcher, infos); - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - // After done return done - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - auto block = result.stealBlock(); - - // check for types - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 1); - - x = block->getValue(1, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 2); - - x = block->getValue(2, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 3); + SharedAqlItemBlockPtr inputBlock = + buildBlock<1>(itemBlockManager, {{1}, {1}, {2}, {2}, {3}}); + AqlCall clientCall; + + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size()); + + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), {}); + + { + auto [state, stats, upstreamCall] = testee.produceRows(emptyInputRange, result); + ASSERT_EQ(ExecutorState::HASMORE, state); + ASSERT_FALSE(upstreamCall.hasHardLimit()); + ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + ASSERT_EQ(0, upstreamCall.offset); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(0, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + auto [state, stats, upstreamCall] = testee.produceRows(inputRange, result); + ASSERT_EQ(ExecutorState::DONE, state); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(3, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + AqlValue x = outputBlock->getValue(0, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 1); + x = outputBlock->getValue(1, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 2); + x = outputBlock->getValue(2, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 3); + } } -TEST_F(SortedCollectExecutorTestRowsUpstream, producer_doesnt_wait_4) { +TEST_F(SortedCollectExecutorTestRowsUpstream, producer_4) { auto input = VPackParser::fromJson("[ [1], [1], [2], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); SortedCollectExecutor testee(fetcher, infos); - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - // After DONE return DONE - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); + SharedAqlItemBlockPtr inputBlock = + buildBlock<1>(itemBlockManager, {{1}, {1}, {2}, {2}}); + AqlCall clientCall; + + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size()); + + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), {}); + + { + auto [state, stats, upstreamCall] = testee.produceRows(emptyInputRange, result); + ASSERT_EQ(ExecutorState::HASMORE, state); + ASSERT_FALSE(upstreamCall.hasHardLimit()); + ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + ASSERT_EQ(0, upstreamCall.offset); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(0, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + auto [state, stats, upstreamCall] = testee.produceRows(inputRange, result); + ASSERT_EQ(ExecutorState::DONE, state); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(2, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + AqlValue x = outputBlock->getValue(0, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 1); + x = outputBlock->getValue(1, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 2); + } +} - auto block = result.stealBlock(); +TEST(SortedCollectExecutorTestRowsUpstreamCount, test) { + ResourceMonitor monitor; + AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; - // check for types - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 1); + mocks::MockAqlServer server{}; + std::unique_ptr fakedQuery = server.createFakeQuery(); + arangodb::transaction::Methods* trx = fakedQuery->trx(); - x = block->getValue(1, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 2); -} + std::unordered_set regToClear = {}; + std::unordered_set regToKeep = {}; + std::vector> groupRegisters = {{1, 0}}; -TEST_F(SortedCollectExecutorTestRowsUpstream, producer_waits) { - auto input = VPackParser::fromJson("[ [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); - SortedCollectExecutor testee(fetcher, infos); + std::unordered_set readableInputRegisters = {0}; + std::unordered_set writeableOutputRegisters = {1, 2}; + RegisterId nrOutputRegister = 3; - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); + std::vector> aggregateRegisters; + aggregateRegisters.emplace_back(std::make_pair(2, 0)); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); + std::vector aggregateTypes = {"SUM"}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); + RegisterId collectRegister = RegisterPlan::MaxRegisterId; + RegisterId expressionRegister = RegisterPlan::MaxRegisterId; + Variable const* expressionVariable = nullptr; + std::vector> variables; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); + SortedCollectExecutorInfos infos(1, nrOutputRegister, std::move(regToClear), + std::move(regToKeep), std::move(readableInputRegisters), + std::move(writeableOutputRegisters), + std::move(groupRegisters), collectRegister, + expressionRegister, expressionVariable, + std::move(aggregateTypes), std::move(variables), + std::move(aggregateRegisters), trx, false); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); + SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {2}}); + AqlCall clientCall; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size()); - auto block = result.stealBlock(); + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), {}); - // check for types - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 1); + auto input = VPackParser::fromJson("[ [1], [2] ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); + SortedCollectExecutor testee(fetcher, infos); - x = block->getValue(1, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.slice().getInt(), 2); + { + auto [state, stats, upstreamCall] = testee.produceRows(emptyInputRange, result); + ASSERT_EQ(ExecutorState::HASMORE, state); + ASSERT_FALSE(upstreamCall.hasHardLimit()); + ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + ASSERT_EQ(0, upstreamCall.offset); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(0, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + auto [state, stats, upstreamCall] = testee.produceRows(inputRange, result); + ASSERT_EQ(ExecutorState::DONE, state); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(2, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + AqlValue x = outputBlock->getValue(0, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 1); + x = outputBlock->getValue(0, 2); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getDouble(), 1); + + x = outputBlock->getValue(1, 1); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getInt(), 2); + x = outputBlock->getValue(1, 2); + ASSERT_TRUE(x.isNumber()); + ASSERT_EQ(x.slice().getDouble(), 2); + } } -TEST(SortedCollectExecutorTestRowsUpstreamCount, test) { - ExecutionState state; +TEST(SortedCollectExecutorTestRowsUpstreamCountStrings, test) { ResourceMonitor monitor; AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; @@ -410,17 +503,15 @@ TEST(SortedCollectExecutorTestRowsUpstreamCount, test) { std::vector> aggregateRegisters; aggregateRegisters.emplace_back(std::make_pair(2, 0)); - writeableOutputRegisters.insert(2); std::vector aggregateTypes; - aggregateTypes.emplace_back("SUM"); + aggregateTypes.emplace_back("LENGTH"); - // if count = true, then we need to set a valid countRegister - bool count = true; RegisterId collectRegister = RegisterPlan::MaxRegisterId; RegisterId expressionRegister = RegisterPlan::MaxRegisterId; Variable const* expressionVariable = nullptr; std::vector> variables; + writeableOutputRegisters.insert(2); SortedCollectExecutorInfos infos(1, nrOutputRegister, regToClear, regToKeep, std::move(readableInputRegisters), @@ -428,269 +519,509 @@ TEST(SortedCollectExecutorTestRowsUpstreamCount, test) { std::move(groupRegisters), collectRegister, expressionRegister, expressionVariable, std::move(aggregateTypes), std::move(variables), - std::move(aggregateRegisters), trx, count); + std::move(aggregateRegisters), trx, false); SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)}; - NoStats stats{}; - auto input = VPackParser::fromJson("[ [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + auto input = VPackParser::fromJson("[ [\"a\"], [\"aa\"], [\"aaa\"] ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input->steal(), false); SortedCollectExecutor testee(fetcher, infos); - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - auto newBlock = result.stealBlock(); - - // check for types - AqlValue x = newBlock->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - EXPECT_EQ(x.slice().getInt(), 1); - - // Check the SUM register - AqlValue counter = newBlock->getValue(0, 2); - ASSERT_TRUE(counter.isNumber()); - EXPECT_EQ(counter.slice().getDouble(), 1); - - // check for types - x = newBlock->getValue(1, 1); - ASSERT_TRUE(x.isNumber()); - EXPECT_EQ(x.slice().getInt(), 2); - - // Check the SUM register - counter = newBlock->getValue(1, 2); - ASSERT_TRUE(counter.isNumber()); - EXPECT_EQ(counter.slice().getDouble(), 2); + SharedAqlItemBlockPtr inputBlock = + buildBlock<1>(itemBlockManager, {{"\"a\""}, {"\"aa\""}, {"\"aaa\""}}); + AqlCall clientCall; + + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size()); + + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), {}); + + { + auto [state, stats, upstreamCall] = testee.produceRows(emptyInputRange, result); + ASSERT_EQ(ExecutorState::HASMORE, state); + ASSERT_FALSE(upstreamCall.hasHardLimit()); + ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + ASSERT_EQ(0, upstreamCall.offset); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(0, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + auto [state, stats, upstreamCall] = testee.produceRows(inputRange, result); + ASSERT_EQ(ExecutorState::DONE, state); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(3, result.numRowsWritten()); + ASSERT_FALSE(result.produced()); + } + + { + AqlValue x = outputBlock->getValue(0, 1); + ASSERT_TRUE(x.isString()); + ASSERT_TRUE(x.slice().isEqualString("a")); + + AqlValue c = outputBlock->getValue(0, 2); + ASSERT_TRUE(c.isNumber()); + EXPECT_EQ(c.slice().getInt(), 1); + } + + { + AqlValue x = outputBlock->getValue(1, 1); + ASSERT_TRUE(x.isString()); + ASSERT_TRUE(x.slice().isEqualString("aa")); + + AqlValue c = outputBlock->getValue(1, 2); + ASSERT_TRUE(c.isNumber()); + EXPECT_EQ(c.slice().getInt(), 1); + } + + { + AqlValue x = outputBlock->getValue(2, 1); + ASSERT_TRUE(x.isString()); + ASSERT_TRUE(x.slice().isEqualString("aaa")); + + AqlValue c = outputBlock->getValue(2, 2); + ASSERT_TRUE(c.isNumber()); + EXPECT_EQ(c.slice().getInt(), 1); + } } -TEST(SortedCollectExecutorTestRowsUpstreamCountNumbers, test) { - ExecutionState state; +class SortedCollectExecutorTestSkip : public ::testing::Test { + protected: + // ExecutionState state; ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; + AqlItemBlockManager itemBlockManager; - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - arangodb::transaction::Methods* trx = fakedQuery->trx(); + mocks::MockAqlServer server; + std::unique_ptr fakedQuery; + arangodb::transaction::Methods* trx; std::unordered_set regToClear; std::unordered_set regToKeep; std::vector> groupRegisters; - groupRegisters.emplace_back(std::make_pair(1, 0)); std::unordered_set readableInputRegisters; - readableInputRegisters.insert(0); + + RegisterId collectRegister; std::unordered_set writeableOutputRegisters; - writeableOutputRegisters.insert(1); - RegisterId nrOutputRegister = 3; + RegisterId nrOutputRegister; std::vector> aggregateRegisters; - aggregateRegisters.emplace_back(std::make_pair(2, 0)); - std::vector aggregateTypes; - aggregateTypes.emplace_back("LENGTH"); // if count = true, then we need to set a valid countRegister - bool count = true; - RegisterId collectRegister = RegisterPlan::MaxRegisterId; - RegisterId expressionRegister = RegisterPlan::MaxRegisterId; - Variable const* expressionVariable = nullptr; + RegisterId expressionRegister; + Variable const* expressionVariable; std::vector> variables; - writeableOutputRegisters.insert(2); + bool count; - SortedCollectExecutorInfos infos(1, nrOutputRegister, regToClear, regToKeep, - std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), collectRegister, - expressionRegister, expressionVariable, - std::move(aggregateTypes), std::move(variables), - std::move(aggregateRegisters), trx, count); + SortedCollectExecutorInfos infos; - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)}; - NoStats stats{}; + SharedAqlItemBlockPtr block; + NoStats stats; + + SortedCollectExecutorTestSkip() + : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), + fakedQuery(server.createFakeQuery()), + trx(fakedQuery->trx()), + groupRegisters{std::make_pair(1, 0)}, + readableInputRegisters({0}), + collectRegister(2), + writeableOutputRegisters({1, 2}), + nrOutputRegister(3), + expressionRegister(RegisterPlan::MaxRegisterId), + expressionVariable(nullptr), + count(false), + infos(1, nrOutputRegister, regToClear, regToKeep, + std::move(readableInputRegisters), std::move(writeableOutputRegisters), + std::move(groupRegisters), collectRegister, expressionRegister, + expressionVariable, std::move(aggregateTypes), + std::move(variables), std::move(aggregateRegisters), trx, count), + block(new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)) {} +}; + +TEST_F(SortedCollectExecutorTestSkip, skip_1) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, std::make_shared>(), false); + + SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {2}}); + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size()); - auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); SortedCollectExecutor testee(fetcher, infos); - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - auto newBlock = result.stealBlock(); - - // check for types - AqlValue x = newBlock->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - EXPECT_EQ(x.slice().getInt(), 1); - - // Check the LENGTH register - AqlValue xx = newBlock->getValue(0, 2); - ASSERT_TRUE(xx.isNumber()); - EXPECT_EQ(xx.slice().getInt(), 1); - - // check for types - x = newBlock->getValue(1, 1); - ASSERT_TRUE(x.isNumber()); - EXPECT_EQ(x.slice().getInt(), 2); - - // Check the LENGTH register - xx = newBlock->getValue(1, 2); - ASSERT_TRUE(xx.isNumber()); - EXPECT_EQ(xx.slice().getInt(), 1); - - // check for types - x = newBlock->getValue(2, 1); - ASSERT_TRUE(x.isNumber()); - EXPECT_EQ(x.slice().getInt(), 3); - - // Check the LENGTH register - xx = newBlock->getValue(2, 2); - ASSERT_TRUE(xx.isNumber()); - EXPECT_EQ(xx.slice().getInt(), 1); + AqlCall clientCall; + clientCall.offset = 2; + + { + auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + ASSERT_EQ(ExecutorState::HASMORE, state); + ASSERT_FALSE(upstreamCall.hasHardLimit()); + ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + ASSERT_EQ(0, upstreamCall.offset); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(skipped, 0); + } + + { + auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + ASSERT_EQ(ExecutorState::DONE, state); + ASSERT_FALSE(upstreamCall.hasHardLimit()); + ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + ASSERT_EQ(0, upstreamCall.offset); + ASSERT_EQ(clientCall.fullCount, upstreamCall.fullCount); + ASSERT_EQ(skipped, 2); + } } -TEST(SortedCollectExecutorTestRowsUpstreamCountStrings, test) { - ExecutionState state; +TEST_F(SortedCollectExecutorTestSkip, skip_2) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, std::make_shared>(), false); + + SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {2}}); + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size()); + + SortedCollectExecutor testee(fetcher, infos); + + AqlCall clientCall; + clientCall.offset = 1; + + { + auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + EXPECT_EQ(ExecutorState::HASMORE, state); + EXPECT_FALSE(upstreamCall.hasHardLimit()); + EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + EXPECT_EQ(0, upstreamCall.offset); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(skipped, 0); + } + + { + auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_FALSE(upstreamCall.hasHardLimit()); + EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + EXPECT_EQ(0, upstreamCall.offset); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(skipped, 1); + EXPECT_EQ(inputRange.upstreamState(), ExecutorState::HASMORE); + } + + { + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), clientCall); + + auto [state, stats, upstreamCall] = testee.produceRows(inputRange, result); + EXPECT_EQ(ExecutorState::DONE, state); + EXPECT_FALSE(upstreamCall.hasHardLimit()); + EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + EXPECT_EQ(0, upstreamCall.offset); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(1, result.numRowsWritten()); + EXPECT_EQ(inputRange.upstreamState(), ExecutorState::DONE); + + { + AqlValue x = outputBlock->getValue(0, 1); + EXPECT_TRUE(x.isNumber()); + EXPECT_EQ(x.slice().getInt(), 2); + } + } +} + +TEST_F(SortedCollectExecutorTestSkip, skip_3) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, std::make_shared>(), false); + + SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {1}}); + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::HASMORE, inputBlock, 0, + inputBlock->size()); + AqlItemBlockInputRange emptyInputRangeDone(ExecutorState::DONE); + + SortedCollectExecutor testee(fetcher, infos); + + AqlCall clientCall; + clientCall.offset = 1; + + { + auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + EXPECT_EQ(ExecutorState::HASMORE, state); + EXPECT_FALSE(upstreamCall.hasHardLimit()); + EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + EXPECT_EQ(0, upstreamCall.offset); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(skipped, 0); + } + + { + auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(inputRange.upstreamState(), ExecutorState::HASMORE); + } + + { + auto [state, skipped, upstreamCall] = + testee.skipRowsRange(emptyInputRangeDone, clientCall); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_EQ(0, upstreamCall.offset); + EXPECT_EQ(skipped, 1); + EXPECT_EQ(inputRange.upstreamState(), ExecutorState::HASMORE); + } +} + +TEST_F(SortedCollectExecutorTestSkip, skip_4) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, std::make_shared>(), false); + + SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {1}}); + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::HASMORE, inputBlock, 0, + inputBlock->size()); + + SharedAqlItemBlockPtr inputBlock2 = buildBlock<1>(itemBlockManager, {{2}}); + AqlItemBlockInputRange inputRange2(ExecutorState::HASMORE, inputBlock2, 0, + inputBlock2->size()); + AqlItemBlockInputRange emptyInputRangeDone(ExecutorState::DONE); + + SortedCollectExecutor testee(fetcher, infos); + + AqlCall clientCall; + clientCall.offset = 1; + + { + auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + EXPECT_EQ(ExecutorState::HASMORE, state); + EXPECT_FALSE(upstreamCall.hasHardLimit()); + EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + EXPECT_EQ(0, upstreamCall.offset); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(skipped, 0); + } + + { + // 1, 1 + auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(inputRange.upstreamState(), ExecutorState::HASMORE); + } + + { + // 2 + auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange2, clientCall); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(0, upstreamCall.offset); + EXPECT_EQ(skipped, 1); + EXPECT_EQ(inputRange.upstreamState(), ExecutorState::HASMORE); + } + + { + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), clientCall); + + auto [state, stats, upstreamCall] = testee.produceRows(inputRange2, result); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(result.numRowsWritten(), 0); + EXPECT_FALSE(result.produced()); + } + + { + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), clientCall); + + auto [state, stats, upstreamCall] = testee.produceRows(emptyInputRangeDone, result); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(1, result.numRowsWritten()); + + { + AqlValue x = outputBlock->getValue(0, 1); + EXPECT_TRUE(x.isNumber()); + EXPECT_EQ(x.slice().getInt(), 2); + } + } +} + +TEST_F(SortedCollectExecutorTestSkip, skip_5) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, std::make_shared>(), false); + + SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {1}, {2}}); + AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, + inputBlock->size()); + + SortedCollectExecutor testee(fetcher, infos); + + AqlCall clientCall; + clientCall.offset = 1; + + { + auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + EXPECT_EQ(ExecutorState::HASMORE, state); + EXPECT_FALSE(upstreamCall.hasHardLimit()); + EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); + EXPECT_EQ(0, upstreamCall.offset); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(skipped, 0); + } + + { + // 1, 1, 2 + auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(skipped, 1); + EXPECT_EQ(inputRange.upstreamState(), ExecutorState::HASMORE); + } + + { + SharedAqlItemBlockPtr outputBlock = + itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); + OutputAqlItemRow result(outputBlock, infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear(), clientCall); + + auto [state, stats, upstreamCall] = testee.produceRows(inputRange, result); + EXPECT_EQ(ExecutorState::DONE, state); + EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); + EXPECT_EQ(1, result.numRowsWritten()); + + { + AqlValue x = outputBlock->getValue(0, 1); + EXPECT_TRUE(x.isNumber()); + EXPECT_EQ(x.slice().getInt(), 2); + } + } +} + +using SortedCollectTestHelper = ExecutorTestHelper; +using SortedCollectSplitType = SortedCollectTestHelper::SplitType; + +class SortedCollectExecutorTestSplit + : public ::testing::TestWithParam> { + protected: + // ExecutionState state; ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; + AqlItemBlockManager itemBlockManager; - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - arangodb::transaction::Methods* trx = fakedQuery->trx(); + mocks::MockAqlServer server; + std::unique_ptr fakedQuery; + arangodb::transaction::Methods* trx; std::unordered_set regToClear; std::unordered_set regToKeep; std::vector> groupRegisters; - groupRegisters.emplace_back(std::make_pair(1, 0)); std::unordered_set readableInputRegisters; - readableInputRegisters.insert(0); + + RegisterId collectRegister; std::unordered_set writeableOutputRegisters; - writeableOutputRegisters.insert(1); - RegisterId nrOutputRegister = 3; + RegisterId nrOutputRegister; std::vector> aggregateRegisters; - aggregateRegisters.emplace_back(std::make_pair(2, 0)); - std::vector aggregateTypes; - aggregateTypes.emplace_back("LENGTH"); // if count = true, then we need to set a valid countRegister - bool count = true; - RegisterId collectRegister = RegisterPlan::MaxRegisterId; - RegisterId expressionRegister = RegisterPlan::MaxRegisterId; - Variable const* expressionVariable = nullptr; + RegisterId expressionRegister; + Variable const* expressionVariable; std::vector> variables; - writeableOutputRegisters.insert(2); + bool count; - SortedCollectExecutorInfos infos(1, nrOutputRegister, regToClear, regToKeep, - std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), collectRegister, - expressionRegister, expressionVariable, - std::move(aggregateTypes), std::move(variables), - std::move(aggregateRegisters), trx, count); + SortedCollectExecutorInfos infos; - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)}; - NoStats stats{}; + SharedAqlItemBlockPtr block; + NoStats stats; - auto input = VPackParser::fromJson("[ [\"a\"], [\"aa\"], [\"aaa\"] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - SortedCollectExecutor testee(fetcher, infos); + SortedCollectExecutorTestSplit() + : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), + fakedQuery(server.createFakeQuery()), + trx(fakedQuery->trx()), + groupRegisters{std::make_pair(1, 0)}, + readableInputRegisters({0}), + collectRegister(2), + writeableOutputRegisters({1, 2}), + nrOutputRegister(3), + expressionRegister(RegisterPlan::MaxRegisterId), + expressionVariable(nullptr), + count(false), + infos(1, nrOutputRegister, regToClear, regToKeep, + std::move(readableInputRegisters), std::move(writeableOutputRegisters), + std::move(groupRegisters), collectRegister, expressionRegister, + expressionVariable, std::move(aggregateTypes), + std::move(variables), std::move(aggregateRegisters), trx, count), + block(new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)) { + auto engine = + std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); + fakedQuery->setEngine(engine.release()); + } +}; - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::vector myStrings; - std::vector myCountNumbers; - auto newBlock = result.stealBlock(); - - // check for types - AqlValue x = newBlock->getValue(0, 1); - ASSERT_TRUE(x.isString()); - EXPECT_EQ(x.slice().copyString(), "a"); - - // Check the count register - AqlValue c = newBlock->getValue(0, 2); - ASSERT_TRUE(c.isNumber()); - EXPECT_EQ(c.slice().getInt(), 1); - - // check for types - x = newBlock->getValue(1, 1); - ASSERT_TRUE(x.isString()); - EXPECT_EQ(x.slice().copyString(), "aa"); - - // Check the count register - c = newBlock->getValue(1, 2); - ASSERT_TRUE(c.isNumber()); - EXPECT_EQ(c.slice().getInt(), 1); - - // check for types - x = newBlock->getValue(2, 1); - ASSERT_TRUE(x.isString()); - EXPECT_EQ(x.slice().copyString(), "aaa"); - - // Check the count register - c = newBlock->getValue(2, 2); - ASSERT_TRUE(c.isNumber()); - EXPECT_EQ(c.slice().getInt(), 1); +TEST_P(SortedCollectExecutorTestSplit, split_1) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) + .setInputSplitType(split) + .setCall(AqlCall{2, AqlCall::Infinity{}, 2, true}) + .expectOutputValueList(3, 4) + .expectSkipped(3) + .expectedState(ExecutionState::DONE) + .run(std::move(infos)); } -} // namespace aql -} // namespace tests -} // namespace arangodb +TEST_P(SortedCollectExecutorTestSplit, split_2) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) + .setInputSplitType(split) + .setCall(AqlCall{2, 2, AqlCall::Infinity{}, false}) + .expectOutputValueList(3, 4) + .expectSkipped(2) + .expectedState(ExecutionState::HASMORE) + .run(std::move(infos)); +} + +TEST_P(SortedCollectExecutorTestSplit, split_3) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValueList(1, 2, 3, 4, 5) + .setInputSplitType(split) + .setCall(AqlCall{1, AqlCall::Infinity{}, 10, true}) + .expectOutputValueList(2, 3, 4, 5) + .expectSkipped(1) + .expectedState(ExecutionState::DONE) + .run(std::move(infos)); +} + +template +const SortedCollectSplitType splitIntoBlocks = SortedCollectSplitType{std::vector{vs...}}; +template +const SortedCollectSplitType splitStep = SortedCollectSplitType{step}; + +INSTANTIATE_TEST_CASE_P(SortedCollectExecutor, SortedCollectExecutorTestSplit, + ::testing::Values(splitIntoBlocks<2, 3>, splitIntoBlocks<3, 4>, splitStep<2>)); + +} // namespace arangodb::tests::aql From 30d9f2775c4c7495897b6b3f8c7c6f49e41954a1 Mon Sep 17 00:00:00 2001 From: Lars Maier Date: Tue, 11 Feb 2020 09:41:25 +0100 Subject: [PATCH 058/122] ReturnExecutor New style (#10831) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ReturnExecutor implemented. Tests misssing. * Apply suggestions from code review Co-Authored-By: Tobias Gödderz * Implemented skipRows in Return. Also fixed ExecutionBlock references in static asserts * Implemented first new style test for Return. * Moved the traceExecute calls into baseclass. * Added tracing around execute in WaitingExecutionBlockMock. Fixed a test bug, if the AqlCall covers all of the first block, but only a part of the Second block. You would only get the part of the second block. * Added more tests for standard behaviour of ReturnExecutor * Added an assertion. * Simplified call to ReturnExecutor * Prepare tests for doCount. Also added test for different input register. * Added accessor for _stats in Engine. * Included count tests in ReturnExecutorTest. * Removed old obsolete test. * Removed obsolete code-path * Fixed unrelated bug in FilterExecutor. In some cases it did lie on skipRows that it has more, even if it does not have and the upstream does not have as well. Thank you jenkins for hitting into this race :+1: Co-authored-by: Tobias Gödderz Co-authored-by: Michael Hackstein --- arangod/Aql/ExecutionBlock.cpp | 68 ++++++ arangod/Aql/ExecutionBlock.h | 7 + arangod/Aql/ExecutionBlockImpl.cpp | 97 +-------- arangod/Aql/ExecutionBlockImpl.h | 6 - arangod/Aql/ExecutionEngine.cpp | 4 + arangod/Aql/ExecutionEngine.h | 2 + arangod/Aql/ExecutionNode.cpp | 130 +++++++----- arangod/Aql/FilterExecutor.cpp | 6 +- arangod/Aql/ReturnExecutor.cpp | 76 ++++++- arangod/Aql/ReturnExecutor.h | 52 ++--- tests/Aql/ExecutorTestHelper.h | 16 +- tests/Aql/ReturnExecutorTest.cpp | 270 +++++++++++++----------- tests/Aql/WaitingExecutionBlockMock.cpp | 23 +- tests/Aql/WaitingExecutionBlockMock.h | 4 + 14 files changed, 439 insertions(+), 322 deletions(-) diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index 0f8fa6173734..1d7f97402f85 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -25,6 +25,7 @@ #include "ExecutionBlock.h" +#include "Aql/AqlCallStack.h" #include "Aql/Ast.h" #include "Aql/BlockCollector.h" #include "Aql/ExecutionEngine.h" @@ -292,3 +293,70 @@ void ExecutionBlock::addDependency(ExecutionBlock* ep) { bool ExecutionBlock::isInSplicedSubquery() const noexcept { return _isInSplicedSubquery; } + +void ExecutionBlock::traceExecuteBegin(AqlCallStack const& stack) { + if (_profile >= PROFILE_LEVEL_BLOCKS) { + if (_getSomeBegin <= 0.0) { + _getSomeBegin = TRI_microtime(); + } + if (_profile >= PROFILE_LEVEL_TRACE_1) { + auto const node = getPlanNode(); + auto const queryId = this->_engine->getQuery()->id(); + // TODO make sure this works also if stack is non relevant, e.g. passed through by outer subquery. + auto const& call = stack.peek(); + LOG_TOPIC("1e717", INFO, Logger::QUERIES) + << "[query#" << queryId << "] " + << "execute type=" << node->getTypeString() << " call= " << call + << " this=" << (uintptr_t)this << " id=" << node->id(); + } + } +} + +void ExecutionBlock::traceExecuteEnd( + std::tuple const& result) { + if (_profile >= PROFILE_LEVEL_BLOCKS) { + auto const& [state, skipped, block] = result; + auto const items = block != nullptr ? block->size() : 0; + ExecutionNode const* en = getPlanNode(); + ExecutionStats::Node stats; + stats.calls = 1; + stats.items = skipped + items; + if (state != ExecutionState::WAITING) { + stats.runtime = TRI_microtime() - _getSomeBegin; + _getSomeBegin = 0.0; + } + + auto it = _engine->_stats.nodes.find(en->id()); + if (it != _engine->_stats.nodes.end()) { + it->second += stats; + } else { + _engine->_stats.nodes.emplace(en->id(), stats); + } + + if (_profile >= PROFILE_LEVEL_TRACE_1) { + ExecutionNode const* node = getPlanNode(); + auto const queryId = this->_engine->getQuery()->id(); + LOG_TOPIC("60bbc", INFO, Logger::QUERIES) + << "[query#" << queryId << "] " + << "execute done type=" << node->getTypeString() << " this=" << (uintptr_t)this + << " id=" << node->id() << " state=" << stateToString(state) + << " skipped=" << skipped << " produced=" << items; + + if (_profile >= PROFILE_LEVEL_TRACE_2) { + if (block == nullptr) { + LOG_TOPIC("9b3f4", INFO, Logger::QUERIES) + << "[query#" << queryId << "] " + << "execute type=" << node->getTypeString() << " result: nullptr"; + } else { + VPackBuilder builder; + auto const options = trxVpackOptions(); + block->toSimpleVPack(options, builder); + LOG_TOPIC("f12f9", INFO, Logger::QUERIES) + << "[query#" << queryId << "] " + << "execute type=" << node->getTypeString() + << " result: " << VPackDumper::toString(builder.slice(), options); + } + } + } + } +} \ No newline at end of file diff --git a/arangod/Aql/ExecutionBlock.h b/arangod/Aql/ExecutionBlock.h index 5088608bc945..f6e7370647e8 100644 --- a/arangod/Aql/ExecutionBlock.h +++ b/arangod/Aql/ExecutionBlock.h @@ -150,6 +150,13 @@ class ExecutionBlock { [[nodiscard]] bool isInSplicedSubquery() const noexcept; + protected: + // Trace the start of a execute call + void traceExecuteBegin(AqlCallStack const& stack); + + // Trace the end of a execute call, potentially with result + void traceExecuteEnd(std::tuple const& result); + protected: /// @brief the execution engine ExecutionEngine* _engine; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index a624a575029d..2c587ed15dad 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -81,29 +81,6 @@ using namespace arangodb; using namespace arangodb::aql; -namespace { - -std::string const doneString = "DONE"; -std::string const hasDataRowString = "HASMORE"; -std::string const waitingString = "WAITING"; -std::string const unknownString = "UNKNOWN"; - -std::string const& stateToString(aql::ExecutionState state) { - switch (state) { - case aql::ExecutionState::DONE: - return doneString; - case aql::ExecutionState::HASMORE: - return hasDataRowString; - case aql::ExecutionState::WAITING: - return waitingString; - default: - // just to suppress a warning .. - return unknownString; - } -} - -} // namespace - /* * Creates a metafunction `checkName` that tests whether a class has a method * named `methodName`, used like this: @@ -152,10 +129,9 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); * Determine whether we execute new style or old style skips, i.e. pre or post shadow row introduction * TODO: This should be removed once all executors and fetchers are ported to the new style. */ - template constexpr bool isNewStyleExecutor = - is_one_of_v ExecutionBlockImpl -void ExecutionBlockImpl::traceExecuteBegin(AqlCallStack const& stack) { - if (_profile >= PROFILE_LEVEL_BLOCKS) { - if (_getSomeBegin <= 0.0) { - _getSomeBegin = TRI_microtime(); - } - if (_profile >= PROFILE_LEVEL_TRACE_1) { - auto const node = getPlanNode(); - auto const queryId = this->_engine->getQuery()->id(); - // TODO make sure this works also if stack is non relevant, e.g. passed through by outer subquery. - auto const& call = stack.peek(); - LOG_TOPIC("1e717", INFO, Logger::QUERIES) - << "[query#" << queryId << "] " - << "execute type=" << node->getTypeString() << " call= " << call - << " this=" << (uintptr_t)this << " id=" << node->id(); - } - } -} - -template -void ExecutionBlockImpl::traceExecuteEnd( - std::tuple const& result) { - if (_profile >= PROFILE_LEVEL_BLOCKS) { - auto const& [state, skipped, block] = result; - auto const items = block != nullptr ? block->size() : 0; - ExecutionNode const* en = getPlanNode(); - ExecutionStats::Node stats; - stats.calls = 1; - stats.items = skipped + items; - if (state != ExecutionState::WAITING) { - stats.runtime = TRI_microtime() - _getSomeBegin; - _getSomeBegin = 0.0; - } - - auto it = _engine->_stats.nodes.find(en->id()); - if (it != _engine->_stats.nodes.end()) { - it->second += stats; - } else { - _engine->_stats.nodes.emplace(en->id(), stats); - } - - if (_profile >= PROFILE_LEVEL_TRACE_1) { - ExecutionNode const* node = getPlanNode(); - auto const queryId = this->_engine->getQuery()->id(); - LOG_TOPIC("60bbc", INFO, Logger::QUERIES) - << "[query#" << queryId << "] " - << "execute done type=" << node->getTypeString() << " this=" << (uintptr_t)this - << " id=" << node->id() << " state=" << stateToString(state) - << " skipped=" << skipped << " produced=" << items; - - if (_profile >= PROFILE_LEVEL_TRACE_2) { - if (block == nullptr) { - LOG_TOPIC("9b3f4", INFO, Logger::QUERIES) - << "[query#" << queryId << "] " - << "execute type=" << node->getTypeString() << " result: nullptr"; - } else { - VPackBuilder builder; - auto const options = trxVpackOptions(); - block->toSimpleVPack(options, builder); - LOG_TOPIC("f12f9", INFO, Logger::QUERIES) - << "[query#" << queryId << "] " - << "execute type=" << node->getTypeString() - << " result: " << VPackDumper::toString(builder.slice(), options); - } - } - } - } -} - // Work around GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480 // Without the namespaces it fails with // error: specialization of 'template std::pair arangodb::aql::ExecutionBlockImpl::initializeCursor(arangodb::aql::AqlItemBlock*, size_t)' in different namespace @@ -1120,7 +1027,7 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert(!useFetcher || hasSkipRows::value, "Fetcher is chosen for skipping, but has not skipRows method!"); - static_assert(useExecutor == (is_one_of_v const& result); - // Allocate an output block and install a call in it [[nodiscard]] auto allocateOutputBlock(AqlCall&& call) -> std::unique_ptr; diff --git a/arangod/Aql/ExecutionEngine.cpp b/arangod/Aql/ExecutionEngine.cpp index 5e162cfcfaed..77c0b16d3f83 100644 --- a/arangod/Aql/ExecutionEngine.cpp +++ b/arangod/Aql/ExecutionEngine.cpp @@ -760,3 +760,7 @@ RegisterId ExecutionEngine::resultRegister() const { return _resultRegister; } AqlItemBlockManager& ExecutionEngine::itemBlockManager() { return _itemBlockManager; } + +auto ExecutionEngine::getStats() const noexcept -> ExecutionStats const& { + return _stats; +} diff --git a/arangod/Aql/ExecutionEngine.h b/arangod/Aql/ExecutionEngine.h index 92fb7ebc9e3a..d56feed72c41 100644 --- a/arangod/Aql/ExecutionEngine.h +++ b/arangod/Aql/ExecutionEngine.h @@ -118,6 +118,8 @@ class ExecutionEngine { /// @brief accessor to the memory recyler for AqlItemBlocks TEST_VIRTUAL AqlItemBlockManager& itemBlockManager(); + auto getStats() const noexcept -> ExecutionStats const&; + public: /// @brief execution statistics for the query /// note that the statistics are modification by execution blocks diff --git a/arangod/Aql/ExecutionNode.cpp b/arangod/Aql/ExecutionNode.cpp index e29f3e5a7ca3..2ef0d5475fdd 100644 --- a/arangod/Aql/ExecutionNode.cpp +++ b/arangod/Aql/ExecutionNode.cpp @@ -45,6 +45,7 @@ #include "Aql/IndexNode.h" #include "Aql/KShortestPathsNode.h" #include "Aql/LimitExecutor.h" +#include "Aql/MaterializeExecutor.h" #include "Aql/ModificationNodes.h" #include "Aql/NoResultsExecutor.h" #include "Aql/NodeFinder.h" @@ -60,7 +61,6 @@ #include "Aql/SubqueryStartExecutionNode.h" #include "Aql/TraversalNode.h" #include "Aql/WalkerWorker.h" -#include "Aql/MaterializeExecutor.h" #include "Basics/VelocyPackHelper.h" #include "Basics/system-compiler.h" #include "Cluster/ServerState.h" @@ -115,8 +115,7 @@ std::unordered_map const typeNames{ {static_cast(ExecutionNode::SUBQUERY_END), "SubqueryEndNode"}, {static_cast(ExecutionNode::DISTRIBUTE_CONSUMER), "DistributeConsumer"}, - {static_cast(ExecutionNode::MATERIALIZE), - "MaterializeNode"}}; + {static_cast(ExecutionNode::MATERIALIZE), "MaterializeNode"}}; } // namespace /// @brief resolve nodeType to a string. @@ -460,7 +459,8 @@ ExecutionNode::ExecutionNode(ExecutionPlan* plan, VPackSlice const& slice) _varsValid.insert(oneVariable); } - _isInSplicedSubquery = VelocyPackHelper::getBooleanValue(slice, "isInSplicedSubquery", false); + _isInSplicedSubquery = + VelocyPackHelper::getBooleanValue(slice, "isInSplicedSubquery", false); } /// @brief toVelocyPack, export an ExecutionNode to VelocyPack @@ -909,7 +909,6 @@ RegisterId ExecutionNode::varToRegUnchecked(Variable const& var) const { return reg; } - bool ExecutionNode::isInSplicedSubquery() const noexcept { return _isInSplicedSubquery; } @@ -1105,7 +1104,7 @@ std::vector ExecutionNode::getParents() const { } bool ExecutionNode::hasParent() const { return (_parents.size() == 1); } - + /// @brief whether or not the node has any ancestor (parent at any distance) /// of this type bool ExecutionNode::hasParentOfType(ExecutionNode::NodeType type) const { @@ -1350,8 +1349,10 @@ std::unique_ptr EnumerateCollectionNode::createBlock( variableToRegisterId(_outVariable), getRegisterPlan()->nrRegs[previousNode->getDepth()], getRegisterPlan()->nrRegs[getDepth()], getRegsToClear(), calcRegsToKeep(), - &engine, this->_collection, _outVariable, (this->isVarUsedLater(_outVariable) || this->_filter != nullptr), - this->_filter.get(), this->projections(), EngineSelectorFeature::ENGINE->useRawDocumentPointers(), this->_random); + &engine, this->_collection, _outVariable, + (this->isVarUsedLater(_outVariable) || this->_filter != nullptr), + this->_filter.get(), this->projections(), + EngineSelectorFeature::ENGINE->useRawDocumentPointers(), this->_random); return std::make_unique>(&engine, this, std::move(infos)); } @@ -1401,7 +1402,7 @@ CostEstimate EnumerateCollectionNode::estimateCost() const { estimate.estimatedCost += estimate.estimatedNrItems * (_random ? 1.005 : 1.0) + 1.0; return estimate; } - + EnumerateListNode::EnumerateListNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base) : ExecutionNode(plan, base), @@ -1737,9 +1738,7 @@ CostEstimate CalculationNode::estimateCost() const { CalculationNode::CalculationNode(ExecutionPlan* plan, size_t id, std::unique_ptr expr, Variable const* outVariable) - : ExecutionNode(plan, id), - _outVariable(outVariable), - _expression(std::move(expr)) { + : ExecutionNode(plan, id), _outVariable(outVariable), _expression(std::move(expr)) { TRI_ASSERT(_expression != nullptr); TRI_ASSERT(_outVariable != nullptr); } @@ -2164,14 +2163,14 @@ std::unique_ptr ReturnNode::createBlock( // one register that is stored within the DOCVEC. RegisterId const numberInputRegisters = getRegisterPlan()->nrRegs[previousNode->getDepth()]; - RegisterId const numberOutputRegisters = - returnInheritedResults ? getRegisterPlan()->nrRegs[getDepth()] : 1; if (returnInheritedResults) { return std::make_unique>>(&engine, this, inputRegister, _count); } else { TRI_ASSERT(!returnInheritedResults); + // The Return Executor only writes to register 0. + RegisterId const numberOutputRegisters = 1; ReturnExecutorInfos infos(inputRegister, numberInputRegisters, numberOutputRegisters, _count); @@ -2339,9 +2338,10 @@ namespace { const char* MATERIALIZE_NODE_IN_NM_COL_PARAM = "inNmColPtr"; const char* MATERIALIZE_NODE_IN_NM_DOC_PARAM = "inNmDocId"; const char* MATERIALIZE_NODE_OUT_VARIABLE_PARAM = "outVariable"; -} +} // namespace -MaterializeNode* materialize::createMaterializeNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base) { +MaterializeNode* materialize::createMaterializeNode(ExecutionPlan* plan, + arangodb::velocypack::Slice const& base) { if (base.hasKey(MATERIALIZE_NODE_IN_NM_COL_PARAM)) { return new MaterializeMultiNode(plan, base); } @@ -2349,14 +2349,17 @@ MaterializeNode* materialize::createMaterializeNode(ExecutionPlan* plan, arangod } MaterializeNode::MaterializeNode(ExecutionPlan* plan, size_t id, - aql::Variable const& inDocId, - aql::Variable const& outVariable) - : ExecutionNode(plan, id), _inNonMaterializedDocId(&inDocId), _outVariable(&outVariable) {} + aql::Variable const& inDocId, aql::Variable const& outVariable) + : ExecutionNode(plan, id), + _inNonMaterializedDocId(&inDocId), + _outVariable(&outVariable) {} MaterializeNode::MaterializeNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base) - : ExecutionNode(plan, base), - _inNonMaterializedDocId(aql::Variable::varFromVPack(plan->getAst(), base, MATERIALIZE_NODE_IN_NM_DOC_PARAM, true)), - _outVariable(aql::Variable::varFromVPack(plan->getAst(), base, MATERIALIZE_NODE_OUT_VARIABLE_PARAM)) {} + : ExecutionNode(plan, base), + _inNonMaterializedDocId(aql::Variable::varFromVPack(plan->getAst(), base, MATERIALIZE_NODE_IN_NM_DOC_PARAM, + true)), + _outVariable(aql::Variable::varFromVPack(plan->getAst(), base, + MATERIALIZE_NODE_OUT_VARIABLE_PARAM)) {} void MaterializeNode::toVelocyPackHelper(arangodb::velocypack::Builder& nodes, unsigned flags, std::unordered_set& seen) const { @@ -2394,14 +2397,17 @@ MaterializeMultiNode::MaterializeMultiNode(ExecutionPlan* plan, size_t id, aql::Variable const& inColPtr, aql::Variable const& inDocId, aql::Variable const& outVariable) - : MaterializeNode(plan, id, inDocId, outVariable), - _inNonMaterializedColPtr(&inColPtr) {} + : MaterializeNode(plan, id, inDocId, outVariable), + _inNonMaterializedColPtr(&inColPtr) {} -MaterializeMultiNode::MaterializeMultiNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base) - : MaterializeNode(plan, base), - _inNonMaterializedColPtr(aql::Variable::varFromVPack(plan->getAst(), base, MATERIALIZE_NODE_IN_NM_COL_PARAM, true)) {} +MaterializeMultiNode::MaterializeMultiNode(ExecutionPlan* plan, + arangodb::velocypack::Slice const& base) + : MaterializeNode(plan, base), + _inNonMaterializedColPtr(aql::Variable::varFromVPack(plan->getAst(), base, MATERIALIZE_NODE_IN_NM_COL_PARAM, + true)) {} -void MaterializeMultiNode::toVelocyPackHelper(arangodb::velocypack::Builder& nodes, unsigned flags, +void MaterializeMultiNode::toVelocyPackHelper(arangodb::velocypack::Builder& nodes, + unsigned flags, std::unordered_set& seen) const { // call base class method MaterializeNode::toVelocyPackHelper(nodes, flags, seen); @@ -2414,7 +2420,6 @@ void MaterializeMultiNode::toVelocyPackHelper(arangodb::velocypack::Builder& nod std::unique_ptr MaterializeMultiNode::createBlock( ExecutionEngine& engine, std::unordered_map const&) const { - ExecutionNode const* previousNode = getFirstDependency(); TRI_ASSERT(previousNode != nullptr); @@ -2438,14 +2443,16 @@ std::unique_ptr MaterializeMultiNode::createBlock( } TRI_ASSERT(engine.getQuery()); - return std::make_unique>>(&engine, this, - MaterializerExecutorInfos(getRegisterPlan()->nrRegs[previousNode->getDepth()], - getRegisterPlan()->nrRegs[getDepth()], getRegsToClear(), - calcRegsToKeep(), inNmColPtrRegId, inNmDocIdRegId, - outDocumentRegId, engine.getQuery()->trx())); + return std::make_unique>>( + &engine, this, + MaterializerExecutorInfos(getRegisterPlan()->nrRegs[previousNode->getDepth()], + getRegisterPlan()->nrRegs[getDepth()], getRegsToClear(), + calcRegsToKeep(), inNmColPtrRegId, inNmDocIdRegId, + outDocumentRegId, engine.getQuery()->trx())); } -ExecutionNode* MaterializeMultiNode::clone(ExecutionPlan* plan, bool withDependencies, bool withProperties) const { +ExecutionNode* MaterializeMultiNode::clone(ExecutionPlan* plan, bool withDependencies, + bool withProperties) const { TRI_ASSERT(plan); auto* outVariable = _outVariable; @@ -2454,30 +2461,39 @@ ExecutionNode* MaterializeMultiNode::clone(ExecutionPlan* plan, bool withDepende if (withProperties) { outVariable = plan->getAst()->variables()->createVariable(outVariable); - inNonMaterializedDocId = plan->getAst()->variables()->createVariable(inNonMaterializedDocId); - inNonMaterializedColId = plan->getAst()->variables()->createVariable(inNonMaterializedColId); + inNonMaterializedDocId = + plan->getAst()->variables()->createVariable(inNonMaterializedDocId); + inNonMaterializedColId = + plan->getAst()->variables()->createVariable(inNonMaterializedColId); } - auto c = std::make_unique(plan, _id, *inNonMaterializedColId, *inNonMaterializedDocId, *outVariable); + auto c = std::make_unique(plan, _id, *inNonMaterializedColId, + *inNonMaterializedDocId, *outVariable); return cloneHelper(std::move(c), withDependencies, withProperties); } -void MaterializeMultiNode::getVariablesUsedHere(::arangodb::containers::HashSet& vars) const { +void MaterializeMultiNode::getVariablesUsedHere( + ::arangodb::containers::HashSet& vars) const { // call base class method MaterializeNode::getVariablesUsedHere(vars); vars.emplace(_inNonMaterializedColPtr); } -MaterializeSingleNode::MaterializeSingleNode(ExecutionPlan* plan, size_t id, aql::Collection const* collection, - aql::Variable const& inDocId, aql::Variable const& outVariable) - : MaterializeNode(plan, id, inDocId, outVariable), CollectionAccessingNode(collection) {} +MaterializeSingleNode::MaterializeSingleNode(ExecutionPlan* plan, size_t id, + aql::Collection const* collection, + aql::Variable const& inDocId, + aql::Variable const& outVariable) + : MaterializeNode(plan, id, inDocId, outVariable), + CollectionAccessingNode(collection) {} -MaterializeSingleNode::MaterializeSingleNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base) - : MaterializeNode(plan, base), CollectionAccessingNode(plan, base) {} +MaterializeSingleNode::MaterializeSingleNode(ExecutionPlan* plan, + arangodb::velocypack::Slice const& base) + : MaterializeNode(plan, base), CollectionAccessingNode(plan, base) {} -void MaterializeSingleNode::toVelocyPackHelper(arangodb::velocypack::Builder& nodes, unsigned flags, - std::unordered_set& seen) const { +void MaterializeSingleNode::toVelocyPackHelper(arangodb::velocypack::Builder& nodes, + unsigned flags, + std::unordered_set& seen) const { // call base class method MaterializeNode::toVelocyPackHelper(nodes, flags, seen); @@ -2489,7 +2505,6 @@ void MaterializeSingleNode::toVelocyPackHelper(arangodb::velocypack::Builder& no std::unique_ptr MaterializeSingleNode::createBlock( ExecutionEngine& engine, std::unordered_map const&) const { - ExecutionNode const* previousNode = getFirstDependency(); TRI_ASSERT(previousNode != nullptr); RegisterId inNmDocIdRegId; @@ -2507,14 +2522,17 @@ std::unique_ptr MaterializeSingleNode::createBlock( TRI_ASSERT(engine.getQuery()); auto const& name = _collection->name(); - return std::make_unique>>(&engine, this, - MaterializerExecutorInfos(getRegisterPlan()->nrRegs[previousNode->getDepth()], - getRegisterPlan()->nrRegs[getDepth()], getRegsToClear(), - calcRegsToKeep(), _collection->name(), inNmDocIdRegId, - outDocumentRegId, engine.getQuery()->trx())); + return std::make_unique>>( + &engine, this, + MaterializerExecutorInfos( + getRegisterPlan()->nrRegs[previousNode->getDepth()], + getRegisterPlan()->nrRegs[getDepth()], getRegsToClear(), + calcRegsToKeep(), _collection->name(), inNmDocIdRegId, + outDocumentRegId, engine.getQuery()->trx())); } -ExecutionNode* MaterializeSingleNode::clone(ExecutionPlan * plan, bool withDependencies, bool withProperties) const { +ExecutionNode* MaterializeSingleNode::clone(ExecutionPlan* plan, bool withDependencies, + bool withProperties) const { TRI_ASSERT(plan); auto* outVariable = _outVariable; @@ -2522,10 +2540,12 @@ ExecutionNode* MaterializeSingleNode::clone(ExecutionPlan * plan, bool withDepen if (withProperties) { outVariable = plan->getAst()->variables()->createVariable(outVariable); - inNonMaterializedDocId = plan->getAst()->variables()->createVariable(inNonMaterializedDocId); + inNonMaterializedDocId = + plan->getAst()->variables()->createVariable(inNonMaterializedDocId); } - auto c = std::make_unique(plan, _id, _collection, *inNonMaterializedDocId, *outVariable); + auto c = std::make_unique(plan, _id, _collection, + *inNonMaterializedDocId, *outVariable); CollectionAccessingNode::cloneInto(*c); return cloneHelper(std::move(c), withDependencies, withProperties); } diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index bbe10b193e2e..ecb318423211 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -104,11 +104,9 @@ std::pair FilterExecutor::expectedNumberOfRows(size_t at // TODO Remove me, we are using the getSome skip variant here. std::tuple FilterExecutor::skipRowsRange( AqlItemBlockInputRange& inputRange, AqlCall& call) { - ExecutorState state = ExecutorState::HASMORE; - InputAqlItemRow input{CreateInvalidInputRowHint{}}; size_t skipped = 0; while (inputRange.hasDataRow() && skipped < call.getOffset()) { - std::tie(state, input) = inputRange.nextDataRow(); + auto const [unused, input] = inputRange.nextDataRow(); if (!input) { TRI_ASSERT(!inputRange.hasDataRow()); break; @@ -121,7 +119,7 @@ std::tuple FilterExecutor::skipRowsRange( AqlCall upstreamCall{}; upstreamCall.softLimit = call.getOffset(); - return {state, skipped, upstreamCall}; + return {inputRange.upstreamState(), skipped, upstreamCall}; } std::tuple FilterExecutor::produceRows( diff --git a/arangod/Aql/ReturnExecutor.cpp b/arangod/Aql/ReturnExecutor.cpp index 4a03e876a74f..02c3dbd0ff0e 100644 --- a/arangod/Aql/ReturnExecutor.cpp +++ b/arangod/Aql/ReturnExecutor.cpp @@ -39,9 +39,83 @@ ReturnExecutorInfos::ReturnExecutorInfos(RegisterId inputRegister, RegisterId nr std::unordered_set{} /*to keep*/ ), _inputRegisterId(inputRegister), - _doCount(doCount) {} + _doCount(doCount) { + // For the time beeing return will only write to register 0. + // It is defined that it can only have exactly 1 output register. + // We can easily replace this by a different register, if we + // modify the caller within the ExecutionEngine to ask for the + // output register from outside. + TRI_ASSERT(nrOutputRegisters == 1); +} ReturnExecutor::ReturnExecutor(Fetcher& fetcher, ReturnExecutorInfos& infos) : _infos(infos), _fetcher(fetcher) {} ReturnExecutor::~ReturnExecutor() = default; + +// TODO: @deprecated remove +std::pair ReturnExecutor::expectedNumberOfRows(size_t atMost) const { + return _fetcher.preFetchNumberOfRows(atMost); +} + +// TODO: @deprecated remove +auto ReturnExecutor::produceRows(OutputAqlItemRow& output) + -> std::pair { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +auto ReturnExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { + TRI_IF_FAILURE("ReturnExecutor::produceRows") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + while (inputRange.hasDataRow() && call.needSkipMore()) { + // I do not think that this is actually called. + // It will be called first to get the upstream-Call + // but this executor will always delegate the skipping + // to upstream. + TRI_ASSERT(false); + auto [state, input] = inputRange.nextDataRow(); + TRI_ASSERT(input.isInitialized()); + TRI_IF_FAILURE("ReturnBlock::getSome") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + call.didSkip(1); + // TODO: do we need to include counted here? + /* + if (_infos.doCount()) { + stats.incrCounted(); + } + */ + } + return {inputRange.upstreamState(), call.getSkipCount(), call}; +} + +auto ReturnExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) + -> std::tuple { + TRI_IF_FAILURE("ReturnExecutor::produceRows") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + Stats stats{}; + + while (inputRange.hasDataRow() && !output.isFull()) { + auto [state, input] = inputRange.nextDataRow(); + TRI_ASSERT(input.isInitialized()); + // REMARK: it is called `getInputRegisterId` here but FilterExecutor calls it `getInputRegister`. + AqlValue val = input.stealValue(_infos.getInputRegisterId()); + AqlValueGuard guard(val, true); + TRI_IF_FAILURE("ReturnBlock::getSome") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + output.moveValueInto(_infos.getOutputRegisterId(), input, guard); + output.advanceRow(); + if (_infos.doCount()) { + stats.incrCounted(); + } + } + + return {inputRange.upstreamState(), stats, output.getClientCall()}; +} diff --git a/arangod/Aql/ReturnExecutor.h b/arangod/Aql/ReturnExecutor.h index a0841dd9f7a5..ec54f26898d0 100644 --- a/arangod/Aql/ReturnExecutor.h +++ b/arangod/Aql/ReturnExecutor.h @@ -94,38 +94,26 @@ class ReturnExecutor { * @return ExecutionState, * if something was written output.hasValue() == true */ - inline std::pair produceRows(OutputAqlItemRow& output) { - ExecutionState state; - ReturnExecutor::Stats stats; - InputAqlItemRow inputRow = InputAqlItemRow{CreateInvalidInputRowHint{}}; - std::tie(state, inputRow) = _fetcher.fetchRow(); - - if (state == ExecutionState::WAITING) { - TRI_ASSERT(!inputRow); - return {state, stats}; - } - - if (!inputRow) { - TRI_ASSERT(state == ExecutionState::DONE); - return {state, stats}; - } - - AqlValue val = inputRow.stealValue(_infos.getInputRegisterId()); - AqlValueGuard guard(val, true); - TRI_IF_FAILURE("ReturnBlock::getSome") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - output.moveValueInto(_infos.getOutputRegisterId(), inputRow, guard); - - if (_infos.doCount()) { - stats.incrCounted(); - } - return {state, stats}; - } - - inline std::pair expectedNumberOfRows(size_t atMost) const { - return _fetcher.preFetchNumberOfRows(atMost); - } + auto produceRows(OutputAqlItemRow& output) -> std::pair; + + /** + * @brief skip the next Rows of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple; + + /** + * @brief produce the next Rows of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + + [[nodiscard]] auto expectedNumberOfRows(size_t atMost) const + -> std::pair; private: ReturnExecutorInfos& _infos; diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index f90e3d7bf98d..0d5ffbb5f08c 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -53,6 +53,7 @@ struct ExecutorTestHelper { explicit ExecutorTestHelper(arangodb::aql::Query& query) : _expectedSkip{0}, _expectedState{ExecutionState::HASMORE}, + _testStats{false}, _query(query), _dummyNode{std::make_unique(_query.plan(), 42)} {} @@ -119,6 +120,12 @@ struct ExecutorTestHelper { return *this; } + auto expectedStats(ExecutionStats stats) -> ExecutorTestHelper& { + _expectedStats = stats; + _testStats = true; + return *this; + }; + auto run(typename E::Infos infos) -> void { ResourceMonitor monitor; AqlItemBlockManager itemBlockManager(&monitor, SerializationFormat::SHADOWROWS); @@ -139,9 +146,10 @@ struct ExecutorTestHelper { SharedAqlItemBlockPtr expectedOutputBlock = buildBlock(itemBlockManager, std::move(_output)); testOutputBlock(result, expectedOutputBlock); - - // ToDo: - // Test stats + if (_testStats) { + auto actualStats = _query.engine()->getStats(); + EXPECT_EQ(actualStats, _expectedStats); + } }; private: @@ -219,6 +227,8 @@ struct ExecutorTestHelper { std::array _outputRegisters; size_t _expectedSkip; ExecutionState _expectedState; + ExecutionStats _expectedStats; + bool _testStats; SplitType _inputSplit = {std::monostate()}; SplitType _outputSplit = {std::monostate()}; diff --git a/tests/Aql/ReturnExecutorTest.cpp b/tests/Aql/ReturnExecutorTest.cpp index 72ccfd962f9b..bf8dc3608fa3 100644 --- a/tests/Aql/ReturnExecutorTest.cpp +++ b/tests/Aql/ReturnExecutorTest.cpp @@ -20,11 +20,16 @@ /// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// -#include "RowFetcherHelper.h" #include "gtest/gtest.h" +#include "ExecutorTestHelper.h" +#include "RowFetcherHelper.h" + +#include "Mocks/Servers.h" + #include "Aql/AqlItemBlock.h" #include "Aql/ExecutionBlockImpl.h" +#include "Aql/ExecutionEngine.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" #include "Aql/ResourceUsage.h" @@ -41,139 +46,166 @@ namespace arangodb { namespace tests { namespace aql { -class ReturnExecutorTest : public ::testing::Test { +// This is only to get a split-type. The Type is independent of actual template parameters +using ReturnExecutorTestHelper = ExecutorTestHelper; +using ReturnExecutorSplitType = ReturnExecutorTestHelper::SplitType; +using ReturnExecutorParamType = std::tuple; + +class ReturnExecutorTest : public ::testing::TestWithParam { protected: - ExecutionState state; - ResourceMonitor monitor; + // ExecutionState state; + ResourceMonitor monitor{}; + mocks::MockAqlServer server{}; AqlItemBlockManager itemBlockManager; - SharedAqlItemBlockPtr block; - std::shared_ptr> registersToKeep; - RegisterId inputRegister; + + std::unique_ptr fakedQuery; ReturnExecutorTest() : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - block(new AqlItemBlock(itemBlockManager, 1000, 1)), - registersToKeep(make_shared_unordered_set()), - inputRegister(0) {} + fakedQuery(server.createFakeQuery()) { + auto engine = + std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); + fakedQuery->setEngine(engine.release()); + } + + auto getSplit() -> ReturnExecutorSplitType { + auto [split, unused] = GetParam(); + return split; + } + + auto doCount() -> bool { + auto [unused, doCount] = GetParam(); + return doCount; + } + + auto getCountStats(size_t nr) -> ExecutionStats { + ExecutionStats stats; + if (doCount()) { + stats.count = nr; + } + return stats; + } }; -TEST_F(ReturnExecutorTest, NoRowsUpstreamProducerDoesNotWait) { - ReturnExecutorInfos infos(inputRegister, 1 /*nr in*/, 1 /*nr out*/, true /*do count*/); - auto const& outputRegisters = infos.getOutputRegisters(); - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); - ReturnExecutor testee(fetcher, infos); - CountStats stats{}; - - OutputAqlItemRow result(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +template +const ReturnExecutorSplitType splitIntoBlocks = + ReturnExecutorSplitType{std::vector{vs...}}; +template +const ReturnExecutorSplitType splitStep = ReturnExecutorSplitType{step}; + +INSTANTIATE_TEST_CASE_P(ReturnExecutor, ReturnExecutorTest, + ::testing::Combine(::testing::Values(splitIntoBlocks<2, 3>, + splitIntoBlocks<3, 4>, + splitStep<1>, splitStep<2>), + ::testing::Bool())); + +/******* + * Start test suite + ******/ + +/** + * @brief Test the most basic query. + * We have an unlimited produce call + * And the data is in register 0 => we expect it to + * be passed through. + */ + +TEST_P(ReturnExecutorTest, returns_all_from_upstream) { + ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); + AqlCall call{}; // unlimited produce + ExecutorTestHelper(*fakedQuery) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}, {2}, {1}, {5}, {7}, {1}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .expectedStats(getCountStats(8)) + .run(std::move(infos)); } -TEST_F(ReturnExecutorTest, NoRowsUpstreamProducerWaits) { - ReturnExecutorInfos infos(inputRegister, 1 /*nr in*/, 1 /*nr out*/, true /*do count*/); - auto const& outputRegisters = infos.getOutputRegisters(); - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); - ReturnExecutor testee(fetcher, infos); - CountStats stats{}; - - OutputAqlItemRow result(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +TEST_P(ReturnExecutorTest, handle_soft_limit) { + ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); + AqlCall call{}; + call.softLimit = 3; + ExecutorTestHelper(*fakedQuery) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}}) + .expectSkipped(0) + .expectedState(ExecutionState::HASMORE) + .expectedStats(getCountStats(3)) + .run(std::move(infos)); } -TEST_F(ReturnExecutorTest, RowsUpstreamProducerDoesNotWait) { - ReturnExecutorInfos infos(inputRegister, 1 /*nr in*/, 1 /*nr out*/, true /*do count*/); - auto const& outputRegisters = infos.getOutputRegisters(); - auto input = VPackParser::fromJson("[ [true], [false], [true] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->buffer(), false); - ReturnExecutor testee(fetcher, infos); - CountStats stats{}; - - OutputAqlItemRow row(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(row.produced()); - - // verify result - AqlValue value; - auto block = row.stealBlock(); - for (std::size_t index = 0; index < 3; index++) { - value = block->getValue(index, 0); - ASSERT_TRUE(value.isBoolean()); - ASSERT_EQ(value.toBoolean(), input->slice().at(index).at(0).getBool()); - } +TEST_P(ReturnExecutorTest, handle_hard_limit) { + ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); + AqlCall call{}; + call.hardLimit = 5; + ExecutorTestHelper(*fakedQuery) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}, {2}, {1}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .expectedStats(getCountStats(5)) + .run(std::move(infos)); +} + +TEST_P(ReturnExecutorTest, handle_offset) { + ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); + AqlCall call{}; + call.offset = 4; + ExecutorTestHelper(*fakedQuery) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {5}, {7}, {1}}) + .expectSkipped(4) + .expectedState(ExecutionState::DONE) + .expectedStats(getCountStats(4)) + .run(std::move(infos)); } -TEST_F(ReturnExecutorTest, RowsUpstreamProducerWaits) { - ReturnExecutorInfos infos(inputRegister, 1 /*nr in*/, 1 /*nr out*/, true /*do count*/); - auto const& outputRegisters = infos.getOutputRegisters(); - auto input = VPackParser::fromJson("[ [true], [false], [true] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); - ReturnExecutor testee(fetcher, infos); - CountStats stats{}; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(row.produced()); +TEST_P(ReturnExecutorTest, handle_fullcount) { + ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); + AqlCall call{}; + call.hardLimit = 2; + call.fullCount = true; + ExecutorTestHelper(*fakedQuery) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}}) + .expectSkipped(6) + .expectedState(ExecutionState::DONE) + .expectedStats(getCountStats(2)) + .run(std::move(infos)); } +TEST_P(ReturnExecutorTest, handle_other_inputRegister) { + ReturnExecutorInfos infos(1 /*input register*/, 2 /*nr in*/, 1 /*nr out*/, doCount()); + AqlCall call{}; + call.hardLimit = 5; + ExecutorTestHelper(*fakedQuery) + .setInputValue({{R"("invalid")", 1}, + {R"("invalid")", 2}, + {R"("invalid")", 5}, + {R"("invalid")", 2}, + {R"("invalid")", 1}, + {R"("invalid")", 5}, + {R"("invalid")", 7}, + {R"("invalid")", 1}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}, {2}, {1}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .expectedStats(getCountStats(5)) + .run(std::move(infos)); +} } // namespace aql } // namespace tests } // namespace arangodb diff --git a/tests/Aql/WaitingExecutionBlockMock.cpp b/tests/Aql/WaitingExecutionBlockMock.cpp index 00e56e4a07d7..4e1f9233ca7a 100644 --- a/tests/Aql/WaitingExecutionBlockMock.cpp +++ b/tests/Aql/WaitingExecutionBlockMock.cpp @@ -111,8 +111,16 @@ std::pair WaitingExecutionBlockMock::skip } } -// NOTE: Does not care for shadowrows! std::tuple WaitingExecutionBlockMock::execute(AqlCallStack stack) { + traceExecuteBegin(stack); + auto res = executeWithoutTrace(stack); + traceExecuteEnd(res); + return res; +} + +// NOTE: Does not care for shadowrows! +std::tuple WaitingExecutionBlockMock::executeWithoutTrace( + AqlCallStack stack) { while (!stack.isRelevant()) { stack.pop(); } @@ -142,15 +150,16 @@ std::tuple WaitingExecutionBlockM skipped += canSkip; continue; } else if (myCall.getLimit() > 0) { + if (result != nullptr) { + // Sorry we can only return one block. + // This means we have prepared the first block. + // But still need more data. + return {ExecutionState::HASMORE, skipped, result}; + } + size_t canReturn = _data.front()->size() - _inflight; if (canReturn <= myCall.getLimit()) { - if (result != nullptr) { - // Sorry we can only return one block. - // This means we have prepared the first block. - // But still need more data. - return {ExecutionState::HASMORE, skipped, result}; - } // We can return the remainder of this block if (_inflight == 0) { // use full block diff --git a/tests/Aql/WaitingExecutionBlockMock.h b/tests/Aql/WaitingExecutionBlockMock.h index 3234060d7936..a66c3a913b4e 100644 --- a/tests/Aql/WaitingExecutionBlockMock.h +++ b/tests/Aql/WaitingExecutionBlockMock.h @@ -112,6 +112,10 @@ class WaitingExecutionBlockMock final : public arangodb::aql::ExecutionBlock { private: void dropBlock(); + // Implementation of execute + std::tuple executeWithoutTrace( + arangodb::aql::AqlCallStack stack); + private: std::deque _data; arangodb::aql::ResourceMonitor _resourceMonitor; From 6359f0c926cad30b4efd7ce263fb3b64563621b8 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Wed, 12 Feb 2020 14:40:20 +0000 Subject: [PATCH 059/122] Tweak ExecutorTestHelper to test pipelines --- tests/Aql/ExecutorTestHelper.h | 29 ++++++++++++++++------ tests/Aql/ReturnExecutorTest.cpp | 32 +++++++++++++++---------- tests/Aql/SortedCollectExecutorTest.cpp | 13 ++++++---- 3 files changed, 49 insertions(+), 25 deletions(-) diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index 0d5ffbb5f08c..64d7c0554f6b 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -44,7 +44,14 @@ namespace arangodb { namespace tests { namespace aql { -template +using ExecBlock = std::shared_ptr; + +// TODO: this could also just be a pair of exec blocks (front and back of the pipeline) +// because the deque is defined by the dependencies (and this way we can depend on +// more than one ExecBlock) +using Pipeline = std::deque; + +template struct ExecutorTestHelper { using SplitType = std::variant, std::size_t, std::monostate>; @@ -124,9 +131,17 @@ struct ExecutorTestHelper { _expectedStats = stats; _testStats = true; return *this; - }; + } + + template + auto setExecBlock(typename E::Infos infos) -> ExecutorTestHelper& { + auto testeeNode = std::make_unique(_query.plan(), 1); + _testee = std::make_unique>(_query.engine(), + testeeNode.get(), std::move(infos)); + return *this; + } - auto run(typename E::Infos infos) -> void { + auto run() -> void { ResourceMonitor monitor; AqlItemBlockManager itemBlockManager(&monitor, SerializationFormat::SHADOWROWS); @@ -134,11 +149,10 @@ struct ExecutorTestHelper { auto testeeNode = std::make_unique(_query.plan(), 1); - ExecutionBlockImpl testee{_query.engine(), testeeNode.get(), std::move(infos)}; - testee.addDependency(inputBlock.get()); + _testee->addDependency(inputBlock.get()); AqlCallStack stack{_call}; - auto const [state, skipped, result] = testee.execute(stack); + auto const [state, skipped, result] = _testee->execute(stack); EXPECT_EQ(skipped, _expectedSkip); EXPECT_EQ(state, _expectedState); @@ -235,6 +249,7 @@ struct ExecutorTestHelper { arangodb::aql::Query& _query; std::unique_ptr _dummyNode; + ExecBlock _testee; }; enum class ExecutorCall { @@ -256,7 +271,7 @@ using ExecutorStepResult = std::tuple +template std::tuple, arangodb::aql::ExecutionStats> runExecutor(arangodb::aql::AqlItemBlockManager& manager, Executor& executor, arangodb::aql::OutputAqlItemRow& outputRow, size_t const numSkip, diff --git a/tests/Aql/ReturnExecutorTest.cpp b/tests/Aql/ReturnExecutorTest.cpp index bf8dc3608fa3..7fb106efd822 100644 --- a/tests/Aql/ReturnExecutorTest.cpp +++ b/tests/Aql/ReturnExecutorTest.cpp @@ -47,7 +47,7 @@ namespace tests { namespace aql { // This is only to get a split-type. The Type is independent of actual template parameters -using ReturnExecutorTestHelper = ExecutorTestHelper; +using ReturnExecutorTestHelper = ExecutorTestHelper<1, 1>; using ReturnExecutorSplitType = ReturnExecutorTestHelper::SplitType; using ReturnExecutorParamType = std::tuple; @@ -113,7 +113,8 @@ INSTANTIATE_TEST_CASE_P(ReturnExecutor, ReturnExecutorTest, TEST_P(ReturnExecutorTest, returns_all_from_upstream) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; // unlimited produce - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -121,14 +122,15 @@ TEST_P(ReturnExecutorTest, returns_all_from_upstream) { .expectSkipped(0) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(8)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_soft_limit) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.softLimit = 3; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -136,14 +138,15 @@ TEST_P(ReturnExecutorTest, handle_soft_limit) { .expectSkipped(0) .expectedState(ExecutionState::HASMORE) .expectedStats(getCountStats(3)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_hard_limit) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.hardLimit = 5; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -151,14 +154,15 @@ TEST_P(ReturnExecutorTest, handle_hard_limit) { .expectSkipped(0) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(5)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_offset) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.offset = 4; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -166,7 +170,7 @@ TEST_P(ReturnExecutorTest, handle_offset) { .expectSkipped(4) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(4)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_fullcount) { @@ -174,7 +178,8 @@ TEST_P(ReturnExecutorTest, handle_fullcount) { AqlCall call{}; call.hardLimit = 2; call.fullCount = true; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -182,14 +187,15 @@ TEST_P(ReturnExecutorTest, handle_fullcount) { .expectSkipped(6) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(2)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_other_inputRegister) { ReturnExecutorInfos infos(1 /*input register*/, 2 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.hardLimit = 5; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<2, 1>(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{R"("invalid")", 1}, {R"("invalid")", 2}, {R"("invalid")", 5}, @@ -204,7 +210,7 @@ TEST_P(ReturnExecutorTest, handle_other_inputRegister) { .expectSkipped(0) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(5)) - .run(std::move(infos)); + .run(); } } // namespace aql } // namespace tests diff --git a/tests/Aql/SortedCollectExecutorTest.cpp b/tests/Aql/SortedCollectExecutorTest.cpp index c267ab80c092..d5c91548df29 100644 --- a/tests/Aql/SortedCollectExecutorTest.cpp +++ b/tests/Aql/SortedCollectExecutorTest.cpp @@ -913,7 +913,7 @@ TEST_F(SortedCollectExecutorTestSkip, skip_5) { } } -using SortedCollectTestHelper = ExecutorTestHelper; +using SortedCollectTestHelper = ExecutorTestHelper<1, 1>; using SortedCollectSplitType = SortedCollectTestHelper::SplitType; class SortedCollectExecutorTestSplit @@ -980,40 +980,43 @@ class SortedCollectExecutorTestSplit TEST_P(SortedCollectExecutorTestSplit, split_1) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) .setInputSplitType(split) .setCall(AqlCall{2, AqlCall::Infinity{}, 2, true}) .expectOutputValueList(3, 4) .expectSkipped(3) .expectedState(ExecutionState::DONE) - .run(std::move(infos)); + .run(); } TEST_P(SortedCollectExecutorTestSplit, split_2) { auto [split] = GetParam(); ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) .setInputSplitType(split) .setCall(AqlCall{2, 2, AqlCall::Infinity{}, false}) .expectOutputValueList(3, 4) .expectSkipped(2) .expectedState(ExecutionState::HASMORE) - .run(std::move(infos)); + .run(); } TEST_P(SortedCollectExecutorTestSplit, split_3) { auto [split] = GetParam(); ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 3, 4, 5) .setInputSplitType(split) .setCall(AqlCall{1, AqlCall::Infinity{}, 10, true}) .expectOutputValueList(2, 3, 4, 5) .expectSkipped(1) .expectedState(ExecutionState::DONE) - .run(std::move(infos)); + .run(); } template From c509e18ce62dca27778ac969f5c8c67ec711d697 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Wed, 12 Feb 2020 14:44:54 +0000 Subject: [PATCH 060/122] Revert "Tweak ExecutorTestHelper to test pipelines" This reverts commit 6359f0c926cad30b4efd7ce263fb3b64563621b8. --- tests/Aql/ExecutorTestHelper.h | 29 ++++++---------------- tests/Aql/ReturnExecutorTest.cpp | 32 ++++++++++--------------- tests/Aql/SortedCollectExecutorTest.cpp | 13 ++++------ 3 files changed, 25 insertions(+), 49 deletions(-) diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index 64d7c0554f6b..0d5ffbb5f08c 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -44,14 +44,7 @@ namespace arangodb { namespace tests { namespace aql { -using ExecBlock = std::shared_ptr; - -// TODO: this could also just be a pair of exec blocks (front and back of the pipeline) -// because the deque is defined by the dependencies (and this way we can depend on -// more than one ExecBlock) -using Pipeline = std::deque; - -template +template struct ExecutorTestHelper { using SplitType = std::variant, std::size_t, std::monostate>; @@ -131,17 +124,9 @@ struct ExecutorTestHelper { _expectedStats = stats; _testStats = true; return *this; - } - - template - auto setExecBlock(typename E::Infos infos) -> ExecutorTestHelper& { - auto testeeNode = std::make_unique(_query.plan(), 1); - _testee = std::make_unique>(_query.engine(), - testeeNode.get(), std::move(infos)); - return *this; - } + }; - auto run() -> void { + auto run(typename E::Infos infos) -> void { ResourceMonitor monitor; AqlItemBlockManager itemBlockManager(&monitor, SerializationFormat::SHADOWROWS); @@ -149,10 +134,11 @@ struct ExecutorTestHelper { auto testeeNode = std::make_unique(_query.plan(), 1); - _testee->addDependency(inputBlock.get()); + ExecutionBlockImpl testee{_query.engine(), testeeNode.get(), std::move(infos)}; + testee.addDependency(inputBlock.get()); AqlCallStack stack{_call}; - auto const [state, skipped, result] = _testee->execute(stack); + auto const [state, skipped, result] = testee.execute(stack); EXPECT_EQ(skipped, _expectedSkip); EXPECT_EQ(state, _expectedState); @@ -249,7 +235,6 @@ struct ExecutorTestHelper { arangodb::aql::Query& _query; std::unique_ptr _dummyNode; - ExecBlock _testee; }; enum class ExecutorCall { @@ -271,7 +256,7 @@ using ExecutorStepResult = std::tuple +template std::tuple, arangodb::aql::ExecutionStats> runExecutor(arangodb::aql::AqlItemBlockManager& manager, Executor& executor, arangodb::aql::OutputAqlItemRow& outputRow, size_t const numSkip, diff --git a/tests/Aql/ReturnExecutorTest.cpp b/tests/Aql/ReturnExecutorTest.cpp index 7fb106efd822..bf8dc3608fa3 100644 --- a/tests/Aql/ReturnExecutorTest.cpp +++ b/tests/Aql/ReturnExecutorTest.cpp @@ -47,7 +47,7 @@ namespace tests { namespace aql { // This is only to get a split-type. The Type is independent of actual template parameters -using ReturnExecutorTestHelper = ExecutorTestHelper<1, 1>; +using ReturnExecutorTestHelper = ExecutorTestHelper; using ReturnExecutorSplitType = ReturnExecutorTestHelper::SplitType; using ReturnExecutorParamType = std::tuple; @@ -113,8 +113,7 @@ INSTANTIATE_TEST_CASE_P(ReturnExecutor, ReturnExecutorTest, TEST_P(ReturnExecutorTest, returns_all_from_upstream) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; // unlimited produce - ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + ExecutorTestHelper(*fakedQuery) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -122,15 +121,14 @@ TEST_P(ReturnExecutorTest, returns_all_from_upstream) { .expectSkipped(0) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(8)) - .run(); + .run(std::move(infos)); } TEST_P(ReturnExecutorTest, handle_soft_limit) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.softLimit = 3; - ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + ExecutorTestHelper(*fakedQuery) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -138,15 +136,14 @@ TEST_P(ReturnExecutorTest, handle_soft_limit) { .expectSkipped(0) .expectedState(ExecutionState::HASMORE) .expectedStats(getCountStats(3)) - .run(); + .run(std::move(infos)); } TEST_P(ReturnExecutorTest, handle_hard_limit) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.hardLimit = 5; - ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + ExecutorTestHelper(*fakedQuery) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -154,15 +151,14 @@ TEST_P(ReturnExecutorTest, handle_hard_limit) { .expectSkipped(0) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(5)) - .run(); + .run(std::move(infos)); } TEST_P(ReturnExecutorTest, handle_offset) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.offset = 4; - ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + ExecutorTestHelper(*fakedQuery) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -170,7 +166,7 @@ TEST_P(ReturnExecutorTest, handle_offset) { .expectSkipped(4) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(4)) - .run(); + .run(std::move(infos)); } TEST_P(ReturnExecutorTest, handle_fullcount) { @@ -178,8 +174,7 @@ TEST_P(ReturnExecutorTest, handle_fullcount) { AqlCall call{}; call.hardLimit = 2; call.fullCount = true; - ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + ExecutorTestHelper(*fakedQuery) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -187,15 +182,14 @@ TEST_P(ReturnExecutorTest, handle_fullcount) { .expectSkipped(6) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(2)) - .run(); + .run(std::move(infos)); } TEST_P(ReturnExecutorTest, handle_other_inputRegister) { ReturnExecutorInfos infos(1 /*input register*/, 2 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.hardLimit = 5; - ExecutorTestHelper<2, 1>(*fakedQuery) - .setExecBlock(std::move(infos)) + ExecutorTestHelper(*fakedQuery) .setInputValue({{R"("invalid")", 1}, {R"("invalid")", 2}, {R"("invalid")", 5}, @@ -210,7 +204,7 @@ TEST_P(ReturnExecutorTest, handle_other_inputRegister) { .expectSkipped(0) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(5)) - .run(); + .run(std::move(infos)); } } // namespace aql } // namespace tests diff --git a/tests/Aql/SortedCollectExecutorTest.cpp b/tests/Aql/SortedCollectExecutorTest.cpp index d5c91548df29..c267ab80c092 100644 --- a/tests/Aql/SortedCollectExecutorTest.cpp +++ b/tests/Aql/SortedCollectExecutorTest.cpp @@ -913,7 +913,7 @@ TEST_F(SortedCollectExecutorTestSkip, skip_5) { } } -using SortedCollectTestHelper = ExecutorTestHelper<1, 1>; +using SortedCollectTestHelper = ExecutorTestHelper; using SortedCollectSplitType = SortedCollectTestHelper::SplitType; class SortedCollectExecutorTestSplit @@ -980,43 +980,40 @@ class SortedCollectExecutorTestSplit TEST_P(SortedCollectExecutorTestSplit, split_1) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + ExecutorTestHelper(*fakedQuery) .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) .setInputSplitType(split) .setCall(AqlCall{2, AqlCall::Infinity{}, 2, true}) .expectOutputValueList(3, 4) .expectSkipped(3) .expectedState(ExecutionState::DONE) - .run(); + .run(std::move(infos)); } TEST_P(SortedCollectExecutorTestSplit, split_2) { auto [split] = GetParam(); ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) .setInputSplitType(split) .setCall(AqlCall{2, 2, AqlCall::Infinity{}, false}) .expectOutputValueList(3, 4) .expectSkipped(2) .expectedState(ExecutionState::HASMORE) - .run(); + .run(std::move(infos)); } TEST_P(SortedCollectExecutorTestSplit, split_3) { auto [split] = GetParam(); ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 3, 4, 5) .setInputSplitType(split) .setCall(AqlCall{1, AqlCall::Infinity{}, 10, true}) .expectOutputValueList(2, 3, 4, 5) .expectSkipped(1) .expectedState(ExecutionState::DONE) - .run(); + .run(std::move(infos)); } template From 94da545713ef5c851e76655cbcbcaff74ec1beaf Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 13 Feb 2020 08:12:13 +0100 Subject: [PATCH 061/122] Added Stats return value to skipRowsRange (#11081) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added Stats return value to skipRowsRange * Fixed catch test. Somehow i merged the incorrect part Co-authored-by: Tobias Gödderz --- arangod/Aql/ExecutionBlockImpl.cpp | 26 +++++++++----- arangod/Aql/ExecutionBlockImpl.h | 4 +-- arangod/Aql/FilterExecutor.cpp | 13 ++++--- arangod/Aql/FilterExecutor.h | 4 +-- arangod/Aql/LimitExecutor.cpp | 11 +++--- arangod/Aql/ReturnExecutor.cpp | 9 ++--- arangod/Aql/ReturnExecutor.h | 2 +- arangod/Aql/ShortestPathExecutor.cpp | 6 ++-- arangod/Aql/ShortestPathExecutor.h | 2 +- arangod/Aql/SortedCollectExecutor.cpp | 7 ++-- arangod/Aql/SortedCollectExecutor.h | 4 +-- tests/Aql/ExecutionBlockImplTest.cpp | 40 +++++++++++----------- tests/Aql/FilterExecutorTest.cpp | 6 ++-- tests/Aql/ShortestPathExecutorTest.cpp | 6 ++-- tests/Aql/SortedCollectExecutorTest.cpp | 45 ++++++++++++++++--------- tests/Aql/TestLambdaExecutor.cpp | 2 +- tests/Aql/TestLambdaExecutor.h | 4 +-- 17 files changed, 109 insertions(+), 82 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 2c587ed15dad..c0eef936f3e6 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1054,8 +1054,9 @@ template struct dependent_false : std::false_type {}; template -std::tuple ExecutionBlockImpl::executeSkipRowsRange( - AqlItemBlockInputRange& inputRange, AqlCall& call) { +std::tuple +ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& inputRange, + AqlCall& call) { if constexpr (isNewStyleExecutor) { call.skippedRows = 0; if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { @@ -1067,19 +1068,22 @@ std::tuple ExecutionBlockImpl::execute // is a property of the executor), then we can just let the fetcher skip // the number of rows that we would like to skip. // Returning this will trigger to end in upstream state now, with the - // call that was handed it - return {inputRange.upstreamState(), 0, call}; + // call that was handed it. + static_assert( + std::is_same_v, + "Executors with custom statistics must implement skipRowsRange."); + return {inputRange.upstreamState(), NoStats{}, 0, call}; } else { static_assert(dependent_false::value, "This value of SkipRowsRangeVariant is not supported"); - return std::make_tuple(ExecutorState::DONE, 0, call); + return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call); } } else { TRI_ASSERT(false); - return std::make_tuple(ExecutorState::DONE, 0, call); + return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call); } // Compiler is unhappy without this. - return std::make_tuple(ExecutorState::DONE, 0, call); + return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call); } /** @@ -1165,8 +1169,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::SKIP: { - auto [state, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); + auto [state, stats, skippedLocal, call] = + executeSkipRowsRange(_lastRange, clientCall); _skipped += skippedLocal; + _engine->_stats += stats; // The execute might have modified the client call. if (state == ExecutorState::DONE) { _execState = ExecState::SHADOWROWS; @@ -1232,8 +1238,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::FULLCOUNT: { - auto [state, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); + auto [state, stats, skippedLocal, call] = + executeSkipRowsRange(_lastRange, clientCall); _skipped += skippedLocal; + _engine->_stats += stats; if (state == ExecutorState::DONE) { _execState = ExecState::SHADOWROWS; diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 78f8f7392bcb..a50634f58bbb 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -224,8 +224,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { std::tuple executeWithoutTrace(AqlCallStack stack); // execute a skipRowsRange call - std::tuple executeSkipRowsRange(AqlItemBlockInputRange& input, - AqlCall& call); + std::tuple executeSkipRowsRange( + AqlItemBlockInputRange& input, AqlCall& call); /** * @brief Inner getSome() part, without the tracing calls. diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index ecb318423211..8c2d66f09032 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -102,8 +102,9 @@ std::pair FilterExecutor::expectedNumberOfRows(size_t at } // TODO Remove me, we are using the getSome skip variant here. -std::tuple FilterExecutor::skipRowsRange( - AqlItemBlockInputRange& inputRange, AqlCall& call) { +auto FilterExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { + FilterStats stats{}; size_t skipped = 0; while (inputRange.hasDataRow() && skipped < call.getOffset()) { auto const [unused, input] = inputRange.nextDataRow(); @@ -113,17 +114,19 @@ std::tuple FilterExecutor::skipRowsRange( } if (input.getValue(_infos.getInputRegister()).toBoolean()) { skipped++; + } else { + stats.incrFiltered(); } } call.didSkip(skipped); AqlCall upstreamCall{}; upstreamCall.softLimit = call.getOffset(); - return {inputRange.upstreamState(), skipped, upstreamCall}; + return {inputRange.upstreamState(), stats, skipped, upstreamCall}; } -std::tuple FilterExecutor::produceRows( - AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { +auto FilterExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) + -> std::tuple { TRI_IF_FAILURE("FilterExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } diff --git a/arangod/Aql/FilterExecutor.h b/arangod/Aql/FilterExecutor.h index 0b5237e08652..4b7350088180 100644 --- a/arangod/Aql/FilterExecutor.h +++ b/arangod/Aql/FilterExecutor.h @@ -103,7 +103,7 @@ class FilterExecutor { * * @return ExecutorState, the stats, and a new Call that needs to be send to upstream */ - [[nodiscard]] std::tuple skipRowsRange( + [[nodiscard]] std::tuple skipRowsRange( AqlItemBlockInputRange& inputRange, AqlCall& call); [[nodiscard]] std::pair expectedNumberOfRows(size_t atMost) const; @@ -115,4 +115,4 @@ class FilterExecutor { } // namespace arangodb::aql -#endif \ No newline at end of file +#endif diff --git a/arangod/Aql/LimitExecutor.cpp b/arangod/Aql/LimitExecutor.cpp index c3760732443b..42a6befce2a9 100644 --- a/arangod/Aql/LimitExecutor.cpp +++ b/arangod/Aql/LimitExecutor.cpp @@ -88,8 +88,8 @@ std::pair LimitExecutor::skipRestForFullCount() { return {state, stats}; } - // We must not update _counter here. It is only used to count until offset+limit - // is reached. + // We must not update _counter here. It is only used to count until + // offset+limit is reached. if (infos().isFullCountEnabled()) { stats.incrFullCountBy(skipped); @@ -153,7 +153,7 @@ std::pair LimitExecutor::produceRows(OutputAqlItemRo state = _stateOfLastRowToOutput; TRI_ASSERT(state != ExecutionState::WAITING); input = std::move(_lastRowToOutput); - TRI_ASSERT(!_lastRowToOutput.isInitialized()); // rely on the move + TRI_ASSERT(!_lastRowToOutput.isInitialized()); // rely on the move } else { std::tie(state, input) = _fetcher.fetchRow(maxRowsLeftToFetch()); @@ -244,7 +244,7 @@ std::tuple LimitExecutor::fet case LimitState::RETURNING_LAST_ROW: case LimitState::RETURNING: auto rv = _fetcher.fetchBlockForPassthrough(std::min(atMost, maxRowsLeftToFetch())); - return { rv.first, LimitStats{}, std::move(rv.second) }; + return {rv.first, LimitStats{}, std::move(rv.second)}; } // The control flow cannot reach this. It is only here to make MSVC happy, // which is unable to figure out that the switch above is complete. @@ -286,5 +286,4 @@ std::tuple LimitExecutor::skipRows } return std::make_tuple(state, LimitStats{}, reportSkipped); -} - +} \ No newline at end of file diff --git a/arangod/Aql/ReturnExecutor.cpp b/arangod/Aql/ReturnExecutor.cpp index 02c3dbd0ff0e..35723a24524d 100644 --- a/arangod/Aql/ReturnExecutor.cpp +++ b/arangod/Aql/ReturnExecutor.cpp @@ -66,11 +66,11 @@ auto ReturnExecutor::produceRows(OutputAqlItemRow& output) } auto ReturnExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) - -> std::tuple { + -> std::tuple { TRI_IF_FAILURE("ReturnExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - + Stats stats{}; while (inputRange.hasDataRow() && call.needSkipMore()) { // I do not think that this is actually called. // It will be called first to get the upstream-Call @@ -83,14 +83,15 @@ auto ReturnExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } call.didSkip(1); - // TODO: do we need to include counted here? + /* if (_infos.doCount()) { + // TODO: do we need to include counted here? stats.incrCounted(); } */ } - return {inputRange.upstreamState(), call.getSkipCount(), call}; + return {inputRange.upstreamState(), stats, call.getSkipCount(), call}; } auto ReturnExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) diff --git a/arangod/Aql/ReturnExecutor.h b/arangod/Aql/ReturnExecutor.h index ec54f26898d0..c2d46ae638c1 100644 --- a/arangod/Aql/ReturnExecutor.h +++ b/arangod/Aql/ReturnExecutor.h @@ -102,7 +102,7 @@ class ReturnExecutor { * @return ExecutorState, the stats, and a new Call that needs to be send to upstream */ [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) - -> std::tuple; + -> std::tuple; /** * @brief produce the next Rows of Aql Values. diff --git a/arangod/Aql/ShortestPathExecutor.cpp b/arangod/Aql/ShortestPathExecutor.cpp index 45679024e646..e8d1a3336b92 100644 --- a/arangod/Aql/ShortestPathExecutor.cpp +++ b/arangod/Aql/ShortestPathExecutor.cpp @@ -270,7 +270,7 @@ auto ShortestPathExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlI } auto ShortestPathExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) - -> std::tuple { + -> std::tuple { auto skipped = size_t{0}; while (true) { @@ -279,13 +279,13 @@ auto ShortestPathExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& if (pathLengthAvailable() == 0) { if (!fetchPath(input)) { TRI_ASSERT(!input.hasDataRow()); - return {input.upstreamState(), skipped, AqlCall{}}; + return {input.upstreamState(), NoStats{}, skipped, AqlCall{}}; } } else { // if we end up here there is path available, but // we have skipped as much as we were asked to. TRI_ASSERT(call.getOffset() == 0); - return {ExecutorState::HASMORE, skipped, AqlCall{}}; + return {ExecutorState::HASMORE, NoStats{}, skipped, AqlCall{}}; } } } diff --git a/arangod/Aql/ShortestPathExecutor.h b/arangod/Aql/ShortestPathExecutor.h index 93bdfe2fe3ca..280e4fba7a4f 100644 --- a/arangod/Aql/ShortestPathExecutor.h +++ b/arangod/Aql/ShortestPathExecutor.h @@ -187,7 +187,7 @@ class ShortestPathExecutor { [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) -> std::tuple; [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) - -> std::tuple; + -> std::tuple; private: /** diff --git a/arangod/Aql/SortedCollectExecutor.cpp b/arangod/Aql/SortedCollectExecutor.cpp index 3967bf6d2faa..c59058930845 100644 --- a/arangod/Aql/SortedCollectExecutor.cpp +++ b/arangod/Aql/SortedCollectExecutor.cpp @@ -150,8 +150,7 @@ SortedCollectExecutorInfos::SortedCollectExecutorInfos( _trxPtr(trxPtr) {} SortedCollectExecutor::SortedCollectExecutor(Fetcher&, Infos& infos) - : _infos(infos), - _currentGroup(infos.getCount(), infos) { + : _infos(infos), _currentGroup(infos.getCount(), infos) { // reserve space for the current row _currentGroup.initialize(_infos.getGroupRegisters().size()); // reset and recreate new group @@ -398,7 +397,7 @@ auto SortedCollectExecutor::produceRows(AqlItemBlockInputRange& inputRange, } auto SortedCollectExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& clientCall) - -> std::tuple { + -> std::tuple { TRI_IF_FAILURE("SortedCollectExecutor::skipRowsRange") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } @@ -468,5 +467,5 @@ auto SortedCollectExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, Aq LOG_DEVEL_SC << " skipped rows: " << clientCall.getSkipCount(); LOG_DEVEL_SC << "reporting state: " << inputRange.upstreamState(); - return {inputRange.upstreamState(), clientCall.getSkipCount(), AqlCall{}}; + return {inputRange.upstreamState(), Stats{}, clientCall.getSkipCount(), AqlCall{}}; } diff --git a/arangod/Aql/SortedCollectExecutor.h b/arangod/Aql/SortedCollectExecutor.h index 94eaa59a38d6..9145b991ad69 100644 --- a/arangod/Aql/SortedCollectExecutor.h +++ b/arangod/Aql/SortedCollectExecutor.h @@ -198,8 +198,8 @@ class SortedCollectExecutor { * * @return ExecutorState, the stats, and a new Call that needs to be send to upstream */ - [[nodiscard]] std::tuple skipRowsRange( - AqlItemBlockInputRange& inputRange, AqlCall& call); + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple; /** * This executor has no chance to estimate how many rows diff --git a/tests/Aql/ExecutionBlockImplTest.cpp b/tests/Aql/ExecutionBlockImplTest.cpp index 49bf8ad9dcf0..c06c04776340 100644 --- a/tests/Aql/ExecutionBlockImplTest.cpp +++ b/tests/Aql/ExecutionBlockImplTest.cpp @@ -547,9 +547,10 @@ class SharedExecutionBlockImplTest { * @param numRowsLeftWithInput The number of available rows in the output, if we have given an input * @return ProduceCall The call ready to hand over to the LambdaExecutorInfos */ - ProduceCall generateProduceCall(size_t& nrCalls, AqlCall expectedCall, + static auto generateProduceCall(size_t& nrCalls, AqlCall expectedCall, size_t numRowsLeftNoInput = ExecutionBlock::DefaultBatchSize, - size_t numRowsLeftWithInput = ExecutionBlock::DefaultBatchSize) { + size_t numRowsLeftWithInput = ExecutionBlock::DefaultBatchSize) + -> ProduceCall { return [&nrCalls, numRowsLeftNoInput, numRowsLeftWithInput, expectedCall](AqlItemBlockInputRange& input, OutputAqlItemRow& output) -> std::tuple { @@ -595,10 +596,9 @@ class SharedExecutionBlockImplTest { * @param expectedCall The call that is expected on every invocation of this function. * @return SkipCall The call ready to hand over to the LambdaExecutorInfos */ - SkipCall generateSkipCall(size_t& nrCalls, AqlCall expectedCall) { - return [&nrCalls, - expectedCall](AqlItemBlockInputRange& inputRange, - AqlCall& clientCall) -> std::tuple { + static auto generateSkipCall(size_t& nrCalls, AqlCall expectedCall) -> SkipCall { + return [&nrCalls, expectedCall](AqlItemBlockInputRange& inputRange, AqlCall& clientCall) + -> std::tuple { if (nrCalls > 10) { EXPECT_TRUE(false); // This is emergency bailout, we ask way to often here @@ -622,7 +622,7 @@ class SharedExecutionBlockImplTest { upstreamCall.hardLimit = clientCall.getOffset() + clientCall.hardLimit; upstreamCall.offset = 0; - return {inputRange.upstreamState(), localSkip, upstreamCall}; + return {inputRange.upstreamState(), NoStats{}, localSkip, upstreamCall}; }; } @@ -632,9 +632,9 @@ class SharedExecutionBlockImplTest { * * @return SkipCall The always failing call to be used for the executor. */ - SkipCall generateNeverSkipCall() { + static auto generateNeverSkipCall() -> SkipCall { return [](AqlItemBlockInputRange& input, - AqlCall& call) -> std::tuple { + AqlCall& call) -> std::tuple { // Should not be called here. No Skip! EXPECT_TRUE(false); THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); @@ -647,7 +647,7 @@ class SharedExecutionBlockImplTest { * * @return ProduceCall The always failing call to be used for the executor. */ - ProduceCall generateNeverProduceCall() { + static auto generateNeverProduceCall() -> ProduceCall { return [](AqlItemBlockInputRange& input, OutputAqlItemRow& output) -> std::tuple { // Should not be called here. No limit, only skip! @@ -1438,9 +1438,8 @@ class ExecutionBlockImplExecuteIntegrationTest return {inputRange.upstreamState(), NoStats{}, call}; }; - auto skipData = - [data, iterator](AqlItemBlockInputRange& inputRange, - AqlCall& clientCall) -> std::tuple { + auto skipData = [data, iterator](AqlItemBlockInputRange& inputRange, AqlCall& clientCall) + -> std::tuple { size_t skipped = 0; while (inputRange.hasDataRow() && (clientCall.getOffset() > 0 || @@ -1467,7 +1466,7 @@ class ExecutionBlockImplExecuteIntegrationTest call.softLimit = clientCall.getOffset(); } // else softLimit == unlimited call.fullCount = false; - return {inputRange.upstreamState(), skipped, call}; + return {inputRange.upstreamState(), NoStats{}, skipped, call}; }; auto infos = outReg == 0 ? makeSkipInfos(std::move(writeData), skipData, RegisterPlan::MaxRegisterId, outReg, resetCall) @@ -1541,8 +1540,8 @@ class ExecutionBlockImplExecuteIntegrationTest return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; }; - auto skipData = [&skipAsserter](AqlItemBlockInputRange& inputRange, - AqlCall& call) -> std::tuple { + auto skipData = [&skipAsserter](AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { skipAsserter.gotCalled(call); size_t skipped = 0; @@ -1559,7 +1558,7 @@ class ExecutionBlockImplExecuteIntegrationTest request.softLimit = call.getOffset(); } // else fullCount case, simple get UNLIMITED from above - return {inputRange.upstreamState(), skipped, request}; + return {inputRange.upstreamState(), NoStats{}, skipped, request}; }; auto producer = std::make_unique>( fakedQuery->engine(), generateNodeDummy(), @@ -1866,8 +1865,9 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_implement_ request.softLimit = (std::min)(getClient.softLimit, getClient.hardLimit); return {inputRange.upstreamState(), NoStats{}, request}; }; - auto forwardSkipCall = [&](AqlItemBlockInputRange& inputRange, - AqlCall& call) -> std::tuple { + auto forwardSkipCall = + [&](AqlItemBlockInputRange& inputRange, + AqlCall& call) -> std::tuple { skipState.gotCalled(call); size_t skipped = 0; while (inputRange.hasDataRow() && call.shouldSkip()) { @@ -1883,7 +1883,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_implement_ request.softLimit = call.getOffset(); } // else fullCount case, simple get UNLIMITED from above - return {inputRange.upstreamState(), skipped, request}; + return {inputRange.upstreamState(), NoStats{}, skipped, request}; }; auto lower = std::make_unique>( diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index 6cceab6e09fc..86907101764b 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -424,7 +424,8 @@ TEST_F(FilterExecutorTest, test_skip_datarange_need_more) { AqlCall clientCall; clientCall.offset = 1000; - auto const [state, skipped, call] = testee.skipRowsRange(input, clientCall); + auto const [state, stats, skipped, call] = testee.skipRowsRange(input, clientCall); + // TODO check stats EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(skipped, 3); EXPECT_EQ(clientCall.getOffset(), 1000 - 3); @@ -494,7 +495,8 @@ TEST_F(FilterExecutorTest, test_skip_datarange_has_more) { AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; AqlCall clientCall; clientCall.offset = 2; - auto const [state, skipped, call] = testee.skipRowsRange(input, clientCall); + auto const [state, stats, skipped, call] = testee.skipRowsRange(input, clientCall); + // TODO check stats EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(skipped, 2); EXPECT_EQ(clientCall.getOffset(), 0); diff --git a/tests/Aql/ShortestPathExecutorTest.cpp b/tests/Aql/ShortestPathExecutorTest.cpp index 28a8bdc3fb9b..3ddc6495b66c 100644 --- a/tests/Aql/ShortestPathExecutorTest.cpp +++ b/tests/Aql/ShortestPathExecutorTest.cpp @@ -430,7 +430,8 @@ class ShortestPathExecutorTest // If an offset is requested, skip if (ourCall.getOffset() > 0) { - std::tie(state, skippedInitial, std::ignore) = testee.skipRowsRange(input, ourCall); + std::tie(state, std::ignore, skippedInitial, std::ignore) = + testee.skipRowsRange(input, ourCall); } // Produce rows @@ -453,7 +454,8 @@ class ShortestPathExecutorTest // Emulate being called with a full count ourCall.hardLimit = 0; ourCall.softLimit = 0; - std::tie(state, skippedFullCount, std::ignore) = testee.skipRowsRange(input, ourCall); + std::tie(state, std::ignore, skippedFullCount, std::ignore) = + testee.skipRowsRange(input, ourCall); } ValidateCalledWith(); diff --git a/tests/Aql/SortedCollectExecutorTest.cpp b/tests/Aql/SortedCollectExecutorTest.cpp index c267ab80c092..cfc51936e38b 100644 --- a/tests/Aql/SortedCollectExecutorTest.cpp +++ b/tests/Aql/SortedCollectExecutorTest.cpp @@ -662,7 +662,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_1) { clientCall.offset = 2; { - auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(emptyInputRange, clientCall); ASSERT_EQ(ExecutorState::HASMORE, state); ASSERT_FALSE(upstreamCall.hasHardLimit()); ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); @@ -672,7 +673,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_1) { } { - auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(inputRange, clientCall); ASSERT_EQ(ExecutorState::DONE, state); ASSERT_FALSE(upstreamCall.hasHardLimit()); ASSERT_TRUE(std::holds_alternative(upstreamCall.softLimit)); @@ -697,7 +699,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_2) { clientCall.offset = 1; { - auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(emptyInputRange, clientCall); EXPECT_EQ(ExecutorState::HASMORE, state); EXPECT_FALSE(upstreamCall.hasHardLimit()); EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); @@ -707,7 +710,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_2) { } { - auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(inputRange, clientCall); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_FALSE(upstreamCall.hasHardLimit()); EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); @@ -756,7 +760,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_3) { clientCall.offset = 1; { - auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(emptyInputRange, clientCall); EXPECT_EQ(ExecutorState::HASMORE, state); EXPECT_FALSE(upstreamCall.hasHardLimit()); EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); @@ -766,7 +771,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_3) { } { - auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(inputRange, clientCall); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); EXPECT_EQ(skipped, 0); @@ -774,7 +780,7 @@ TEST_F(SortedCollectExecutorTestSkip, skip_3) { } { - auto [state, skipped, upstreamCall] = + auto [state, stats, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRangeDone, clientCall); EXPECT_EQ(state, ExecutorState::DONE); EXPECT_EQ(0, upstreamCall.offset); @@ -803,7 +809,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_4) { clientCall.offset = 1; { - auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(emptyInputRange, clientCall); EXPECT_EQ(ExecutorState::HASMORE, state); EXPECT_FALSE(upstreamCall.hasHardLimit()); EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); @@ -814,7 +821,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_4) { { // 1, 1 - auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(inputRange, clientCall); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); EXPECT_EQ(skipped, 0); @@ -823,7 +831,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_4) { { // 2 - auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange2, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(inputRange2, clientCall); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(0, upstreamCall.offset); EXPECT_EQ(skipped, 1); @@ -876,7 +885,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_5) { clientCall.offset = 1; { - auto [state, skipped, upstreamCall] = testee.skipRowsRange(emptyInputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(emptyInputRange, clientCall); EXPECT_EQ(ExecutorState::HASMORE, state); EXPECT_FALSE(upstreamCall.hasHardLimit()); EXPECT_TRUE(std::holds_alternative(upstreamCall.softLimit)); @@ -887,7 +897,8 @@ TEST_F(SortedCollectExecutorTestSkip, skip_5) { { // 1, 1, 2 - auto [state, skipped, upstreamCall] = testee.skipRowsRange(inputRange, clientCall); + auto [state, stats, skipped, upstreamCall] = + testee.skipRowsRange(inputRange, clientCall); EXPECT_EQ(state, ExecutorState::HASMORE); EXPECT_EQ(clientCall.fullCount, upstreamCall.fullCount); EXPECT_EQ(skipped, 1); @@ -1016,12 +1027,14 @@ TEST_P(SortedCollectExecutorTestSplit, split_3) { .run(std::move(infos)); } -template -const SortedCollectSplitType splitIntoBlocks = SortedCollectSplitType{std::vector{vs...}}; -template +template +const SortedCollectSplitType splitIntoBlocks = + SortedCollectSplitType{std::vector{vs...}}; +template const SortedCollectSplitType splitStep = SortedCollectSplitType{step}; INSTANTIATE_TEST_CASE_P(SortedCollectExecutor, SortedCollectExecutorTestSplit, - ::testing::Values(splitIntoBlocks<2, 3>, splitIntoBlocks<3, 4>, splitStep<2>)); + ::testing::Values(splitIntoBlocks<2, 3>, + splitIntoBlocks<3, 4>, splitStep<2>)); } // namespace arangodb::tests::aql diff --git a/tests/Aql/TestLambdaExecutor.cpp b/tests/Aql/TestLambdaExecutor.cpp index 9f7cdb99e417..facdfe75c65d 100644 --- a/tests/Aql/TestLambdaExecutor.cpp +++ b/tests/Aql/TestLambdaExecutor.cpp @@ -117,6 +117,6 @@ auto TestLambdaSkipExecutor::produceRows(AqlItemBlockInputRange& input, OutputAq } auto TestLambdaSkipExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) - -> std::tuple { + -> std::tuple { return _infos.getSkipLambda()(input, call); } diff --git a/tests/Aql/TestLambdaExecutor.h b/tests/Aql/TestLambdaExecutor.h index 4ec0c995e956..1843b7f1a24d 100644 --- a/tests/Aql/TestLambdaExecutor.h +++ b/tests/Aql/TestLambdaExecutor.h @@ -50,7 +50,7 @@ using ProduceCall = * @brief This is a shorthand for the skipRowsInRange signature */ using SkipCall = - std::function(AqlItemBlockInputRange& input, AqlCall& call)>; + std::function(AqlItemBlockInputRange& input, AqlCall& call)>; /** * @brief This is a shorthand for the reset state signature @@ -223,7 +223,7 @@ class TestLambdaSkipExecutor { * @return std::tuple */ auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) - -> std::tuple; + -> std::tuple; /** * @brief produceRows API. Just calls the ProduceCall in the Infos. From 518c042a65349b0e93191c090775197ce8ce13a7 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 13 Feb 2020 09:51:59 +0100 Subject: [PATCH 062/122] Additional Assertion in ExecutionBlockImpl (#11077) * Added a assertion arround skipRowsInRange that ensures that the Call and the skippedLocal do not get out-of-sync * Improved the assertion --- arangod/Aql/ExecutionBlockImpl.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index c0eef936f3e6..9c525cf2caef 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1169,8 +1169,30 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::SKIP: { +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + size_t offsetBefore = clientCall.getOffset(); + TRI_ASSERT(offsetBefore > 0); + size_t canPassFullcount = + clientCall.getLimit() == 0 && clientCall.needsFullCount(); +#endif auto [state, stats, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + // Assertion: We did skip 'skippedLocal' documents here. + // This means that they have to be removed from clientCall.getOffset() + // This has to be done by the Executor calling call.didSkip() + // accordingly. + if (canPassFullcount) { + // In htis case we can first skip. But straight after continue with fullCount, so we might skip more + TRI_ASSERT(clientCall.getOffset() + skippedLocal >= offsetBefore); + if (clientCall.getOffset() + skippedLocal > offsetBefore) { + // First need to count down offset. + TRI_ASSERT(clientCall.getOffset() == 0); + } + } else { + TRI_ASSERT(clientCall.getOffset() + skippedLocal == offsetBefore); + } +#endif _skipped += skippedLocal; _engine->_stats += stats; // The execute might have modified the client call. From 54043f3b76093f0cd07375f5b336bfa8b17e158a Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 14 Feb 2020 10:54:20 +0100 Subject: [PATCH 063/122] Feature/aql subquery operations stack id executor (#10986) * added inputRange skip and produce * added inputRange skip and produce * Added shouldSkip helper to AqlCall * Added implementation if execute to Constfetcher. Not specifically tested yet. * Moved IDExecutor to newStyle executor * Added some documentation to SingleRowFetcher * Adapted IdExecutor tests. * Added an assertion in ConstFetcher * Fixed initialize cursor in ExecutionBlockImpl * Added tests for Initilaize cursor in IdExecutor. There is a special implementation for this... * Fixed linker issues, because of variables in cpp files named identical and not beeing static * Started refactoring the Scatter/Gather Executor logic. Added an intermediate class that does not use a template and can be casted to. This state compiles but will not work. * Made BlocksWithClientsImpl a template, based on an executor * Added a dummy implementation to fetch a new block. Distribute not yet implemented. * Added superoptimistic first implementation of ScatterExecutor return. Only works if there are no shadowRows. * Implemented logic to slice Blocks in Scatter according to the given call. Also added handfull of test for it, not complete though * Fixed a Scatter Executor handling of softLimit. Added another test to ansure diverse calls of clients to Scatter * Added random ordering test framework on ScatterExecutor * Added some tests for ScatterExecutor including subqueries * Removed todo, as it has been solved by now * Implemented Scatter by using IDExecutor. Tests not green yet * Implementation of ConstFetcher that allows to inject fullBlocks of AqlItemRows, and not only single rows. Also handles ShadowRows now in a correct way * Do not jump over relevant shadow rows in ConstFetcher. * Fixed tests. * Moved information of DistributeExecutor into the Infos. This resulted in adapting the Constructor API. I used this situation to do a big cleanup of ancient code. From this point there is no way back. * Implemented the last bits of DistributeExecutor * 'Implemented' execute by fallback to old API in RemoteExecutor * Fixed an EdgeCase with Scatter Executor, where the data was modified in place right after scatter. This has side-effects on peer executors. So we for now do a full copy of every block here. * Removed invalid assertions, also fixed RegisterInput to DistributeExecutor * Removed obsolete TODOs * Update arangod/Aql/IdExecutor.cpp Co-Authored-By: Lars Maier Co-authored-by: Heiko Co-authored-by: Lars Maier --- arangod/Aql/AqlItemBlock.cpp | 58 ++- arangod/Aql/AqlItemBlock.h | 19 +- arangod/Aql/BlocksWithClients.cpp | 178 ++++++- arangod/Aql/BlocksWithClients.h | 130 +++++- arangod/Aql/ClusterNodes.cpp | 62 +-- arangod/Aql/ConstFetcher.cpp | 188 +++++++- arangod/Aql/ConstFetcher.h | 22 +- arangod/Aql/DependencyProxy.cpp | 47 +- arangod/Aql/DistributeExecutor.cpp | 535 +++++++++++---------- arangod/Aql/DistributeExecutor.h | 215 +++++---- arangod/Aql/ExecutionBlockImpl.cpp | 28 +- arangod/Aql/ExecutionBlockImpl.h | 6 + arangod/Aql/IdExecutor.cpp | 35 +- arangod/Aql/IdExecutor.h | 12 + arangod/Aql/RemoteExecutor.cpp | 17 +- arangod/Aql/RestAqlHandler.cpp | 19 +- arangod/Aql/ScatterExecutor.cpp | 230 ++++----- arangod/Aql/ScatterExecutor.h | 86 ++-- arangod/Aql/SingleRowFetcher.h | 16 +- tests/Aql/ExecutorTestHelper.cpp | 45 +- tests/Aql/ExecutorTestHelper.h | 40 ++ tests/Aql/IdExecutorTest.cpp | 534 +++++++++++++++++++-- tests/Aql/ScatterExecutorTest.cpp | 592 ++++++++++++++++++++++++ tests/Aql/ShortestPathExecutorTest.cpp | 2 +- tests/Aql/SortedCollectExecutorTest.cpp | 23 +- tests/Aql/TestExecutorHelper.cpp | 7 +- tests/CMakeLists.txt | 1 + 27 files changed, 2451 insertions(+), 696 deletions(-) create mode 100644 tests/Aql/ScatterExecutorTest.cpp diff --git a/arangod/Aql/AqlItemBlock.cpp b/arangod/Aql/AqlItemBlock.cpp index de5c98eacfb6..25334172a92b 100644 --- a/arangod/Aql/AqlItemBlock.cpp +++ b/arangod/Aql/AqlItemBlock.cpp @@ -447,21 +447,59 @@ void AqlItemBlock::clearRegisters(std::unordered_set const& toClear) SharedAqlItemBlockPtr AqlItemBlock::slice(size_t from, size_t to) const { TRI_ASSERT(from < to); TRI_ASSERT(to <= _nrItems); + return slice({{from, to}}); +} - std::unordered_set cache; - cache.reserve((to - from) * _nrRegs / 4 + 1); - - SharedAqlItemBlockPtr res{_manager.requestBlock(to - from, _nrRegs)}; +/** + * @brief Slice multiple ranges out of this AqlItemBlock. + * This does a deep copy of all entries + * + * @param ranges list of ranges from(included) -> to(excluded) + * Every range needs to be valid from[i] < to[i] + * And every range needs to be within the block to[i] <= size() + * The list is required to be ordered to[i] <= from[i+1] + * + * @return SharedAqlItemBlockPtr A block where all the slices are contained in the order of the list + */ +auto AqlItemBlock::slice(std::vector> const& ranges) const + -> SharedAqlItemBlockPtr { +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + // Analyze correctness of ranges + TRI_ASSERT(!ranges.empty()); + for (size_t i = 0; i < ranges.size(); ++i) { + auto const& [from, to] = ranges[i]; + // Range is valid + TRI_ASSERT(from < to); + TRI_ASSERT(to <= _nrItems); + if (i > 0) { + // List is ordered + TRI_ASSERT(ranges[i - 1].second <= from); + } + } +#endif + size_t numRows = 0; + for (auto const& [from, to] : ranges) { + numRows += to - from; + } - for (size_t row = from; row < to; row++) { - // Note this loop is special, it will also Copy over the SubqueryDepth data in reg 0 - for (RegisterId col = 0; col < _nrRegs; col++) { - AqlValue const& a(_data[getAddress(row, col)]); - ::CopyValueOver(cache, a, row - from, col, res); + std::unordered_set cache; + cache.reserve(numRows * _nrRegs / 4 + 1); + + SharedAqlItemBlockPtr res{_manager.requestBlock(numRows, _nrRegs)}; + size_t targetRow = 0; + for (auto const& [from, to] : ranges) { + for (size_t row = from; row < to; row++, targetRow++) { + // Note this loop is special, it will also Copy over the SubqueryDepth data in reg 0 + for (RegisterId col = 0; col < _nrRegs; col++) { + AqlValue const& a(_data[getAddress(row, col)]); + ::CopyValueOver(cache, a, targetRow, col, res); + } + res->copySubQueryDepthFromOtherBlock(targetRow, *this, row); } - res->copySubQueryDepthFromOtherBlock(row - from, *this, row); } + TRI_ASSERT(res->size() == numRows); + return res; } diff --git a/arangod/Aql/AqlItemBlock.h b/arangod/Aql/AqlItemBlock.h index 14acd1c4225d..6badb45b8c5d 100644 --- a/arangod/Aql/AqlItemBlock.h +++ b/arangod/Aql/AqlItemBlock.h @@ -193,6 +193,19 @@ class AqlItemBlock { /// @brief slice/clone, this does a deep copy of all entries SharedAqlItemBlockPtr slice(size_t from, size_t to) const; + /** + * @brief Slice multiple ranges out of this AqlItemBlock. + * This does a deep copy of all entries + * + * @param ranges list of ranges from(included) -> to(excluded) + * Every range needs to be valid from[i] < to[i] + * And every range needs to be within the block to[i] <= size() + * The list is required to be ordered to[i] <= from[i+1] + * + * @return SharedAqlItemBlockPtr A block where all the slices are contained in the order of the list + */ + auto slice(std::vector> const& ranges) const -> SharedAqlItemBlockPtr; + /// @brief create an AqlItemBlock with a single row, with copies of the /// specified registers from the current block SharedAqlItemBlockPtr slice(size_t row, std::unordered_set const& registers, @@ -318,9 +331,9 @@ class AqlItemBlock { /// this ItemBlock. Used to easier split data based on them. std::set _shadowRowIndexes; - /// @brief current row index we want to read from. This will be increased after - /// getRelevantRange function will be called, which will return a tuple of the - /// old _rowIndex and the newly calculated _rowIndex - 1 + /// @brief current row index we want to read from. This will be increased + /// after getRelevantRange function will be called, which will return a tuple + /// of the old _rowIndex and the newly calculated _rowIndex - 1 size_t _rowIndex; }; diff --git a/arangod/Aql/BlocksWithClients.cpp b/arangod/Aql/BlocksWithClients.cpp index 7c856f3a4aa1..63adcc31c392 100644 --- a/arangod/Aql/BlocksWithClients.cpp +++ b/arangod/Aql/BlocksWithClients.cpp @@ -29,10 +29,12 @@ #include "Aql/AqlValue.h" #include "Aql/BlockCollector.h" #include "Aql/Collection.h" +#include "Aql/DistributeExecutor.h" #include "Aql/ExecutionEngine.h" #include "Aql/ExecutionStats.h" #include "Aql/InputAqlItemRow.h" #include "Aql/Query.h" +#include "Aql/ScatterExecutor.h" #include "Basics/Exceptions.h" #include "Basics/StaticStrings.h" #include "Basics/StringBuffer.h" @@ -61,22 +63,63 @@ using namespace arangodb::aql; using VelocyPackHelper = arangodb::basics::VelocyPackHelper; using StringBuffer = arangodb::basics::StringBuffer; -BlocksWithClients::BlocksWithClients(ExecutionEngine* engine, ExecutionNode const* ep, - std::vector const& shardIds) +ClientsExecutorInfos::ClientsExecutorInfos(std::vector clientIds) + : _clientIds(std::move(clientIds)) { + TRI_ASSERT(!_clientIds.empty()); +}; + +auto ClientsExecutorInfos::nrClients() const noexcept -> size_t { + return _clientIds.size(); +} +auto ClientsExecutorInfos::clientIds() const noexcept -> std::vector const& { + return _clientIds; +} + +template +BlocksWithClientsImpl::BlocksWithClientsImpl(ExecutionEngine* engine, + ExecutionNode const* ep, + typename Executor::Infos infos) : ExecutionBlock(engine, ep), - _nrClients(shardIds.size()), + BlocksWithClients(), + _nrClients(infos.nrClients()), _type(ScatterNode::ScatterType::SHARD), + _infos(std::move(infos)), + _executor{_infos}, + _clientBlockData{}, _wasShutdown(false) { _shardIdMap.reserve(_nrClients); + auto const& shardIds = _infos.clientIds(); for (size_t i = 0; i < _nrClients; i++) { _shardIdMap.try_emplace(shardIds[i], i); } + auto scatter = ExecutionNode::castTo(ep); TRI_ASSERT(scatter != nullptr); _type = scatter->getScatterType(); + + _clientBlockData.reserve(shardIds.size()); + + auto readAble = make_shared_unordered_set(); + auto writeAble = make_shared_unordered_set(); + + for (auto const& id : shardIds) { + _clientBlockData.try_emplace(id, typename Executor::ClientBlockData{*engine, scatter, _infos}); + } +} + +/// @brief initializeCursor +template +auto BlocksWithClientsImpl::initializeCursor(InputAqlItemRow const& input) + -> std::pair { + for (auto& [key, list] : _clientBlockData) { + list.clear(); + } + return ExecutionBlock::initializeCursor(input); } -std::pair BlocksWithClients::getBlock(size_t atMost) { +template +auto BlocksWithClientsImpl::getBlock(size_t atMost) + -> std::pair { if (_engine->getQuery()->killed()) { THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); } @@ -101,7 +144,9 @@ std::pair BlocksWithClients::getBlock(size_t atMost) { } /// @brief shutdown -std::pair BlocksWithClients::shutdown(int errorCode) { +template +auto BlocksWithClientsImpl::shutdown(int errorCode) + -> std::pair { if (_wasShutdown) { return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; } @@ -115,7 +160,8 @@ std::pair BlocksWithClients::shutdown(int errorCode) { /// @brief getClientId: get the number (used internally) /// corresponding to -size_t BlocksWithClients::getClientId(std::string const& shardId) const { +template +size_t BlocksWithClientsImpl::getClientId(std::string const& shardId) const { if (shardId.empty()) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "got empty distribution id"); @@ -129,18 +175,132 @@ size_t BlocksWithClients::getClientId(std::string const& shardId) const { return it->second; } -std::pair BlocksWithClients::getSome(size_t) { +template +std::pair BlocksWithClientsImpl::getSome(size_t) { TRI_ASSERT(false); THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } -std::pair BlocksWithClients::skipSome(size_t) { +template +std::pair BlocksWithClientsImpl::skipSome(size_t) { TRI_ASSERT(false); THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } -std::tuple BlocksWithClients::execute(AqlCallStack stack) { +template +std::tuple BlocksWithClientsImpl::execute(AqlCallStack stack) { // This will not be implemented here! TRI_ASSERT(false); THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } + +template +auto BlocksWithClientsImpl::executeForClient(AqlCallStack stack, + std::string const& clientId) + -> std::tuple { + // traceExecuteBegin(stack); + auto res = executeWithoutTraceForClient(stack, clientId); + // traceExecuteEnd(res); + return res; +} + +template +auto BlocksWithClientsImpl::executeWithoutTraceForClient(AqlCallStack stack, + std::string const& clientId) + -> std::tuple { + TRI_ASSERT(!clientId.empty()); + if (clientId.empty()) { + // Security bailout to avoid UB + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, + "got empty distribution id"); + } + + auto it = _clientBlockData.find(clientId); + TRI_ASSERT(it != _clientBlockData.end()); + if (it == _clientBlockData.end()) { + // Security bailout to avoid UB + std::string message("AQL: unknown distribution id "); + message.append(clientId); + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, message); + } + + // This call is only used internally. + auto call = stack.isRelevant() ? stack.popCall() : AqlCall{}; + + // We do not have anymore data locally. + // Need to fetch more from upstream + auto& dataContainer = it->second; + + while (!dataContainer.hasDataFor(call)) { + if (_upstreamState == ExecutionState::DONE) { + // We are done, with everything, we will not be able to fetch any more rows + return {_upstreamState, 0, nullptr}; + } + + auto state = fetchMore(stack); + if (state == ExecutionState::WAITING) { + return {state, 0, nullptr}; + } + _upstreamState = state; + } + // If we get here we have data and can return it. + return dataContainer.execute(call, _upstreamState); +} + +template +auto BlocksWithClientsImpl::fetchMore(AqlCallStack stack) -> ExecutionState { + if (_engine->getQuery()->killed()) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); + } + + // NOTE: We do not handle limits / skip here + // They can differ between different calls to this executor. + // We may need to revisit this for performance reasons. + AqlCall call{}; + stack.pushCall(std::move(call)); + + TRI_ASSERT(_dependencies.size() == 1); + auto [state, skipped, block] = _dependencies[0]->execute(stack); + + // We can never ever forward skip! + // We could need the row in a different block, and once skipped + // we cannot get it back. + TRI_ASSERT(skipped == 0); + + TRI_IF_FAILURE("ExecutionBlock::getBlock") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + // Waiting -> no block + TRI_ASSERT(state != ExecutionState::WAITING || block == nullptr); + if (block != nullptr) { + _executor.distributeBlock(block, _clientBlockData); + } + + return state; +} + +/// @brief getSomeForShard +/// @deprecated +template +std::pair BlocksWithClientsImpl::getSomeForShard( + size_t atMost, std::string const& shardId) { + AqlCallStack stack(AqlCall::SimulateGetSome(atMost)); + auto [state, skipped, block] = executeForClient(stack, shardId); + TRI_ASSERT(skipped == 0); + return {state, block}; +} + +/// @brief skipSomeForShard +/// @deprecated +template +std::pair BlocksWithClientsImpl::skipSomeForShard( + size_t atMost, std::string const& shardId) { + AqlCallStack stack(AqlCall::SimulateSkipSome(atMost)); + auto [state, skipped, block] = executeForClient(stack, shardId); + TRI_ASSERT(block == nullptr); + return {state, skipped}; +} + +template class ::arangodb::aql::BlocksWithClientsImpl; +template class ::arangodb::aql::BlocksWithClientsImpl; \ No newline at end of file diff --git a/arangod/Aql/BlocksWithClients.h b/arangod/Aql/BlocksWithClients.h index ca29e4cd476f..ead4589bf64f 100644 --- a/arangod/Aql/BlocksWithClients.h +++ b/arangod/Aql/BlocksWithClients.h @@ -39,10 +39,6 @@ namespace arangodb { -namespace httpclient { -class SimpleHttpResult; -} - namespace transaction { class Methods; } @@ -53,14 +49,75 @@ struct Collection; class ExecutionEngine; class ExecutionNode; -class BlocksWithClients : public ExecutionBlock { +class ClientsExecutorInfos { + public: + ClientsExecutorInfos(std::vector clientIds); + + ClientsExecutorInfos(ClientsExecutorInfos&&) = default; + ClientsExecutorInfos(ClientsExecutorInfos const&) = delete; + ~ClientsExecutorInfos() = default; + + auto nrClients() const noexcept -> size_t; + auto clientIds() const noexcept -> std::vector const&; + + private: + std::vector _clientIds; +}; + +class BlocksWithClients { public: - BlocksWithClients(ExecutionEngine* engine, ExecutionNode const* ep, - std::vector const& shardIds); + virtual ~BlocksWithClients() {} + + /// @brief getSomeForShard + /// @deprecated + virtual std::pair getSomeForShard( + size_t atMost, std::string const& shardId) = 0; + + /// @brief skipSomeForShard + /// @deprecated + virtual std::pair skipSomeForShard(size_t atMost, + std::string const& shardId) = 0; + + /** + * @brief Execute for client. + * Like execute, but bound to the dataset, that needs to be send to the given client ID + * + * @param stack The AqlCallStack + * @param clientId The requesting client Id. + * @return std::tuple + */ + virtual auto executeForClient(AqlCallStack stack, std::string const& clientId) + -> std::tuple = 0; +}; - ~BlocksWithClients() override = default; +/** + * @brief Implementation of an ExecutionBlock that has multiple clients + * Data is distributed to those clients, it might be all (Scatter) + * or a selected part of it (Distribute). + * How data is distributed is defined by the template parameter. + * + * @tparam ClientBlockData needs to be able to hold the data to be distributed + * to a single client. + * It needs to implement the following methods: + * canProduce(size_t limit) -> bool stating it has enough information to fill limit many rows (or more) + * + */ + +template +class BlocksWithClientsImpl : public ExecutionBlock, public BlocksWithClients { + using Infos = typename Executor::Infos; public: + BlocksWithClientsImpl(ExecutionEngine* engine, ExecutionNode const* ep, + typename Executor::Infos infos); + + ~BlocksWithClientsImpl() override = default; + + public: + /// @brief initializeCursor + auto initializeCursor(InputAqlItemRow const& input) + -> std::pair override; + /// @brief shutdown std::pair shutdown(int) override; @@ -72,15 +129,46 @@ class BlocksWithClients : public ExecutionBlock { /// @brief skipSome: shouldn't be used, use skipSomeForShard std::pair skipSome(size_t atMost) final; + /// @brief execute: shouldn't be used, use executeForClient + std::tuple execute(AqlCallStack stack) override; + + /** + * @brief Execute for client. + * Like execute, but bound to the dataset, that needs to be send to the given client ID + * + * @param stack The AqlCallStack + * @param clientId The requesting client Id. + * @return std::tuple + */ + auto executeForClient(AqlCallStack stack, std::string const& clientId) + -> std::tuple override; + + private: + /** + * @brief Actual implementation of Execute. + * + * @param stack The AqlCallStack + * @param clientId The requesting client Id. + * @return std::tuple + */ + auto executeWithoutTraceForClient(AqlCallStack stack, std::string const& clientId) + -> std::tuple; + + /** + * @brief Load more data from upstream and distribute it into _clientBlockData + * + */ + auto fetchMore(AqlCallStack stack) -> ExecutionState; + /// @brief getSomeForShard - virtual std::pair getSomeForShard( - size_t atMost, std::string const& shardId) = 0; + /// @deprecated + std::pair getSomeForShard(size_t atMost, + std::string const& shardId) override; /// @brief skipSomeForShard - virtual std::pair skipSomeForShard(size_t atMost, - std::string const& shardId) = 0; - - std::tuple execute(AqlCallStack stack) override; + /// @deprecated + std::pair skipSomeForShard(size_t atMost, + std::string const& shardId) override; protected: /// @brief getClientId: get the number (used internally) @@ -88,6 +176,7 @@ class BlocksWithClients : public ExecutionBlock { size_t getClientId(std::string const& shardId) const; /// @brief _shardIdMap: map from shardIds to clientNrs + /// @deprecated std::unordered_map _shardIdMap; /// @brief _nrClients: total number of clients @@ -97,6 +186,19 @@ class BlocksWithClients : public ExecutionBlock { ScatterNode::ScatterType _type; private: + /** + * @brief This is the working party of this implementation + * the template class needs to implement the logic + * to produce a single row from the upstream information. + */ + Infos _infos; + + Executor _executor; + + /// @brief A map of clientId to the data this client should receive. + /// This map will be filled as the execution progresses. + std::unordered_map _clientBlockData; + bool _wasShutdown; }; diff --git a/arangod/Aql/ClusterNodes.cpp b/arangod/Aql/ClusterNodes.cpp index 84f04004d70d..c07fc82a499e 100644 --- a/arangod/Aql/ClusterNodes.cpp +++ b/arangod/Aql/ClusterNodes.cpp @@ -221,10 +221,10 @@ std::unique_ptr ScatterNode::createBlock( std::unordered_set regsToKeep = calcRegsToKeep(); std::unordered_set regsToClear = getRegsToClear(); - ExecutorInfos infos({}, {}, nrInRegs, nrOutRegs, std::move(regsToClear), - std::move(regsToKeep)); + ScatterExecutorInfos infos({}, {}, nrInRegs, nrOutRegs, std::move(regsToClear), + std::move(regsToKeep), _clients); return std::make_unique>(&engine, this, - std::move(infos), _clients); + std::move(infos)); } /// @brief toVelocyPack, for ScatterNode @@ -319,9 +319,6 @@ std::unique_ptr DistributeNode::createBlock( std::unordered_set regsToKeep = calcRegsToKeep(); std::unordered_set regsToClear = getRegsToClear(); - ExecutorInfos infos({}, {}, nrInRegs, nrOutRegs, std::move(regsToClear), - std::move(regsToKeep)); - RegisterId regId; RegisterId alternativeRegId = RegisterPlan::MaxRegisterId; @@ -348,10 +345,18 @@ std::unique_ptr DistributeNode::createBlock( TRI_ASSERT(alternativeRegId == RegisterPlan::MaxRegisterId); } } + auto inAndOutRegs = make_shared_unordered_set({regId}); + if (alternativeRegId != RegisterPlan::MaxRegisterId) { + inAndOutRegs->emplace(alternativeRegId); + } + DistributeExecutorInfos infos(inAndOutRegs, inAndOutRegs, nrInRegs, nrOutRegs, + std::move(regsToClear), std::move(regsToKeep), + clients(), collection(), regId, alternativeRegId, + _allowSpecifiedKeys, _allowKeyConversionToObject, + _createKeys, getScatterType()); - return std::make_unique>( - &engine, this, std::move(infos), clients(), collection(), regId, alternativeRegId, - _allowSpecifiedKeys, _allowKeyConversionToObject, _createKeys); + return std::make_unique>(&engine, this, + std::move(infos)); } /// @brief toVelocyPack, for DistributedNode @@ -426,7 +431,7 @@ GatherNode::GatherNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& b _vocbase(&(plan->getAst()->query()->vocbase())), _elements(elements), _sortmode(SortMode::MinElement), - _parallelism(Parallelism::Undefined), + _parallelism(Parallelism::Undefined), _limit(0) { if (!_elements.empty()) { auto const sortModeSlice = base.get("sortmode"); @@ -441,14 +446,16 @@ GatherNode::GatherNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& b basics::VelocyPackHelper::getNumericValue(base, "limit", 0); } - - setParallelism(parallelismFromString(VelocyPackHelper::getStringValue(base, "parellelism", ""))); + + setParallelism(parallelismFromString( + VelocyPackHelper::getStringValue(base, "parellelism", ""))); } -GatherNode::GatherNode(ExecutionPlan* plan, size_t id, SortMode sortMode, Parallelism parallelism) noexcept - : ExecutionNode(plan, id), +GatherNode::GatherNode(ExecutionPlan* plan, size_t id, SortMode sortMode, + Parallelism parallelism) noexcept + : ExecutionNode(plan, id), _vocbase(&(plan->getAst()->query()->vocbase())), - _sortmode(sortMode), + _sortmode(sortMode), _parallelism(parallelism), _limit(0) {} @@ -457,7 +464,7 @@ void GatherNode::toVelocyPackHelper(VPackBuilder& nodes, unsigned flags, std::unordered_set& seen) const { // call base class method ExecutionNode::toVelocyPackHelperGeneric(nodes, flags, seen); - + nodes.add("parallelism", VPackValue(toString(_parallelism))); if (_elements.empty()) { @@ -484,7 +491,7 @@ void GatherNode::toVelocyPackHelper(VPackBuilder& nodes, unsigned flags, } } } - + // And close it: nodes.close(); } @@ -507,15 +514,15 @@ std::unique_ptr GatherNode::createBlock( calcRegsToKeep(), getRegsToClear()); return std::make_unique>(&engine, this, - std::move(infos)); + std::move(infos)); } } - + Parallelism p = _parallelism; if (ServerState::instance()->isDBServer()) { - p = Parallelism::Serial; // not supported in v36 + p = Parallelism::Serial; // not supported in v36 } - + std::vector sortRegister; SortRegister::fill(*plan(), *getRegisterPlan(), _elements, sortRegister); SortingGatherExecutorInfos infos(make_shared_unordered_set(), @@ -553,7 +560,8 @@ struct ParallelizableFinder final : public WalkerWorker { bool _isParallelizable; explicit ParallelizableFinder(TRI_vocbase_t const& _vocbase) - : _parallelizeWrites(_vocbase.server().getFeature().parallelizeGatherWrites()), + : _parallelizeWrites( + _vocbase.server().getFeature().parallelizeGatherWrites()), _isParallelizable(true) {} ~ParallelizableFinder() = default; @@ -563,8 +571,7 @@ struct ParallelizableFinder final : public WalkerWorker { } bool before(ExecutionNode* node) override final { - if (node->getType() == ExecutionNode::SCATTER || - node->getType() == ExecutionNode::GATHER || + if (node->getType() == ExecutionNode::SCATTER || node->getType() == ExecutionNode::GATHER || node->getType() == ExecutionNode::DISTRIBUTE || node->getType() == ExecutionNode::TRAVERSAL || node->getType() == ExecutionNode::SHORTEST_PATH || @@ -576,10 +583,9 @@ struct ParallelizableFinder final : public WalkerWorker { // can be parallelized, provided the rest of the plan // does not prohibit this if (node->isModificationNode() && - (!_parallelizeWrites || - (node->getType() != ExecutionNode::REMOVE && - node->getType() != ExecutionNode::REPLACE && - node->getType() != ExecutionNode::UPDATE))) { + (!_parallelizeWrites || (node->getType() != ExecutionNode::REMOVE && + node->getType() != ExecutionNode::REPLACE && + node->getType() != ExecutionNode::UPDATE))) { _isParallelizable = false; return true; // true to abort the whole walking process } diff --git a/arangod/Aql/ConstFetcher.cpp b/arangod/Aql/ConstFetcher.cpp index 1b67cbe83359..16c011228246 100644 --- a/arangod/Aql/ConstFetcher.cpp +++ b/arangod/Aql/ConstFetcher.cpp @@ -22,6 +22,7 @@ #include "ConstFetcher.h" +#include "Aql/AqlCallStack.h" #include "Aql/DependencyProxy.h" #include "Aql/ShadowAqlItemRow.h" #include "Basics/Exceptions.h" @@ -35,6 +36,158 @@ ConstFetcher::ConstFetcher() : _currentBlock{nullptr}, _rowIndex(0) {} ConstFetcher::ConstFetcher(DependencyProxy& executionBlock) : _currentBlock{nullptr}, _rowIndex(0) {} +auto ConstFetcher::execute(AqlCallStack& stack) + -> std::tuple { + // Note this fetcher can only be executed on top level (it is the singleton, or test) + TRI_ASSERT(stack.isRelevant()); + auto call = stack.popCall(); + if (_blockForPassThrough == nullptr) { + // we are done, nothing to move arround here. + return {ExecutionState::DONE, 0, AqlItemBlockInputRange{ExecutorState::DONE}}; + } + std::vector> sliceIndexes; + sliceIndexes.emplace_back(_rowIndex, _blockForPassThrough->size()); + // Modifiable first slice indexes. + // from is the first data row to be returned + // to is one after the last data row to be returned + + if (_blockForPassThrough->hasShadowRows()) { + auto shadowIndexes = _blockForPassThrough->getShadowRowIndexes(); + auto shadowRow = shadowIndexes.lower_bound(_rowIndex); + if (shadowRow != shadowIndexes.end()) { + size_t fromShadowRow = *shadowRow; + size_t toShadowRow = *shadowRow + 1; + for (++shadowRow; shadowRow != shadowIndexes.end(); ++shadowRow) { + if (*shadowRow == toShadowRow) { + ShadowAqlItemRow srow{_blockForPassThrough, toShadowRow}; + TRI_ASSERT(srow.isInitialized()); + if (srow.isRelevant()) { + // we cannot jump over relveant shadow rows. + // Unfortunately we need to stop including rows here. + // NOTE: As all blocks have this behaviour anyway + // this is not cirtical. + break; + } + toShadowRow++; + } + } + TRI_ASSERT(fromShadowRow < toShadowRow); + // We cannot go past the first shadowRow + sliceIndexes.emplace_back(fromShadowRow, toShadowRow); + sliceIndexes[0].second = fromShadowRow; + } + } + // Number of data rows we have left + size_t rowsLeft = sliceIndexes[0].second - sliceIndexes[0].first; + { + // We use this scope here to ensure the correctness + // of the following reference. + // if sliceIndexes is modified the references are broken + // in this scope it is ensured that sliceIndexes is not + // modified in size. + // These indexes will be modified by the call. + auto& [from, to] = sliceIndexes[0]; + + { + // Skip over the front rows. + // Adjust from and rowsLeft + // Note: canSkip can be 0 + size_t canSkip = (std::min)(call.getOffset(), rowsLeft); + from += canSkip; + rowsLeft -= canSkip; + call.didSkip(canSkip); + } + { + // Produce the next rows + // Adjost from and rowsLeft + // Note: canProduce can be 0 + size_t canProduce = (std::min)(call.getLimit(), rowsLeft); + to = from + canProduce; + rowsLeft -= canProduce; + call.didProduce(canProduce); + } + } + + // Now adjust the rowIndex for consumed rows + if (call.hasHardLimit() && rowsLeft > 0) { + // fast forward + // We can only get here, if we have skipped and produced all rows + TRI_ASSERT(call.getOffset() == 0 && call.getLimit() == 0); + if (call.needsFullCount()) { + call.didSkip(rowsLeft); + } + rowsLeft = 0; + + // In this case we consumed all rows until the end of + // a) the shadowRow range + // b) the end of the block + if (sliceIndexes.size() == 2) { + // We have shadowRows in use, go to end of their indexes + _rowIndex = sliceIndexes.back().second; + } else { + // We do not have shadowRows in use, need to to go the end. + _rowIndex = _blockForPassThrough->size(); + } + } else { + // No hardLimit, but softLimit. + // And we have not reached the end. + if (rowsLeft > 0 && sliceIndexes.size() == 2) { + // Cannot include shadowRows + sliceIndexes.pop_back(); + } + // Row index is now at the end of the last returned row + _rowIndex = sliceIndexes.back().second; + } + + // Now we have a slicing vector: + // [0] => (data rows) + // [1] => (shadow rows, optional) + TRI_ASSERT(sliceIndexes.size() == 1 || sliceIndexes.size() == 2); + + if (canUseFullBlock(sliceIndexes)) { + // FastPath + // No need for slicing + SharedAqlItemBlockPtr resultBlock = _blockForPassThrough; + _blockForPassThrough.reset(nullptr); + _rowIndex = 0; + return {ExecutionState::DONE, call.getSkipCount(), + DataRange{ExecutorState::DONE, resultBlock, 0, resultBlock->size()}}; + } + + SharedAqlItemBlockPtr resultBlock = _blockForPassThrough; + + if (_rowIndex >= resultBlock->size()) { + // used the full block by now. + _blockForPassThrough.reset(nullptr); + } + + if (sliceIndexes[0].first >= sliceIndexes[0].second) { + // We do not return any DataRow either we do not have one, + // or we have completely skipped it. + // Remove the indexes from the slice list + sliceIndexes.erase(sliceIndexes.begin()); + } + // NOTE: The above if may have invalidated from and to memory. + // Do not use them below this point! + + if (sliceIndexes.empty()) { + // No data to be returned + // Block is dropped. + resultBlock = nullptr; + return {ExecutionState::DONE, call.getSkipCount(), DataRange{ExecutorState::DONE}}; + } + + // Slowest path need to slice, this unfortunately requires copy of data + ExecutionState resState = + _blockForPassThrough == nullptr ? ExecutionState::DONE : ExecutionState::HASMORE; + ExecutorState rangeState = + _blockForPassThrough == nullptr ? ExecutorState::DONE : ExecutorState::HASMORE; + + resultBlock = resultBlock->slice(sliceIndexes); + return {resState, call.getSkipCount(), + DataRange{rangeState, resultBlock, 0, resultBlock->size()}}; +} + void ConstFetcher::injectBlock(SharedAqlItemBlockPtr block) { _currentBlock = block; _blockForPassThrough = std::move(block); @@ -77,15 +230,46 @@ std::pair ConstFetcher::skipRows(size_t) { return {rowState, 1}; } -bool ConstFetcher::indexIsValid() { +auto ConstFetcher::indexIsValid() const noexcept -> bool { return _currentBlock != nullptr && _rowIndex + 1 <= _currentBlock->size(); } -bool ConstFetcher::isLastRowInBlock() { +auto ConstFetcher::isLastRowInBlock() const noexcept -> bool { TRI_ASSERT(indexIsValid()); return _rowIndex + 1 == _currentBlock->size(); } +auto ConstFetcher::numRowsLeft() const noexcept -> size_t { + if (!indexIsValid()) { + return 0; + } + return _currentBlock->size() - _rowIndex; +} + +auto ConstFetcher::canUseFullBlock(std::vector> const& ranges) const + noexcept -> bool { + TRI_ASSERT(!ranges.empty()); + if (ranges.front().first != 0) { + // We do not start at the first index. + return false; + } + if (ranges.back().second != _currentBlock->size()) { + // We de not stop at the last index + return false; + } + + if (ranges.size() > 1) { + TRI_ASSERT(ranges.size() == 2); + if (ranges.front().second != ranges.back().first) { + // We have two ranges, that are not next to each other. + // We cannot use the full block, as we need to slice these out. + return false; + } + } + // If we get here, the ranges covers the full block + return true; +} + std::pair ConstFetcher::fetchBlockForPassthrough(size_t) { // Should only be called once, and then _blockForPassThrough should be // initialized. However, there are still some blocks left that ask their diff --git a/arangod/Aql/ConstFetcher.h b/arangod/Aql/ConstFetcher.h index e80f2c8693b4..46b9ec73667d 100644 --- a/arangod/Aql/ConstFetcher.h +++ b/arangod/Aql/ConstFetcher.h @@ -32,6 +32,7 @@ namespace arangodb { namespace aql { +class AqlCallStack; class AqlItemBlock; template class DependencyProxy; @@ -58,6 +59,20 @@ class ConstFetcher { ConstFetcher(); public: + /** + * @brief Execute the given call stack + * + * @param stack Call stack, on top of stack there is current subquery, bottom is the main query. + * @return std::tuple + * ExecutionState => DONE, all queries are done, there will be no more + * ExecutionState => HASMORE, there are more results for queries, might be on other subqueries + * ExecutionState => WAITING, we need to do I/O to solve the request, save local state and return WAITING to caller immediately + * + * size_t => Amount of documents skipped + * DataRange => Resulting data + */ + auto execute(AqlCallStack& stack) -> std::tuple; + /** * @brief Fetch one new AqlItemRow from upstream. * **Guarantee**: the pointer returned is valid only @@ -111,8 +126,11 @@ class ConstFetcher { size_t _rowIndex; private: - bool indexIsValid(); - bool isLastRowInBlock(); + auto indexIsValid() const noexcept -> bool; + auto isLastRowInBlock() const noexcept -> bool; + auto numRowsLeft() const noexcept -> size_t; + auto canUseFullBlock(std::vector> const& ranges) const + noexcept -> bool; }; } // namespace aql diff --git a/arangod/Aql/DependencyProxy.cpp b/arangod/Aql/DependencyProxy.cpp index f420f6da5b0c..63f6ba51d140 100644 --- a/arangod/Aql/DependencyProxy.cpp +++ b/arangod/Aql/DependencyProxy.cpp @@ -34,9 +34,47 @@ using namespace arangodb::aql; template std::tuple DependencyProxy::execute(AqlCallStack& stack) { - // TODO: Test this, especially if upstreamBlock is done etc. - // We do not modify any local state here. - return upstreamBlock().execute(stack); + ExecutionState state = ExecutionState::HASMORE; + size_t skipped = 0; + SharedAqlItemBlockPtr block = nullptr; + do { + // Note: upstreamBlock will return next dependency + // if we need to loop here + if (!_distributeId.empty()) { + // We are in the cluster case. + // we have to ask executeForShard + auto upstreamWithClient = dynamic_cast(&upstreamBlock()); + TRI_ASSERT(upstreamWithClient != nullptr); + if (upstreamWithClient == nullptr) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL_AQL, + "Invalid state reached, we try to " + "request sharded data from a block " + "that is not able to provide it."); + } + std::tie(state, skipped, block) = + upstreamWithClient->executeForClient(stack, _distributeId); + } else { + std::tie(state, skipped, block) = upstreamBlock().execute(stack); + } + TRI_IF_FAILURE("ExecutionBlock::getBlock") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + if (state == ExecutionState::WAITING) { + TRI_ASSERT(block == nullptr); + TRI_ASSERT(skipped == 0); + break; + } + + if (block == nullptr) { + // We're not waiting and didn't get a block, so we have to be done. + TRI_ASSERT(state == ExecutionState::DONE); + if (!advanceDependency()) { + break; + } + } + + } while (block == nullptr); + return {state, skipped, block}; } template @@ -264,8 +302,7 @@ template DependencyProxy::DependencyProxy( std::vector const& dependencies, AqlItemBlockManager& itemBlockManager, std::shared_ptr const> inputRegisters, - RegisterId nrInputRegisters, - velocypack::Options const* const options) + RegisterId nrInputRegisters, velocypack::Options const* const options) : _dependencies(dependencies), _itemBlockManager(itemBlockManager), _inputRegisters(std::move(inputRegisters)), diff --git a/arangod/Aql/DistributeExecutor.cpp b/arangod/Aql/DistributeExecutor.cpp index 1042214c0710..50970918e72b 100644 --- a/arangod/Aql/DistributeExecutor.cpp +++ b/arangod/Aql/DistributeExecutor.cpp @@ -22,11 +22,15 @@ #include "DistributeExecutor.h" +#include "Aql/AqlCallStack.h" #include "Aql/ClusterNodes.h" #include "Aql/Collection.h" #include "Aql/ExecutionEngine.h" +#include "Aql/IdExecutor.h" +#include "Aql/OutputAqlItemRow.h" #include "Aql/Query.h" #include "Aql/RegisterPlan.h" +#include "Aql/ShadowAqlItemRow.h" #include "Basics/StaticStrings.h" #include "VocBase/LogicalCollection.h" @@ -36,267 +40,268 @@ using namespace arangodb; using namespace arangodb::aql; -ExecutionBlockImpl::ExecutionBlockImpl( - ExecutionEngine* engine, DistributeNode const* node, ExecutorInfos&& infos, - std::vector const& shardIds, Collection const* collection, +DistributeExecutorInfos::DistributeExecutorInfos( + std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, + std::vector clientIds, Collection const* collection, RegisterId regId, RegisterId alternativeRegId, bool allowSpecifiedKeys, - bool allowKeyConversionToObject, bool createKeys) - : BlocksWithClients(engine, node, shardIds), - _infos(std::move(infos)), - _query(*engine->getQuery()), - _collection(collection), - _logCol(_collection->getCollection()), - _index(0), + bool allowKeyConversionToObject, bool createKeys, ScatterNode::ScatterType type) + : ExecutorInfos(readableInputRegisters, writeableOutputRegisters, nrInputRegisters, + nrOutputRegisters, registersToClear, registersToKeep), + ClientsExecutorInfos(std::move(clientIds)), _regId(regId), _alternativeRegId(alternativeRegId), - _allowSpecifiedKeys(allowSpecifiedKeys), _allowKeyConversionToObject(allowKeyConversionToObject), - _createKeys(createKeys) { - _usesDefaultSharding = collection->usesDefaultSharding(); -} - -/// @brief initializeCursor -std::pair ExecutionBlockImpl::initializeCursor( - InputAqlItemRow const& input) { - // local clean up - _distBuffer.clear(); - _distBuffer.reserve(_nrClients); - - for (size_t i = 0; i < _nrClients; i++) { - _distBuffer.emplace_back(); + _createKeys(createKeys), + _usesDefaultSharding(collection->usesDefaultSharding()), + _allowSpecifiedKeys(allowSpecifiedKeys), + _collection(collection), + _logCol(collection->getCollection()), + _type(type) { + TRI_ASSERT(readableInputRegisters->find(_regId) != readableInputRegisters->end()); + if (hasAlternativeRegister()) { + TRI_ASSERT(readableInputRegisters->find(_alternativeRegId) != + readableInputRegisters->end()); } - - return ExecutionBlock::initializeCursor(input); } -/// @brief getSomeForShard -std::pair ExecutionBlockImpl::getSomeForShard( - size_t atMost, std::string const& shardId) { - traceGetSomeBegin(atMost); - auto result = getSomeForShardWithoutTrace(atMost, shardId); - return traceGetSomeEnd(result.first, std::move(result.second)); +auto DistributeExecutorInfos::registerId() const noexcept -> RegisterId { + TRI_ASSERT(_regId != RegisterPlan::MaxRegisterId); + return _regId; } - -std::pair ExecutionBlockImpl::getSomeForShardWithoutTrace( - size_t atMost, std::string const& shardId) { - if (getQuery().killed()) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); - } - // NOTE: We do not need to retain these, the getOrSkipSome is required to! - size_t skipped = 0; - SharedAqlItemBlockPtr result = nullptr; - auto out = getOrSkipSomeForShard(atMost, false, result, skipped, shardId); - if (out.first == ExecutionState::WAITING) { - return {out.first, nullptr}; - } - if (!out.second.ok()) { - THROW_ARANGO_EXCEPTION(out.second); - } - return {out.first, std::move(result)}; +auto DistributeExecutorInfos::hasAlternativeRegister() const noexcept -> bool { + return _alternativeRegId != RegisterPlan::MaxRegisterId; } - -/// @brief skipSomeForShard -std::pair ExecutionBlockImpl::skipSomeForShard( - size_t atMost, std::string const& shardId) { - traceSkipSomeBegin(atMost); - auto result = skipSomeForShardWithoutTrace(atMost, shardId); - return traceSkipSomeEnd(result.first, result.second); +auto DistributeExecutorInfos::alternativeRegisterId() const noexcept -> RegisterId { + TRI_ASSERT(_alternativeRegId != RegisterPlan::MaxRegisterId); + return _alternativeRegId; } -std::pair ExecutionBlockImpl::skipSomeForShardWithoutTrace( - size_t atMost, std::string const& shardId) { - if (getQuery().killed()) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); - } - // NOTE: We do not need to retain these, the getOrSkipSome is required to! - size_t skipped = 0; - SharedAqlItemBlockPtr result = nullptr; - auto out = getOrSkipSomeForShard(atMost, true, result, skipped, shardId); - if (out.first == ExecutionState::WAITING) { - return {out.first, 0}; - } - TRI_ASSERT(result == nullptr); - if (!out.second.ok()) { - THROW_ARANGO_EXCEPTION(out.second); - } - return {out.first, skipped}; +auto DistributeExecutorInfos::allowKeyConversionToObject() const noexcept -> bool { + return _allowKeyConversionToObject; } -/// @brief getOrSkipSomeForShard -std::pair ExecutionBlockImpl::getOrSkipSomeForShard( - size_t atMost, bool skipping, SharedAqlItemBlockPtr& result, - size_t& skipped, std::string const& shardId) { - TRI_ASSERT(result == nullptr && skipped == 0); - TRI_ASSERT(atMost > 0); - - size_t clientId = getClientId(shardId); - - if (!hasMoreForClientId(clientId)) { - return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; - } - - std::deque>& buf = _distBuffer.at(clientId); +auto DistributeExecutorInfos::createKeys() const noexcept -> bool { + return _createKeys; +} +auto DistributeExecutorInfos::usesDefaultSharding() const noexcept -> bool { + return _usesDefaultSharding; +} +auto DistributeExecutorInfos::allowSpecifiedKeys() const noexcept -> bool { + return _allowSpecifiedKeys; +} - if (buf.empty()) { - auto res = getBlockForClient(atMost, clientId); - if (res.first == ExecutionState::WAITING) { - return {res.first, TRI_ERROR_NO_ERROR}; - } - if (!res.second) { - // Upstream is empty! - TRI_ASSERT(res.first == ExecutionState::DONE); - return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; - } - } +auto DistributeExecutorInfos::scatterType() const noexcept -> ScatterNode::ScatterType { + return _type; +} - skipped = (std::min)(buf.size(), atMost); +auto DistributeExecutorInfos::getResponsibleClient(arangodb::velocypack::Slice value) const + -> ResultT { + std::string shardId; + int res = _logCol->getResponsibleShard(value, true, shardId); - if (skipping) { - for (size_t i = 0; i < skipped; i++) { - buf.pop_front(); - } - return {getHasMoreStateForClientId(clientId), TRI_ERROR_NO_ERROR}; + if (res != TRI_ERROR_NO_ERROR) { + return Result{res}; } - BlockCollector collector(&_engine->itemBlockManager()); - std::vector chosen; - - size_t i = 0; - while (i < skipped) { - size_t const n = buf.front().first; - while (buf.front().first == n && i < skipped) { - chosen.emplace_back(buf.front().second); - buf.pop_front(); - i++; - - // make sure we are not overreaching over the end of the buffer - if (buf.empty()) { - break; - } - } - - SharedAqlItemBlockPtr more{_buffer[n]->slice(chosen, 0, chosen.size())}; - collector.add(std::move(more)); - - chosen.clear(); + TRI_ASSERT(!shardId.empty()); + if (_type == ScatterNode::ScatterType::SERVER) { + // Special case for server based distribution. + shardId = _collection->getServerForShard(shardId); + TRI_ASSERT(!shardId.empty()); } + return shardId; +} - // Skipping was handle before - TRI_ASSERT(!skipping); - result = collector.steal(); - - // _buffer is left intact, deleted and cleared at shutdown - - return {getHasMoreStateForClientId(clientId), TRI_ERROR_NO_ERROR}; +/// @brief create a new document key +auto DistributeExecutorInfos::createKey(VPackSlice input) const -> std::string { + return _logCol->createKey(input); } -/// @brief hasMore: any more for any shard? -bool ExecutionBlockImpl::hasMoreForShard(std::string const& shardId) const { - return hasMoreForClientId(getClientId(shardId)); +// TODO +// This section is not implemented yet + +DistributeExecutor::ClientBlockData::ClientBlockData(ExecutionEngine& engine, + ScatterNode const* node, + ExecutorInfos const& scatterInfos) + : _blockManager(engine.itemBlockManager()), _infos(scatterInfos) { + // We only get shared ptrs to const data. so we need to copy here... + IdExecutorInfos infos{scatterInfos.numberOfInputRegisters(), + *scatterInfos.registersToKeep(), + *scatterInfos.registersToClear(), "", false}; + // NOTE: Do never change this type! The execute logic below requires this and only this type. + _executor = + std::make_unique>>(&engine, node, + std::move(infos)); } -ExecutionState ExecutionBlockImpl::getHasMoreStateForClientId(size_t clientId) const { - if (hasMoreForClientId(clientId)) { - return ExecutionState::HASMORE; - } - return ExecutionState::DONE; +auto DistributeExecutor::ClientBlockData::clear() -> void { + _queue.clear(); + _executorHasMore = false; } -bool ExecutionBlockImpl::hasMoreForClientId(size_t clientId) const { - // We have more for a client ID if - // we still have some information in the local buffer - // or if there is still some information from upstream +auto DistributeExecutor::ClientBlockData::addBlock(SharedAqlItemBlockPtr block, + std::vector usedIndexes) -> void { + _queue.emplace_back(block, std::move(usedIndexes)); +} - TRI_ASSERT(_distBuffer.size() > clientId); - if (!_distBuffer[clientId].empty()) { - return true; - } - return _upstreamState == ExecutionState::HASMORE; +auto DistributeExecutor::ClientBlockData::hasDataFor(AqlCall const& call) -> bool { + return _executorHasMore || !_queue.empty(); } -/// @brief getBlockForClient: try to get atMost pairs into -/// _distBuffer.at(clientId), this means we have to look at every row in the -/// incoming blocks until they run out or we find enough rows for clientId. We -/// also keep track of blocks which should be sent to other clients than the -/// current one. -std::pair ExecutionBlockImpl::getBlockForClient( - size_t atMost, size_t clientId) { - - if (_buffer.empty()) { - _index = 0; // position in _buffer - _pos = 0; // position in _buffer.at(_index) +/** + * @brief This call will join as many blocks as available from the queue + * and return them in a SingleBlock. We then use the IdExecutor + * to hand out the data contained in these blocks + * We do on purpose not give any kind of guarantees on the sizing of + * this block to be flexible with the implementation, and find a good + * trade-off between blocksize and block copy operations. + * + * @return SharedAqlItemBlockPtr a joind block from the queue. + */ +auto DistributeExecutor::ClientBlockData::popJoinedBlock() -> SharedAqlItemBlockPtr { + // There are some optimizations available in this implementation. + // Namely we could apply good logic to cut the blocks at shadow rows + // in order to allow the IDexecutor to hand them out en-block. + // However we might leverage the restriction to stop at ShadowRows + // at one point anyways, and this Executor has no business with ShadowRows. + size_t numRows = 0; + for (auto const& [block, choosen] : _queue) { + numRows += choosen.size(); + if (numRows >= ExecutionBlock::DefaultBatchSize) { + // Avoid to put too many rows into this block. + break; + } } - // it should be the case that buf.at(clientId) is empty - auto& buf = _distBuffer[clientId]; - - while (buf.size() < atMost) { - if (_index == _buffer.size()) { - if (getQuery().killed()) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); - } - auto res = getBlock(atMost); - if (res.first == ExecutionState::WAITING) { - return {res.first, false}; - } - if (!res.second) { - TRI_ASSERT(res.first == ExecutionState::DONE); - if (buf.empty()) { - TRI_ASSERT(getHasMoreStateForClientId(clientId) == ExecutionState::DONE); - return {ExecutionState::DONE, false}; - } - break; + SharedAqlItemBlockPtr newBlock = + _blockManager.requestBlock(numRows, _infos.numberOfOutputRegisters()); + // We create a block, with correct register information + // but we do not allow outputs to be written. + OutputAqlItemRow output{newBlock, make_shared_unordered_set(), + _infos.registersToKeep(), _infos.registersToClear()}; + while (!output.isFull()) { + // If the queue is empty our sizing above would not be correct + TRI_ASSERT(!_queue.empty()); + auto const& [block, choosen] = _queue.front(); + TRI_ASSERT(output.numRowsLeft() >= choosen.size()); + for (auto const& i : choosen) { + // We do not really care what we copy. However + // the API requires to know what it is. + if (block->isShadowRow(i)) { + ShadowAqlItemRow toCopy{block, i}; + output.copyRow(toCopy); + } else { + InputAqlItemRow toCopy{block, i}; + output.copyRow(toCopy); } + output.advanceRow(); } + // All required rows copied. + // Drop block form queue. + _queue.pop_front(); + } + return newBlock; +} - SharedAqlItemBlockPtr cur = _buffer[_index]; +auto DistributeExecutor::ClientBlockData::execute(AqlCall call, ExecutionState upstreamState) + -> std::tuple { + TRI_ASSERT(_executor != nullptr); + // Make sure we actually have data before you call execute + TRI_ASSERT(hasDataFor(call)); + if (!_executorHasMore) { + // This cast is guaranteed, we create this a couple lines above and only + // this executor is used here. + // Unfortunately i did not get a version compiled were i could only forward + // declare the templates in header. + auto casted = + static_cast>*>(_executor.get()); + TRI_ASSERT(casted != nullptr); + auto block = popJoinedBlock(); + // We will at least get one block, otherwise the hasDataFor would + // be required to return false! + TRI_ASSERT(block != nullptr); + + casted->injectConstantBlock(block); + _executorHasMore = true; + } + AqlCallStack stack{call}; + auto [state, skipped, result] = _executor->execute(stack); - for (; _pos < cur->size(); ++_pos) { - if (!cur->isShadowRow(_pos)) { - // this may modify the input item buffer in place - size_t const id = sendToClient(cur); + // We have all data locally cannot wait here. + TRI_ASSERT(state != ExecutionState::WAITING); - _distBuffer[id].emplace_back(_index, _pos); - } else { - // A shadow row must always be distributed to all clients. - for (auto& dist : _distBuffer) { - dist.emplace_back(_index, _pos); - } - } - } + if (state == ExecutionState::DONE) { + // This executor is finished, including shadowrows + // We are going to reset it on next call + _executorHasMore = false; - if (_pos == cur->size()) { - _pos = 0; - _index++; + // Also we need to adjust the return states + // as this state only represents one single block + if (!_queue.empty()) { + state = ExecutionState::HASMORE; } else { - break; + state = upstreamState; } } - - return {getHasMoreStateForClientId(clientId), true}; + return {state, skipped, result}; } -/// @brief sendToClient: for each row of the incoming AqlItemBlock use the -/// attributes of the Aql value to determine to which shard -/// the row should be sent and return its clientId -size_t ExecutionBlockImpl::sendToClient(SharedAqlItemBlockPtr cur) { - if (getQuery().killed()) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); +DistributeExecutor::DistributeExecutor(DistributeExecutorInfos const& infos) + : _infos(infos){}; + +auto DistributeExecutor::distributeBlock(SharedAqlItemBlockPtr block, + std::unordered_map& blockMap) + -> void { + std::unordered_map> choosenMap; + choosenMap.reserve(blockMap.size()); + for (size_t i = 0; i < block->size(); ++i) { + if (block->isShadowRow(i)) { + // ShadowRows need to be added to all Clients + for (auto const& [key, value] : blockMap) { + choosenMap[key].emplace_back(i); + } + } else { + auto client = getClient(block, i); + // We can only have clients we are prepared for + TRI_ASSERT(blockMap.find(client) != blockMap.end()); + choosenMap[client].emplace_back(i); + } + } + // We cannot have more in choosen than we have blocks + TRI_ASSERT(choosenMap.size() <= blockMap.size()); + for (auto const& [key, value] : choosenMap) { + TRI_ASSERT(blockMap.find(key) != blockMap.end()); + auto target = blockMap.find(key); + if (target == blockMap.end()) { + // Impossible, just avoid UB. + LOG_TOPIC("7bae6", ERR, Logger::AQL) + << "Tried to distribute data to shard " << key + << " which is not part of the query. Ignoring."; + continue; + } + target->second.addBlock(block, std::move(value)); } +} - // inspect cur in row _pos and check to which shard it should be sent . . - AqlValue val = cur->getValueReference(_pos, _regId); +auto DistributeExecutor::getClient(SharedAqlItemBlockPtr block, size_t rowIndex) + -> std::string { + InputAqlItemRow row{block, rowIndex}; + AqlValue val = row.getValue(_infos.registerId()); VPackSlice input = val.slice(); // will throw when wrong type bool usedAlternativeRegId = false; - if (input.isNull() && _alternativeRegId != RegisterPlan::MaxRegisterId) { + if (input.isNull() && _infos.hasAlternativeRegister()) { // value is set, but null // check if there is a second input register available (UPSERT makes use of // two input registers, // one for the search document, the other for the insert document) - val = cur->getValueReference(_pos, _alternativeRegId); + val = row.getValue(_infos.alternativeRegisterId()); input = val.slice(); // will throw when wrong type usedAlternativeRegId = true; @@ -305,17 +310,17 @@ size_t ExecutionBlockImpl::sendToClient(SharedAqlItemBlockPt VPackSlice value = input; bool hasCreatedKeyAttribute = false; - if (input.isString() && _allowKeyConversionToObject) { + if (input.isString() && _infos.allowKeyConversionToObject()) { _keyBuilder.clear(); _keyBuilder.openObject(true); _keyBuilder.add(StaticStrings::KeyString, input); _keyBuilder.close(); // clear the previous value - cur->destroyValue(_pos, _regId); + block->destroyValue(rowIndex, _infos.registerId()); // overwrite with new value - cur->emplaceValue(_pos, _regId, _keyBuilder.slice()); + block->emplaceValue(rowIndex, _infos.registerId(), _keyBuilder.slice()); value = _keyBuilder.slice(); hasCreatedKeyAttribute = true; @@ -325,11 +330,11 @@ size_t ExecutionBlockImpl::sendToClient(SharedAqlItemBlockPt TRI_ASSERT(value.isObject()); - if (_createKeys) { + if (_infos.createKeys()) { bool buildNewObject = false; // we are responsible for creating keys if none present - if (_usesDefaultSharding) { + if (_infos.usesDefaultSharding()) { // the collection is sharded by _key... if (!hasCreatedKeyAttribute && !value.hasKey(StaticStrings::KeyString)) { // there is no _key attribute present, so we are responsible for @@ -340,7 +345,7 @@ size_t ExecutionBlockImpl::sendToClient(SharedAqlItemBlockPt // the collection is not sharded by _key if (hasCreatedKeyAttribute || value.hasKey(StaticStrings::KeyString)) { // a _key was given, but user is not allowed to specify _key - if (usedAlternativeRegId || !_allowSpecifiedKeys) { + if (usedAlternativeRegId || !_infos.allowSpecifiedKeys()) { THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_MUST_NOT_SPECIFY_KEY); } } else { @@ -351,47 +356,97 @@ size_t ExecutionBlockImpl::sendToClient(SharedAqlItemBlockPt if (buildNewObject) { _keyBuilder.clear(); _keyBuilder.openObject(true); - _keyBuilder.add(StaticStrings::KeyString, VPackValue(createKey(value))); + _keyBuilder.add(StaticStrings::KeyString, VPackValue(_infos.createKey(value))); _keyBuilder.close(); _objectBuilder.clear(); VPackCollection::merge(_objectBuilder, input, _keyBuilder.slice(), true); // clear the previous value and overwrite with new value: - if (usedAlternativeRegId) { - cur->destroyValue(_pos, _alternativeRegId); - cur->emplaceValue(_pos, _alternativeRegId, _objectBuilder.slice()); - } else { - cur->destroyValue(_pos, _regId); - cur->emplaceValue(_pos, _regId, _objectBuilder.slice()); - } + auto reg = usedAlternativeRegId ? _infos.alternativeRegisterId() + : _infos.registerId(); + + block->destroyValue(rowIndex, reg); + block->emplaceValue(rowIndex, reg, _objectBuilder.slice()); value = _objectBuilder.slice(); } } + auto res = _infos.getResponsibleClient(value); + THROW_ARANGO_EXCEPTION_IF_FAIL(res.result()); + return res.get(); +} - std::string shardId; - int res = _logCol->getResponsibleShard(value, true, shardId); +ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, + DistributeNode const* node, + DistributeExecutorInfos&& infos) + : BlocksWithClientsImpl(engine, node, std::move(infos)) {} - if (res != TRI_ERROR_NO_ERROR) { - THROW_ARANGO_EXCEPTION(res); +/* +/// @brief getOrSkipSomeForShard +std::pair ExecutionBlockImpl::getOrSkipSomeForShard( + size_t atMost, bool skipping, SharedAqlItemBlockPtr& result, + size_t& skipped, std::string const& shardId) { + TRI_ASSERT(result == nullptr && skipped == 0); + TRI_ASSERT(atMost > 0); + + size_t clientId = getClientId(shardId); + + if (!hasMoreForClientId(clientId)) { + return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; } - TRI_ASSERT(!shardId.empty()); - if (_type == ScatterNode::ScatterType::SERVER) { - // Special case for server based distribution. - shardId = _collection->getServerForShard(shardId); - TRI_ASSERT(!shardId.empty()); + std::deque>& buf = _distBuffer.at(clientId); + + if (buf.empty()) { + auto res = getBlockForClient(atMost, clientId); + if (res.first == ExecutionState::WAITING) { + return {res.first, TRI_ERROR_NO_ERROR}; + } + if (!res.second) { + // Upstream is empty! + TRI_ASSERT(res.first == ExecutionState::DONE); + return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; + } } - return getClientId(shardId); -} -Query const& ExecutionBlockImpl::getQuery() const noexcept { return _query; } + skipped = (std::min)(buf.size(), atMost); -/// @brief create a new document key -std::string ExecutionBlockImpl::createKey(VPackSlice input) const { - return _logCol->createKey(input); -} + if (skipping) { + for (size_t i = 0; i < skipped; i++) { + buf.pop_front(); + } + return {getHasMoreStateForClientId(clientId), TRI_ERROR_NO_ERROR}; + } + + BlockCollector collector(&_engine->itemBlockManager()); + std::vector chosen; + + size_t i = 0; + while (i < skipped) { + size_t const n = buf.front().first; + while (buf.front().first == n && i < skipped) { + chosen.emplace_back(buf.front().second); + buf.pop_front(); + i++; + + // make sure we are not overreaching over the end of the buffer + if (buf.empty()) { + break; + } + } + + SharedAqlItemBlockPtr more{_buffer[n]->slice(chosen, 0, chosen.size())}; + collector.add(std::move(more)); + + chosen.clear(); + } + + // Skipping was handle before + TRI_ASSERT(!skipping); + result = collector.steal(); -ExecutorInfos const& ExecutionBlockImpl::infos() const { - return _infos; + // _buffer is left intact, deleted and cleared at shutdown + + return {getHasMoreStateForClientId(clientId), TRI_ERROR_NO_ERROR}; } +*/ \ No newline at end of file diff --git a/arangod/Aql/DistributeExecutor.h b/arangod/Aql/DistributeExecutor.h index 003e3fda1fb8..5d0073b45a37 100644 --- a/arangod/Aql/DistributeExecutor.h +++ b/arangod/Aql/DistributeExecutor.h @@ -26,123 +26,154 @@ #include "Aql/BlocksWithClients.h" #include "Aql/ExecutionBlockImpl.h" #include "Aql/ExecutorInfos.h" +#include "Cluster/ResultT.h" namespace arangodb { namespace aql { +class AqlItemBlockManager; class DistributeNode; -// The DistributeBlock is actually implemented by specializing -// ExecutionBlockImpl, so this class only exists to identify the specialization. -class DistributeExecutor {}; - -class Query; - -/** - * @brief See ExecutionBlockImpl.h for documentation. - */ -template <> -class ExecutionBlockImpl : public BlocksWithClients { +class DistributeExecutorInfos : public ExecutorInfos, public ClientsExecutorInfos { public: - // TODO Even if it's not strictly necessary here, for consistency's sake the - // non-standard arguments (shardIds, collection) should probably be moved into - // some DistributeExecutorInfos class. - ExecutionBlockImpl(ExecutionEngine* engine, DistributeNode const* node, - ExecutorInfos&& infos, std::vector const& shardIds, - Collection const* collection, RegisterId regId, - RegisterId alternativeRegId, bool allowSpecifiedKeys, - bool allowKeyConversionToObject, bool createKeys); - - ~ExecutionBlockImpl() override = default; - - std::pair initializeCursor(InputAqlItemRow const& input) override; - - /// @brief getSomeForShard - std::pair getSomeForShard(size_t atMost, - std::string const& shardId) override; - - /// @brief skipSomeForShard - std::pair skipSomeForShard(size_t atMost, - std::string const& shardId) override; + DistributeExecutorInfos(std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, + std::vector clientIds, Collection const* collection, + RegisterId regId, RegisterId alternativeRegId, + bool allowSpecifiedKeys, bool allowKeyConversionToObject, + bool createKeys, ScatterNode::ScatterType type); + + auto registerId() const noexcept -> RegisterId; + auto hasAlternativeRegister() const noexcept -> bool; + auto alternativeRegisterId() const noexcept -> RegisterId; + auto allowKeyConversionToObject() const noexcept -> bool; + auto createKeys() const noexcept -> bool; + auto usesDefaultSharding() const noexcept -> bool; + auto allowSpecifiedKeys() const noexcept -> bool; + auto scatterType() const noexcept -> ScatterNode::ScatterType; + + auto getResponsibleClient(arangodb::velocypack::Slice value) const + -> ResultT; + + auto createKey(VPackSlice input) const -> std::string; private: - /// @brief getSomeForShard - std::pair getSomeForShardWithoutTrace( - size_t atMost, std::string const& shardId); - - /// @brief skipSomeForShard - std::pair skipSomeForShardWithoutTrace(size_t atMost, - std::string const& shardId); - - std::pair getOrSkipSomeForShard( - size_t atMost, bool skipping, SharedAqlItemBlockPtr& result, - size_t& skipped, std::string const& shardId); - - bool hasMoreForClientId(size_t clientId) const; - - /// @brief getHasMoreStateForClientId: State for client ? - ExecutionState getHasMoreStateForClientId(size_t clientId) const; - - /// @brief hasMoreForShard: any more for shard ? - bool hasMoreForShard(std::string const& shardId) const; + RegisterId _regId; + RegisterId _alternativeRegId; + bool _allowKeyConversionToObject; + bool _createKeys; + bool _usesDefaultSharding; + bool _allowSpecifiedKeys; - /// @brief getBlockForClient: try to get at atMost pairs into - /// _distBuffer.at(clientId). - std::pair getBlockForClient(size_t atMost, size_t clientId); + /// @brief _colectionName: the name of the sharded collection + Collection const* _collection; - /// @brief sendToClient: for each row of the incoming AqlItemBlock use the - /// attributes of the register to determine to which shard - /// the row should be sent. - size_t sendToClient(SharedAqlItemBlockPtr); + /// @brief Cache for the Logical Collection. This way it is not refetched + /// on every document. + std::shared_ptr _logCol; - /// @brief create a new document key - std::string createKey(arangodb::velocypack::Slice) const; + /// @brief type of distribution that this nodes follows. + ScatterNode::ScatterType _type; +}; - ExecutorInfos const& infos() const; - - Query const& getQuery() const noexcept; +// The DistributeBlock is actually implemented by specializing +// ExecutionBlockImpl, so this class only exists to identify the specialization. +class DistributeExecutor { + public: + using Infos = DistributeExecutorInfos; + + class ClientBlockData { + public: + ClientBlockData(ExecutionEngine& engine, ScatterNode const* node, + ExecutorInfos const& scatterInfos); + + auto clear() -> void; + auto addBlock(SharedAqlItemBlockPtr block, std::vector usedIndexes) -> void; + auto hasDataFor(AqlCall const& call) -> bool; + + auto execute(AqlCall call, ExecutionState upstreamState) + -> std::tuple; + + private: + /** + * @brief This call will join as many blocks as available from the queue + * and return them in a SingleBlock. We then use the IdExecutor + * to hand out the data contained in these blocks + * We do on purpose not give any kind of guarantees on the sizing of + * this block to be flexible with the implementation, and find a good + * trade-off between blocksize and block copy operations. + * + * @return SharedAqlItemBlockPtr a joind block from the queue. + */ + auto popJoinedBlock() -> SharedAqlItemBlockPtr; + + private: + AqlItemBlockManager& _blockManager; + ExecutorInfos const& _infos; + + std::deque>> _queue; + + // This is unique_ptr to get away with everything beeing forward declared... + std::unique_ptr _executor; + bool _executorHasMore; + }; + + DistributeExecutor(DistributeExecutorInfos const& infos); + ~DistributeExecutor() = default; + + /** + * @brief Distribute the rows of the given block into the blockMap + * NOTE: Has SideEffects + * If the input value does not contain an object, it is modified inplace with + * a new Object containing a key value! + * Hence this method is not const ;( + * + * @param block The block to be distributed + * @param blockMap Map client => Data. Will provide the required data to the correct client. + */ + auto distributeBlock(SharedAqlItemBlockPtr block, + std::unordered_map& blockMap) -> void; private: - ExecutorInfos _infos; - - Query const& _query; + /** + * @brief Compute which client needs to get this row + * NOTE: Has SideEffects + * If the input value does not contain an object, it is modified inplace with + * a new Object containing a key value! + * Hence this method is not const ;( + * + * @param block The input block + * @param rowIndex + * @return std::string Identifier used by the client + */ + auto getClient(SharedAqlItemBlockPtr block, size_t rowIndex) -> std::string; - /// @brief _distBuffer.at(i) is a deque containing pairs (j,k) such that - // _buffer.at(j) row k should be sent to the client with id = i. - std::vector>> _distBuffer; + private: + DistributeExecutorInfos const& _infos; // a reusable Builder object for building _key values arangodb::velocypack::Builder _keyBuilder; // a reusable Builder object for building document objects arangodb::velocypack::Builder _objectBuilder; +}; - /// @brief _colectionName: the name of the sharded collection - Collection const* _collection; - - /// @brief Cache for the Logical Collection. This way it is not refetched - /// on every document. - std::shared_ptr _logCol; - - /// @brief _index: the block in _buffer we are currently considering - size_t _index; - - /// @brief _regId: the register to inspect - RegisterId _regId; - - /// @brief a second register to inspect (used only for UPSERT nodes at the - /// moment to distinguish between search and insert) - RegisterId _alternativeRegId; - - /// @brief whether or not the collection uses the default sharding - bool _usesDefaultSharding; - - /// @brief allow specified keys even in non-default sharding case - bool _allowSpecifiedKeys; +class Query; - bool _allowKeyConversionToObject; +/** + * @brief See ExecutionBlockImpl.h for documentation. + */ +template <> +class ExecutionBlockImpl + : public BlocksWithClientsImpl { + public: + ExecutionBlockImpl(ExecutionEngine* engine, DistributeNode const* node, + DistributeExecutorInfos&& infos); - bool _createKeys; + ~ExecutionBlockImpl() override = default; }; } // namespace aql diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 9c525cf2caef..229e30debfb4 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -131,7 +131,8 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); */ template constexpr bool isNewStyleExecutor = - is_one_of_v, + IdExecutor>, ReturnExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif @@ -556,6 +557,7 @@ std::pair ExecutionBlockImpl::initializeCursor // reinitialize the DependencyProxy _dependencyProxy.reset(); _lastRange = DataRange(ExecutorState::HASMORE); + _hasUsedDataRangeBlock = false; // destroy and re-create the Fetcher _rowFetcher.~Fetcher(); @@ -631,9 +633,11 @@ std::tuple ExecutionBlockImpl std::pair arangodb::aql::ExecutionBlockImpl::initializeCursor(arangodb::aql::AqlItemBlock*, size_t)' in different namespace namespace arangodb::aql { // TODO -- remove this specialization when cpp 17 becomes available + template <> -std::pair ExecutionBlockImpl>::initializeCursor( - InputAqlItemRow const& input) { +template <> +auto ExecutionBlockImpl>::injectConstantBlock>(SharedAqlItemBlockPtr block) + -> void { // reinitialize the DependencyProxy _dependencyProxy.reset(); @@ -646,15 +650,27 @@ std::pair ExecutionBlockImpl>:: TRI_ASSERT(_state == InternalState::DONE || _state == InternalState::FETCH_DATA); _state = InternalState::FETCH_DATA; - SharedAqlItemBlockPtr block = - input.cloneToBlock(_engine->itemBlockManager(), *(infos().registersToKeep()), - infos().numberOfOutputRegisters()); + // Reset state of execute + _lastRange = AqlItemBlockInputRange{ExecutorState::HASMORE}; + _hasUsedDataRangeBlock = false; + _upstreamState = ExecutionState::HASMORE; _rowFetcher.injectBlock(block); // cppcheck-suppress unreadVariable constexpr bool customInit = hasInitializeCursor::value; InitializeCursor::init(_executor, _rowFetcher, _infos); +} + +// TODO -- remove this specialization when cpp 17 becomes available +template <> +std::pair ExecutionBlockImpl>::initializeCursor( + InputAqlItemRow const& input) { + SharedAqlItemBlockPtr block = + input.cloneToBlock(_engine->itemBlockManager(), *(infos().registersToKeep()), + infos().numberOfOutputRegisters()); + + injectConstantBlock(block); // end of default initializeCursor return ExecutionBlock::initializeCursor(input); diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index a50634f58bbb..5868ae27739a 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -37,6 +37,9 @@ namespace arangodb::aql { +template +class IdExecutor; + struct AqlCall; class AqlItemBlock; class ExecutionEngine; @@ -196,6 +199,9 @@ class ExecutionBlockImpl final : public ExecutionBlock { [[nodiscard]] std::pair initializeCursor(InputAqlItemRow const& input) override; + template >>> + auto injectConstantBlock(SharedAqlItemBlockPtr block) -> void; + [[nodiscard]] Infos const& infos() const; /// @brief shutdown, will be called exactly once for the whole query diff --git a/arangod/Aql/IdExecutor.cpp b/arangod/Aql/IdExecutor.cpp index 909aa5d7256a..7965e7fbb4e5 100644 --- a/arangod/Aql/IdExecutor.cpp +++ b/arangod/Aql/IdExecutor.cpp @@ -22,7 +22,9 @@ #include "IdExecutor.h" +#include "Aql/AqlCall.h" #include "Aql/AqlCallStack.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/AqlValue.h" #include "Aql/ConstFetcher.h" #include "Aql/ExecutionEngine.h" @@ -158,29 +160,26 @@ IdExecutor::~IdExecutor() = default; template std::pair IdExecutor::produceRows(OutputAqlItemRow& output) { - ExecutionState state = ExecutionState::HASMORE; - NoStats stats; - InputAqlItemRow inputRow = InputAqlItemRow{CreateInvalidInputRowHint{}}; - while (!output.isFull() && state != ExecutionState::DONE) { - std::tie(state, inputRow) = _fetcher.fetchRow(output.numRowsLeft()); + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} - if (state == ExecutionState::WAITING) { - TRI_ASSERT(!inputRow); - return {state, stats}; - } +template +auto IdExecutor::produceRows(AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) + -> std::tuple { + NoStats stats; - if (!inputRow) { - TRI_ASSERT(state == ExecutionState::DONE); - return {state, stats}; - } + while (!output.isFull() && inputRange.hasDataRow()) { + auto const& [state, inputRow] = inputRange.nextDataRow(); + TRI_ASSERT(inputRow); TRI_IF_FAILURE("SingletonBlock::getOrSkipSome") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - TRI_ASSERT(state == ExecutionState::HASMORE || state == ExecutionState::DONE); /*Second parameter are to ignore registers that should be kept but are missing in the input row*/ - output.copyRow(inputRow, std::is_same::value); + output.copyRow(inputRow, std::is_same_v); TRI_ASSERT(output.produced()); output.advanceRow(); @@ -189,14 +188,14 @@ std::pair IdExecutor::produceRows(OutputAq } } - return {state, stats}; + return {inputRange.upstreamState(), stats, output.getClientCall()}; } template std::tuple::Stats, SharedAqlItemBlockPtr> IdExecutor::fetchBlockForPassthrough(size_t atMost) { - auto rv = _fetcher.fetchBlockForPassthrough(atMost); - return {rv.first, {}, std::move(rv.second)}; + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } template class ::arangodb::aql::IdExecutor; diff --git a/arangod/Aql/IdExecutor.h b/arangod/Aql/IdExecutor.h index 50e1a762a816..10e63216e50c 100644 --- a/arangod/Aql/IdExecutor.h +++ b/arangod/Aql/IdExecutor.h @@ -52,6 +52,9 @@ class Methods; } namespace aql { + +struct AqlCall; +class AqlItemBlockInputRange; class ExecutionEngine; class ExecutionNode; class ExecutorInfos; @@ -147,6 +150,15 @@ class IdExecutor { */ std::pair produceRows(OutputAqlItemRow& output); + /** + * @brief produce the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + + // Deprecated remove me std::tuple fetchBlockForPassthrough(size_t atMost); private: diff --git a/arangod/Aql/RemoteExecutor.cpp b/arangod/Aql/RemoteExecutor.cpp index 1ac8f587a3fd..df23a2281a65 100644 --- a/arangod/Aql/RemoteExecutor.cpp +++ b/arangod/Aql/RemoteExecutor.cpp @@ -431,7 +431,22 @@ std::pair ExecutionBlockImpl::shutdown(i std::tuple ExecutionBlockImpl::execute( AqlCallStack stack) { - TRI_ASSERT(false); + // Use the old getSome/SkipSome API. + // TODO needs execute implementation instead + auto myCall = stack.popCall(); + TRI_ASSERT(AqlCall::IsSkipSomeCall(myCall) || AqlCall::IsGetSomeCall(myCall)); + if (AqlCall::IsSkipSomeCall(myCall)) { + auto const [state, skipped] = skipSome(myCall.getOffset()); + if (state != ExecutionState::WAITING) { + myCall.didSkip(skipped); + } + return {state, skipped, nullptr}; + } else if (AqlCall::IsGetSomeCall(myCall)) { + auto const [state, block] = getSome(myCall.getLimit()); + // We do not need to count as softLimit will be overwritten, and hard cannot be set. + return {state, 0, block}; + } + // Should never get here! THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } diff --git a/arangod/Aql/RestAqlHandler.cpp b/arangod/Aql/RestAqlHandler.cpp index c9e6e0665cdb..e0cc8ab6ee0a 100644 --- a/arangod/Aql/RestAqlHandler.cpp +++ b/arangod/Aql/RestAqlHandler.cpp @@ -674,9 +674,8 @@ RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, TRI_IF_FAILURE("RestAqlHandler::getSome") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - auto atMost = - VelocyPackHelper::getNumericValue(querySlice, "atMost", - ExecutionBlock::DefaultBatchSize); + auto atMost = VelocyPackHelper::getNumericValue(querySlice, "atMost", + ExecutionBlock::DefaultBatchSize); SharedAqlItemBlockPtr items; ExecutionState state; if (shardId.empty()) { @@ -685,13 +684,13 @@ RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, return RestStatus::WAITING; } } else { + TRI_ASSERT(_query->engine()->root()->getPlanNode()->getType() == ExecutionNode::SCATTER || + _query->engine()->root()->getPlanNode()->getType() == ExecutionNode::DISTRIBUTE); auto block = dynamic_cast(_query->engine()->root()); if (block == nullptr) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "unexpected node type"); } - TRI_ASSERT(block->getPlanNode()->getType() == ExecutionNode::SCATTER || - block->getPlanNode()->getType() == ExecutionNode::DISTRIBUTE); std::tie(state, items) = block->getSomeForShard(atMost, shardId); if (state == ExecutionState::WAITING) { return RestStatus::WAITING; @@ -708,9 +707,8 @@ RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, answerBuilder); } } else if (operation == "skipSome") { - auto atMost = - VelocyPackHelper::getNumericValue(querySlice, "atMost", - ExecutionBlock::DefaultBatchSize); + auto atMost = VelocyPackHelper::getNumericValue(querySlice, "atMost", + ExecutionBlock::DefaultBatchSize); size_t skipped; if (shardId.empty()) { auto tmpRes = _query->engine()->skipSome(atMost); @@ -719,13 +717,14 @@ RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, } skipped = tmpRes.second; } else { + TRI_ASSERT(_query->engine()->root()->getPlanNode()->getType() == ExecutionNode::SCATTER || + _query->engine()->root()->getPlanNode()->getType() == ExecutionNode::DISTRIBUTE); + auto block = dynamic_cast(_query->engine()->root()); if (block == nullptr) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "unexpected node type"); } - TRI_ASSERT(block->getPlanNode()->getType() == ExecutionNode::SCATTER || - block->getPlanNode()->getType() == ExecutionNode::DISTRIBUTE); auto tmpRes = block->skipSomeForShard(atMost, shardId); if (tmpRes.first == ExecutionState::WAITING) { diff --git a/arangod/Aql/ScatterExecutor.cpp b/arangod/Aql/ScatterExecutor.cpp index 65b601ee1c92..40d1c2208cf2 100644 --- a/arangod/Aql/ScatterExecutor.cpp +++ b/arangod/Aql/ScatterExecutor.cpp @@ -22,168 +22,114 @@ #include "ScatterExecutor.h" +#include "Aql/AqlCallStack.h" +#include "Aql/ExecutionBlockImpl.h" #include "Aql/ExecutionEngine.h" +#include "Aql/IdExecutor.h" #include "Basics/Exceptions.h" using namespace arangodb; using namespace arangodb::aql; -ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, - ScatterNode const* node, - ExecutorInfos&& infos, - std::vector const& shardIds) - : BlocksWithClients(engine, node, shardIds), - _infos(std::move(infos)), - _query(*engine->getQuery()) { - _shardIdMap.reserve(_nrClients); - for (size_t i = 0; i < _nrClients; i++) { - _shardIdMap.emplace(std::make_pair(shardIds[i], i)); - } +ScatterExecutorInfos::ScatterExecutorInfos( + std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, std::vector clientIds) + : ExecutorInfos(readableInputRegisters, writeableOutputRegisters, nrInputRegisters, + nrOutputRegisters, registersToClear, registersToKeep), + ClientsExecutorInfos(std::move(clientIds)) {} + +ScatterExecutor::ClientBlockData::ClientBlockData(ExecutionEngine& engine, + ScatterNode const* node, + ExecutorInfos const& scatterInfos) + : _queue{}, _executor(nullptr), _executorHasMore{false} { + // We only get shared ptrs to const data. so we need to copy here... + IdExecutorInfos infos{scatterInfos.numberOfInputRegisters(), + *scatterInfos.registersToKeep(), + *scatterInfos.registersToClear(), "", false}; + // NOTE: Do never change this type! The execute logic below requires this and only this type. + _executor = + std::make_unique>>(&engine, node, + std::move(infos)); } -/// @brief initializeCursor -std::pair ExecutionBlockImpl::initializeCursor( - InputAqlItemRow const& input) { - // local clean up - _posForClient.clear(); - - for (size_t i = 0; i < _nrClients; i++) { - _posForClient.emplace_back(0, 0); - } - - return ExecutionBlock::initializeCursor(input); -} - -/// @brief getSomeForShard -std::pair ExecutionBlockImpl::getSomeForShard( - size_t atMost, std::string const& shardId) { - traceGetSomeBegin(atMost); - auto result = getSomeForShardWithoutTrace(atMost, shardId); - return traceGetSomeEnd(result.first, std::move(result.second)); -} -std::pair ExecutionBlockImpl::getSomeForShardWithoutTrace( - size_t atMost, std::string const& shardId) { - // NOTE: We do not need to retain these, the getOrSkipSome is required to! - size_t skipped = 0; - SharedAqlItemBlockPtr result = nullptr; - auto out = getOrSkipSomeForShard(atMost, false, result, skipped, shardId); - if (out.first == ExecutionState::WAITING) { - return {out.first, nullptr}; - } - if (!out.second.ok()) { - THROW_ARANGO_EXCEPTION(out.second); - } - return {out.first, std::move(result)}; +auto ScatterExecutor::ClientBlockData::clear() -> void { + _queue.clear(); + _executorHasMore = false; } -/// @brief skipSomeForShard -std::pair ExecutionBlockImpl::skipSomeForShard( - size_t atMost, std::string const& shardId) { - traceSkipSomeBegin(atMost); - auto result = skipSomeForShardWithoutTrace(atMost, shardId); - return traceSkipSomeEnd(result.first, result.second); +auto ScatterExecutor::ClientBlockData::addBlock(SharedAqlItemBlockPtr block) -> void { + // NOTE: + // There given ItemBlock will be reused in all requesting blocks. + // However, the next followwing block could be passthrough. + // If it is, it will modify that data stored in block. + // If now anther client requests the same block, it is not + // the original any more, but a modified version. + // For Instance in calculation we assert that the place we write to + // is empty. If another peer-calculation has written to this value + // this assertion does not hold true anymore. + // Hence we are required to do an indepth cloning here. + _queue.emplace_back(block->slice(0, block->size())); } -std::pair ExecutionBlockImpl::skipSomeForShardWithoutTrace( - size_t atMost, std::string const& shardId) { - // NOTE: We do not need to retain these, the getOrSkipSome is required to! - size_t skipped = 0; - SharedAqlItemBlockPtr result = nullptr; - auto out = getOrSkipSomeForShard(atMost, true, result, skipped, shardId); - if (out.first == ExecutionState::WAITING) { - return {out.first, 0}; - } - TRI_ASSERT(result == nullptr); - if (!out.second.ok()) { - THROW_ARANGO_EXCEPTION(out.second); - } - return {out.first, skipped}; +auto ScatterExecutor::ClientBlockData::hasDataFor(AqlCall const& call) -> bool { + return _executorHasMore || !_queue.empty(); } -/// @brief getOrSkipSomeForShard -std::pair ExecutionBlockImpl::getOrSkipSomeForShard( - size_t atMost, bool skipping, SharedAqlItemBlockPtr& result, - size_t& skipped, std::string const& shardId) { - TRI_ASSERT(result == nullptr && skipped == 0); - TRI_ASSERT(atMost > 0); - - size_t const clientId = getClientId(shardId); - - if (!hasMoreForClientId(clientId)) { - return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; +auto ScatterExecutor::ClientBlockData::execute(AqlCall call, ExecutionState upstreamState) + -> std::tuple { + TRI_ASSERT(_executor != nullptr); + // Make sure we actually have data before you call execute + TRI_ASSERT(hasDataFor(call)); + if (!_executorHasMore) { + auto const& block = _queue.front(); + // This cast is guaranteed, we create this a couple lines above and only + // this executor is used here. + // Unfortunately i did not get a version compiled were i could only forward + // declare the teplates in header. + auto casted = + static_cast>*>(_executor.get()); + TRI_ASSERT(casted != nullptr); + casted->injectConstantBlock(block); + _executorHasMore = true; + _queue.pop_front(); } - - TRI_ASSERT(_posForClient.size() > clientId); - std::pair& pos = _posForClient[clientId]; - - // pull more blocks from dependency if necessary . . . - if (pos.first >= _buffer.size()) { - auto res = getBlock(atMost); - if (res.first == ExecutionState::WAITING) { - return {res.first, TRI_ERROR_NO_ERROR}; - } - if (!res.second) { - TRI_ASSERT(res.first == ExecutionState::DONE); - return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; + AqlCallStack stack{call}; + auto [state, skipped, result] = _executor->execute(stack); + + // We have all data locally cannot wait here. + TRI_ASSERT(state != ExecutionState::WAITING); + + if (state == ExecutionState::DONE) { + // This executor is finished, including shadowrows + // We are going to reset it on next call + _executorHasMore = false; + + // Also we need to adjust the return states + // as this state only represents one single block + if (!_queue.empty()) { + state = ExecutionState::HASMORE; + } else { + state = upstreamState; } } - - auto& blockForClient = _buffer[pos.first]; - - size_t available = blockForClient->size() - pos.second; - // available should be non-zero - - skipped = (std::min)(available, atMost); // nr rows in outgoing block - - if (!skipping) { - result = blockForClient->slice(pos.second, pos.second + skipped); - } - - // increment the position . . . - pos.second += skipped; - - // check if we're done at current block in buffer . . . - if (pos.second == blockForClient->size()) { - pos.first++; // next block - pos.second = 0; // reset the position within a block - - // check if we can pop the front of the buffer . . . - bool popit = true; - for (size_t i = 0; i < _nrClients; i++) { - if (_posForClient[i].first == 0) { - popit = false; - break; - } - } - if (popit) { - _buffer.pop_front(); - // update the values in first coord of _posForClient - for (size_t i = 0; i < _nrClients; i++) { - _posForClient[i].first--; - } - } - } - - return {getHasMoreStateForClientId(clientId), TRI_ERROR_NO_ERROR}; + return {state, skipped, result}; } -bool ExecutionBlockImpl::hasMoreForClientId(size_t clientId) const { - TRI_ASSERT(_nrClients != 0); - - TRI_ASSERT(clientId < _posForClient.size()); - std::pair pos = _posForClient.at(clientId); - // (i, j) where i is the position in _buffer, and j is the position in - // _buffer[i] we are sending to +ScatterExecutor::ScatterExecutor(ExecutorInfos const&){}; - if (pos.first <= _buffer.size()) { - return true; +auto ScatterExecutor::distributeBlock(SharedAqlItemBlockPtr block, + std::unordered_map& blockMap) const + -> void { + // Scatter returns every block on every client as is. + for (auto& [id, list] : blockMap) { + list.addBlock(block); } - return _upstreamState == ExecutionState::HASMORE; } -ExecutionState ExecutionBlockImpl::getHasMoreStateForClientId(size_t clientId) const { - if (hasMoreForClientId(clientId)) { - return ExecutionState::HASMORE; - } - return ExecutionState::DONE; -} +ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, + ScatterNode const* node, + ScatterExecutorInfos&& infos) + : BlocksWithClientsImpl(engine, node, std::move(infos)) {} diff --git a/arangod/Aql/ScatterExecutor.h b/arangod/Aql/ScatterExecutor.h index fb20f184b499..53bd1bb1b5c5 100644 --- a/arangod/Aql/ScatterExecutor.h +++ b/arangod/Aql/ScatterExecutor.h @@ -31,61 +31,65 @@ namespace arangodb { namespace aql { +class ExecutionEngine; +class ScatterNode; + +class ScatterExecutorInfos : public ExecutorInfos, public ClientsExecutorInfos { + public: + ScatterExecutorInfos(std::shared_ptr> readableInputRegisters, + std::shared_ptr> writeableOutputRegisters, + RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::unordered_set registersToClear, + std::unordered_set registersToKeep, + std::vector clientIds); +}; + // The ScatterBlock is actually implemented by specializing ExecutionBlockImpl, // so this class only exists to identify the specialization. -class ScatterExecutor {}; +class ScatterExecutor { + public: + using Infos = ScatterExecutorInfos; + + class ClientBlockData { + public: + ClientBlockData(ExecutionEngine& engine, ScatterNode const* node, + ExecutorInfos const& scatterInfos); + + auto clear() -> void; + auto addBlock(SharedAqlItemBlockPtr block) -> void; + auto hasDataFor(AqlCall const& call) -> bool; + + auto execute(AqlCall call, ExecutionState upstreamState) + -> std::tuple; + + private: + std::deque _queue; + // This is unique_ptr to get away with everything beeing forward declared... + std::unique_ptr _executor; + bool _executorHasMore; + }; + + ScatterExecutor(ExecutorInfos const&); + ~ScatterExecutor() = default; + + auto distributeBlock(SharedAqlItemBlockPtr block, + std::unordered_map& blockMap) const + -> void; +}; /** * @brief See ExecutionBlockImpl.h for documentation. */ template <> -class ExecutionBlockImpl : public BlocksWithClients { +class ExecutionBlockImpl : public BlocksWithClientsImpl { public: // TODO Even if it's not strictly necessary here, for consistency's sake the // non-standard argument (shardIds) should probably be moved into some // ScatterExecutorInfos class. ExecutionBlockImpl(ExecutionEngine* engine, ScatterNode const* node, - ExecutorInfos&& infos, std::vector const& shardIds); + ScatterExecutorInfos&& infos); ~ExecutionBlockImpl() override = default; - - std::pair initializeCursor(InputAqlItemRow const& input) override; - - /// @brief getSomeForShard - std::pair getSomeForShard(size_t atMost, - std::string const& shardId) override; - - /// @brief skipSomeForShard - std::pair skipSomeForShard(size_t atMost, - std::string const& shardId) override; - - private: - /// @brief getSomeForShard - std::pair getSomeForShardWithoutTrace( - size_t atMost, std::string const& shardId); - - /// @brief skipSomeForShard - std::pair skipSomeForShardWithoutTrace(size_t atMost, - std::string const& shardId); - - std::pair getOrSkipSomeForShard( - size_t atMost, bool skipping, SharedAqlItemBlockPtr& result, - size_t& skipped, std::string const& shardId); - - bool hasMoreForClientId(size_t clientId) const; - - /// @brief getHasMoreStateForClientId: State for client ? - ExecutionState getHasMoreStateForClientId(size_t clientId) const; - - ExecutorInfos const& infos() const { return _infos; } - - private: - ExecutorInfos _infos; - - Query const& _query; - - /// @brief _posForClient: - std::vector> _posForClient; }; } // namespace aql diff --git a/arangod/Aql/SingleRowFetcher.h b/arangod/Aql/SingleRowFetcher.h index 0460ecef625d..ef98004b4084 100644 --- a/arangod/Aql/SingleRowFetcher.h +++ b/arangod/Aql/SingleRowFetcher.h @@ -62,7 +62,18 @@ class SingleRowFetcher { SingleRowFetcher(); public: - // TODO implement and document + /** + * @brief Execute the given call stack + * + * @param stack Call stack, on top of stack there is current subquery, bottom is the main query. + * @return std::tuple + * ExecutionState => DONE, all queries are done, there will be no more + * ExecutionState => HASMORE, there are more results for queries, might be on other subqueries + * ExecutionState => WAITING, we need to do I/O to solve the request, save local state and return WAITING to caller immediately + * + * size_t => Amount of documents skipped + * DataRange => Resulting data + */ std::tuple execute(AqlCallStack& stack); /** @@ -100,8 +111,7 @@ class SingleRowFetcher { // Like fetchRow(), but returns both the subquery-local state (like fetchRow()) // and the global state (like fetchShadowRow()). // Currently necessary only in the SubqueryStartExecutor. - [[nodiscard]] RowWithStates fetchRowWithGlobalState( - size_t atMost = ExecutionBlock::DefaultBatchSize); + [[nodiscard]] RowWithStates fetchRowWithGlobalState(size_t atMost = ExecutionBlock::DefaultBatchSize); // NOLINTNEXTLINE google-default-arguments [[nodiscard]] TEST_VIRTUAL std::pair fetchShadowRow( diff --git a/tests/Aql/ExecutorTestHelper.cpp b/tests/Aql/ExecutorTestHelper.cpp index 4bd9cd324d2b..17a521230cda 100644 --- a/tests/Aql/ExecutorTestHelper.cpp +++ b/tests/Aql/ExecutorTestHelper.cpp @@ -22,6 +22,49 @@ #include "ExecutorTestHelper.h" +#include "Aql/ExecutionEngine.h" + +using namespace arangodb::tests::aql; + +auto arangodb::tests::aql::ValidateBlocksAreEqual(SharedAqlItemBlockPtr actual, + SharedAqlItemBlockPtr expected) -> void { + velocypack::Options vpackOptions; + ASSERT_NE(expected, nullptr); + ASSERT_NE(actual, nullptr); + EXPECT_EQ(actual->size(), expected->size()); + EXPECT_EQ(actual->getNrRegs(), 1); + for (size_t i = 0; i < (std::min)(actual->size(), expected->size()); ++i) { + auto const& x = actual->getValueReference(i, 0); + auto const& y = expected->getValueReference(i, 0); + EXPECT_TRUE(AqlValue::Compare(&vpackOptions, x, y, true) == 0) + << "Row " << i << " Column " << 0 << " do not agree. " + << x.slice().toJson(&vpackOptions) << " vs. " + << y.slice().toJson(&vpackOptions); + } +} + +template +AqlExecutorTestCase::AqlExecutorTestCase() + : _server{}, fakedQuery{_server.createFakeQuery(enableQueryTrace)} { + auto engine = std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); + fakedQuery->setEngine(engine.release()); +} + +template +auto AqlExecutorTestCase::generateNodeDummy() -> ExecutionNode* { + auto dummy = std::make_unique(fakedQuery->plan(), _execNodes.size()); + auto res = dummy.get(); + _execNodes.emplace_back(std::move(dummy)); + return res; +} +template +auto AqlExecutorTestCase::manager() const -> AqlItemBlockManager& { + return fakedQuery->engine()->itemBlockManager(); +} + +template class ::arangodb::tests::aql::AqlExecutorTestCase; +template class ::arangodb::tests::aql::AqlExecutorTestCase; + std::ostream& arangodb::tests::aql::operator<<(std::ostream& stream, arangodb::tests::aql::ExecutorCall call) { return stream << [call]() { @@ -39,5 +82,5 @@ std::ostream& arangodb::tests::aql::operator<<(std::ostream& stream, // which is unable to figure out that the switch above is complete. TRI_ASSERT(false); THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); - }(); + }(); } diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index 0d5ffbb5f08c..907f9201458d 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -26,6 +26,7 @@ #include "gtest/gtest.h" #include "AqlItemBlockHelper.h" +#include "Mocks/Servers.h" #include "WaitingExecutionBlockMock.h" #include "Aql/AqlCall.h" @@ -44,6 +45,45 @@ namespace arangodb { namespace tests { namespace aql { +auto ValidateBlocksAreEqual(SharedAqlItemBlockPtr actual, SharedAqlItemBlockPtr expected) + -> void; + +/** + * @brief Base class for ExecutorTests in Aql. + * It will provide a test server, including + * an AqlQuery, as well as the ability to generate + * Dummy ExecutionNodes. + * + * @tparam enableQueryTrace Enable Aql Profile Trace logging + */ +template +class AqlExecutorTestCase { + protected: + AqlExecutorTestCase(); + virtual ~AqlExecutorTestCase() = default; + + /** + * @brief Creates and manages a ExecutionNode. + * These nodes can be used to create the Executors + * Caller does not need to manage the memory. + * + * @return ExecutionNode* Pointer to a dummy ExecutionNode. Memory is managed, do not delete. + */ + auto generateNodeDummy() -> ExecutionNode*; + + auto manager() const -> AqlItemBlockManager&; + + private: + mocks::MockAqlServer _server; + std::vector> _execNodes; + + protected: + // available variables + ResourceMonitor monitor{}; + AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; + std::unique_ptr fakedQuery; +}; + template struct ExecutorTestHelper { using SplitType = std::variant, std::size_t, std::monostate>; diff --git a/tests/Aql/IdExecutorTest.cpp b/tests/Aql/IdExecutorTest.cpp index 25871cfade75..32f0a51bf6ce 100644 --- a/tests/Aql/IdExecutorTest.cpp +++ b/tests/Aql/IdExecutorTest.cpp @@ -20,15 +20,23 @@ /// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// -#include "RowFetcherHelper.h" #include "gtest/gtest.h" +#include "AqlItemBlockHelper.h" +#include "ExecutorTestHelper.h" +#include "RowFetcherHelper.h" + +#include "Aql/AqlCall.h" +#include "Aql/AqlCallStack.h" #include "Aql/AqlItemBlock.h" #include "Aql/ConstFetcher.h" +#include "Aql/ExecutionEngine.h" +#include "Aql/ExecutionNode.h" #include "Aql/ExecutorInfos.h" #include "Aql/IdExecutor.h" #include "Aql/InputAqlItemRow.h" #include "Aql/OutputAqlItemRow.h" +#include "Aql/Query.h" #include "Aql/ResourceUsage.h" #include "Aql/Stats.h" @@ -39,60 +47,494 @@ using namespace arangodb::aql; namespace arangodb::tests::aql { -class IdExecutorTest : public ::testing::Test { +using TestParam = std::tuple, // The input data + ExecutorState, // The upstream state + AqlCall, // The client Call, + OutputAqlItemRow::CopyRowBehavior // How the data is handled within outputRow + >; + +class IdExecutorTestCombiner : public AqlExecutorTestCase<>, + public ::testing::TestWithParam { protected: - ExecutionState state; - - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager; - SharedAqlItemBlockPtr block; - std::shared_ptr> outputRegisters; - std::shared_ptr> registersToKeep; // this must be set correctly - - IdExecutorInfos infos; - OutputAqlItemRow row; - - IdExecutorTest() - : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - block(new AqlItemBlock(itemBlockManager, 1000, 1)), - outputRegisters(make_shared_unordered_set()), - registersToKeep(make_shared_unordered_set({0})), - infos(1 /*nrRegs*/, *registersToKeep /*toKeep*/, {} /*toClear*/), - row(std::move(block), outputRegisters, registersToKeep, infos.registersToClear()) {} + IdExecutorTestCombiner() {} + + auto prepareInputRange() -> AqlItemBlockInputRange { + auto const& [input, upstreamState, clientCall, copyBehaviour] = GetParam(); + if (input.empty()) { + // no input + return AqlItemBlockInputRange{upstreamState}; + } + MatrixBuilder<1> matrix; + for (auto const& it : input) { + matrix.emplace_back(RowBuilder<1>{{it}}); + } + SharedAqlItemBlockPtr block = buildBlock<1>(manager(), std::move(matrix)); + return AqlItemBlockInputRange{upstreamState, block, 0, input.size()}; + } + + auto prepareOutputRow(SharedAqlItemBlockPtr input) -> OutputAqlItemRow { + auto toWrite = make_shared_unordered_set({}); + auto toKeep = make_shared_unordered_set({0}); + auto toClear = make_shared_unordered_set(); + auto const& [unused, upstreamState, clientCall, copyBehaviour] = GetParam(); + AqlCall callCopy = clientCall; + if (copyBehaviour == OutputAqlItemRow::CopyRowBehavior::DoNotCopyInputRows) { + // For passthrough we reuse the block + return OutputAqlItemRow(input, toWrite, toKeep, toClear, + std::move(callCopy), copyBehaviour); + } + // Otherwise we need to create a fresh block (or forward nullptr) + if (input == nullptr) { + SharedAqlItemBlockPtr outBlock{nullptr}; + return OutputAqlItemRow(outBlock, toWrite, toKeep, toClear, + std::move(callCopy), copyBehaviour); + } + SharedAqlItemBlockPtr outBlock{ + new AqlItemBlock(manager(), input->size(), input->getNrRegs())}; + return OutputAqlItemRow(outBlock, toWrite, toKeep, toClear, + std::move(callCopy), copyBehaviour); + } + + // After Execute is done these fetchers shall be removed, + // the Executor does not need it anymore! + // However the template is still required. + template + auto runTest(Fetcher& fetcher) -> void { + auto const& [input, upstreamState, clientCall, copyBehaviour] = GetParam(); + + auto inputRange = prepareInputRange(); + auto outputRow = prepareOutputRow(inputRange.getBlock()); + + // If the input is empty, all rows(none) are used, otherwise they are not. + EXPECT_EQ(outputRow.allRowsUsed(), input.empty()); + IdExecutorInfos infos{1, {0}, {}}; + + IdExecutor testee{fetcher, infos}; + + auto const [state, stats, call] = testee.produceRows(inputRange, outputRow); + EXPECT_EQ(state, upstreamState); + // Stats are NoStats, no checks here. + + // We can never forward any offset. + EXPECT_EQ(call.getOffset(), 0); + + // The limits need to be reduced by input size. + EXPECT_EQ(call.softLimit + input.size(), clientCall.softLimit); + EXPECT_EQ(call.hardLimit + input.size(), clientCall.hardLimit); + + // We can forward fullCount if it is there. + EXPECT_EQ(call.needsFullCount(), clientCall.needsFullCount()); + + // This internally actually asserts that all input rows are "copied". + EXPECT_TRUE(outputRow.allRowsUsed()); + auto result = outputRow.stealBlock(); + if (!input.empty()) { + ASSERT_NE(result, nullptr); + ASSERT_EQ(result->size(), input.size()); + for (size_t i = 0; i < input.size(); ++i) { + auto val = result->getValueReference(i, 0); + ASSERT_TRUE(val.isNumber()); + EXPECT_EQ(val.toInt64(), input.at(i)); + } + } else { + EXPECT_EQ(result, nullptr); + } + } }; -TEST_F(IdExecutorTest, there_are_no_rows_upstream) { - ConstFetcherHelper fetcher(itemBlockManager, nullptr); - IdExecutor testee(fetcher, infos); - NoStats stats{}; +TEST_P(IdExecutorTestCombiner, test_produce_datarange_constFetcher) { + std::shared_ptr fakeFetcherInput{VPackParser::fromJson("[ ]")}; + ConstFetcher cFetcher = ConstFetcherHelper{manager(), fakeFetcherInput->buffer()}; + runTest(cFetcher); +} + +TEST_P(IdExecutorTestCombiner, test_produce_datarange_singleRowFetcher) { + std::shared_ptr fakeFetcherInput{VPackParser::fromJson("[ ]")}; + SingleRowFetcher<::arangodb::aql::BlockPassthrough::Enable> srFetcher = + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable>{ + manager(), fakeFetcherInput->buffer(), false}; + runTest(srFetcher); +} + +/** + * In order to test this executor + * Only the following input cases are relevant: + * 1) Empty input and Done + * 2) Empty input and HasMore + * 3) Input with data and done + * 4) Input with data and HasMore + * + * And only the following Call cases are relevant: + * 1) Call limit > data, fullCount: false + * 2) Call limit > data, fullCount: true + * 3) Call limit == data, fullCount: false + * 4) Call limit == data, fullCount: true + * 5) Unlimited call + * + * All other cases are excluded by Passhtrough. + * + * This executor is templated by two fetcher types: + * ConstFetcher + * SingleRowFetcher + * + * The output row has the following copy types + * DoNotCopy << This is actually used in production, however we cannot test that we actually do something with it + * DoCopy << This is to assert that copying is performaed + */ + +static auto inputs = testing::Values(std::vector{}, // Test empty input + std::vector{1, 2, 3} // Test input data +); + +auto upstreamStates = testing::Values(ExecutorState::HASMORE, ExecutorState::DONE); +auto clientCalls = testing::Values(AqlCall{}, // unlimited call + AqlCall{0, 3, AqlCall::Infinity{}, false}, // softlimit call (note this is equal to length of input data) + AqlCall{0, AqlCall::Infinity{}, 3, false}, // hardlimit call (note this is equal to length of input data), no fullcount + AqlCall{0, AqlCall::Infinity{}, 3, true}, // hardlimit call (note this is equal to length of input data), with fullcount + AqlCall{0, 7, AqlCall::Infinity{}, false}, // softlimit call (note this is larger than length of input data) + AqlCall{0, AqlCall::Infinity{}, 7, false}, // hardlimit call (note this is larger than length of input data), no fullcount + AqlCall{0, AqlCall::Infinity{}, 7, true} // hardlimit call (note this is larger than length of input data), with fullcount +); + +auto copyBehaviours = testing::Values(OutputAqlItemRow::CopyRowBehavior::CopyInputRows, // Create a new row and write the data + OutputAqlItemRow::CopyRowBehavior::DoNotCopyInputRows // Just passthrough (production) +); + +INSTANTIATE_TEST_CASE_P(IdExecutorTest, IdExecutorTestCombiner, + ::testing::Combine(inputs, upstreamStates, clientCalls, copyBehaviours)); + +class IdExecutionBlockTest : public AqlExecutorTestCase<>, public ::testing::Test {}; + +// The IdExecutor has a specific initializeCursor method in ExecutionBlockImpl +TEST_F(IdExecutionBlockTest, test_initialize_cursor_get) { + IdExecutorInfos infos{1, {0}, {}}; + ExecutionBlockImpl> testee{fakedQuery->engine(), + generateNodeDummy(), + std::move(infos)}; + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}}); + + for (size_t i = 0; i < inputBlock->size(); ++i) { + InputAqlItemRow input{inputBlock, i}; + ASSERT_TRUE(input.isInitialized()); + { + // Test first call, executor is done, cannot skip and does not return + AqlCall call{}; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } + { + // Initialize cursor + auto const& [state, result] = testee.initializeCursor(input); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_TRUE(result.ok()); + } + { + // Test second call, executor needs to return the row + AqlCall call{}; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ASSERT_NE(block, nullptr); + EXPECT_EQ(block->size(), 1); + auto const& val = block->getValueReference(0, 0); + ASSERT_TRUE(val.isNumber()); + EXPECT_EQ(static_cast(val.toInt64()), i); + } + } +} + +// The IdExecutor has a specific initializeCursor method in ExecutionBlockImpl +TEST_F(IdExecutionBlockTest, test_initialize_cursor_skip) { + IdExecutorInfos infos{1, {0}, {}}; + ExecutionBlockImpl> testee{fakedQuery->engine(), + generateNodeDummy(), + std::move(infos)}; + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}}); + + for (size_t i = 0; i < inputBlock->size(); ++i) { + InputAqlItemRow input{inputBlock, i}; + ASSERT_TRUE(input.isInitialized()); + { + // Test first call, executor is done, cannot skip and does not return + AqlCall call{}; + call.offset = 10; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } + { + // Initialize cursor + auto const& [state, result] = testee.initializeCursor(input); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_TRUE(result.ok()); + } + { + // Test second call, executor needs to skip the row + AqlCall call{}; + call.offset = 10; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 1); + ASSERT_EQ(block, nullptr); + } + } +} + +// The IdExecutor has a specific initializeCursor method in ExecutionBlockImpl +TEST_F(IdExecutionBlockTest, test_initialize_cursor_fullCount) { + IdExecutorInfos infos{1, {0}, {}}; + ExecutionBlockImpl> testee{fakedQuery->engine(), + generateNodeDummy(), + std::move(infos)}; + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}}); + + for (size_t i = 0; i < inputBlock->size(); ++i) { + InputAqlItemRow input{inputBlock, i}; + ASSERT_TRUE(input.isInitialized()); + { + // Test first call, executor is done, cannot skip and does not return + AqlCall call{}; + call.hardLimit = 0; + call.fullCount = true; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } + { + // Initialize cursor + auto const& [state, result] = testee.initializeCursor(input); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_TRUE(result.ok()); + } + { + // Test second call, executor needs to skip the row + AqlCall call{}; + call.hardLimit = 0; + call.fullCount = true; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 1); + ASSERT_EQ(block, nullptr); + } + } +} - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(row.produced()); +TEST_F(IdExecutionBlockTest, test_hardlimit_single_row_fetcher) { + IdExecutorInfos infos{1, {0}, {}}; + ExecutorTestHelper>>(*fakedQuery) + .setInputValueList(1, 2, 3, 4, 5, 6) + .setCall(AqlCall{0, AqlCall::Infinity{}, 2, false}) + .expectOutput({0}, {{1}, {2}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(std::move(infos)); } -TEST_F(IdExecutorTest, there_are_rows_in_the_upstream) { - auto input = VPackParser::fromJson("[ [true], [false], [true] ]"); - ConstFetcherHelper fetcher(itemBlockManager, input->buffer()); - IdExecutor testee(fetcher, infos); - NoStats stats{}; +/** + * @brief This are special tests, the ConstFetcher is overloaded + * with data rows, now the IdExecutor which is passthrough + * asks for a much lower hard limit. + * Used in ScatterExecutor logic. + * param: useFullCount + */ +class BlockOverloadTest : public AqlExecutorTestCase<>, + public ::testing::TestWithParam { + protected: + auto getTestee() -> ExecutionBlockImpl> { + IdExecutorInfos infos{1, {0}, {}}; + return ExecutionBlockImpl>{fakedQuery->engine(), + generateNodeDummy(), + std::move(infos)}; + } + + auto useFullCount() -> bool { return GetParam(); } +}; - // This block consumes all rows at once. - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); +INSTANTIATE_TEST_CASE_P(IdExecutionBlock, BlockOverloadTest, ::testing::Bool()); - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(row.produced()); +TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher) { + auto testee = getTestee(); + { + // Inject block + auto inputBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}}); + testee.injectConstantBlock(inputBlock); + } + { + // Now call with too small hardLimit + auto expectedOutputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}}); + AqlCall call{}; + call.hardLimit = 3; + call.fullCount = useFullCount(); + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + if (useFullCount()) { + EXPECT_EQ(skipped, 4); + } else { + EXPECT_EQ(skipped, 0); + } - // verify result - AqlValue value; - auto block = row.stealBlock(); - for (std::size_t index = 0; index < 3; index++) { - value = block->getValue(index, 0); - ASSERT_TRUE(value.isBoolean()); - ASSERT_EQ(value.toBoolean(), input->slice().at(index).at(0).getBool()); + ValidateBlocksAreEqual(block, expectedOutputBlock); + } + { + // Validate that additional upstream-rows are gone. + AqlCall call{}; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } +} + +TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_at_end) { + auto testee = getTestee(); + { + // Inject block + auto inputBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}}, + {{5, 0}, {6, 1}}); + testee.injectConstantBlock(inputBlock); + } + { + // Now call with too small hardLimit + auto expectedOutputBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {5}, {6}}, {{3, 0}, {4, 1}}); + AqlCall call{}; + call.hardLimit = 3; + call.fullCount = useFullCount(); + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + if (useFullCount()) { + EXPECT_EQ(skipped, 2); + } else { + EXPECT_EQ(skipped, 0); + } + ValidateBlocksAreEqual(block, expectedOutputBlock); + } + { + // Validate that additional upstream-rows are gone. + AqlCall call{}; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); + } +} + +TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_in_between) { + auto testee = getTestee(); + { + // Inject block + auto inputBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}}, + {{3, 0}, {4, 1}, {6, 0}}); + testee.injectConstantBlock(inputBlock); + } + { + // Now call with too small hardLimit + auto expectedOutputBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {3}, {4}}, {{2, 0}, {3, 1}}); + AqlCall call{}; + call.hardLimit = 2; + call.fullCount = useFullCount(); + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::HASMORE); + if (useFullCount()) { + EXPECT_EQ(skipped, 1); + } else { + EXPECT_EQ(skipped, 0); + } + ValidateBlocksAreEqual(block, expectedOutputBlock); + } + { + // Validate that next call will give remaining rows + // Now call with too small hardLimit + auto expectedOutputBlock = buildBlock<1>(itemBlockManager, {{5}, {6}}, {{1, 0}}); + AqlCall call{}; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, expectedOutputBlock); + } +} + +// Consecutive ShadowRows can be returned within the same block. +TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_consecutive_shadow_rows) { + auto testee = getTestee(); + { + // Inject block + auto inputBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}}, + {{3, 0}, {4, 1}, {5, 0}, {6, 0}}); + testee.injectConstantBlock(inputBlock); + } + // We can only return until the next top-level shadow row is reached. + { + // Now call with too small hardLimit + auto expectedOutputBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {3}, {4}}, {{2, 0}, {3, 1}}); + AqlCall call{}; + call.hardLimit = 2; + call.fullCount = useFullCount(); + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::HASMORE); + if (useFullCount()) { + EXPECT_EQ(skipped, 1); + } else { + EXPECT_EQ(skipped, 0); + } + ValidateBlocksAreEqual(block, expectedOutputBlock); + } + { + // Second call will only find a single ShadowRow + auto expectedOutputBlock = buildBlock<1>(itemBlockManager, {{5}}, {{0, 0}}); + AqlCall call{}; + call.hardLimit = 2; + call.fullCount = useFullCount(); + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, expectedOutputBlock); + } + { + // Third call will only find a single ShadowRow + auto expectedOutputBlock = buildBlock<1>(itemBlockManager, {{6}}, {{0, 0}}); + AqlCall call{}; + call.hardLimit = 2; + call.fullCount = useFullCount(); + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, expectedOutputBlock); + } + { + // Validate that additional upstream-rows are gone. + AqlCall call{}; + AqlCallStack stack(std::move(call)); + auto const& [state, skipped, block] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + EXPECT_EQ(block, nullptr); } } -} // namespace arangodb +} // namespace arangodb::tests::aql diff --git a/tests/Aql/ScatterExecutorTest.cpp b/tests/Aql/ScatterExecutorTest.cpp new file mode 100644 index 000000000000..883a8b8498c0 --- /dev/null +++ b/tests/Aql/ScatterExecutorTest.cpp @@ -0,0 +1,592 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#include "gtest/gtest.h" + +#include "AqlItemBlockHelper.h" +#include "Mocks/Servers.h" +#include "WaitingExecutionBlockMock.h" + +#include "Aql/AqlCall.h" +#include "Aql/AqlCallStack.h" +#include "Aql/ClusterNodes.h" +#include "Aql/ExecutionBlockImpl.h" +#include "Aql/ExecutionEngine.h" +#include "Aql/ExecutionNode.h" +#include "Aql/ExecutorInfos.h" +#include "Aql/Query.h" +#include "Aql/ScatterExecutor.h" + +using namespace arangodb; +using namespace arangodb::aql; + +namespace arangodb::tests::aql { + +class SharedScatterExecutionBlockTest { + protected: + mocks::MockAqlServer server{}; + ResourceMonitor monitor{}; + AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; + std::unique_ptr fakedQuery{server.createFakeQuery()}; + std::vector> _execNodes; + velocypack::Options vpackOptions; + std::vector clientIds{"a", "b", "c"}; + + SharedScatterExecutionBlockTest() { + auto engine = + std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); + fakedQuery->setEngine(engine.release()); + } + + /** + * @brief Creates and manages a ExecutionNode. + * These nodes can be used to create the Executors + * Caller does not need to manage the memory. + * + * @return ExecutionNode* Pointer to a dummy ExecutionNode. Memory is managed, do not delete. + */ + auto generateNodeDummy() -> ExecutionNode* { + auto dummy = std::make_unique(fakedQuery->plan(), _execNodes.size()); + auto res = dummy.get(); + _execNodes.emplace_back(std::move(dummy)); + return res; + } + + auto generateScatterNode() -> ScatterNode* { + auto dummy = std::make_unique(fakedQuery->plan(), _execNodes.size(), + ScatterNode::ScatterType::SHARD); + auto res = dummy.get(); + _execNodes.emplace_back(std::move(dummy)); + return res; + } + + auto generateInfos() const -> ScatterExecutorInfos { + auto inputRegs = make_shared_unordered_set({0}); + auto outputRegs = make_shared_unordered_set({}); + return {inputRegs, outputRegs, 1, 1, {}, {0}, clientIds}; + } + + auto createProducer(SharedAqlItemBlockPtr inputBlock) -> WaitingExecutionBlockMock { + std::deque blockDeque; + // TODO add input splicing + blockDeque.push_back(inputBlock); + return createProducer(blockDeque); + } + + auto createProducer(std::deque blockDeque) -> WaitingExecutionBlockMock { + // TODO add input splicing + + return WaitingExecutionBlockMock{fakedQuery->engine(), generateNodeDummy(), + std::move(blockDeque), + WaitingExecutionBlockMock::WaitingBehaviour::NEVER}; + } + + auto ValidateBlocksAreEqual(SharedAqlItemBlockPtr actual, SharedAqlItemBlockPtr expected) { + ASSERT_NE(expected, nullptr); + ASSERT_NE(actual, nullptr); + EXPECT_EQ(actual->size(), expected->size()); + EXPECT_EQ(actual->getNrRegs(), 1); + for (size_t i = 0; i < (std::min)(actual->size(), expected->size()); ++i) { + auto const& x = actual->getValueReference(i, 0); + auto const& y = expected->getValueReference(i, 0); + EXPECT_TRUE(AqlValue::Compare(&vpackOptions, x, y, true) == 0) + << "Row " << i << " Column " << 0 << " do not agree. " + << x.slice().toJson(&vpackOptions) << " vs. " + << y.slice().toJson(&vpackOptions); + } + } +}; + +// The tests of this suite test all permutations of client calls. +// This way we can ensure that the block works even on parallel +// execution. +class RandomOrderTest : public SharedScatterExecutionBlockTest, + public ::testing::TestWithParam> { + protected: + std::vector const& getCallOrder() { return GetParam(); } + + RandomOrderTest() {} +}; + +namespace { +template +auto ArrayPermutations(std::vector base) -> std::vector> { + std::vector> res; + // This is not corect we would need faculity of base, but we are in a test... + res.reserve(base.size()); + do { + res.emplace_back(base); + } while (std::next_permutation(base.begin(), base.end())); + return res; +}; + +auto randomOrderCalls = ArrayPermutations({"a", "b", "c"}); +} // namespace + +INSTANTIATE_TEST_CASE_P(ScatterExecutionBlockTestRandomOrder, RandomOrderTest, + ::testing::ValuesIn(randomOrderCalls)); + +TEST_P(RandomOrderTest, all_clients_should_get_the_block) { + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}}); + auto producer = createProducer(inputBlock); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client); + AqlCall call{}; // DefaultCall + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, inputBlock); + } +} + +TEST_P(RandomOrderTest, all_clients_can_skip_the_block) { + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}}); + auto producer = createProducer(inputBlock); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client); + AqlCall call{}; + call.offset = 10; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 3); + EXPECT_EQ(block, nullptr); + } +} + +TEST_P(RandomOrderTest, all_clients_can_fullcount_the_block) { + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}}); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{0}}); + auto producer = createProducer(inputBlock); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client); + AqlCall call{}; + call.hardLimit = 1; + call.fullCount = true; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 2); + ValidateBlocksAreEqual(block, expectedBlock); + } +} + +TEST_P(RandomOrderTest, all_clients_can_have_different_calls) { + auto inputBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}}); + auto producer = createProducer(inputBlock); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client); + if (client == "a") { + // Just produce all + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, inputBlock); + } else if (client == "b") { + AqlCall call{}; + call.offset = 2; + call.hardLimit = 2; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 2); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{2}, {3}}); + ValidateBlocksAreEqual(block, expectedBlock); + } else if (client == "c") { + { + AqlCall call{}; + call.softLimit = 2; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{0}, {1}}); + ValidateBlocksAreEqual(block, expectedBlock); + } + { + // As we have softLimit we can simply call again + AqlCall call{}; + call.offset = 1; + call.softLimit = 2; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 1); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{3}, {4}}); + ValidateBlocksAreEqual(block, expectedBlock); + } + } + } +} + +TEST_P(RandomOrderTest, get_does_not_jump_over_shadowrows) { + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}}, + {{3, 0}, {5, 0}}); + auto firstExpectedBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}}, {{3, 0}}); + auto secondExpectedBlock = buildBlock<1>(itemBlockManager, {{4}, {5}}, {{0, 0}}); + auto producer = createProducer(inputBlock); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + + // First call. reach first shadowrow, but do not jump over it, we do not know + // how to proceed after (e.g. skip the rows). + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client + " first call"); + // Produce all until shadow row + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, firstExpectedBlock); + } + + // Second call. reach up to last shadowRow and figure out that we are essentially done + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client + " second call"); + // Produce all until shadow row + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, secondExpectedBlock); + } +} + +TEST_P(RandomOrderTest, handling_of_higher_depth_shadowrows_produce) { + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}}, + {{2, 0}, {3, 1}, {5, 0}}); + auto firstExpectedBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}}, {{2, 0}, {3, 1}}); + auto secondExpectedBlock = buildBlock<1>(itemBlockManager, {{4}, {5}}, {{1, 0}}); + auto producer = createProducer(inputBlock); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + + // First call. reach first shadowrow, but do not jump over it, we do not know + // how to proceed after (e.g. skip the rows). + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client + " first call"); + // Produce all until shadow row + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, firstExpectedBlock); + } + + // Second call. reach up to last shadowRow and figure out that we are essentially done + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client + " second call"); + // Produce all until shadow row + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, secondExpectedBlock); + } +} + +TEST_P(RandomOrderTest, handling_of_higher_depth_shadowrows_skip) { + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}}, + {{2, 0}, {3, 1}, {5, 0}}); + auto firstExpectedBlock = + buildBlock<1>(itemBlockManager, {{2}, {3}}, {{0, 0}, {1, 1}}); + auto secondExpectedBlock = buildBlock<1>(itemBlockManager, {{4}, {5}}, {{1, 0}}); + auto producer = createProducer(inputBlock); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + + // First call. reach first shadowrow, but do not jump over it, we do not know + // how to proceed after (e.g. skip the rows). + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client + " first call"); + // Produce all until shadow row + AqlCall call{}; + call.offset = 10; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 2); + ValidateBlocksAreEqual(block, firstExpectedBlock); + } + + // Second call. reach up to last shadowRow and figure out that we are essentially done + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client + " second call"); + // Produce all until shadow row + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ValidateBlocksAreEqual(block, secondExpectedBlock); + } +} + +TEST_P(RandomOrderTest, handling_of_consecutive_shadow_rows) { + // As there is no produce inbetween we are actually able to just forward it + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}}, + {{2, 0}, {3, 1}, {4, 0}, {5, 1}}); + auto producer = createProducer(inputBlock); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + + // First call. actually there are only shadowRows following, we would be able + // to plainly forward everything, however this is not suppoert yet + // so we need to ask once for every relevant shadow row (depth 0) + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client); + { + // Produce all until second relevant shadow row + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + auto expected = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}}, {{2, 0}, {3, 1}}); + ValidateBlocksAreEqual(block, expected); + } + { + // Produce the last shadow rows + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + auto expected = buildBlock<1>(itemBlockManager, {{4}, {5}}, {{0, 0}, {1, 1}}); + ValidateBlocksAreEqual(block, expected); + } + } +} + +TEST_P(RandomOrderTest, shadowrows_with_different_call_types) { + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}}, + {{3, 0}, {5, 0}}); + auto producer = createProducer(inputBlock); + + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + + // First call. desired to be stopped at shadowRow + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client + " first call."); + if (client == "a") { + // Just produce all + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + auto expectedBlock = + buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}}, {{3, 0}}); + ValidateBlocksAreEqual(block, expectedBlock); + } else if (client == "b") { + AqlCall call{}; + call.offset = 2; + call.hardLimit = 2; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 2); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{2}, {3}}, {{1, 0}}); + ValidateBlocksAreEqual(block, expectedBlock); + } else if (client == "c") { + { + AqlCall call{}; + call.softLimit = 2; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{0}, {1}}); + ValidateBlocksAreEqual(block, expectedBlock); + } + { + // As we have softLimit we can simply call again + AqlCall call{}; + call.offset = 1; + call.softLimit = 2; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 1); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{3}}, {{0, 0}}); + ValidateBlocksAreEqual(block, expectedBlock); + } + } + } + + // Second call. desired to be stopped at shadowRow + for (auto const& client : getCallOrder()) { + SCOPED_TRACE("Testing client " + client + " second call."); + if (client == "a") { + // Just produce all + AqlCall call{}; + call.hardLimit = 1; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{4}, {5}}, {{1, 0}}); + ValidateBlocksAreEqual(block, expectedBlock); + } else if (client == "b") { + AqlCall call{}; + call.softLimit = 1; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{4}, {5}}, {{1, 0}}); + ValidateBlocksAreEqual(block, expectedBlock); + } else if (client == "c") { + { + AqlCall call{}; + call.offset = 10; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 1); + auto expectedBlock = buildBlock<1>(itemBlockManager, {{5}}, {{0, 0}}); + ValidateBlocksAreEqual(block, expectedBlock); + } + } + } +} + +// This test does not include randomization of clientCall ordering +class ScatterExecutionBlockTest : public SharedScatterExecutionBlockTest, + public ::testing::Test {}; + +// Here we do a more specific ordering of calls, as we need to rearange multidepthCalls + +TEST_F(ScatterExecutionBlockTest, any_ordering_of_calls_is_fine) { + std::deque blockDeque; + std::unordered_map>> expected; + std::vector callOrder; + for (auto const& c : clientIds) { + expected[c] = std::make_pair(0, std::vector{}); + } + + { + auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}}); + blockDeque.emplace_back(inputBlock); + for (auto const& c : clientIds) { + expected[c].second.emplace_back(inputBlock); + callOrder.emplace_back(c); + } + } + + { + auto inputBlock = buildBlock<1>(itemBlockManager, {{3}, {4}, {5}, {6}}); + blockDeque.emplace_back(inputBlock); + for (auto const& c : clientIds) { + expected[c].second.emplace_back(inputBlock); + callOrder.emplace_back(c); + } + } + + { + auto inputBlock = buildBlock<1>(itemBlockManager, {{7}, {8}, {9}}); + blockDeque.emplace_back(inputBlock); + for (auto const& c : clientIds) { + expected[c].second.emplace_back(inputBlock); + callOrder.emplace_back(c); + } + } + // Every client will ask every block alone. + ASSERT_EQ(callOrder.size(), clientIds.size() * blockDeque.size()); + // Now we do all permuation of potentiall call ordering + do { + auto producer = createProducer(blockDeque); + ExecutionBlockImpl testee{fakedQuery->engine(), + generateScatterNode(), generateInfos()}; + testee.addDependency(&producer); + for (auto& [c, pair] : expected) { + // Reset seen position + pair.first = 0; + } + std::stringstream permutation; + for (auto c : callOrder) { + permutation << " " << c; + } + SCOPED_TRACE("Testing permutation: " + permutation.str()); + for (auto const& client : callOrder) { + auto& [callNr, blocks] = expected[client]; + SCOPED_TRACE("Testing client " + client + " call number " + std::to_string(callNr)); + AqlCall call{}; + AqlCallStack stack{call}; + auto const [state, skipped, block] = testee.executeForClient(stack, client); + if (callNr == 2) { + EXPECT_EQ(state, ExecutionState::DONE); + } else { + EXPECT_EQ(state, ExecutionState::HASMORE); + } + EXPECT_EQ(skipped, 0); + ASSERT_TRUE(callNr < blocks.size()); + ValidateBlocksAreEqual(block, blocks[callNr]); + callNr++; + } + } while (std::next_permutation(callOrder.begin(), callOrder.end())); +} + +// TODO add test for initilaize cursor + +} // namespace arangodb::tests::aql \ No newline at end of file diff --git a/tests/Aql/ShortestPathExecutorTest.cpp b/tests/Aql/ShortestPathExecutorTest.cpp index 3ddc6495b66c..93eca4fcfc98 100644 --- a/tests/Aql/ShortestPathExecutorTest.cpp +++ b/tests/Aql/ShortestPathExecutorTest.cpp @@ -531,7 +531,7 @@ PathSequence const someOtherPaths = {pathBetween("vertex/a", "vertex/target", 10 auto sources = testing::Values(constSource, regSource, brokenSource); auto targets = testing::Values(constTarget, regTarget, brokenTarget); -auto inputs = testing::Values(noneRow, oneRow, twoRows, threeRows, someRows); +static auto inputs = testing::Values(noneRow, oneRow, twoRows, threeRows, someRows); auto paths = testing::Values(noPath, onePath, threePaths, somePaths); auto calls = testing::Values(AqlCall{}, AqlCall{0, 0, 0, false}, AqlCall{0, 1, 0, false}, diff --git a/tests/Aql/SortedCollectExecutorTest.cpp b/tests/Aql/SortedCollectExecutorTest.cpp index cfc51936e38b..95acf6b94d2c 100644 --- a/tests/Aql/SortedCollectExecutorTest.cpp +++ b/tests/Aql/SortedCollectExecutorTest.cpp @@ -928,14 +928,9 @@ using SortedCollectTestHelper = ExecutorTestHelper; using SortedCollectSplitType = SortedCollectTestHelper::SplitType; class SortedCollectExecutorTestSplit - : public ::testing::TestWithParam> { + : public AqlExecutorTestCase, + public ::testing::TestWithParam> { protected: - // ExecutionState state; - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager; - - mocks::MockAqlServer server; - std::unique_ptr fakedQuery; arangodb::transaction::Methods* trx; std::unordered_set regToClear; @@ -961,13 +956,8 @@ class SortedCollectExecutorTestSplit SortedCollectExecutorInfos infos; - SharedAqlItemBlockPtr block; - NoStats stats; - SortedCollectExecutorTestSplit() - : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - fakedQuery(server.createFakeQuery()), - trx(fakedQuery->trx()), + : trx(fakedQuery->trx()), groupRegisters{std::make_pair(1, 0)}, readableInputRegisters({0}), collectRegister(2), @@ -980,12 +970,7 @@ class SortedCollectExecutorTestSplit std::move(readableInputRegisters), std::move(writeableOutputRegisters), std::move(groupRegisters), collectRegister, expressionRegister, expressionVariable, std::move(aggregateTypes), - std::move(variables), std::move(aggregateRegisters), trx, count), - block(new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)) { - auto engine = - std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); - fakedQuery->setEngine(engine.release()); - } + std::move(variables), std::move(aggregateRegisters), trx, count) {} }; TEST_P(SortedCollectExecutorTestSplit, split_1) { diff --git a/tests/Aql/TestExecutorHelper.cpp b/tests/Aql/TestExecutorHelper.cpp index 0ac499f43895..1343cee83c1c 100644 --- a/tests/Aql/TestExecutorHelper.cpp +++ b/tests/Aql/TestExecutorHelper.cpp @@ -27,9 +27,9 @@ #include "Basics/Common.h" -#include "Aql/InputAqlItemRow.h" #include "Aql/AqlValue.h" #include "Aql/ExecutorInfos.h" +#include "Aql/InputAqlItemRow.h" #include "Aql/SingleRowFetcher.h" #include "Logger/LogMacros.h" @@ -38,7 +38,8 @@ using namespace arangodb; using namespace arangodb::aql; -TestExecutorHelper::TestExecutorHelper(Fetcher& fetcher, Infos& infos) : _infos(infos), _fetcher(fetcher){}; +TestExecutorHelper::TestExecutorHelper(Fetcher& fetcher, Infos& infos) + : _infos(infos), _fetcher(fetcher){}; TestExecutorHelper::~TestExecutorHelper() = default; std::pair TestExecutorHelper::produceRows(OutputAqlItemRow& output) { @@ -68,7 +69,7 @@ std::pair TestExecutorHelper::produceRows(OutputAql output.copyRow(input); return {state, stats}; - //stats.incrFiltered(); + // stats.incrFiltered(); } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f688e3b4fc27..7b7c9fda3323 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -59,6 +59,7 @@ set(ARANGODB_TESTS_SOURCES Aql/ReplaceExecutorTest.cpp Aql/ReturnExecutorTest.cpp Aql/RowFetcherHelper.cpp + Aql/ScatterExecutorTest.cpp Aql/ShortestPathExecutorTest.cpp Aql/SingleRowFetcherTest.cpp Aql/SortedCollectExecutorTest.cpp From 44fdb29f850c31859e7b4cbe19f7f5e8a79e1bad Mon Sep 17 00:00:00 2001 From: Lars Maier Date: Fri, 14 Feb 2020 11:28:50 +0100 Subject: [PATCH 064/122] Replaced the operator== and operator!= with `isSameBlockAndIndex`. (#11088) --- arangod/Aql/InputAqlItemRow.cpp | 7 ++-- arangod/Aql/InputAqlItemRow.h | 9 ++---- arangod/Aql/OutputAqlItemRow.cpp | 2 +- arangod/Aql/ShadowAqlItemRow.cpp | 8 ++--- arangod/Aql/ShadowAqlItemRow.h | 7 ++-- tests/Aql/AqlItemRowTest.cpp | 55 +++++++------------------------- 6 files changed, 21 insertions(+), 67 deletions(-) diff --git a/arangod/Aql/InputAqlItemRow.cpp b/arangod/Aql/InputAqlItemRow.cpp index 382e53e3a091..5216e4f8b546 100644 --- a/arangod/Aql/InputAqlItemRow.cpp +++ b/arangod/Aql/InputAqlItemRow.cpp @@ -168,13 +168,10 @@ RegisterCount InputAqlItemRow::getNrRegisters() const noexcept { return block().getNrRegs(); } -bool InputAqlItemRow::operator==(InputAqlItemRow const& other) const noexcept { - return this->_block == other._block && this->_baseIndex == other._baseIndex; +bool InputAqlItemRow::isSameBlockAndIndex(InputAqlItemRow const& other) const noexcept { + return this->_block == other._block && this->_baseIndex == other._baseIndex; } -bool InputAqlItemRow::operator!=(InputAqlItemRow const& other) const noexcept { - return !(*this == other); -} bool InputAqlItemRow::equates(InputAqlItemRow const& other, velocypack::Options const* const options) const noexcept { diff --git a/arangod/Aql/InputAqlItemRow.h b/arangod/Aql/InputAqlItemRow.h index 0212b9f87709..b5f4377f5a26 100644 --- a/arangod/Aql/InputAqlItemRow.h +++ b/arangod/Aql/InputAqlItemRow.h @@ -88,12 +88,9 @@ class InputAqlItemRow { RegisterCount getNrRegisters() const noexcept; - // Note that == and != here check whether the rows are *identical*, that is, - // the same row in the same block. - // TODO Make this a named method - bool operator==(InputAqlItemRow const& other) const noexcept; - - bool operator!=(InputAqlItemRow const& other) const noexcept; + // This the old operator==. It tests if both rows refer to the _same_ block and + // the _same_ index. + [[nodiscard]] bool isSameBlockAndIndex(InputAqlItemRow const& other) const noexcept; // This checks whether the rows are equivalent, in the sense that they hold // the same number of registers and their entry-AqlValues compare equal. diff --git a/arangod/Aql/OutputAqlItemRow.cpp b/arangod/Aql/OutputAqlItemRow.cpp index 330948f49a80..df485265ebc7 100644 --- a/arangod/Aql/OutputAqlItemRow.cpp +++ b/arangod/Aql/OutputAqlItemRow.cpp @@ -327,7 +327,7 @@ void OutputAqlItemRow::memorizeRow(ShadowAqlItemRow const& sou template <> bool OutputAqlItemRow::testIfWeMustClone(InputAqlItemRow const& sourceRow) const { - return _baseIndex == 0 || _lastSourceRow != sourceRow; + return _baseIndex == 0 || !_lastSourceRow.isSameBlockAndIndex(sourceRow); } template <> diff --git a/arangod/Aql/ShadowAqlItemRow.cpp b/arangod/Aql/ShadowAqlItemRow.cpp index c7a77595ba04..c3e9df3f4994 100644 --- a/arangod/Aql/ShadowAqlItemRow.cpp +++ b/arangod/Aql/ShadowAqlItemRow.cpp @@ -88,12 +88,8 @@ AqlItemBlock const& ShadowAqlItemRow::block() const noexcept { return *_block; } -bool ShadowAqlItemRow::operator==(ShadowAqlItemRow const& other) const noexcept { - return this->_block == other._block && this->_baseIndex == other._baseIndex; -} - -bool ShadowAqlItemRow::operator!=(ShadowAqlItemRow const& other) const noexcept { - return !(*this == other); +bool ShadowAqlItemRow::isSameBlockAndIndex(ShadowAqlItemRow const& other) const noexcept { + return this->_block == other._block && this->_baseIndex == other._baseIndex; } bool ShadowAqlItemRow::equates(ShadowAqlItemRow const& other, diff --git a/arangod/Aql/ShadowAqlItemRow.h b/arangod/Aql/ShadowAqlItemRow.h index b0761691a92d..b16e60114af0 100644 --- a/arangod/Aql/ShadowAqlItemRow.h +++ b/arangod/Aql/ShadowAqlItemRow.h @@ -102,12 +102,9 @@ class ShadowAqlItemRow { /// NOTE: Innermost query will have depth 0. Outermost query wil have highest depth. [[nodiscard]] uint64_t getDepth() const; - // Note that == and != here check whether the rows are *identical*, that is, + // Check whether the rows are *identical*, that is, // the same row in the same block. - // TODO Make this a named method - [[nodiscard]] bool operator==(ShadowAqlItemRow const& other) const noexcept; - - [[nodiscard]] bool operator!=(ShadowAqlItemRow const& other) const noexcept; + [[nodiscard]] bool isSameBlockAndIndex(ShadowAqlItemRow const& other) const noexcept; // This checks whether the rows are equivalent, in the sense that they hold // the same number of registers and their entry-AqlValues compare equal, diff --git a/tests/Aql/AqlItemRowTest.cpp b/tests/Aql/AqlItemRowTest.cpp index 15de8dab1930..1784222752dc 100644 --- a/tests/Aql/AqlItemRowTest.cpp +++ b/tests/Aql/AqlItemRowTest.cpp @@ -350,58 +350,25 @@ TYPED_TEST(AqlItemRowsCommonEqTest, row_eq_operators) { RowType const otherInvalidRow = createInvalidRow(); // same rows must be equal - EXPECT_TRUE((RowType{block, 0}.operator==(RowType{block, 0}))); - EXPECT_TRUE((RowType{block, 0} == RowType{block, 0})); - EXPECT_TRUE((RowType{block, 1}.operator==(RowType{block, 1}))); - EXPECT_TRUE((RowType{block, 1} == RowType{block, 1})); - EXPECT_FALSE((RowType{block, 0}.operator!=(RowType{block, 0}))); - EXPECT_FALSE((RowType{block, 0} != RowType{block, 0})); - EXPECT_FALSE((RowType{block, 1}.operator!=(RowType{block, 1}))); - EXPECT_FALSE((RowType{block, 1} != RowType{block, 1})); + EXPECT_TRUE((RowType{block, 0}.isSameBlockAndIndex(RowType{block, 0}))); + EXPECT_TRUE((RowType{block, 1}.isSameBlockAndIndex(RowType{block, 1}))); // different rows in the same block must be non-equal - EXPECT_FALSE((RowType{block, 0}.operator==(RowType{block, 1}))); - EXPECT_FALSE((RowType{block, 0} == RowType{block, 1})); - EXPECT_FALSE((RowType{block, 1}.operator==(RowType{block, 0}))); - EXPECT_FALSE((RowType{block, 1} == RowType{block, 0})); - EXPECT_TRUE((RowType{block, 0}.operator!=(RowType{block, 1}))); - EXPECT_TRUE((RowType{block, 0} != RowType{block, 1})); - EXPECT_TRUE((RowType{block, 1}.operator!=(RowType{block, 0}))); - EXPECT_TRUE((RowType{block, 1} != RowType{block, 0})); + EXPECT_FALSE((RowType{block, 0}.isSameBlockAndIndex(RowType{block, 1}))); + EXPECT_FALSE((RowType{block, 1}.isSameBlockAndIndex(RowType{block, 0}))); // rows in different blocks must be non-equal - EXPECT_FALSE((RowType{block, 0}.operator==(RowType{otherBlock, 0}))); - EXPECT_FALSE((RowType{block, 0} == RowType{otherBlock, 0})); - EXPECT_FALSE((RowType{block, 1}.operator==(RowType{otherBlock, 0}))); - EXPECT_FALSE((RowType{block, 1} == RowType{otherBlock, 0})); - EXPECT_FALSE((RowType{otherBlock, 0}.operator==(RowType{block, 0}))); - EXPECT_FALSE((RowType{otherBlock, 0} == RowType{block, 0})); - EXPECT_FALSE((RowType{otherBlock, 0}.operator==(RowType{block, 1}))); - EXPECT_FALSE((RowType{otherBlock, 0} == RowType{block, 1})); - EXPECT_TRUE((RowType{block, 0}.operator!=(RowType{otherBlock, 0}))); - EXPECT_TRUE((RowType{block, 0} != RowType{otherBlock, 0})); - EXPECT_TRUE((RowType{block, 1}.operator!=(RowType{otherBlock, 0}))); - EXPECT_TRUE((RowType{block, 1} != RowType{otherBlock, 0})); - EXPECT_TRUE((RowType{otherBlock, 0}.operator!=(RowType{block, 0}))); - EXPECT_TRUE((RowType{otherBlock, 0} != RowType{block, 0})); - EXPECT_TRUE((RowType{otherBlock, 0}.operator!=(RowType{block, 1}))); - EXPECT_TRUE((RowType{otherBlock, 0} != RowType{block, 1})); + EXPECT_FALSE((RowType{block, 0}.isSameBlockAndIndex(RowType{otherBlock, 0}))); + EXPECT_FALSE((RowType{block, 1}.isSameBlockAndIndex(RowType{otherBlock, 0}))); + EXPECT_FALSE((RowType{otherBlock, 0}.isSameBlockAndIndex(RowType{block, 0}))); + EXPECT_FALSE((RowType{otherBlock, 0}.isSameBlockAndIndex(RowType{block, 1}))); // comparisons with an invalid row must be false - EXPECT_FALSE((RowType{block, 0}.operator==(invalidRow))); - EXPECT_FALSE((RowType{block, 0} == invalidRow)); - EXPECT_FALSE((invalidRow.operator==(RowType{block, 0}))); - EXPECT_FALSE((invalidRow == RowType{block, 0})); - EXPECT_TRUE((RowType{block, 0}.operator!=(invalidRow))); - EXPECT_TRUE((RowType{block, 0} != invalidRow)); - EXPECT_TRUE((invalidRow.operator!=(RowType{block, 0}))); - EXPECT_TRUE((invalidRow != RowType{block, 0})); + EXPECT_FALSE((RowType{block, 0}.isSameBlockAndIndex(invalidRow))); + EXPECT_FALSE((invalidRow.isSameBlockAndIndex(RowType{block, 0}))); // two invalid rows must be equal - EXPECT_TRUE((invalidRow.operator==(otherInvalidRow))); - EXPECT_TRUE((invalidRow == otherInvalidRow)); - EXPECT_FALSE((invalidRow.operator!=(otherInvalidRow))); - EXPECT_FALSE((invalidRow != otherInvalidRow)); + EXPECT_TRUE((invalidRow.isSameBlockAndIndex(otherInvalidRow))); } TYPED_TEST(AqlItemRowsCommonEqTest, row_equivalence) { From 95a73267de6b8dadd6e9019b26eadb80d0b3f12e Mon Sep 17 00:00:00 2001 From: Lars Maier Date: Fri, 14 Feb 2020 17:10:16 +0100 Subject: [PATCH 065/122] New style IndexExecutor. (#11029) * New style IndexExecutor. * Small changes. * Fixing some logic for skipping. * Small changes. * TRI_ASSERT(FALSE); * Removed _fetcher member. (again) --- arangod/Aql/ExecutionBlockImpl.cpp | 12 +- arangod/Aql/IndexExecutor.cpp | 290 ++++++++++++++++------------- arangod/Aql/IndexExecutor.h | 25 ++- 3 files changed, 185 insertions(+), 142 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 229e30debfb4..65f1eba03c24 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -132,9 +132,10 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); template constexpr bool isNewStyleExecutor = is_one_of_v, - IdExecutor>, ReturnExecutor, + IdExecutor>, ReturnExecutor, IndexExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS - TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode + TestLambdaExecutor, + TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif ShortestPathExecutor>; @@ -1043,11 +1044,12 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert(!useFetcher || hasSkipRows::value, "Fetcher is chosen for skipping, but has not skipRows method!"); - static_assert(useExecutor == (is_one_of_v), + SortedCollectExecutor>), "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! diff --git a/arangod/Aql/IndexExecutor.cpp b/arangod/Aql/IndexExecutor.cpp index ae585b5c0ba0..d862d9b98900 100644 --- a/arangod/Aql/IndexExecutor.cpp +++ b/arangod/Aql/IndexExecutor.cpp @@ -26,6 +26,7 @@ #include "IndexExecutor.h" #include "Aql/AqlValue.h" +#include "Aql/AqlValueMaterializer.h" #include "Aql/Ast.h" #include "Aql/Collection.h" #include "Aql/DocumentProducingHelper.h" @@ -38,7 +39,6 @@ #include "Aql/OutputAqlItemRow.h" #include "Aql/Query.h" #include "Aql/SingleRowFetcher.h" -#include "Aql/AqlValueMaterializer.h" #include "Basics/ScopeGuard.h" #include "Cluster/ServerState.h" #include "ExecutorExpressionContext.h" @@ -52,6 +52,10 @@ #include #include +// Set this to true to activate devel logging +#define LOG_DEVEL_INDEX_ENABLED false +#define LOG_DEVEL_IDX LOG_DEVEL_IF(LOG_DEVEL_INDEX_ENABLED) + using namespace arangodb; using namespace arangodb::aql; @@ -74,7 +78,8 @@ template IndexIterator::DocumentCallback getCallback(DocumentProducingFunctionContext& context, transaction::Methods::IndexHandle const& index, IndexNode::IndexValuesRegisters const& outNonMaterializedIndRegs) { - return [&context, &index, &outNonMaterializedIndRegs](LocalDocumentId const& token, VPackSlice slice) { + return [&context, &index, &outNonMaterializedIndRegs](LocalDocumentId const& token, + VPackSlice slice) { if constexpr (checkUniqueness) { if (!context.checkUniqueness(token)) { // Document already found, skip it @@ -119,7 +124,7 @@ IndexIterator::DocumentCallback getCallback(DocumentProducingFunctionContext& co TRI_ASSERT(!output.isFull()); output.moveValueInto(indReg.second, input, guard); } - } else { // primary + } else { // primary auto indReg = outNonMaterializedIndRegs.second.cbegin(); TRI_ASSERT(indReg != outNonMaterializedIndRegs.second.cend()); if (ADB_UNLIKELY(indReg == outNonMaterializedIndRegs.second.cend())) { @@ -142,8 +147,7 @@ static inline DocumentProducingFunctionContext createContext(InputAqlItemRow con IndexExecutorInfos const& infos) { return DocumentProducingFunctionContext( inputRow, nullptr, infos.getOutputRegisterId(), infos.getProduceResult(), - infos.getQuery(), infos.getFilter(), - infos.getProjections(), + infos.getQuery(), infos.getFilter(), infos.getProjections(), infos.getCoveringIndexAttributePositions(), false, infos.getUseRawDocumentPointers(), infos.getIndexes().size() > 1 || infos.hasMultipleExpansions()); } @@ -151,25 +155,20 @@ static inline DocumentProducingFunctionContext createContext(InputAqlItemRow con IndexExecutorInfos::IndexExecutorInfos( std::shared_ptr>&& writableOutputRegisters, - RegisterId nrInputRegisters, - RegisterId outputRegister, - RegisterId nrOutputRegisters, + RegisterId nrInputRegisters, RegisterId outputRegister, RegisterId nrOutputRegisters, // cppcheck-suppress passedByValue std::unordered_set registersToClear, // cppcheck-suppress passedByValue std::unordered_set registersToKeep, ExecutionEngine* engine, Collection const* collection, Variable const* outVariable, bool produceResult, - Expression* filter, - std::vector const& projections, + Expression* filter, std::vector const& projections, std::vector const& coveringIndexAttributePositions, bool useRawDocumentPointers, std::vector>&& nonConstExpression, std::vector&& expInVars, std::vector&& expInRegs, bool hasV8Expression, AstNode const* condition, std::vector indexes, Ast* ast, - IndexIteratorOptions options, - IndexNode::IndexValuesRegisters&& outNonMaterializedIndRegs) - : ExecutorInfos(make_shared_unordered_set(), - writableOutputRegisters, + IndexIteratorOptions options, IndexNode::IndexValuesRegisters&& outNonMaterializedIndRegs) + : ExecutorInfos(make_shared_unordered_set(), writableOutputRegisters, nrInputRegisters, nrOutputRegisters, std::move(registersToClear), std::move(registersToKeep)), _indexes(std::move(indexes)), @@ -265,9 +264,7 @@ transaction::Methods* IndexExecutorInfos::getTrxPtr() const noexcept { return _engine->getQuery()->trx(); } -Expression* IndexExecutorInfos::getFilter() const noexcept { - return _filter; -} +Expression* IndexExecutorInfos::getFilter() const noexcept { return _filter; } std::vector const& IndexExecutorInfos::getCoveringIndexAttributePositions() const noexcept { return _coveringIndexAttributePositions; @@ -345,25 +342,31 @@ IndexExecutor::CursorReader::CursorReader(IndexExecutorInfos const& infos, _type(infos.isLateMaterialized() ? Type::LateMaterialized : !infos.getProduceResult() - ? Type::NoResult - : _cursor->hasCovering() && // if change see IndexNode::canApplyLateDocumentMaterializationRule() - !infos.getCoveringIndexAttributePositions().empty() - ? Type::Covering - : Type::Document) { + ? Type::NoResult + : _cursor->hasCovering() && // if change see IndexNode::canApplyLateDocumentMaterializationRule() + !infos.getCoveringIndexAttributePositions().empty() + ? Type::Covering + : Type::Document) { switch (_type) { - case Type::NoResult: { - _documentNonProducer = checkUniqueness ? getNullCallback(context) : getNullCallback(context); - break; - } - case Type::LateMaterialized: - _documentProducer = checkUniqueness ? ::getCallback(context, _index, _infos.getOutNonMaterializedIndRegs()) : - ::getCallback(context, _index, _infos.getOutNonMaterializedIndRegs()); - break; - default: - _documentProducer = checkUniqueness ? buildDocumentCallback(context) : buildDocumentCallback(context); - break; + case Type::NoResult: { + _documentNonProducer = checkUniqueness ? getNullCallback(context) + : getNullCallback(context); + break; + } + case Type::LateMaterialized: + _documentProducer = + checkUniqueness + ? ::getCallback(context, _index, _infos.getOutNonMaterializedIndRegs()) + : ::getCallback(context, _index, _infos.getOutNonMaterializedIndRegs()); + break; + default: + _documentProducer = checkUniqueness + ? buildDocumentCallback(context) + : buildDocumentCallback(context); + break; } - _documentSkipper = checkUniqueness ? buildDocumentCallback(context) : buildDocumentCallback(context); + _documentSkipper = checkUniqueness ? buildDocumentCallback(context) + : buildDocumentCallback(context); } IndexExecutor::CursorReader::CursorReader(CursorReader&& other) noexcept @@ -440,9 +443,8 @@ void IndexExecutor::CursorReader::reset() { IndexExecutor::IndexExecutor(Fetcher& fetcher, Infos& infos) : _infos(infos), - _fetcher(fetcher), _documentProducingFunctionContext(::createContext(_input, _infos)), - _state(ExecutionState::HASMORE), + _state(ExecutorState::HASMORE), _input(InputAqlItemRow{CreateInvalidInputRowHint{}}), _currentIndex(_infos.getIndexes().size()), _skipped(0) { @@ -456,7 +458,7 @@ IndexExecutor::IndexExecutor(Fetcher& fetcher, Infos& infos) IndexExecutor::~IndexExecutor() = default; void IndexExecutor::initializeCursor() { - _state = ExecutionState::HASMORE; + _state = ExecutorState::HASMORE; _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; _documentProducingFunctionContext.reset(); _currentIndex = _infos.getIndexes().size(); @@ -648,6 +650,26 @@ bool IndexExecutor::advanceCursor() { } std::pair IndexExecutor::produceRows(OutputAqlItemRow& output) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +std::tuple IndexExecutor::skipRows(size_t toSkip) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +IndexExecutor::CursorReader& IndexExecutor::getCursor() { + TRI_ASSERT(_currentIndex < _cursors.size()); + return _cursors[_currentIndex]; +} + +bool IndexExecutor::needsUniquenessCheck() const noexcept { + return _infos.getIndexes().size() > 1 || _infos.hasMultipleExpansions(); +} + +auto IndexExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) + -> std::tuple { TRI_IF_FAILURE("IndexExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } @@ -657,45 +679,63 @@ std::pair IndexExecutor::produceRows(OutputAqlItemRo TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumFiltered() == 0); _documentProducingFunctionContext.setOutputRow(&output); - while (true) { - if (!_input) { - if (_state == ExecutionState::DONE) { - return {_state, stats}; - } - - std::tie(_state, _input) = _fetcher.fetchRow(); - - if (!_input) { - TRI_ASSERT(_state == ExecutionState::WAITING || _state == ExecutionState::DONE); - return {_state, stats}; - } - TRI_ASSERT(_state != ExecutionState::WAITING); - TRI_ASSERT(_input); - initIndexes(_input); - if (!advanceCursor()) { - _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; - // just to validate that after continue we get into retry mode - TRI_ASSERT(!_input); - continue; + AqlCall clientCall = output.getClientCall(); + LOG_DEVEL_IDX << "IndexExecutor::produceRows " << clientCall; + + /* + * Logic of this executor is as follows: + * - peek a data row + * - read the indexes for this data row until its done + * - continue + */ + + while (!output.isFull()) { + LOG_DEVEL_IDX << "IndexExecutor::produceRows output.numRowsLeft() == " + << output.numRowsLeft(); + if (!_input.isInitialized()) { + std::tie(_state, _input) = inputRange.peekDataRow(); + LOG_DEVEL_IDX + << "IndexExecutor::produceRows input not initialized, peek next row: " << _state + << " " << std::boolalpha << _input.isInitialized(); + + if (_input.isInitialized()) { + LOG_DEVEL_IDX << "IndexExecutor::produceRows initIndexes"; + initIndexes(_input); + if (!advanceCursor()) { + LOG_DEVEL_IDX << "IndexExecutor::produceRows failed to advanceCursor " + "after init"; + std::ignore = inputRange.nextDataRow(); + _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; + // just to validate that after continue we get into retry mode + TRI_ASSERT(!_input); + continue; + } + } else { + break; } } - TRI_ASSERT(_input.isInitialized()); + TRI_ASSERT(_input.isInitialized()); // Short Loop over the output block here for performance! while (!output.isFull()) { - if (!getCursor().hasMore()) { - if (!advanceCursor()) { - // End of this cursor. Either return or try outer loop again. - _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; - break; - } + LOG_DEVEL_IDX << "IndexExecutor::produceRows::innerLoop hasMore = " << std::boolalpha + << getCursor().hasMore() << " " << output.numRowsLeft(); + + if (!getCursor().hasMore() && !advanceCursor()) { + LOG_DEVEL_IDX << "IndexExecutor::produceRows::innerLoop cursor does " + "not have more and advancing failed"; + std::ignore = inputRange.nextDataRow(); + _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; + break; } - auto& cursor = getCursor(); - TRI_ASSERT(cursor.hasMore()); + + TRI_ASSERT(getCursor().hasMore()); // Read the next elements from the index - bool more = cursor.readIndex(output); - TRI_ASSERT(more == cursor.hasMore()); + bool more = getCursor().readIndex(output); + TRI_ASSERT(more == getCursor().hasMore()); + + LOG_DEVEL_IDX << "IndexExecutor::produceRows::innerLoop output.numRowsWritten() == " << output.numRowsWritten(); // NOTE: more => output.isFull() does not hold, if we do uniqness checks. // The index iterator does still count skipped rows for limit. // Nevertheless loop here, the cursor has more so we will retigger @@ -703,18 +743,24 @@ std::pair IndexExecutor::produceRows(OutputAqlItemRo // Loop here, either we have filled the output // Or the cursor is done, so we need to advance } + + stats.incrScanned(_documentProducingFunctionContext.getAndResetNumScanned()); stats.incrFiltered(_documentProducingFunctionContext.getAndResetNumFiltered()); - if (output.isFull()) { - if (_state == ExecutionState::DONE && !_input) { - return {ExecutionState::DONE, stats}; - } - return {ExecutionState::HASMORE, stats}; - } } + + + bool reportDone = _state == ExecutorState::DONE && !_input.isInitialized(); + + AqlCall upstreamCall; + upstreamCall.fullCount = clientCall.needsFullCount(); + + LOG_DEVEL_IDX << "IndexExecutor::produceRows reporting state " << (reportDone ? ExecutorState::DONE : ExecutorState::HASMORE); + return {reportDone ? ExecutorState::DONE : ExecutorState::HASMORE, stats, upstreamCall}; } -std::tuple IndexExecutor::skipRows(size_t toSkip) { +auto IndexExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& clientCall) + -> std::tuple { // This code does not work correctly with multiple indexes, as it does not // check for duplicates. Currently, no plan is generated where that can // happen, because with multiple indexes, the FILTER is not removed and thus @@ -724,74 +770,62 @@ std::tuple IndexExecutor::skipRows THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange " << clientCall; + IndexStats stats{}; - while (_skipped < toSkip) { + while (clientCall.needSkipMore()) { + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange skipped " << _skipped + << " " << clientCall.getOffset(); // get an input row first, if necessary - if (!_input) { - if (_state == ExecutionState::DONE) { - size_t skipped = _skipped; - - _skipped = 0; - - return std::make_tuple(_state, stats, skipped); // tuple, cannot use initializer list due to build failure - } - - std::tie(_state, _input) = _fetcher.fetchRow(); - - if (_state == ExecutionState::WAITING) { - return std::make_tuple(_state, stats, 0); // tuple, cannot use initializer list due to build failure - } - - if (!_input) { - TRI_ASSERT(_state == ExecutionState::DONE); - size_t skipped = _skipped; - - _skipped = 0; - - return std::make_tuple(_state, stats, skipped); // tuple, cannot use initializer list due to build failure - } - - initIndexes(_input); - - if (!advanceCursor()) { - _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; - // just to validate that after continue we get into retry mode - TRI_ASSERT(!_input); - continue; + if (!_input.isInitialized()) { + std::tie(_state, _input) = inputRange.peekDataRow(); + LOG_DEVEL_IDX + << "IndexExecutor::skipRowsRange input not initialized, peek next row: " << _state + << " " << std::boolalpha << _input.isInitialized(); + + if (_input.isInitialized()) { + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange initIndexes"; + initIndexes(_input); + if (!advanceCursor()) { + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange failed to advanceCursor " + "after init"; + std::ignore = inputRange.nextDataRow(); + _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; + // just to validate that after continue we get into retry mode + TRI_ASSERT(!_input); + continue; + } + } else { + break; } } - if (!getCursor().hasMore()) { - if (!advanceCursor()) { - _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; - break; - } + if (!getCursor().hasMore() && !advanceCursor()) { + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange cursor does not " + "have more and advancing failed"; + std::ignore = inputRange.nextDataRow(); + _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; + continue; } - size_t skippedNow = getCursor().skipIndex(toSkip - _skipped); + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange skipIndex(" + << clientCall.getOffset() << ")"; + size_t skippedNow = getCursor().skipIndex(clientCall.getOffset()); + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange skipIndex(...) == " << skippedNow; + stats.incrScanned(skippedNow); _skipped += skippedNow; + clientCall.didSkip(skippedNow); } size_t skipped = _skipped; - _skipped = 0; - if (_state == ExecutionState::DONE && !_input) { - return std::make_tuple(ExecutionState::DONE, stats, - skipped); // tuple, cannot use initializer list due to build failure - } + AqlCall upstreamCall; + upstreamCall.fullCount = clientCall.needsFullCount(); - return std::make_tuple(ExecutionState::HASMORE, stats, - skipped); // tuple, cannot use initializer list due to build failure -} - -IndexExecutor::CursorReader& IndexExecutor::getCursor() { - TRI_ASSERT(_currentIndex < _cursors.size()); - return _cursors[_currentIndex]; -} -bool IndexExecutor::needsUniquenessCheck() const noexcept { - return _infos.getIndexes().size() > 1 || _infos.hasMultipleExpansions(); + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange returning " << _state << " " << skipped << " " << upstreamCall; + return {_state, stats, skipped, upstreamCall}; } diff --git a/arangod/Aql/IndexExecutor.h b/arangod/Aql/IndexExecutor.h index c22d02621233..6dfbf13accdd 100644 --- a/arangod/Aql/IndexExecutor.h +++ b/arangod/Aql/IndexExecutor.h @@ -26,6 +26,8 @@ #ifndef ARANGOD_AQL_INDEX_EXECUTOR_H #define ARANGOD_AQL_INDEX_EXECUTOR_H +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/DocumentProducingHelper.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" @@ -57,12 +59,12 @@ struct NonConstExpression; class IndexExecutorInfos : public ExecutorInfos { public: IndexExecutorInfos( - std::shared_ptr>&& writableOutputRegisters, RegisterId nrInputRegisters, - RegisterId firstOutputRegister, RegisterId nrOutputRegisters, std::unordered_set registersToClear, + std::shared_ptr>&& writableOutputRegisters, + RegisterId nrInputRegisters, RegisterId firstOutputRegister, + RegisterId nrOutputRegisters, std::unordered_set registersToClear, std::unordered_set registersToKeep, ExecutionEngine* engine, Collection const* collection, Variable const* outVariable, bool produceResult, - Expression* filter, - std::vector const& projections, + Expression* filter, std::vector const& projections, std::vector const& coveringIndexAttributePositions, bool useRawDocumentPointers, std::vector>&& nonConstExpression, std::vector&& expInVars, std::vector&& expInRegs, @@ -195,9 +197,9 @@ class IndexExecutor { DocumentProducingFunctionContext& _context; Type const _type; - // Only one of _documentProducer and _documentNonProducer is set at a time, depending on _type. - // As std::function is not trivially destructible, it's safer not to use a - // union. + // Only one of _documentProducer and _documentNonProducer is set at a time, + // depending on _type. As std::function is not trivially destructible, it's + // safer not to use a union. IndexIterator::LocalDocumentIdCallback _documentNonProducer; IndexIterator::DocumentCallback _documentProducer; IndexIterator::DocumentCallback _documentSkipper; @@ -228,6 +230,12 @@ class IndexExecutor { std::pair produceRows(OutputAqlItemRow& output); std::tuple skipRows(size_t toSkip); + auto produceRows(AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) + -> std::tuple; + + auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& clientCall) + -> std::tuple; + public: void initializeCursor(); @@ -242,9 +250,8 @@ class IndexExecutor { private: Infos& _infos; - Fetcher& _fetcher; DocumentProducingFunctionContext _documentProducingFunctionContext; - ExecutionState _state; + ExecutorState _state; InputAqlItemRow _input; /// @brief a vector of cursors for the index block From dac0a1053c81925c79f83f52292d41a6a15d4d13 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 18 Feb 2020 13:16:53 +0100 Subject: [PATCH 066/122] Feature/aql subquery execution block impl execute implementation debug logging (#11112) * Prepare reusability of LOG output for ExecutionBlock. * Added first version of DEBUG output to ExecutionBlockImpl --- arangod/Aql/ExecutionBlock.cpp | 34 +++++++++++++++++++++--------- arangod/Aql/ExecutionBlock.h | 3 +++ arangod/Aql/ExecutionBlockImpl.cpp | 22 +++++++++++++++++++ 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index 1d7f97402f85..a9ba6fb98b3a 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -46,6 +46,10 @@ using namespace arangodb; using namespace arangodb::aql; +#define LOG_QUERY(logId, level) \ + LOG_TOPIC(logId, level, Logger::QUERIES) \ + << "[query#" << this->_engine->getQuery()->id() << "] " + namespace { std::string const doneString = "DONE"; @@ -335,28 +339,38 @@ void ExecutionBlock::traceExecuteEnd( if (_profile >= PROFILE_LEVEL_TRACE_1) { ExecutionNode const* node = getPlanNode(); - auto const queryId = this->_engine->getQuery()->id(); - LOG_TOPIC("60bbc", INFO, Logger::QUERIES) - << "[query#" << queryId << "] " - << "execute done type=" << node->getTypeString() << " this=" << (uintptr_t)this - << " id=" << node->id() << " state=" << stateToString(state) - << " skipped=" << skipped << " produced=" << items; + LOG_QUERY("60bbc", INFO) << "execute done " << printBlockInfo() + << " state=" << stateToString(state) + << " skipped=" << skipped << " produced=" << items; if (_profile >= PROFILE_LEVEL_TRACE_2) { if (block == nullptr) { - LOG_TOPIC("9b3f4", INFO, Logger::QUERIES) - << "[query#" << queryId << "] " + LOG_QUERY("9b3f4", INFO) << "execute type=" << node->getTypeString() << " result: nullptr"; } else { VPackBuilder builder; auto const options = trxVpackOptions(); block->toSimpleVPack(options, builder); - LOG_TOPIC("f12f9", INFO, Logger::QUERIES) - << "[query#" << queryId << "] " + LOG_QUERY("f12f9", INFO) << "execute type=" << node->getTypeString() << " result: " << VPackDumper::toString(builder.slice(), options); } } } } +} + +auto ExecutionBlock::printTypeInfo() const -> std::string const { + std::stringstream stream; + ExecutionNode const* node = getPlanNode(); + stream << "type=" << node->getTypeString(); + ; + return stream.str(); +} + +auto ExecutionBlock::printBlockInfo() const -> std::string const { + std::stringstream stream; + ExecutionNode const* node = getPlanNode(); + stream << printTypeInfo() << " this=" << (uintptr_t)this << " id=" << node->id(); + return stream.str(); } \ No newline at end of file diff --git a/arangod/Aql/ExecutionBlock.h b/arangod/Aql/ExecutionBlock.h index f6e7370647e8..059217978485 100644 --- a/arangod/Aql/ExecutionBlock.h +++ b/arangod/Aql/ExecutionBlock.h @@ -157,6 +157,9 @@ class ExecutionBlock { // Trace the end of a execute call, potentially with result void traceExecuteEnd(std::tuple const& result); + [[nodiscard]] auto printBlockInfo() const -> std::string const; + [[nodiscard]] auto printTypeInfo() const -> std::string const; + protected: /// @brief the execution engine ExecutionEngine* _engine; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 65f1eba03c24..509882edcd2b 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -81,6 +81,10 @@ using namespace arangodb; using namespace arangodb::aql; +#define LOG_QUERY(logId, level) \ + LOG_TOPIC(logId, level, Logger::QUERIES) \ + << "[query#" << this->_engine->getQuery()->id() << "] " + /* * Creates a metafunction `checkName` that tests whether a class has a method * named `methodName`, used like this: @@ -1152,6 +1156,7 @@ std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if constexpr (isNewStyleExecutor) { if (!stack.isRelevant()) { + LOG_QUERY("bf029", DEBUG) << "subquery bypassing executor " << printBlockInfo(); // We are bypassing subqueries. // This executor is not allowed to perform actions // However we need to maintain the upstream state. @@ -1180,9 +1185,12 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { clientCall = _clientRequest; } + LOG_QUERY("007ac", DEBUG) << "starting statemachine of executor " << printBlockInfo(); while (_execState != ExecState::DONE) { switch (_execState) { case ExecState::CHECKCALL: { + LOG_QUERY("cfe46", DEBUG) + << printTypeInfo() << " determine next action on call " << clientCall; _execState = nextState(clientCall); break; } @@ -1193,6 +1201,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { size_t canPassFullcount = clientCall.getLimit() == 0 && clientCall.needsFullCount(); #endif + LOG_QUERY("1f786", DEBUG) << printTypeInfo() << " call skipRows " << clientCall; auto [state, stats, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); #ifdef ARANGODB_ENABLE_MAINTAINER_MODE @@ -1231,6 +1240,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // Make sure there's a block allocated and set // the call TRI_ASSERT(clientCall.getLimit() > 0); + + LOG_QUERY("1f786", DEBUG) << printTypeInfo() << " call produceRows " << clientCall; ensureOutputBlock(std::move(clientCall)); TRI_ASSERT(_outputItemRow); @@ -1258,6 +1269,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::FASTFORWARD: { + LOG_QUERY("96e2c", DEBUG) + << printTypeInfo() << " all produced, fast forward to end up (sub-)query."; // We can either do FASTFORWARD or FULLCOUNT, difference is that // fullcount counts what is produced now, FASTFORWARD simply drops TRI_ASSERT(!clientCall.needsFullCount()); @@ -1278,6 +1291,9 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::FULLCOUNT: { + LOG_QUERY("ff258", DEBUG) + << printTypeInfo() + << " all produced, skip to end up (sub-)query, for fullCount."; auto [state, stats, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); _skipped += skippedLocal; @@ -1293,6 +1309,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::UPSTREAM: { + LOG_QUERY("488de", DEBUG) + << printTypeInfo() << " request dependency " << _upstreamRequest; // If this triggers the executors produceRows function has returned // HASMORE even if it knew that upstream has no further rows. TRI_ASSERT(_upstreamState != ExecutionState::DONE); @@ -1319,6 +1337,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::SHADOWROWS: { + LOG_QUERY("7c63c", DEBUG) + << printTypeInfo() << " (sub-)query completed. Move ShadowRows."; // TODO: Check if there is a situation where we are at this point, but at the end of a block // Or if we would not recognize this beforehand // TODO: Check if we can have the situation that we are between two shadow rows here. @@ -1335,6 +1355,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _outputItemRow->copyRow(shadowRow); if (shadowRow.isRelevant()) { + LOG_QUERY("6d337", DEBUG) << printTypeInfo() << " init executor."; // We found a relevant shadow Row. // We need to reset the Executor // cppcheck-suppress unreadVariable @@ -1368,6 +1389,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(false); } } + LOG_QUERY("80c24", DEBUG) << printBlockInfo() << " local statemachine done. Return now."; // If we do not have an output, we simply return a nullptr here. auto outputBlock = _outputItemRow != nullptr ? _outputItemRow->stealBlock() : SharedAqlItemBlockPtr{nullptr}; From 1871aeb252112767938e0c95af64f9fb257caad9 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 18 Feb 2020 17:15:57 +0100 Subject: [PATCH 067/122] Feature/aql subquery operations stack hashed collect executor (#11103) * produce inputRange * produce inputRange test 1 * produce inputRange test 2 * produce inputRange test 3 * finalized tests * added inputRange skip and produce * added inputRange skip and produce * Added shouldSkip helper to AqlCall * Added implementation if execute to Constfetcher. Not specifically tested yet. * Moved IDExecutor to newStyle executor * Added some documentation to SingleRowFetcher * Adapted IdExecutor tests. * Added an assertion in ConstFetcher * Fixed initialize cursor in ExecutionBlockImpl * Added tests for Initilaize cursor in IdExecutor. There is a special implementation for this... * Fixed linker issues, because of variables in cpp files named identical and not beeing static * Started refactoring the Scatter/Gather Executor logic. Added an intermediate class that does not use a template and can be casted to. This state compiles but will not work. * Made BlocksWithClientsImpl a template, based on an executor * Added a dummy implementation to fetch a new block. Distribute not yet implemented. * Added superoptimistic first implementation of ScatterExecutor return. Only works if there are no shadowRows. * Implemented logic to slice Blocks in Scatter according to the given call. Also added handfull of test for it, not complete though * Fixed a Scatter Executor handling of softLimit. Added another test to ansure diverse calls of clients to Scatter * Added random ordering test framework on ScatterExecutor * Added some tests for ScatterExecutor including subqueries * Removed todo, as it has been solved by now * Implemented Scatter by using IDExecutor. Tests not green yet * Implementation of ConstFetcher that allows to inject fullBlocks of AqlItemRows, and not only single rows. Also handles ShadowRows now in a correct way * Do not jump over relevant shadow rows in ConstFetcher. * Fixed tests. * Moved information of DistributeExecutor into the Infos. This resulted in adapting the Constructor API. I used this situation to do a big cleanup of ancient code. From this point there is no way back. * Implemented the last bits of DistributeExecutor * 'Implemented' execute by fallback to old API in RemoteExecutor * Fixed an EdgeCase with Scatter Executor, where the data was modified in place right after scatter. This has side-effects on peer executors. So we for now do a full copy of every block here. * Removed invalid assertions, also fixed RegisterInput to DistributeExecutor * Removed obsolete TODOs * produce inputRange * produce inputRange test 1 * produce inputRange test 2 * produce inputRange test 3 * finalized tests * produce inputRange * produce inputRange test 1 * produce inputRange test 2 * produce inputRange test 3 * finalized tests * Made the branch compile again * Update arangod/Aql/IdExecutor.cpp Co-Authored-By: Lars Maier * Intermediate commit. Empty might not compile * Added some external helper functions to compare ItemBlocks in different variants * Added a brief description for the bunch of input parameters in HashedCollect * Allow unordered output tests in TestHelper class. * Added first HashedCollectExecutor test using the helper * Allow to use never wait in getSome / skipSome variant of WaitingExecutionBlockMock. * Added more HashedCollect tests (multi group, count) and started with framework for AggregateMethods * Fixed simple Aggregate Test * Improved output for unordered Row in Output test. * Added test around collecting of Arrays and Objects * Also allow a fullCount call in the old Execute Wrapper * Almost Completed register and call behaviour test suite * Added a todo for a missing test * Improved setup of AqlTestCase framework, not only start the server once. This speeds up AQL test by at least a factor of 10. * implemented newStyle HashedCollect executor. * Cleanup of old code. * Added test cases where the upstream producer lies to HashedCollect. * Let HashedCollect and Impl life with the fact that upstream lies. * Improved gtest with multiple calls. * Simplify input consume logic of HashedCollect Co-authored-by: Heiko Co-authored-by: Lars Maier --- arangod/Aql/AqlCall.h | 6 + arangod/Aql/ExecutionBlockImpl.cpp | 24 +- arangod/Aql/HashedCollectExecutor.cpp | 157 +-- arangod/Aql/HashedCollectExecutor.h | 62 +- tests/Aql/ExecutorTestHelper.cpp | 133 ++- tests/Aql/ExecutorTestHelper.h | 136 ++- tests/Aql/HashedCollectExecutorTest.cpp | 1181 +++++++++++------------ tests/Aql/IdExecutorTest.cpp | 22 +- tests/Aql/SortedCollectExecutorTest.cpp | 3 +- tests/Aql/WaitingExecutionBlockMock.cpp | 20 +- 10 files changed, 971 insertions(+), 773 deletions(-) diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 72acbb351c85..63fbf31b5883 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -65,6 +65,12 @@ struct AqlCall { return !call.hasHardLimit() && call.getLimit() > 0 && call.getOffset() == 0; } + // TODO Remove me, this will not be necessary later + static bool IsFullCountCall(AqlCall const& call) { + return call.hasHardLimit() && call.getLimit() == 0 && + call.getOffset() == 0 && call.needsFullCount(); + } + std::size_t offset{0}; // TODO: The defaultBatchSize function could move into this file instead Limit softLimit{Infinity{}}; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 509882edcd2b..acb04789adb7 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -136,7 +136,7 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); template constexpr bool isNewStyleExecutor = is_one_of_v, - IdExecutor>, ReturnExecutor, IndexExecutor, + IdExecutor>, ReturnExecutor, HashedCollectExecutor, IndexExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode @@ -617,7 +617,8 @@ std::tuple ExecutionBlockImpl ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { return {_upstreamState, skippedLocal, bypassedRange.getBlock()}; } AqlCall clientCall = stack.popCall(); + ExecutorState localExecutorState = ExecutorState::DONE; // We can only have returned the following internal states TRI_ASSERT(_execState == ExecState::CHECKCALL || _execState == ExecState::SHADOWROWS || @@ -1220,6 +1228,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(clientCall.getOffset() + skippedLocal == offsetBefore); } #endif + localExecutorState = state; _skipped += skippedLocal; _engine->_stats += stats; // The execute might have modified the client call. @@ -1249,6 +1258,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { auto const [state, stats, call] = _executor.produceRows(_lastRange, *_outputItemRow); _engine->_stats += stats; + localExecutorState = state; // Produce might have modified the clientCall clientCall = _outputItemRow->getClientCall(); @@ -1274,8 +1284,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // We can either do FASTFORWARD or FULLCOUNT, difference is that // fullcount counts what is produced now, FASTFORWARD simply drops TRI_ASSERT(!clientCall.needsFullCount()); - // We can drop all dataRows from upstream + // We need to claim that the Executor was done + localExecutorState = ExecutorState::DONE; + // We can drop all dataRows from upstream while (_lastRange.hasDataRow()) { auto [state, row] = _lastRange.nextDataRow(); TRI_ASSERT(row.isInitialized()); @@ -1298,6 +1310,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { executeSkipRowsRange(_lastRange, clientCall); _skipped += skippedLocal; _engine->_stats += stats; + localExecutorState = state; if (state == ExecutorState::DONE) { _execState = ExecState::SHADOWROWS; @@ -1403,7 +1416,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // We return skipped here, reset member size_t skipped = _skipped; _skipped = 0; - if (_lastRange.hasDataRow() || _lastRange.hasShadowRow()) { + if (localExecutorState == ExecutorState::HASMORE || + _lastRange.hasDataRow() || _lastRange.hasShadowRow()) { // We have skipped or/and return data, otherwise we cannot return HASMORE TRI_ASSERT(skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0)); return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; diff --git a/arangod/Aql/HashedCollectExecutor.cpp b/arangod/Aql/HashedCollectExecutor.cpp index f7ebd6778a3e..6f8e086fb6f6 100644 --- a/arangod/Aql/HashedCollectExecutor.cpp +++ b/arangod/Aql/HashedCollectExecutor.cpp @@ -26,6 +26,7 @@ #include "HashedCollectExecutor.h" #include "Aql/Aggregator.h" +#include "Aql/AqlCall.h" #include "Aql/AqlValue.h" #include "Aql/ExecutionNode.h" #include "Aql/ExecutorInfos.h" @@ -111,16 +112,13 @@ HashedCollectExecutor::createAggregatorFactories(HashedCollectExecutor::Infos co HashedCollectExecutor::HashedCollectExecutor(Fetcher& fetcher, Infos& infos) : _infos(infos), - _fetcher(fetcher), - _upstreamState(ExecutionState::HASMORE), _lastInitializedInputRow(InputAqlItemRow{CreateInvalidInputRowHint{}}), _allGroups(1024, AqlValueGroupHash(_infos.getTransaction(), _infos.getGroupRegisters().size()), AqlValueGroupEqual(_infos.getTransaction())), _isInitialized(false), - _aggregatorFactories(), - _returnedGroups(0) { + _aggregatorFactories() { _aggregatorFactories = createAggregatorFactories(_infos); _nextGroupValues.reserve(_infos.getGroupRegisters().size()); }; @@ -205,65 +203,109 @@ void HashedCollectExecutor::writeCurrentGroupToOutput(OutputAqlItemRow& output) } } -ExecutionState HashedCollectExecutor::init() { - TRI_ASSERT(!_isInitialized); - - // fetch & consume all input - while (_upstreamState != ExecutionState::DONE) { - InputAqlItemRow input = InputAqlItemRow{CreateInvalidInputRowHint{}}; - std::tie(_upstreamState, input) = _fetcher.fetchRow(); +std::pair HashedCollectExecutor::produceRows(OutputAqlItemRow& output) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} - if (_upstreamState == ExecutionState::WAITING) { - TRI_ASSERT(!input.isInitialized()); - return ExecutionState::WAITING; +auto HashedCollectExecutor::consumeInputRange(AqlItemBlockInputRange& inputRange) -> bool { + TRI_ASSERT(!_isInitialized); + do { + auto [state, input] = inputRange.nextDataRow(); + if (input) { + consumeInputRow(input); + // We need to retain this + _lastInitializedInputRow = input; } + if (state == ExecutorState::DONE) { + // initialize group iterator for output + _currentGroup = _allGroups.begin(); + return true; + } + } while (inputRange.hasDataRow()); - // !input.isInitialized() => _upstreamState == ExecutionState::DONE - TRI_ASSERT(input.isInitialized() || _upstreamState == ExecutionState::DONE); - - // needed to remember the last valid input aql item row - // NOTE: this might impact the performance - if (input.isInitialized()) { - _lastInitializedInputRow = input; + TRI_ASSERT(inputRange.upstreamState() == ExecutorState::HASMORE); + return false; +} - consumeInputRow(input); - } +auto HashedCollectExecutor::returnState() const -> ExecutorState { + if (!_isInitialized || _currentGroup != _allGroups.end()) { + // We have either not started, or not produce all groups. + return ExecutorState::HASMORE; } - - // initialize group iterator for output - _currentGroup = _allGroups.begin(); - // The values within are not supposed to be used anymore. - _nextGroupValues.clear(); - return ExecutionState::DONE; + return ExecutorState::DONE; } -std::pair HashedCollectExecutor::produceRows(OutputAqlItemRow& output) { +/** + * @brief Produce rows. + * We need to consume all rows from the inputRange, except the + * last Row. This is to indicate that this executor is not yet done. + * Afterwards we write all groups into the output. + * Only if we have written the last group we consume + * the remaining inputRow. This is to indicate that + * this executor cannot produce anymore. + * + * @param inputRange Data from input + * @param output Where to write the output + * @return std::tuple + */ + +auto HashedCollectExecutor::produceRows(AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) + -> std::tuple { TRI_IF_FAILURE("HashedCollectExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - if (!_isInitialized) { - // fetch & consume all input and initialize output cursor - ExecutionState state = init(); - if (state == ExecutionState::WAITING) { - return {state, NoStats{}}; + // Consume the input range + _isInitialized = consumeInputRange(inputRange); + } + + if (_isInitialized) { + while (_currentGroup != _allGroups.end() && !output.isFull()) { + writeCurrentGroupToOutput(output); + ++_currentGroup; + output.advanceRow(); } - TRI_ASSERT(state == ExecutionState::DONE); - _isInitialized = true; } - // produce output - if (_currentGroup != _allGroups.end()) { - writeCurrentGroupToOutput(output); - ++_currentGroup; - ++_returnedGroups; - TRI_ASSERT(_returnedGroups <= _allGroups.size()); + AqlCall upstreamCall{}; + // We cannot forward anything, no skip, no limit. + // Need to request all from upstream. + return {returnState(), NoStats{}, upstreamCall}; +} + +/** + * @brief Skip Rows + * We need to consume all rows from the inputRange, except the + * last Row. This is to indicate that this executor is not yet done. + * Afterwards we skip all groups. + * Only if we have skipped the last group we consume + * the remaining inputRow. This is to indicate that + * this executor cannot produce anymore. + * + * @param inputRange Data from input + * @param call Call from client + * @return std::tuple + */ +auto HashedCollectExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { + if (!_isInitialized) { + // Consume the input range + _isInitialized = consumeInputRange(inputRange); } - ExecutionState state = _currentGroup != _allGroups.end() ? ExecutionState::HASMORE - : ExecutionState::DONE; + if (_isInitialized) { + while (_currentGroup != _allGroups.end() && call.needSkipMore()) { + ++_currentGroup; + call.didSkip(1); + } + } - return {state, NoStats{}}; + AqlCall upstreamCall{}; + // We cannot forward anything, no skip, no limit. + // Need to request all from upstream. + return {returnState(), NoStats{}, call.getSkipCount(), upstreamCall}; } // finds the group matching the current row, or emplaces it. in either case, @@ -301,7 +343,8 @@ decltype(HashedCollectExecutor::_allGroups)::iterator HashedCollectExecutor::fin } // note: aggregateValues may be a nullptr! - auto [result, emplaced] = _allGroups.try_emplace(std::move(_nextGroupValues), std::move(aggregateValues)); + auto [result, emplaced] = + _allGroups.try_emplace(std::move(_nextGroupValues), std::move(aggregateValues)); // emplace must not fail TRI_ASSERT(emplaced); @@ -313,24 +356,8 @@ decltype(HashedCollectExecutor::_allGroups)::iterator HashedCollectExecutor::fin }; std::pair HashedCollectExecutor::expectedNumberOfRows(size_t atMost) const { - size_t rowsLeft = 0; - if (!_isInitialized) { - ExecutionState state; - std::tie(state, rowsLeft) = _fetcher.preFetchNumberOfRows(atMost); - if (state == ExecutionState::WAITING) { - TRI_ASSERT(rowsLeft == 0); - return {state, 0}; - } - // Overestimate, we have not grouped! - } else { - // This fetcher nows how exactly many rows are left - // as it knows how many groups is has created and not returned. - rowsLeft = _allGroups.size() - _returnedGroups; - } - if (rowsLeft > 0) { - return {ExecutionState::HASMORE, rowsLeft}; - } - return {ExecutionState::DONE, rowsLeft}; + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } const HashedCollectExecutor::Infos& HashedCollectExecutor::infos() const noexcept { diff --git a/arangod/Aql/HashedCollectExecutor.h b/arangod/Aql/HashedCollectExecutor.h index 1381a590e93f..1ff7767d1f74 100644 --- a/arangod/Aql/HashedCollectExecutor.h +++ b/arangod/Aql/HashedCollectExecutor.h @@ -38,6 +38,9 @@ namespace arangodb { namespace aql { + +struct AqlCall; +class AqlItemBlockInputRange; class OutputAqlItemRow; class ExecutorInfos; template @@ -46,6 +49,26 @@ struct Aggregator; class HashedCollectExecutorInfos : public ExecutorInfos { public: + /** + * @brief Construct a new Hashed Collect Executor Infos object + * + * @param nrInputRegisters Number Registers in the input row + * @param nrOutputRegisters Number Registers in the output row + * @param registersToClear Registers that need to be empty after this + * @param registersToKeep Registers that will be copied after this + * @param readableInputRegisters InputRegisters this Executor is allowed to read + * @param writeableOutputRegisters OutputRegisters this Executor is required to write + * @param groupRegisters Registers the grouping is based on. + * If values in the registers are identical, + * the rows are considered as the same group. + * Format: + * @param collectRegister Register to write the GroupingResult to + * (COLLECT ... INTO collectRegister) + * @param aggregateTypes Aggregation methods used + * @param aggregateRegisters Input and output Register for Aggregation + * @param trxPtr The AQL transaction, as it might be needed for aggregates + * @param count Flag to enable count, will be written to collectRegister + */ HashedCollectExecutorInfos(RegisterId nrInputRegisters, RegisterId nrOutputRegisters, std::unordered_set registersToClear, std::unordered_set registersToKeep, @@ -117,9 +140,26 @@ class HashedCollectExecutor { * @brief produce the next Row of Aql Values. * * @return ExecutionState, and if successful exactly one new Row of AqlItems. + * @deprecated */ std::pair produceRows(OutputAqlItemRow& output); + /** + * @brief produce the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + + /** + * @brief skip the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple; + /** * @brief This Executor does not know how many distinct rows will be fetched * from upstream, it can only report how many it has found by itself, plus @@ -138,13 +178,21 @@ class HashedCollectExecutor { Infos const& infos() const noexcept; /** - * @brief Shall be executed until it returns DONE, then never again. - * Consumes all input, writes groups and calculates aggregates, and - * initializes _currentGroup to _allGroups.begin(). + * @brief Consumes all rows from upstream + * Every row is collected into one of the groups. * - * @return DONE or WAITING + * @param inputRange Upstream range, will be fully consumed + * @return true We have consumed everything, start output + * @return false We do not have all input ask for more. */ - ExecutionState init(); + auto consumeInputRange(AqlItemBlockInputRange& inputRange) -> bool; + + /** + * @brief State this Executor needs to report + * + * @return ExecutorState + */ + auto returnState() const -> ExecutorState; void destroyAllGroupsAqlValues(); @@ -159,8 +207,6 @@ class HashedCollectExecutor { private: Infos const& _infos; - Fetcher& _fetcher; - ExecutionState _upstreamState; /// @brief We need to save any input row (it really doesn't matter, except for /// when input blocks are freed - thus the last), so we can produce output @@ -175,8 +221,6 @@ class HashedCollectExecutor { std::vector(transaction::Methods*)> const*> _aggregatorFactories; - size_t _returnedGroups; - GroupKeyType _nextGroupValues; }; diff --git a/tests/Aql/ExecutorTestHelper.cpp b/tests/Aql/ExecutorTestHelper.cpp index 17a521230cda..d0e38abac134 100644 --- a/tests/Aql/ExecutorTestHelper.cpp +++ b/tests/Aql/ExecutorTestHelper.cpp @@ -26,26 +26,137 @@ using namespace arangodb::tests::aql; -auto arangodb::tests::aql::ValidateBlocksAreEqual(SharedAqlItemBlockPtr actual, - SharedAqlItemBlockPtr expected) -> void { +auto asserthelper::AqlValuesAreIdentical(AqlValue const& lhs, AqlValue const& rhs) -> bool { velocypack::Options vpackOptions; + return AqlValue::Compare(&vpackOptions, lhs, rhs, true) == 0; +} + +auto asserthelper::RowsAreIdentical(SharedAqlItemBlockPtr actual, size_t actualRow, + SharedAqlItemBlockPtr expected, size_t expectedRow, + std::optional> const& onlyCompareRegisters) + -> bool { + if (onlyCompareRegisters) { + if (actual->getNrRegs() < onlyCompareRegisters->size()) { + // Registers do not match + return false; + } + } else { + if (actual->getNrRegs() != expected->getNrRegs()) { + // Registers do not match + return false; + } + } + + for (RegisterId reg = 0; reg < expected->getNrRegs(); ++reg) { + auto const& x = + actual->getValueReference(actualRow, onlyCompareRegisters + ? onlyCompareRegisters->at(reg) + : reg); + auto const& y = expected->getValueReference(expectedRow, reg); + if (!AqlValuesAreIdentical(x, y)) { + // At least one value mismatched + return false; + } + } + // All values match + return true; +} + +auto asserthelper::ValidateAqlValuesAreEqual(SharedAqlItemBlockPtr actual, + size_t actualRow, RegisterId actualRegister, + SharedAqlItemBlockPtr expected, size_t expectedRow, + RegisterId expectedRegister) -> void { + velocypack::Options vpackOptions; + auto const& x = actual->getValueReference(actualRow, actualRegister); + auto const& y = expected->getValueReference(expectedRow, expectedRegister); + EXPECT_TRUE(AqlValuesAreIdentical(x, y)) + << "Row " << actualRow << " Column " << actualRegister << " do not agree. " + << x.slice().toJson(&vpackOptions) << " vs. " << y.slice().toJson(&vpackOptions); +} + +auto asserthelper::ValidateBlocksAreEqual(SharedAqlItemBlockPtr actual, + SharedAqlItemBlockPtr expected, + std::optional> const& onlyCompareRegisters) + -> void { ASSERT_NE(expected, nullptr); ASSERT_NE(actual, nullptr); EXPECT_EQ(actual->size(), expected->size()); - EXPECT_EQ(actual->getNrRegs(), 1); - for (size_t i = 0; i < (std::min)(actual->size(), expected->size()); ++i) { - auto const& x = actual->getValueReference(i, 0); - auto const& y = expected->getValueReference(i, 0); - EXPECT_TRUE(AqlValue::Compare(&vpackOptions, x, y, true) == 0) - << "Row " << i << " Column " << 0 << " do not agree. " - << x.slice().toJson(&vpackOptions) << " vs. " - << y.slice().toJson(&vpackOptions); + RegisterId outRegs = (std::min)(actual->getNrRegs(), expected->getNrRegs()); + if (onlyCompareRegisters) { + outRegs = onlyCompareRegisters->size(); + ASSERT_GE(actual->getNrRegs(), outRegs); + } else { + EXPECT_EQ(actual->getNrRegs(), expected->getNrRegs()); + } + + for (size_t row = 0; row < (std::min)(actual->size(), expected->size()); ++row) { + for (RegisterId reg = 0; reg < outRegs; ++reg) { + RegisterId actualRegister = + onlyCompareRegisters ? onlyCompareRegisters->at(reg) : reg; + ValidateAqlValuesAreEqual(actual, row, actualRegister, expected, row, reg); + } + } +} + +auto asserthelper::ValidateBlocksAreEqualUnordered( + SharedAqlItemBlockPtr actual, SharedAqlItemBlockPtr expected, std::size_t numRowsNotContained, + std::optional> const& onlyCompareRegisters) -> void { + std::unordered_set matchedRows{}; + return ValidateBlocksAreEqualUnordered(actual, expected, matchedRows, + numRowsNotContained, onlyCompareRegisters); +} + +auto asserthelper::ValidateBlocksAreEqualUnordered( + SharedAqlItemBlockPtr actual, SharedAqlItemBlockPtr expected, + std::unordered_set& matchedRows, std::size_t numRowsNotContained, + std::optional> const& onlyCompareRegisters) -> void { + ASSERT_NE(expected, nullptr); + ASSERT_NE(actual, nullptr); + EXPECT_EQ(actual->size() + numRowsNotContained, expected->size()); + + RegisterId outRegs = (std::min)(actual->getNrRegs(), expected->getNrRegs()); + if (onlyCompareRegisters) { + outRegs = onlyCompareRegisters->size(); + ASSERT_GE(actual->getNrRegs(), outRegs); + } else { + EXPECT_EQ(actual->getNrRegs(), expected->getNrRegs()); + } + + matchedRows.clear(); + + for (size_t expectedRow = 0; expectedRow < expected->size(); ++expectedRow) { + for (size_t actualRow = 0; actualRow < actual->size(); ++actualRow) { + if (RowsAreIdentical(actual, actualRow, expected, expectedRow, onlyCompareRegisters)) { + auto const& [unused, inserted] = matchedRows.emplace(expectedRow); + if (inserted) { + // one is enough, but do not match the same rows twice + break; + } + } + } + } + + if (matchedRows.size() + numRowsNotContained < expected->size()) { + // Did not find all rows. + // This is for reporting only: + for (size_t expectedRow = 0; expectedRow < expected->size(); ++expectedRow) { + if (matchedRows.find(expectedRow) == matchedRows.end()) { + InputAqlItemRow missing(expected, expectedRow); + velocypack::Options vpackOptions; + VPackBuilder rowBuilder; + missing.toSimpleVelocyPack(&vpackOptions, rowBuilder); + VPackBuilder blockBuilder; + actual->toSimpleVPack(&vpackOptions, blockBuilder); + EXPECT_TRUE(false) << "Did not find row: " << rowBuilder.toJson() + << " in " << blockBuilder.toJson(); + } + } } } template AqlExecutorTestCase::AqlExecutorTestCase() - : _server{}, fakedQuery{_server.createFakeQuery(enableQueryTrace)} { + : fakedQuery{_server->createFakeQuery(enableQueryTrace)} { auto engine = std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); fakedQuery->setEngine(engine.release()); } diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index 907f9201458d..0edc6c4992a8 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -44,9 +44,45 @@ namespace arangodb { namespace tests { namespace aql { - -auto ValidateBlocksAreEqual(SharedAqlItemBlockPtr actual, SharedAqlItemBlockPtr expected) - -> void; +/** + * @brief Static helper class just offers helper methods + * Do never instantiate + * + */ +class asserthelper { + private: + asserthelper() {} + + public: + static auto AqlValuesAreIdentical(AqlValue const& lhs, AqlValue const& rhs) -> bool; + + static auto RowsAreIdentical(SharedAqlItemBlockPtr actual, size_t actualRow, + SharedAqlItemBlockPtr expected, size_t expectedRow, + std::optional> const& onlyCompareRegisters = std::nullopt) + -> bool; + + static auto ValidateAqlValuesAreEqual(SharedAqlItemBlockPtr actual, + size_t actualRow, RegisterId actualRegister, + SharedAqlItemBlockPtr expected, size_t expectedRow, + RegisterId expectedRegister) -> void; + + static auto ValidateBlocksAreEqual( + SharedAqlItemBlockPtr actual, SharedAqlItemBlockPtr expected, + std::optional> const& onlyCompareRegisters = std::nullopt) + -> void; + + static auto ValidateBlocksAreEqualUnordered( + SharedAqlItemBlockPtr actual, SharedAqlItemBlockPtr expected, + std::size_t numRowsNotContained = 0, + std::optional> const& onlyCompareRegisters = std::nullopt) + -> void; + + static auto ValidateBlocksAreEqualUnordered( + SharedAqlItemBlockPtr actual, SharedAqlItemBlockPtr expected, + std::unordered_set& matchedRows, std::size_t numRowsNotContained = 0, + std::optional> const& onlyCompareRegisters = std::nullopt) + -> void; +}; /** * @brief Base class for ExecutorTests in Aql. @@ -57,7 +93,16 @@ auto ValidateBlocksAreEqual(SharedAqlItemBlockPtr actual, SharedAqlItemBlockPtr * @tparam enableQueryTrace Enable Aql Profile Trace logging */ template -class AqlExecutorTestCase { +class AqlExecutorTestCase : public ::testing::Test { + public: + // Creating a server instance costs a lot of time, so do it only once. + // Note that newer version of gtest call these SetUpTestSuite/TearDownTestSuite + static void SetUpTestCase() { + _server = std::make_unique(); + } + + static void TearDownTestCase() { _server.reset(); } + protected: AqlExecutorTestCase(); virtual ~AqlExecutorTestCase() = default; @@ -74,7 +119,7 @@ class AqlExecutorTestCase { auto manager() const -> AqlItemBlockManager&; private: - mocks::MockAqlServer _server; + static inline std::unique_ptr _server; std::vector> _execNodes; protected: @@ -84,6 +129,16 @@ class AqlExecutorTestCase { std::unique_ptr fakedQuery; }; +/** + * @brief Shortcut handle for parameterized AqlExecutorTestCases with param + * + * @tparam T The Test Parameter used for gtest. + * @tparam enableQueryTrace Enable Aql Profile Trace logging + */ +template +class AqlExecutorTestCaseWithParam : public AqlExecutorTestCase, + public ::testing::WithParamInterface {}; + template struct ExecutorTestHelper { using SplitType = std::variant, std::size_t, std::monostate>; @@ -94,6 +149,9 @@ struct ExecutorTestHelper { : _expectedSkip{0}, _expectedState{ExecutionState::HASMORE}, _testStats{false}, + _unorderedOutput{false}, + _appendEmptyBlock{false}, + _unorderedSkippedRows{0}, _query(query), _dummyNode{std::make_unique(_query.plan(), 42)} {} @@ -135,7 +193,7 @@ struct ExecutorTestHelper { return *this; } - auto expectOutput(std::array const& regs, + auto expectOutput(std::array const& regs, MatrixBuilder const& out) -> ExecutorTestHelper& { _outputRegisters = regs; _output = out; @@ -166,6 +224,26 @@ struct ExecutorTestHelper { return *this; }; + auto allowAnyOutputOrder(bool expected, size_t skippedRows = 0) -> ExecutorTestHelper& { + _unorderedOutput = expected; + _unorderedSkippedRows = skippedRows; + return *this; + } + + /** + * @brief This appends an empty block after the input fully created. + * It simulates a situation where the Producer lies about the + * the last input with HASMORE, but it actually is not able + * to produce more. + * + * @param append If this should be enabled or not + * @return ExecutorTestHelper& this for chaining + */ + auto appendEmptyBlock(bool append) -> ExecutorTestHelper& { + _appendEmptyBlock = append; + return *this; + } + auto run(typename E::Infos infos) -> void { ResourceMonitor monitor; AqlItemBlockManager itemBlockManager(&monitor, SerializationFormat::SHADOWROWS); @@ -182,10 +260,23 @@ struct ExecutorTestHelper { EXPECT_EQ(skipped, _expectedSkip); EXPECT_EQ(state, _expectedState); + if (result == nullptr) { + // Empty output, possible if we skip all + EXPECT_EQ(_output.size(), 0) + << "Executor does not yield output, although it is expected"; + } else { + SharedAqlItemBlockPtr expectedOutputBlock = + buildBlock(itemBlockManager, std::move(_output)); + std::vector outRegVector(_outputRegisters.begin(), + _outputRegisters.end()); + if (_unorderedOutput) { + asserthelper::ValidateBlocksAreEqualUnordered(result, expectedOutputBlock, + _unorderedSkippedRows, outRegVector); + } else { + asserthelper::ValidateBlocksAreEqual(result, expectedOutputBlock, outRegVector); + } + } - SharedAqlItemBlockPtr expectedOutputBlock = - buildBlock(itemBlockManager, std::move(_output)); - testOutputBlock(result, expectedOutputBlock); if (_testStats) { auto actualStats = _query.engine()->getStats(); EXPECT_EQ(actualStats, _expectedStats); @@ -193,23 +284,6 @@ struct ExecutorTestHelper { }; private: - void testOutputBlock(SharedAqlItemBlockPtr const& outputBlock, - SharedAqlItemBlockPtr const& expectedOutputBlock) { - velocypack::Options vpackOptions; - - EXPECT_EQ(outputBlock->size(), expectedOutputBlock->size()); - for (size_t i = 0; i < outputBlock->size(); i++) { - for (size_t j = 0; j < outputColumns; j++) { - AqlValue const& x = outputBlock->getValueReference(i, _outputRegisters[j]); - AqlValue const& y = expectedOutputBlock->getValueReference(i, j); - - EXPECT_TRUE(AqlValue::Compare(&vpackOptions, x, y, true) == 0) - << "Row " << i << " Column " << j << " (Reg " << _outputRegisters[j] - << ") do not agree"; - } - } - } - auto generateInputRanges(AqlItemBlockManager& itemBlockManager) -> std::unique_ptr { using VectorSizeT = std::vector; @@ -255,6 +329,9 @@ struct ExecutorTestHelper { buildBlock(itemBlockManager, std::move(matrix)); blockDeque.emplace_back(inputBlock); } + if (_appendEmptyBlock) { + blockDeque.emplace_back(nullptr); + } return std::make_unique( _query.engine(), _dummyNode.get(), std::move(blockDeque), @@ -264,11 +341,14 @@ struct ExecutorTestHelper { AqlCall _call; MatrixBuilder _input; MatrixBuilder _output; - std::array _outputRegisters; - size_t _expectedSkip; + std::array _outputRegisters; + std::size_t _expectedSkip; ExecutionState _expectedState; ExecutionStats _expectedStats; bool _testStats; + bool _unorderedOutput; + bool _appendEmptyBlock; + std::size_t _unorderedSkippedRows; SplitType _inputSplit = {std::monostate()}; SplitType _outputSplit = {std::monostate()}; diff --git a/tests/Aql/HashedCollectExecutorTest.cpp b/tests/Aql/HashedCollectExecutorTest.cpp index 7009caa5243c..0c01634af57b 100644 --- a/tests/Aql/HashedCollectExecutorTest.cpp +++ b/tests/Aql/HashedCollectExecutorTest.cpp @@ -25,15 +25,19 @@ #include "gtest/gtest.h" +#include "AqlItemBlockHelper.h" +#include "ExecutorTestHelper.h" #include "RowFetcherHelper.h" +#include "Aql/AqlCall.h" #include "Aql/AqlItemBlock.h" #include "Aql/Collection.h" #include "Aql/ExecutionEngine.h" #include "Aql/HashedCollectExecutor.h" #include "Aql/OutputAqlItemRow.h" -#include "Aql/SingleRowFetcher.h" #include "Aql/Query.h" +#include "Aql/RegisterPlan.h" +#include "Aql/SingleRowFetcher.h" #include "Mocks/Servers.h" #include "Transaction/Context.h" #include "Transaction/Methods.h" @@ -49,674 +53,573 @@ namespace arangodb { namespace tests { namespace aql { -class HashedCollectExecutorTestNoRows : public ::testing::Test { +// This is only to get a split-type. The Type is independent of actual template parameters +using HashedCollectTestHelper = ExecutorTestHelper; +using HashedCollectSplitType = HashedCollectTestHelper::SplitType; +using HashedCollectInputParam = std::tuple; + +class HashedCollectExecutorTest + : public AqlExecutorTestCaseWithParam { protected: - ExecutionState state; - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager; - - mocks::MockAqlServer server; - std::unique_ptr fakedQuery; - arangodb::transaction::Methods* trx; - - std::unordered_set const regToClear; - std::unordered_set const regToKeep; - std::vector> groupRegisters; - - std::vector aggregateTypes; - - std::vector> aggregateRegisters; - - // if count = true, then we need to set a countRegister - RegisterId collectRegister; - bool count; - - std::unordered_set readableInputRegisters; - std::unordered_set writeableOutputRegisters; - - HashedCollectExecutorInfos infos; - - SharedAqlItemBlockPtr block; - VPackBuilder input; - NoStats stats; - - HashedCollectExecutorTestNoRows() - : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - fakedQuery(server.createFakeQuery()), - trx(fakedQuery->trx()), - groupRegisters{std::make_pair(1, 2)}, - collectRegister(0), - count(false), - infos(2 /*nrIn*/, 2 /*nrOut*/, regToClear, regToKeep, - std::move(readableInputRegisters), - std::move(writeableOutputRegisters), std::move(groupRegisters), 0, - std::move(aggregateTypes), std::move(aggregateRegisters), trx, false), - block(new AqlItemBlock(itemBlockManager, 1000, 2)) {} + auto getSplit() -> HashedCollectSplitType { + auto [split, empty] = GetParam(); + return split; + } + + auto appendEmpty() -> bool { + auto [split, empty] = GetParam(); + return empty; + } + + auto buildInfos(RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::vector> groupRegisters, + RegisterId collectRegister = RegisterPlan::MaxRegisterId, + std::vector aggregateTypes = {}, + std::vector> aggregateRegisters = {}) + -> HashedCollectExecutorInfos { + std::unordered_set registersToClear{}; + std::unordered_set registersToKeep{}; + std::unordered_set readableInputRegisters{}; + std::unordered_set writeableOutputRegisters{}; + + for (RegisterId i = 0; i < nrInputRegisters; ++i) { + // All registers need to be invalidated! + registersToClear.emplace(i); + } + + for (auto const& [out, in] : groupRegisters) { + readableInputRegisters.emplace(in); + writeableOutputRegisters.emplace(out); + } + + // It seems that count <=> collectRegister exists + bool count = false; + if (collectRegister != RegisterPlan::MaxRegisterId) { + writeableOutputRegisters.emplace(collectRegister); + count = true; + } + TRI_ASSERT(aggregateTypes.size() == aggregateRegisters.size()); + for (auto const& [out, in] : aggregateRegisters) { + if (in != RegisterPlan::MaxRegisterId) { + readableInputRegisters.emplace(in); + } + writeableOutputRegisters.emplace(out); + } + + return HashedCollectExecutorInfos{nrInputRegisters, + nrOutputRegisters, + registersToClear, + registersToKeep, + std::move(readableInputRegisters), + std::move(writeableOutputRegisters), + std::move(groupRegisters), + collectRegister, + std::move(aggregateTypes), + std::move(aggregateRegisters), + fakedQuery->trx(), + count}; + }; }; -TEST_F(HashedCollectExecutorTestNoRows, the_producer_doesnt_wait) { - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +template +const HashedCollectSplitType splitIntoBlocks = + HashedCollectSplitType{std::vector{vs...}}; +template +const HashedCollectSplitType splitStep = HashedCollectSplitType{step}; + +INSTANTIATE_TEST_CASE_P(HashedCollect, HashedCollectExecutorTest, + ::testing::Combine(::testing::Values(splitIntoBlocks<2, 3>, + splitIntoBlocks<3, 4>, + splitStep<1>, splitStep<2>), + ::testing::Bool())); + +// Collect with only one group value +TEST_P(HashedCollectExecutorTest, collect_only) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; // unlimited produce + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {{1}, {2}, {6}, {R"("1")"}}) + .allowAnyOutputOrder(true) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .appendEmptyBlock(appendEmpty()) + // .expectedStats(stats) + .run(std::move(infos)); } -TEST_F(HashedCollectExecutorTestNoRows, the_producer_waits) { - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +// Collect skip all +TEST_P(HashedCollectExecutorTest, skip_all) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; + call.offset = 1000; // skip all + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {}) + .allowAnyOutputOrder(true) + .expectSkipped(4) + .expectedState(ExecutionState::DONE) + .appendEmptyBlock(appendEmpty()) + // .expectedStats(stats) + .run(std::move(infos)); } -class HashedCollectExecutorTestRowsNoCount : public ::testing::Test { - protected: - ExecutionState state; - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager; - - mocks::MockAqlServer server; - std::unique_ptr fakedQuery; - arangodb::transaction::Methods* trx; - - std::unordered_set regToClear; - std::unordered_set regToKeep; - std::vector> groupRegisters; - - std::unordered_set readableInputRegisters; - - std::unordered_set writeableOutputRegisters; - - RegisterId nrOutputRegister; - - std::vector> aggregateRegisters; - std::vector aggregateTypes; - - // if count = true, then we need to set a valid countRegister - RegisterId collectRegister; - bool count; - - HashedCollectExecutorInfos infos; - - SharedAqlItemBlockPtr block; - NoStats stats; - - HashedCollectExecutorTestRowsNoCount() - : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - fakedQuery(server.createFakeQuery()), - trx(fakedQuery->trx()), - groupRegisters{std::make_pair(1, 0)}, - readableInputRegisters{0}, - writeableOutputRegisters{1}, - nrOutputRegister(2), - collectRegister(0), - count(false), - infos(1, nrOutputRegister, regToClear, regToKeep, std::move(readableInputRegisters), - std::move(writeableOutputRegisters), std::move(groupRegisters), 0, - std::move(aggregateTypes), std::move(aggregateRegisters), trx, false), - block(new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)) {} -}; - -TEST_F(HashedCollectExecutorTestRowsNoCount, the_producer_doesnt_wait_1) { - auto input = VPackParser::fromJson("[ [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::vector myNumbers; - auto block = result.stealBlock(); - - // check for types - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - myNumbers.emplace_back(x.slice().getInt()); - - AqlValue z = block->getValue(1, 1); - ASSERT_TRUE(z.isNumber()); - myNumbers.emplace_back(z.slice().getInt()); - - // now sort vector and check for appearances - std::sort(myNumbers.begin(), myNumbers.end()); - ASSERT_EQ(myNumbers.at(0), 1); - ASSERT_EQ(myNumbers.at(1), 2); +// Collect fullCount all +TEST_P(HashedCollectExecutorTest, fullcount_all) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; + call.hardLimit = 0; // HardLimit + call.fullCount = true; // count all + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {}) + .allowAnyOutputOrder(true) + .expectSkipped(4) + .expectedState(ExecutionState::DONE) + .appendEmptyBlock(appendEmpty()) + // .expectedStats(stats) + .run(std::move(infos)); } -TEST_F(HashedCollectExecutorTestRowsNoCount, the_producer_doesnt_wait_2) { - auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::vector myNumbers; - auto block = result.stealBlock(); - - // check for types - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - myNumbers.emplace_back(x.slice().getInt()); - - AqlValue y = block->getValue(1, 1); - ASSERT_TRUE(y.isNumber()); - myNumbers.emplace_back(y.slice().getInt()); - - AqlValue z = block->getValue(2, 1); - ASSERT_TRUE(z.isNumber()); - myNumbers.emplace_back(z.slice().getInt()); - - // now sort vector and check for appearances - std::sort(myNumbers.begin(), myNumbers.end()); - ASSERT_EQ(myNumbers.at(0), 1); - ASSERT_EQ(myNumbers.at(1), 2); - ASSERT_EQ(myNumbers.at(2), 3); +// Collect get some +TEST_P(HashedCollectExecutorTest, collect_only_soft_less) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; + call.softLimit = 2; + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {{1}, {2}, {6}, {R"("1")"}}) + .allowAnyOutputOrder(true, 2) + .expectSkipped(0) + .expectedState(ExecutionState::HASMORE) + .appendEmptyBlock(appendEmpty()) + // .expectedStats(stats) + .run(std::move(infos)); } -TEST_F(HashedCollectExecutorTestRowsNoCount, the_producer_doesnt_wait_3) { - auto input = VPackParser::fromJson("[ [1], [2], [3], [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::vector myNumbers; - auto block = result.stealBlock(); - - // check for types - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - myNumbers.emplace_back(x.slice().getInt()); - - AqlValue y = block->getValue(1, 1); - ASSERT_TRUE(y.isNumber()); - myNumbers.emplace_back(y.slice().getInt()); - - AqlValue z = block->getValue(2, 1); - ASSERT_TRUE(z.isNumber()); - myNumbers.emplace_back(z.slice().getInt()); - - // now sort vector and check for appearances - std::sort(myNumbers.begin(), myNumbers.end()); - ASSERT_EQ(myNumbers.at(0), 1); - ASSERT_EQ(myNumbers.at(1), 2); - ASSERT_EQ(myNumbers.at(2), 3); +// Collect get some multiple calls +TEST_P(HashedCollectExecutorTest, collect_only_soft_less_second_call) { + auto infos = buildInfos(1, 2, {{1, 0}}); + + // TODO maybe we should add this to the test framework as well. + + std::deque blockDeque; + { + auto data = + buildBlock<2>(manager(), {{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}); + blockDeque.emplace_back(data); + } + + auto inputBlock = std::make_unique( + fakedQuery->engine(), generateNodeDummy(), std::move(blockDeque), + WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + ExecutionBlockImpl testee{fakedQuery->engine(), + generateNodeDummy(), std::move(infos)}; + testee.addDependency(inputBlock.get()); + // First Call + std::unordered_set matchedRows{}; + auto buildExpectedOutput = [&]() -> SharedAqlItemBlockPtr { + MatrixBuilder<1> matrix; + if (matchedRows.find(0) == matchedRows.end()) { + matrix.emplace_back(RowBuilder<1>{1}); + } + if (matchedRows.find(1) == matchedRows.end()) { + matrix.emplace_back(RowBuilder<1>{2}); + } + if (matchedRows.find(2) == matchedRows.end()) { + matrix.emplace_back(RowBuilder<1>{6}); + } + if (matchedRows.find(3) == matchedRows.end()) { + matrix.emplace_back(RowBuilder<1>{R"("1")"}); + } + return buildBlock<1>(manager(), std::move(matrix)); + }; + std::vector registersToTest{1}; + { + AqlCall call{}; + call.softLimit = 2; + AqlCallStack stack{call}; + auto const [state, skipped, result] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::HASMORE); + EXPECT_EQ(skipped, 0); + ASSERT_NE(result, nullptr); + asserthelper::ValidateBlocksAreEqualUnordered(result, buildExpectedOutput(), + matchedRows, 2, registersToTest); + } + + // Second call + { + AqlCall call{}; + call.softLimit = 2; + AqlCallStack stack{call}; + auto const [state, skipped, result] = testee.execute(stack); + EXPECT_EQ(state, ExecutionState::DONE); + EXPECT_EQ(skipped, 0); + ASSERT_NE(result, nullptr); + asserthelper::ValidateBlocksAreEqualUnordered(result, buildExpectedOutput(), + matchedRows, 0, registersToTest); + } } -TEST_F(HashedCollectExecutorTestRowsNoCount, the_producer_doesnt_wait_4) { - auto input = VPackParser::fromJson("[ [1], [2], [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::vector myNumbers; - auto block = result.stealBlock(); - - // check for types - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - myNumbers.emplace_back(x.slice().getInt()); - - AqlValue y = block->getValue(1, 1); - ASSERT_TRUE(y.isNumber()); - myNumbers.emplace_back(y.slice().getInt()); - - // now sort vector and check for appearances - std::sort(myNumbers.begin(), myNumbers.end()); - ASSERT_EQ(myNumbers.at(0), 1); - ASSERT_EQ(myNumbers.at(1), 2); +// Collect get some +TEST_P(HashedCollectExecutorTest, collect_only_hard_less) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; + call.hardLimit = 2; + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {{1}, {2}, {6}, {R"("1")"}}) + .allowAnyOutputOrder(true, 2) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .appendEmptyBlock(appendEmpty()) + // .expectedStats(stats) + .run(std::move(infos)); } -TEST_F(HashedCollectExecutorTestRowsNoCount, the_producer_waits) { - auto input = VPackParser::fromJson("[ [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); +// Collect skip some +TEST_P(HashedCollectExecutorTest, skip_some) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; + call.offset = 2; // skip some + call.softLimit = 0; // 0 limit + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {}) + .allowAnyOutputOrder(true) + .expectSkipped(2) + .expectedState(ExecutionState::HASMORE) + .appendEmptyBlock(appendEmpty()) + // .expectedStats(stats) + .run(std::move(infos)); +} - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); +// Collect skip and get +TEST_P(HashedCollectExecutorTest, skip_and_get) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; + call.offset = 2; // skip some + call.softLimit = 1000; // high limit + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {{1}, {2}, {6}, {R"("1")"}}) + .allowAnyOutputOrder(true, 2) + .expectSkipped(2) + .expectedState(ExecutionState::DONE) + .appendEmptyBlock(appendEmpty()) + // .expectedStats(stats) + .run(std::move(infos)); +} - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); +// Collect skip and hardLimit +TEST_P(HashedCollectExecutorTest, skip_and_hardLimit) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; + call.offset = 2; // skip some + call.hardLimit = 1; // hard limit + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {{1}, {2}, {6}, {R"("1")"}}) + .allowAnyOutputOrder(true, 3) + .expectSkipped(2) + .expectedState(ExecutionState::DONE) + .appendEmptyBlock(appendEmpty()) + // .expectedStats(stats) + .run(std::move(infos)); +} - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +// Collect skip and fullCount +TEST_P(HashedCollectExecutorTest, skip_and_fullCount) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; + call.offset = 1; // skip some + call.hardLimit = 2; // hard limit + call.fullCount = true; + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {{1}, {2}, {6}, {R"("1")"}}) + .allowAnyOutputOrder(true, 2) + .expectSkipped(2) + .expectedState(ExecutionState::DONE) + .appendEmptyBlock(appendEmpty()) + // .expectedStats(stats) + .run(std::move(infos)); +} - std::vector myNumbers; - auto block = result.stealBlock(); +// Collect with more then one group value +TEST_P(HashedCollectExecutorTest, collect_only_multiple_values) { + auto infos = buildInfos(2, 4, {{2, 0}, {3, 1}}); + AqlCall call{}; // unlimited produce + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue(MatrixBuilder<2>{RowBuilder<2>{1, 5}, RowBuilder<2>{1, 1}, + RowBuilder<2>{2, 2}, RowBuilder<2>{1, 5}, + RowBuilder<2>{6, 1}, RowBuilder<2>{2, 2}, + RowBuilder<2>{R"("1")", 1}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({2, 3}, MatrixBuilder<2>{RowBuilder<2>{1, 5}, RowBuilder<2>{1, 1}, + RowBuilder<2>{2, 2}, RowBuilder<2>{6, 1}, + RowBuilder<2>{R"("1")", 1}}) + .allowAnyOutputOrder(true) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + // .expectedStats(stats) + .run(std::move(infos)); +} - // check for types - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - myNumbers.emplace_back(x.slice().getInt()); +// Collect with one group value and count +TEST_P(HashedCollectExecutorTest, count) { + auto infos = buildInfos(1, 3, {{1, 0}}, 2); + AqlCall call{}; // unlimited produce + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1, 2}, {{1, 3}, {2, 2}, {6, 1}, {R"("1")", 1}}) + .allowAnyOutputOrder(true) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + // .expectedStats(stats) + .run(std::move(infos)); +} - AqlValue z = block->getValue(1, 1); - ASSERT_TRUE(z.isNumber()); - myNumbers.emplace_back(z.slice().getInt()); +// Collect with multiple aggregators +TEST_P(HashedCollectExecutorTest, many_aggregators) { + auto infos = + buildInfos(2, 5, {{2, 0}}, RegisterPlan::MaxRegisterId, {"LENGTH", "SUM"}, + {{3, RegisterPlan::MaxRegisterId}, {4, 1}}); + AqlCall call{}; // unlimited produce + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue(MatrixBuilder<2>{RowBuilder<2>{1, 5}, RowBuilder<2>{1, 1}, + RowBuilder<2>{2, 2}, RowBuilder<2>{1, 5}, + RowBuilder<2>{6, 1}, RowBuilder<2>{2, 2}, + RowBuilder<2>{3, 1}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({2, 3, 4}, + MatrixBuilder<3>{RowBuilder<3>{1, 3, 11}, RowBuilder<3>{2, 2, 4}, + RowBuilder<3>{6, 1, 1}, RowBuilder<3>{3, 1, 1}}) + .allowAnyOutputOrder(true) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + // .expectedStats(stats) + .run(std::move(infos)); +} - // now sort vector and check for appearances - std::sort(myNumbers.begin(), myNumbers.end()); - ASSERT_EQ(myNumbers.at(0), 1); - ASSERT_EQ(myNumbers.at(1), 2); +// Collect based on equal arrays. +TEST_P(HashedCollectExecutorTest, collect_arrays) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; // unlimited produce + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{R"([1,1,1])"}}, + {{1}}, + {{R"([1,1,1,1])"}}, + {{R"([2,1,1])"}}, + {{R"([1,1,1])"}}, + {{R"([2,1,1])"}}, + {{R"([1,1,1])"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {{1}, {R"([1,1,1])"}, {R"([1,1,1,1])"}, {R"([2,1,1])"}}) + .allowAnyOutputOrder(true) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + // .expectedStats(stats) + .run(std::move(infos)); } -TEST(HashedCollectExecutorTestRowsCount, the_producer_doesnt_wait) { - ExecutionState state; - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; - - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - arangodb::transaction::Methods* trx = fakedQuery->trx(); - - std::unordered_set regToClear; - std::unordered_set regToKeep; - std::vector> groupRegisters; - groupRegisters.emplace_back(std::make_pair(1, 0)); - - std::unordered_set readableInputRegisters; - readableInputRegisters.insert(0); - - std::unordered_set writeableOutputRegisters; - writeableOutputRegisters.insert(1); - - RegisterId nrOutputRegister = 3; - - std::vector> aggregateRegisters; - aggregateRegisters.emplace_back(std::make_pair(1, 0)); - - std::vector aggregateTypes; - aggregateTypes.emplace_back("SUM"); - - // if count = true, then we need to set a valid countRegister - bool count = true; - RegisterId collectRegister = 2; - writeableOutputRegisters.insert(2); - - HashedCollectExecutorInfos infos(1, nrOutputRegister, regToClear, regToKeep, - std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), collectRegister, - std::move(aggregateTypes), - std::move(aggregateRegisters), trx, count); - - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)}; - NoStats stats{}; - - auto input = VPackParser::fromJson("[ [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::vector myNumbers; - std::vector myCountNumbers; - auto newBlock = result.stealBlock(); - - // check for types - AqlValue x = newBlock->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - myNumbers.emplace_back(x.slice().getInt()); - - // Check the count register - AqlValue xx = newBlock->getValue(0, 2); - ASSERT_TRUE(xx.isNumber()); - myCountNumbers.emplace_back(xx.slice().getDouble()); - - AqlValue z = newBlock->getValue(1, 1); - ASSERT_TRUE(z.isNumber()); - myNumbers.emplace_back(z.slice().getInt()); - - // Check the count register - AqlValue zz = newBlock->getValue(1, 2); - ASSERT_TRUE(zz.isNumber()); - myCountNumbers.emplace_back(zz.slice().getDouble()); - - // now sort vector and check for appearances - std::sort(myNumbers.begin(), myNumbers.end()); - - std::sort(myCountNumbers.begin(), myCountNumbers.end()); - ASSERT_EQ(myNumbers.at(0), 1); - ASSERT_EQ(myNumbers.at(1), 2); - ASSERT_EQ(myCountNumbers.at(0), 1); - ASSERT_EQ(myCountNumbers.at(1), 2); +// Collect based on equal objects. +TEST_P(HashedCollectExecutorTest, collect_objects) { + auto infos = buildInfos(1, 2, {{1, 0}}); + AqlCall call{}; // unlimited produce + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{R"({"a": 1, "b": 1})"}}, + {{1}}, + {{R"({"a": 1, "b": 1, "c": 1})"}}, + {{R"({"a": 2, "b": 1})"}}, + {{R"({"b": 1, "a": 1})"}}, + {{R"({"b": 1, "c": 1, "a": 1})"}}, + {{R"([1,1,1])"}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {{1}, + {R"([1,1,1])"}, + {R"({"a": 1, "b": 1})"}, + {R"({"a": 1, "b": 1, "c": 1})"}, + {R"({"a": 2, "b": 1})"}}) + .allowAnyOutputOrder(true) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + // .expectedStats(stats) + .run(std::move(infos)); } -TEST(HashedCollectExecutorTestRowsCountNumbers, the_producer_doesnt_wait) { - ExecutionState state; - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; +/** + * @section + * + * Test suite for aggregate functions. + * We only do full produce here, to make sure that aggregate + * functions work as expected. + * The register / call implementation is tested in the other suite + */ + +struct AggregateInput { + std::string name; + RegisterId inReg; + MatrixBuilder<2> expectedOutput; +}; - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - arangodb::transaction::Methods* trx = fakedQuery->trx(); - - std::unordered_set regToClear; - std::unordered_set regToKeep; - std::vector> groupRegisters; - groupRegisters.emplace_back(std::make_pair(1, 0)); - - std::unordered_set readableInputRegisters; - readableInputRegisters.insert(0); - - std::unordered_set writeableOutputRegisters; - writeableOutputRegisters.insert(1); - - RegisterId nrOutputRegister = 3; - - std::vector> aggregateRegisters; - aggregateRegisters.emplace_back(std::make_pair(1, 0)); - - std::vector aggregateTypes; - aggregateTypes.emplace_back("LENGTH"); - - // if count = true, then we need to set a valid countRegister - bool count = true; - RegisterId collectRegister = 2; - writeableOutputRegisters.insert(2); - - HashedCollectExecutorInfos infos(1, nrOutputRegister, regToClear, regToKeep, - std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), collectRegister, - std::move(aggregateTypes), - std::move(aggregateRegisters), trx, count); - - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)}; - NoStats stats{}; - - auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::vector myNumbers; - std::vector myCountNumbers; - auto newBlock = result.stealBlock(); - - // check for types - AqlValue x = newBlock->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - myNumbers.emplace_back(x.slice().getInt()); - - // Check the count register - AqlValue xx = newBlock->getValue(0, 2); - ASSERT_TRUE(xx.isNumber()); - myCountNumbers.emplace_back(xx.slice().getInt()); - - AqlValue z = newBlock->getValue(1, 1); - ASSERT_TRUE(z.isNumber()); - myNumbers.emplace_back(z.slice().getInt()); - - // Check the count register - AqlValue zz = newBlock->getValue(1, 2); - ASSERT_TRUE(zz.isNumber()); - myCountNumbers.emplace_back(zz.slice().getInt()); - - AqlValue y = newBlock->getValue(2, 1); - ASSERT_TRUE(y.isNumber()); - myNumbers.emplace_back(y.slice().getInt()); - - // Check the count register - AqlValue yy = newBlock->getValue(2, 2); - ASSERT_TRUE(yy.isNumber()); - myCountNumbers.emplace_back(yy.slice().getInt()); - - // now sort vector and check for appearances - std::sort(myNumbers.begin(), myNumbers.end()); - - std::sort(myCountNumbers.begin(), myCountNumbers.end()); - ASSERT_EQ(myNumbers.at(0), 1); - ASSERT_EQ(myNumbers.at(1), 2); - ASSERT_EQ(myNumbers.at(2), 3); - ASSERT_EQ(myCountNumbers.at(0), 1); - ASSERT_EQ(myCountNumbers.at(1), 1); - ASSERT_EQ(myCountNumbers.at(2), 1); +std::ostream& operator<<(std::ostream& out, AggregateInput const& agg) { + out << agg.name; + if (agg.inReg != RegisterPlan::MaxRegisterId) { + out << " reg: " << agg.inReg; + } + return out; } -TEST(HashedCollectExecutorTestRowsCountStrings, the_producer_doesnt_wait) { - ExecutionState state; - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; +using HashedCollectAggregateInputParam = std::tuple; + +class HashedCollectExecutorTestAggregate + : public AqlExecutorTestCaseWithParam { + protected: + auto getSplit() -> HashedCollectSplitType { + auto [split, unused] = GetParam(); + return split; + } + + auto getAggregator() -> AggregateInput { + auto [unused, info] = GetParam(); + return info; + } + + auto buildInfos(RegisterId nrInputRegisters, RegisterId nrOutputRegisters, + std::vector> groupRegisters) + -> HashedCollectExecutorInfos { + std::unordered_set registersToClear{}; + std::unordered_set registersToKeep{}; + std::unordered_set readableInputRegisters{}; + std::unordered_set writeableOutputRegisters{}; + + for (RegisterId i = 0; i < nrInputRegisters; ++i) { + // All registers need to be invalidated! + registersToClear.emplace(i); + } + + for (auto const& [out, in] : groupRegisters) { + readableInputRegisters.emplace(in); + writeableOutputRegisters.emplace(out); + } + + bool count = false; + RegisterId collectRegister = RegisterPlan::MaxRegisterId; + + auto agg = getAggregator(); + std::vector aggregateTypes{agg.name}; + std::vector> aggregateRegisters{{3, agg.inReg}}; + if (agg.inReg != RegisterPlan::MaxRegisterId) { + readableInputRegisters.emplace(agg.inReg); + } + + writeableOutputRegisters.emplace(3); + + return HashedCollectExecutorInfos{nrInputRegisters, + nrOutputRegisters, + registersToClear, + registersToKeep, + std::move(readableInputRegisters), + std::move(writeableOutputRegisters), + std::move(groupRegisters), + collectRegister, + std::move(aggregateTypes), + std::move(aggregateRegisters), + fakedQuery->trx(), + count}; + }; +}; - mocks::MockAqlServer server{}; - std::unique_ptr fakedQuery = server.createFakeQuery(); - arangodb::transaction::Methods* trx = fakedQuery->trx(); - - std::unordered_set regToClear; - std::unordered_set regToKeep; - std::vector> groupRegisters; - groupRegisters.emplace_back(std::make_pair(1, 0)); - - std::unordered_set readableInputRegisters; - readableInputRegisters.insert(0); - - std::unordered_set writeableOutputRegisters; - writeableOutputRegisters.insert(1); - - RegisterId nrOutputRegister = 3; - - std::vector> aggregateRegisters; - aggregateRegisters.emplace_back(std::make_pair(1, 0)); - - std::vector aggregateTypes; - aggregateTypes.emplace_back("LENGTH"); - - // if count = true, then we need to set a valid countRegister - bool count = true; - RegisterId collectRegister = 2; - writeableOutputRegisters.insert(2); - - HashedCollectExecutorInfos infos(1, nrOutputRegister, regToClear, regToKeep, - std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), collectRegister, - std::move(aggregateTypes), - std::move(aggregateRegisters), trx, count); - - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)}; - NoStats stats{}; - - auto input = VPackParser::fromJson("[ [\"a\"], [\"aa\"], [\"aaa\"] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - HashedCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::vector myStrings; - std::vector myCountNumbers; - auto newBlock = result.stealBlock(); - - // check for types - AqlValue x = newBlock->getValue(0, 1); - ASSERT_TRUE(x.isString()); - myStrings.emplace_back(x.slice().copyString()); - - // Check the count register - AqlValue xx = newBlock->getValue(0, 2); - ASSERT_TRUE(xx.isNumber()); - myCountNumbers.emplace_back(xx.slice().getInt()); - - AqlValue z = newBlock->getValue(1, 1); - ASSERT_TRUE(z.isString()); - myStrings.emplace_back(z.slice().copyString()); - - // Check the count register - AqlValue zz = newBlock->getValue(1, 2); - ASSERT_TRUE(zz.isNumber()); - myCountNumbers.emplace_back(zz.slice().getInt()); - - AqlValue y = newBlock->getValue(2, 1); - ASSERT_TRUE(y.isString()); - myStrings.emplace_back(y.slice().copyString()); - - // Check the count register - AqlValue yy = newBlock->getValue(2, 2); - ASSERT_TRUE(yy.isNumber()); - myCountNumbers.emplace_back(yy.slice().getInt()); - - // now sort vector and check for appearances - std::sort(myStrings.begin(), myStrings.end()); - - std::sort(myCountNumbers.begin(), myCountNumbers.end()); - ASSERT_EQ(myStrings.at(0), "a"); - ASSERT_EQ(myStrings.at(1), "aa"); - ASSERT_EQ(myStrings.at(2), "aaa"); - ASSERT_EQ(myCountNumbers.at(0), 1); - ASSERT_EQ(myCountNumbers.at(1), 1); - ASSERT_EQ(myCountNumbers.at(2), 1); +/** + * Input used: + * + * [ + * [1, 5] + * [1, 1] + * [2, 2] + * [1, 5] + * [6, 1] + * [2, 2] + * [3, 1] + * ] + * We join on the first value. + * The second value can be used by aggregate + */ + +/** + * TODO: + * [] Add tests for all aggregate functions + */ + +auto AggregateInputs = + ::testing::Values(AggregateInput{"LENGTH", + RegisterPlan::MaxRegisterId, + {{1, 3}, {2, 2}, {6, 1}, {3, 1}}}, + AggregateInput{"SUM", 0, {{1, 3}, {2, 4}, {6, 6}, {3, 3}}}, + AggregateInput{"SUM", 1, {{1, 11}, {2, 4}, {6, 1}, {3, 1}}}); + +INSTANTIATE_TEST_CASE_P(HashedCollectAggregate, HashedCollectExecutorTestAggregate, + ::testing::Combine(::testing::Values(splitIntoBlocks<2, 3>, + splitIntoBlocks<3, 4>, + splitStep<1>, splitStep<2>), + AggregateInputs)); + +TEST_P(HashedCollectExecutorTestAggregate, run) { + auto infos = buildInfos(2, 4, {{2, 0}}); + AqlCall call{}; // unlimited produce + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setInputValue(MatrixBuilder<2>{RowBuilder<2>{1, 5}, RowBuilder<2>{1, 1}, + RowBuilder<2>{2, 2}, RowBuilder<2>{1, 5}, + RowBuilder<2>{6, 1}, RowBuilder<2>{2, 2}, + RowBuilder<2>{3, 1}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({2, 3}, getAggregator().expectedOutput) + .allowAnyOutputOrder(true) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + // .expectedStats(stats) + .run(std::move(infos)); } } // namespace aql diff --git a/tests/Aql/IdExecutorTest.cpp b/tests/Aql/IdExecutorTest.cpp index 32f0a51bf6ce..e9c2d3df07d7 100644 --- a/tests/Aql/IdExecutorTest.cpp +++ b/tests/Aql/IdExecutorTest.cpp @@ -53,8 +53,7 @@ using TestParam = std::tuple, // The input data OutputAqlItemRow::CopyRowBehavior // How the data is handled within outputRow >; -class IdExecutorTestCombiner : public AqlExecutorTestCase<>, - public ::testing::TestWithParam { +class IdExecutorTestCombiner : public AqlExecutorTestCaseWithParam { protected: IdExecutorTestCombiner() {} @@ -203,7 +202,7 @@ auto copyBehaviours = testing::Values(OutputAqlItemRow::CopyRowBehavior::CopyInp INSTANTIATE_TEST_CASE_P(IdExecutorTest, IdExecutorTestCombiner, ::testing::Combine(inputs, upstreamStates, clientCalls, copyBehaviours)); -class IdExecutionBlockTest : public AqlExecutorTestCase<>, public ::testing::Test {}; +class IdExecutionBlockTest : public AqlExecutorTestCase<> {}; // The IdExecutor has a specific initializeCursor method in ExecutionBlockImpl TEST_F(IdExecutionBlockTest, test_initialize_cursor_get) { @@ -347,8 +346,7 @@ TEST_F(IdExecutionBlockTest, test_hardlimit_single_row_fetcher) { * Used in ScatterExecutor logic. * param: useFullCount */ -class BlockOverloadTest : public AqlExecutorTestCase<>, - public ::testing::TestWithParam { +class BlockOverloadTest : public AqlExecutorTestCaseWithParam { protected: auto getTestee() -> ExecutionBlockImpl> { IdExecutorInfos infos{1, {0}, {}}; @@ -385,7 +383,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher) { EXPECT_EQ(skipped, 0); } - ValidateBlocksAreEqual(block, expectedOutputBlock); + asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } { // Validate that additional upstream-rows are gone. @@ -422,7 +420,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_at_end) { } else { EXPECT_EQ(skipped, 0); } - ValidateBlocksAreEqual(block, expectedOutputBlock); + asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } { // Validate that additional upstream-rows are gone. @@ -459,7 +457,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_in_between) { } else { EXPECT_EQ(skipped, 0); } - ValidateBlocksAreEqual(block, expectedOutputBlock); + asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } { // Validate that next call will give remaining rows @@ -470,7 +468,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_in_between) { auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); EXPECT_EQ(skipped, 0); - ValidateBlocksAreEqual(block, expectedOutputBlock); + asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } } @@ -500,7 +498,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_consecutive_shadow_rows) } else { EXPECT_EQ(skipped, 0); } - ValidateBlocksAreEqual(block, expectedOutputBlock); + asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } { // Second call will only find a single ShadowRow @@ -512,7 +510,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_consecutive_shadow_rows) auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::HASMORE); EXPECT_EQ(skipped, 0); - ValidateBlocksAreEqual(block, expectedOutputBlock); + asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } { // Third call will only find a single ShadowRow @@ -524,7 +522,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_consecutive_shadow_rows) auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); EXPECT_EQ(skipped, 0); - ValidateBlocksAreEqual(block, expectedOutputBlock); + asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } { // Validate that additional upstream-rows are gone. diff --git a/tests/Aql/SortedCollectExecutorTest.cpp b/tests/Aql/SortedCollectExecutorTest.cpp index 95acf6b94d2c..e580e9b45f24 100644 --- a/tests/Aql/SortedCollectExecutorTest.cpp +++ b/tests/Aql/SortedCollectExecutorTest.cpp @@ -928,8 +928,7 @@ using SortedCollectTestHelper = ExecutorTestHelper; using SortedCollectSplitType = SortedCollectTestHelper::SplitType; class SortedCollectExecutorTestSplit - : public AqlExecutorTestCase, - public ::testing::TestWithParam> { + : public AqlExecutorTestCaseWithParam> { protected: arangodb::transaction::Methods* trx; diff --git a/tests/Aql/WaitingExecutionBlockMock.cpp b/tests/Aql/WaitingExecutionBlockMock.cpp index 4e1f9233ca7a..74000072c520 100644 --- a/tests/Aql/WaitingExecutionBlockMock.cpp +++ b/tests/Aql/WaitingExecutionBlockMock.cpp @@ -64,7 +64,7 @@ std::pair WaitingExecutionBlockMock::shut } std::pair WaitingExecutionBlockMock::getSome(size_t atMost) { - if (!_hasWaited) { + if (_variant != WaitingBehaviour::NEVER && !_hasWaited) { _hasWaited = true; if (_returnedDone) { return {ExecutionState::DONE, nullptr}; @@ -91,7 +91,7 @@ std::pair WaitingExecution std::pair WaitingExecutionBlockMock::skipSome(size_t atMost) { traceSkipSomeBegin(atMost); - if (!_hasWaited) { + if (_variant != WaitingBehaviour::NEVER && !_hasWaited) { _hasWaited = true; return traceSkipSomeEnd(ExecutionState::WAITING, 0); } @@ -136,7 +136,23 @@ std::tuple WaitingExecutionBlockM } size_t skipped = 0; SharedAqlItemBlockPtr result = nullptr; + if (_data.front() == nullptr) { + dropBlock(); + } while (!_data.empty()) { + if (_data.front() == nullptr) { + if (myCall.getOffset() > 0 || myCall.getLimit() > 0) { + TRI_ASSERT(skipped > 0 || result != nullptr); + // This is a specific break point return now. + // Sorry we can only return one block. + // This means we have prepared the first block. + // But still need more data. + return {ExecutionState::HASMORE, skipped, result}; + } else { + dropBlock(); + continue; + } + } if (_data.front()->size() <= _inflight) { dropBlock(); continue; From 1a54762068741362a228461377c996b1877b6a61 Mon Sep 17 00:00:00 2001 From: Heiko Date: Tue, 18 Feb 2020 18:43:27 +0100 Subject: [PATCH 068/122] Feature/aql subquery operations stack enumerate list (#10988) * produce inputRange * produce inputRange test * produce inputRange test 2 * apply new api * enable new api * enumeratelistexecutor changes * added hackis shouldSkip method * add new execution style for enumerate lsit * added skip for enumerate list * edit of enumerate list executor * fixed enumlist executor * optimized skip loop * more tests, using new framework * allow empty output checks * added new style tests, removed old tests * removed old produceRows method * fixed tests * added one missing exit case * added another exit in SHADOWROWS mode, as we need to take a look at fullCount there * Revert "added another exit in SHADOWROWS mode, as we need to take a look at fullCount there" This reverts commit e74b7cede78f81b8fece034fc921a969e91dda12. * do not take care of fullCount in executor itself, impl will do * removed obsolete comment * rm _skipped because unused, return upstreamstate * added invalid value test, added border edge tests for softLimit, hardLimit and fullCount * api change * added test for inner states inside the executor * added new tests * fixed assert * removed two not needed ifs * asserted a case in our test helper * Prepare reusability of LOG output for ExecutionBlock. * Added first version of DEBUG output to ExecutionBlockImpl * undo this commit before merge * undo this commit before merge * Merged Co-authored-by: Michael Hackstein --- arangod/Aql/EnumerateCollectionExecutor.cpp | 6 +- arangod/Aql/EnumerateListExecutor.cpp | 186 ++++-- arangod/Aql/EnumerateListExecutor.h | 40 +- arangod/Aql/ExecutionBlockImpl.cpp | 16 +- etc/testing/arangod-common.conf | 1 + tests/Aql/EnumerateListExecutorTest.cpp | 543 ++++++++++-------- .../shell-statement-encoding-noncluster.js | 2 +- 7 files changed, 458 insertions(+), 336 deletions(-) diff --git a/arangod/Aql/EnumerateCollectionExecutor.cpp b/arangod/Aql/EnumerateCollectionExecutor.cpp index 446b3d0a949c..15a5db47ef83 100644 --- a/arangod/Aql/EnumerateCollectionExecutor.cpp +++ b/arangod/Aql/EnumerateCollectionExecutor.cpp @@ -42,7 +42,7 @@ using namespace arangodb; using namespace arangodb::aql; - + namespace { std::vector const emptyAttributePositions; } @@ -55,7 +55,7 @@ EnumerateCollectionExecutorInfos::EnumerateCollectionExecutorInfos( std::unordered_set registersToKeep, ExecutionEngine* engine, Collection const* collection, Variable const* outVariable, bool produceResult, Expression* filter, - std::vector const& projections, + std::vector const& projections, bool useRawDocumentPointers, bool random) : ExecutorInfos(make_shared_unordered_set(), make_shared_unordered_set({outputRegister}), @@ -119,7 +119,7 @@ EnumerateCollectionExecutor::EnumerateCollectionExecutor(Fetcher& fetcher, Infos _documentProducingFunctionContext(_input, nullptr, _infos.getOutputRegisterId(), _infos.getProduceResult(), _infos.getQuery(), _infos.getFilter(), - _infos.getProjections(), + _infos.getProjections(), ::emptyAttributePositions, true, _infos.getUseRawDocumentPointers(), false), _state(ExecutionState::HASMORE), diff --git a/arangod/Aql/EnumerateListExecutor.cpp b/arangod/Aql/EnumerateListExecutor.cpp index 3160ccc8ef92..2840dbf3339c 100644 --- a/arangod/Aql/EnumerateListExecutor.cpp +++ b/arangod/Aql/EnumerateListExecutor.cpp @@ -25,6 +25,8 @@ #include "EnumerateListExecutor.h" +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/AqlValue.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" @@ -45,7 +47,7 @@ void throwArrayExpectedException(AqlValue const& value) { " as operand to FOR loop; you provided a value of type '") + value.getTypeString() + std::string("'")); } -} // namespace +} // namespace EnumerateListExecutorInfos::EnumerateListExecutorInfos( RegisterId inputRegister, RegisterId outputRegister, @@ -70,84 +72,142 @@ RegisterId EnumerateListExecutorInfos::getOutputRegister() const noexcept { } EnumerateListExecutor::EnumerateListExecutor(Fetcher& fetcher, EnumerateListExecutorInfos& infos) - : _infos(infos), - _fetcher(fetcher), - _currentRow{CreateInvalidInputRowHint{}}, - _rowState(ExecutionState::HASMORE), - _inputArrayPosition(0), - _inputArrayLength(0) {} + : _infos(infos), _currentRow{CreateInvalidInputRowHint{}}, _inputArrayPosition(0), _inputArrayLength(0) {} std::pair EnumerateListExecutor::produceRows(OutputAqlItemRow& output) { - while (true) { - // HIT in first run, because pos and length are initiliazed - // both with 0 - - if (_inputArrayPosition == _inputArrayLength) { - // we need to set position back to zero - // because we finished iterating over existing array - // element and need to refetch another row - // _inputArrayPosition = 0; - if (_rowState == ExecutionState::DONE) { - return {_rowState, NoStats{}}; - } - initialize(); - std::tie(_rowState, _currentRow) = _fetcher.fetchRow(); - if (_rowState == ExecutionState::WAITING) { - return {_rowState, NoStats{}}; - } - } + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} - if (!_currentRow.isInitialized()) { - TRI_ASSERT(_rowState == ExecutionState::DONE); - return {_rowState, NoStats{}}; - } +void EnumerateListExecutor::initializeNewRow(AqlItemBlockInputRange& inputRange) { + if (_currentRow) { + std::ignore = inputRange.nextDataRow(); + } + std::tie(_currentRowState, _currentRow) = inputRange.peekDataRow(); + if (!_currentRow) { + return; + } - AqlValue const& inputList = _currentRow.getValue(_infos.getInputRegister()); + // fetch new row, put it in local state + AqlValue const& inputList = _currentRow.getValue(_infos.getInputRegister()); - if (_inputArrayPosition == 0) { - // store the length into a local variable - // so we don't need to calculate length every time - if (inputList.isDocvec()) { - _inputArrayLength = inputList.docvecSize(); - } else { - if (!inputList.isArray()) { - throwArrayExpectedException(inputList); - } - _inputArrayLength = inputList.length(); - } + // store the length into a local variable + // so we don't need to calculate length every time + if (inputList.isDocvec()) { + _inputArrayLength = inputList.docvecSize(); + } else { + if (!inputList.isArray()) { + throwArrayExpectedException(inputList); } + _inputArrayLength = inputList.length(); + } - if (_inputArrayLength == 0) { + _inputArrayPosition = 0; +} + +void EnumerateListExecutor::processArrayElement(OutputAqlItemRow& output) { + bool mustDestroy; + AqlValue const& inputList = _currentRow.getValue(_infos.getInputRegister()); + AqlValue innerValue = getAqlValue(inputList, _inputArrayPosition, mustDestroy); + AqlValueGuard guard(innerValue, mustDestroy); + + TRI_IF_FAILURE("EnumerateListBlock::getSome") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + output.moveValueInto(_infos.getOutputRegister(), _currentRow, guard); + output.advanceRow(); + + // set position to +1 for next iteration after new fetchRow + _inputArrayPosition++; +} + +size_t EnumerateListExecutor::skipArrayElement(size_t toSkip) { + size_t skipped = 0; + + if (toSkip <= _inputArrayLength - _inputArrayPosition) { + // if we're skipping less or exact the amount of elements we can skip with toSkip + _inputArrayPosition += toSkip; + skipped = toSkip; + } else if (toSkip > _inputArrayLength - _inputArrayPosition) { + // we can only skip the max amount of values we've in our array + skipped = _inputArrayLength - _inputArrayPosition; + _inputArrayPosition = _inputArrayLength; + } + return skipped; +} + +std::tuple EnumerateListExecutor::produceRows( + AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { + AqlCall upstreamCall{}; + upstreamCall.fullCount = output.getClientCall().fullCount; + + while (inputRange.hasDataRow() && !output.isFull()) { + if (_inputArrayLength == _inputArrayPosition) { + // we reached either the end of an array + // or are in our first loop iteration + initializeNewRow(inputRange); continue; - } else if (_inputArrayLength == _inputArrayPosition) { - // we reached the end, forget all state - initialize(); + } - if (_rowState == ExecutionState::HASMORE) { - continue; - } else { - return {_rowState, NoStats{}}; - } - } else { - bool mustDestroy; - AqlValue innerValue = getAqlValue(inputList, _inputArrayPosition, mustDestroy); - AqlValueGuard guard(innerValue, mustDestroy); + TRI_ASSERT(_inputArrayPosition < _inputArrayLength); + processArrayElement(output); + } - TRI_IF_FAILURE("EnumerateListBlock::getSome") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } + if (_inputArrayLength == _inputArrayPosition) { + // we reached either the end of an array + // or are in our first loop iteration + initializeNewRow(inputRange); + } - output.moveValueInto(_infos.getOutputRegister(), _currentRow, guard); + return {inputRange.upstreamState(), NoStats{}, upstreamCall}; +} + +std::tuple EnumerateListExecutor::skipRowsRange( + AqlItemBlockInputRange& inputRange, AqlCall& call) { + AqlCall upstreamCall{}; - // set position to +1 for next iteration after new fetchRow - _inputArrayPosition++; + if (!inputRange.hasDataRow()) { + return {inputRange.upstreamState(), NoStats{}, 0, upstreamCall}; + } - if (_inputArrayPosition < _inputArrayLength || _rowState == ExecutionState::HASMORE) { - return {ExecutionState::HASMORE, NoStats{}}; + InputAqlItemRow input{CreateInvalidInputRowHint{}}; + size_t skipped = 0; + bool offsetPhase = (call.getOffset() > 0); + + while (inputRange.hasDataRow() && call.shouldSkip()) { + if (_inputArrayLength == _inputArrayPosition) { + // we reached either the end of an array + // or are in our first loop iteration + initializeNewRow(inputRange); + continue; + } + // auto const& [state, input] = inputRange.peekDataRow(); + + TRI_ASSERT(_inputArrayPosition < _inputArrayLength); + // if offset is > 0, we're in offset skip phase + if (offsetPhase) { + if (skipped < call.getOffset()) { + // we still need to skip offset entries + skipped += skipArrayElement(call.getOffset() - skipped); + } else { + // we skipped enough in our offset phase + break; } - return {ExecutionState::DONE, NoStats{}}; + } else { + // fullCount phase - skippen bis zum ende + skipped += skipArrayElement(_inputArrayLength - _inputArrayPosition); } } + call.didSkip(skipped); + + upstreamCall.softLimit = call.getOffset(); + if (_inputArrayPosition < _inputArrayLength) { + // fullCount will always skip the complete array + TRI_ASSERT(offsetPhase); + return {ExecutorState::HASMORE, NoStats{}, skipped, upstreamCall}; + } + return {inputRange.upstreamState(), NoStats{}, skipped, upstreamCall}; } void EnumerateListExecutor::initialize() { diff --git a/arangod/Aql/EnumerateListExecutor.h b/arangod/Aql/EnumerateListExecutor.h index 34cfc7957092..70916e2a7464 100644 --- a/arangod/Aql/EnumerateListExecutor.h +++ b/arangod/Aql/EnumerateListExecutor.h @@ -41,6 +41,8 @@ class Methods; namespace aql { +struct AqlCall; +class AqlItemBlockInputRange; class ExecutorInfos; class OutputAqlItemRow; class NoStats; @@ -85,7 +87,7 @@ class EnumerateListExecutor { using Infos = EnumerateListExecutorInfos; using Stats = NoStats; - EnumerateListExecutor(Fetcher& fetcher, EnumerateListExecutorInfos&); + EnumerateListExecutor(Fetcher&, EnumerateListExecutorInfos&); ~EnumerateListExecutor() = default; /** @@ -95,15 +97,47 @@ class EnumerateListExecutor { */ std::pair produceRows(OutputAqlItemRow& output); + /** + * @brief Will fetch a new InputRow if necessary and store their local state + * + * @return bool done in case we do not have any input and upstreamState is done + */ + void initializeNewRow(AqlItemBlockInputRange& inputRange); + + /** + * @brief Will process an found array element + */ + void processArrayElement(OutputAqlItemRow& output); + + /** + * @brief Will skip a maximum of n-elements inside the current array + */ + size_t skipArrayElement(size_t skip); + + /** + * @brief produce the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple produceRows( + AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output); + + /** + * @brief skip the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple skipRowsRange( + AqlItemBlockInputRange& inputRange, AqlCall& call); + private: AqlValue getAqlValue(AqlValue const& inVarReg, size_t const& pos, bool& mustDestroy); void initialize(); private: EnumerateListExecutorInfos& _infos; - Fetcher& _fetcher; InputAqlItemRow _currentRow; - ExecutionState _rowState; + ExecutorState _currentRowState; size_t _inputArrayPosition; size_t _inputArrayLength; }; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index acb04789adb7..1cb86283cf7d 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -141,7 +141,7 @@ constexpr bool isNewStyleExecutor = TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif - ShortestPathExecutor>; + ShortestPathExecutor, EnumerateListExecutor>; template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, @@ -1060,7 +1060,8 @@ static SkipRowsRangeVariant constexpr skipRowsType() { #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif - SortedCollectExecutor>), + EnumerateListExecutor, SortedCollectExecutor>), + "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! @@ -1381,16 +1382,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if (_outputItemRow->allRowsUsed()) { _execState = ExecState::DONE; } else if (state == ExecutorState::DONE) { - if (_lastRange.hasDataRow()) { - // TODO this state is invalid, and can just show up now if we exclude SKIP - _execState = ExecState::PRODUCE; - } else { - // Right now we cannot support to have more than one set of - // ShadowRows inside of a Range. - // We do not know how to continue with the above executor after a shadowrow. - TRI_ASSERT(!_lastRange.hasDataRow()); - _execState = ExecState::DONE; - } + _execState = ExecState::DONE; } } else { _execState = ExecState::DONE; diff --git a/etc/testing/arangod-common.conf b/etc/testing/arangod-common.conf index b664b6edb9d2..89fd87728211 100644 --- a/etc/testing/arangod-common.conf +++ b/etc/testing/arangod-common.conf @@ -3,6 +3,7 @@ line-number = false force-direct = false level = info level = replication=warn +level = queries=debug role = true [database] diff --git a/tests/Aql/EnumerateListExecutorTest.cpp b/tests/Aql/EnumerateListExecutorTest.cpp index 44bb6c416f6b..6132aad39bb1 100644 --- a/tests/Aql/EnumerateListExecutorTest.cpp +++ b/tests/Aql/EnumerateListExecutorTest.cpp @@ -25,15 +25,19 @@ #include "gtest/gtest.h" +#include "ExecutorTestHelper.h" #include "RowFetcherHelper.h" -#include "fakeit.hpp" +#include "Aql/AqlCall.h" #include "Aql/AqlItemBlock.h" #include "Aql/EnumerateListExecutor.h" #include "Aql/ExecutionBlockImpl.h" +#include "Aql/ExecutionEngine.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/ResourceUsage.h" #include "Aql/Stats.h" +#include "AqlItemBlockHelper.h" +#include "Mocks/Servers.h" #include "Transaction/Context.h" #include "Transaction/Methods.h" @@ -47,296 +51,327 @@ namespace arangodb { namespace tests { namespace aql { +// test inner executor behaviour class EnumerateListExecutorTest : public ::testing::Test { protected: ExecutionState state; + NoStats stats; + AqlCall call; + ResourceMonitor monitor; AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; - EnumerateListExecutorTest() : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS) {} }; -TEST_F(EnumerateListExecutorTest, there_are_no_rows_upstream_the_producer_does_not_wait) { - EnumerateListExecutorInfos infos(0, 1, 1, 2, {}, {0}); - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; - VPackBuilder input; +TEST_F(EnumerateListExecutorTest, test_check_state_first_row_border) { + // old styled test, to test the inner step-states of our executor - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); - EnumerateListExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); -} -TEST_F(EnumerateListExecutorTest, there_are_no_rows_upstream_the_producer_waits) { - EnumerateListExecutorInfos infos(0, 1, 1, 2, {}, {0}); - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; - VPackBuilder input; - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); - EnumerateListExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); -} + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); -TEST_F(EnumerateListExecutorTest, there_is_one_row_in_the_upstream_the_producer_waits) { - EnumerateListExecutorInfos infos(3, 4, 4, 5, {}, {0, 1, 2, 3}); + // This is the relevant part of the test SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 5)}; - auto input = VPackParser::fromJson("[ [1, 2, 3, [true, true, true]] ]"); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); - EnumerateListExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - - /* - * Here we are not waiting after every row produce, because the fetcher - * does not need to refetch a new row to produce the next one. - * 1. produce => WAIT RES1 - due true flag in - * SingleRowFetcherHelper - * 2. produce => HASMORE RES1 - return a row - * 3. produce => HASMORE RES2 - return a row - * 4. produce => HASMORE RES3 - return a row - * 5. produce => DONE RES4 - DONE - do not return a row - */ - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - block = result.stealBlock(); - AqlValue v = block->getValue(0, 0); - ASSERT_TRUE(v.isNumber()); - int64_t number = v.toInt64(); - ASSERT_EQ(number, 1); - - v = block->getValue(1, 0); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 1); - - v = block->getValue(1, 1); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 2); - - v = block->getValue(1, 2); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 3); - - bool mustDestroy = false; - v = block->getValue(1, 3); - ASSERT_TRUE(v.isArray()); - ASSERT_TRUE(v.at(0, mustDestroy, false).toBoolean()); - ASSERT_TRUE(v.at(1, mustDestroy, false).toBoolean()); - ASSERT_TRUE(v.at(2, mustDestroy, false).toBoolean()); - - v = block->getValue(1, 4); - ASSERT_TRUE(v.isBoolean()); - ASSERT_TRUE(v.toBoolean()); -} - -TEST_F(EnumerateListExecutorTest, there_is_one_empty_array_row_in_the_upstream_the_producer_waits) { EnumerateListExecutorInfos infos(3, 4, 4, 5, {}, {0, 1, 2, 3}); - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 5)}; - auto input = VPackParser::fromJson("[ [1, 2, 3, [] ] ]"); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); EnumerateListExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); + SharedAqlItemBlockPtr inBlock = + buildBlock<4>(itemBlockManager, {{{{1}, {2}, {3}, {R"([true, 1, 2])"}}}, + {{{1}, {2}, {3}, {R"([true, 1, 2])"}}}}); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + + // receive first 3 of 6 results in total + AqlCall myCall{0, AqlCall::Infinity{}, 3, false}; + + output.setCall(std::move(myCall)); + EXPECT_EQ(output.numRowsWritten(), 0); + { + // reach the end (edge) of our first row, check that we do not return DONE here! + auto const [state, stats, call] = testee.produceRows(input, output); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(output.numRowsWritten(), 3); + } +} - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +TEST_F(EnumerateListExecutorTest, test_check_state_second_row_border) { + // old styled test, to test the inner step-states of our executor - block = result.stealBlock(); - ASSERT_EQ(block, nullptr); -} + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); -TEST_F(EnumerateListExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) { - EnumerateListExecutorInfos infos(3, 4, 4, 5, {}, {0, 1, 2, 3}); + // This is the relevant part of the test SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 5)}; - auto input = VPackParser::fromJson( - "[ [1, 2, 3, [true, true, true]], [1, 2, 3, [true, true, true]] ]"); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + EnumerateListExecutorInfos infos(3, 4, 4, 5, {}, {0, 1, 2, 3}); EnumerateListExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - - // like the test above, except now two rows of input - // are available - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); + SharedAqlItemBlockPtr inBlock = + buildBlock<4>(itemBlockManager, {{{{1}, {2}, {3}, {R"([true, 1, 2])"}}}, + {{{1}, {2}, {3}, {R"([true, 1, 2])"}}}}); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + + // receive 6 of 6 results in total + AqlCall myCall{0, AqlCall::Infinity{}, 6, false}; + + output.setCall(std::move(myCall)); + EXPECT_EQ(output.numRowsWritten(), 0); + { + // reach the end (edge) of our second row, check that we do not return DONE here! + auto const [state, stats, call] = testee.produceRows(input, output); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_EQ(output.numRowsWritten(), 6); + } +} - result.advanceRow(); +// new framework tests +using EnumerateListTestHelper = ExecutorTestHelper; +using EnumerateListSplitType = EnumerateListTestHelper::SplitType; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); +class EnumerateListExecutorTestProduce + : public ::testing::TestWithParam> { + protected: + ResourceMonitor monitor; + AqlItemBlockManager itemBlockManager; + + mocks::MockAqlServer server; + std::unique_ptr fakedQuery; + EnumerateListExecutorInfos infos; + + SharedAqlItemBlockPtr block; + NoStats stats; + + EnumerateListExecutorTestProduce() + : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), + fakedQuery(server.createFakeQuery()), + infos(0, 1, 1, 2, {}, {0}) { + auto engine = + std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); + fakedQuery->setEngine(engine.release()); + } + + auto makeInfos(RegisterId inputRegister = 0, RegisterId outputRegister = 1, + RegisterId nrInputRegister = 1, RegisterId nrOutputRegister = 2, + std::unordered_set regToClear = {}, + std::unordered_set regToKeep = {0}) -> EnumerateListExecutorInfos { + EnumerateListExecutorInfos infos{inputRegister, outputRegister, + nrInputRegister, nrOutputRegister, + regToClear, regToKeep}; + block = SharedAqlItemBlockPtr{new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)}; + return infos; + } +}; - result.advanceRow(); +TEST_P(EnumerateListExecutorTestProduce, empty_array_1) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{R"([])"}}}) + .setInputSplitType(split) + .expectOutput({}, {}) + .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(makeInfos()); +} - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); +TEST_P(EnumerateListExecutorTestProduce, invalid_value_1) { + auto [split] = GetParam(); + + try { + ExecutorTestHelper(*fakedQuery) + .setInputValue({{1}}) + .setInputSplitType(split) + .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) + .expectOutput({}, {}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(makeInfos()); + FAIL(); + } catch (const arangodb::basics::Exception& e) { + ASSERT_EQ(e.code(), 1563); + } +} - result.advanceRow(); +TEST_P(EnumerateListExecutorTestProduce, default_1) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{R"([1, 1, 2])"}}}) + .setInputSplitType(split) + .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) + .expectOutput({1}, {{1}, {1}, {2}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(makeInfos()); +} - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +TEST_P(EnumerateListExecutorTestProduce, offset_1) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) + .setInputSplitType(split) + .setCall(AqlCall{5, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) + .expectOutput({1}, {{6}, {7}, {8}, {9}, {10}}) + .expectSkipped(5) + .expectedState(ExecutionState::DONE) + .run(makeInfos()); +} - block = result.stealBlock(); - bool mustDestroy = false; +TEST_P(EnumerateListExecutorTestProduce, offset_2) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) + .setInputSplitType(split) + .setCall(AqlCall{3, AqlCall::Infinity{}, 2, false}) + .expectOutput({1}, {{4}, {5}}) + .expectSkipped(3) + .expectedState(ExecutionState::DONE) + .run(makeInfos()); +} - // first row - AqlValue v = block->getValue(0, 0); - ASSERT_TRUE(v.isNumber()); - int64_t number = v.toInt64(); - ASSERT_EQ(number, 1); +TEST_P(EnumerateListExecutorTestProduce, offset_3) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) + .setInputSplitType(split) + .setCall(AqlCall{7, AqlCall::Infinity{}, 3, false}) + .expectOutput({1}, {{8}, {9}, {10}}) + .expectSkipped(7) + .expectedState(ExecutionState::DONE) + .run(makeInfos()); +} - v = block->getValue(1, 0); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 1); +TEST_P(EnumerateListExecutorTestProduce, offset_4) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) + .setInputSplitType(split) + .setCall(AqlCall{5, AqlCall::Infinity{}, 2, true}) + .expectOutput({1}, {{6}, {7}}) + .expectSkipped(8) + .expectedState(ExecutionState::DONE) + .run(makeInfos()); +} - v = block->getValue(1, 1); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 2); +TEST_P(EnumerateListExecutorTestProduce, offset_5) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) + .setInputSplitType(split) + .setCall(AqlCall{7, AqlCall::Infinity{}, 3, true}) + .expectOutput({1}, {{8}, {9}, {10}}) + .expectSkipped(7) + .expectedState(ExecutionState::DONE) + .run(makeInfos()); +} - v = block->getValue(1, 2); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 3); +TEST_P(EnumerateListExecutorTestProduce, default_multiple_1) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({{1, 2, 3, R"([1, 2, 3])"}}) + .setInputSplitType(split) + .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) + .expectOutput({0, 1, 2, 3, 4}, {{RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 1}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 2}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 3}}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); +} - v = block->getValue(1, 3); - ASSERT_TRUE(v.isArray()); - ASSERT_TRUE(v.at(0, mustDestroy, false).toBoolean()); - ASSERT_TRUE(v.at(1, mustDestroy, false).toBoolean()); - ASSERT_TRUE(v.at(2, mustDestroy, false).toBoolean()); +TEST_P(EnumerateListExecutorTestProduce, default_multiple_2) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({RowBuilder<4>{1, 2, 3, R"([1, 2, 3])"}, + RowBuilder<4>{1, 2, 3, R"([4, 5, 6])"}}) + .setInputSplitType(split) + .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) + .expectOutput({0, 1, 2, 3, 4}, {{RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 1}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 2}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 3}, + RowBuilder<5>{1, 2, 3, R"([4, 5, 6])", 4}, + RowBuilder<5>{1, 2, 3, R"([4, 5, 6])", 5}, + RowBuilder<5>{1, 2, 3, R"([4, 5, 6])", 6}}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); +} - v = block->getValue(1, 4); - ASSERT_TRUE(v.isBoolean()); - ASSERT_TRUE(v.toBoolean()); +TEST_P(EnumerateListExecutorTestProduce, default_border_first_array_soft) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({RowBuilder<4>{1, 2, 3, R"([1, 2, 3])"}, + RowBuilder<4>{1, 2, 3, R"([4, 5, 6])"}}) + .setInputSplitType(split) + .setCall(AqlCall{0, 3, AqlCall::Infinity{}, false}) + .expectOutput({0, 1, 2, 3, 4}, {{RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 1}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 2}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 3}}}) + .expectSkipped(0) + .expectedState(ExecutionState::HASMORE) // hasmore because of softLimit + .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); +} - // second row - v = block->getValue(2, 0); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 1); +TEST_P(EnumerateListExecutorTestProduce, default_border_first_array_hard) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({RowBuilder<4>{1, 2, 3, R"([1, 2, 3])"}, + RowBuilder<4>{1, 2, 3, R"([4, 5, 6])"}}) + .setInputSplitType(split) + .setCall(AqlCall{0, AqlCall::Infinity{}, 3, false}) + .expectOutput({0, 1, 2, 3, 4}, {{RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 1}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 2}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 3}}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) // done because of hardLimit + .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); +} - v = block->getValue(2, 1); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 2); +TEST_P(EnumerateListExecutorTestProduce, default_border_first_array_hard_fullcount) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setInputValue({RowBuilder<4>{1, 2, 3, R"([1, 2, 3])"}, + RowBuilder<4>{1, 2, 3, R"([4, 5, 6])"}}) + .setInputSplitType(split) + .setCall(AqlCall{0, AqlCall::Infinity{}, 3, true}) + .expectOutput({0, 1, 2, 3, 4}, {{RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 1}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 2}, + RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 3}}}) + .expectSkipped(3) // skipped amount of 3 in the fullCount phase + .expectedState(ExecutionState::DONE) + .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); +} - v = block->getValue(2, 2); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 3); +template +const EnumerateListSplitType splitIntoBlocks = + EnumerateListSplitType{std::vector{vs...}}; +template +const EnumerateListSplitType splitStep = EnumerateListSplitType{step}; - v = block->getValue(2, 3); - ASSERT_TRUE(v.isArray()); - ASSERT_TRUE(v.at(0, mustDestroy, false).toBoolean()); - ASSERT_TRUE(v.at(1, mustDestroy, false).toBoolean()); - ASSERT_TRUE(v.at(2, mustDestroy, false).toBoolean()); +INSTANTIATE_TEST_CASE_P(EnumerateListExecutor, EnumerateListExecutorTestProduce, + ::testing::Values(splitIntoBlocks<2, 3>, + splitIntoBlocks<3, 4>, splitStep<2>)); - v = block->getValue(2, 4); - ASSERT_TRUE(v.isBoolean()); - ASSERT_TRUE(v.toBoolean()); -} +// namespace aql } // namespace aql } // namespace tests diff --git a/tests/js/common/shell/shell-statement-encoding-noncluster.js b/tests/js/common/shell/shell-statement-encoding-noncluster.js index 6687d0acaa01..ed7fe34325e0 100644 --- a/tests/js/common/shell/shell-statement-encoding-noncluster.js +++ b/tests/js/common/shell/shell-statement-encoding-noncluster.js @@ -96,7 +96,7 @@ function StatementResultEncodingSuite () { testQueryAttribute : function () { var results = db._query("FOR t IN UnitTestsShellStatement " + "LET l = (FOR x IN t.alternateName FILTER x.`@language` == 'bn' RETURN x) " + - "RETURN { name: t.name, alternateName: l }").toArray(); + "RETURN { name: t.name, alternateName: l }", {}, {profile: 3}).toArray(); var map = { }; results.forEach(function(result) { delete result._key; From 6d6edc984b106c53ca4d66893901034a439ac066 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 20 Feb 2020 10:42:24 +0100 Subject: [PATCH 069/122] Feature/aql subquery execution block impl execute implementation inject stack into fetcher (#11124) * Added api to pass through the stack to the dependencyProxy. THis is just temporary and doomed to be removed as soon as API is completed. * Let depdendency Proxy forward the Stack. This may not work in all cases. * Default implement Assignment constructor. * Removed Query jenkins debug output again * Fixed forwarding of stack in skipSome --- arangod/Aql/AllRowsFetcher.cpp | 5 ++ arangod/Aql/AllRowsFetcher.h | 6 +- arangod/Aql/AqlCallStack.h | 2 + arangod/Aql/ConstFetcher.h | 3 + arangod/Aql/DependencyProxy.cpp | 80 ++++++++++++++++--- arangod/Aql/DependencyProxy.h | 10 ++- arangod/Aql/ExecutionBlockImpl.cpp | 1 + .../Aql/MultiDependencySingleRowFetcher.cpp | 8 +- arangod/Aql/MultiDependencySingleRowFetcher.h | 8 +- arangod/Aql/SingleRowFetcher.cpp | 6 ++ arangod/Aql/SingleRowFetcher.h | 3 + etc/testing/arangod-common.conf | 1 - 12 files changed, 114 insertions(+), 19 deletions(-) diff --git a/arangod/Aql/AllRowsFetcher.cpp b/arangod/Aql/AllRowsFetcher.cpp index 06daba611938..117deb8ae799 100644 --- a/arangod/Aql/AllRowsFetcher.cpp +++ b/arangod/Aql/AllRowsFetcher.cpp @@ -254,3 +254,8 @@ std::pair AllRowsFetcher::fetchShadowRow(size_ return {state, row}; } + +//@deprecated +auto AllRowsFetcher::useStack(AqlCallStack const& stack) -> void { + _dependencyProxy->useStack(stack); +} \ No newline at end of file diff --git a/arangod/Aql/AllRowsFetcher.h b/arangod/Aql/AllRowsFetcher.h index 06051e477595..adfe5a402817 100644 --- a/arangod/Aql/AllRowsFetcher.h +++ b/arangod/Aql/AllRowsFetcher.h @@ -179,8 +179,10 @@ class AllRowsFetcher { ExecutionState upstreamState(); // NOLINTNEXTLINE google-default-arguments - std::pair fetchShadowRow( - size_t atMost = ExecutionBlock::DefaultBatchSize); + std::pair fetchShadowRow(size_t atMost = ExecutionBlock::DefaultBatchSize); + + //@deprecated + auto useStack(AqlCallStack const& stack) -> void; private: DependencyProxy* _dependencyProxy; diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index 214de85d1e4d..b4af03eec652 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -39,6 +39,8 @@ class AqlCallStack { // Used to pass between blocks AqlCallStack(AqlCallStack const& other); + AqlCallStack& operator=(AqlCallStack const& other) = default; + // Quick test is this CallStack is of local relevance, or it is sufficient to pass it through bool isRelevant() const; diff --git a/arangod/Aql/ConstFetcher.h b/arangod/Aql/ConstFetcher.h index 46b9ec73667d..70fe73fa4c12 100644 --- a/arangod/Aql/ConstFetcher.h +++ b/arangod/Aql/ConstFetcher.h @@ -107,6 +107,9 @@ class ConstFetcher { // NOLINTNEXTLINE google-default-arguments std::pair fetchShadowRow(size_t atMost = 1) const; + //@deprecated + auto useStack(AqlCallStack const& stack) -> void{}; + private: /** * @brief Input block currently in use. Used for memory management by the diff --git a/arangod/Aql/DependencyProxy.cpp b/arangod/Aql/DependencyProxy.cpp index 63f6ba51d140..5d16d05dd4e6 100644 --- a/arangod/Aql/DependencyProxy.cpp +++ b/arangod/Aql/DependencyProxy.cpp @@ -82,18 +82,29 @@ ExecutionState DependencyProxy::prefetchBlock(size_t atMost) { TRI_ASSERT(atMost > 0); ExecutionState state; SharedAqlItemBlockPtr block; + + // Temporary. + // Just do a copy of the stack here to not mess with it. + AqlCallStack stack = _injectedStack; + stack.pushCall(AqlCall::SimulateGetSome(atMost)); + // Also temporary, will not be used here. + size_t skipped = 0; do { // Note: upstreamBlock will return next dependency // if we need to loop here if (_distributeId.empty()) { - std::tie(state, block) = upstreamBlock().getSome(atMost); + std::tie(state, skipped, block) = upstreamBlock().execute(stack); } else { auto upstreamWithClient = dynamic_cast(&upstreamBlock()); - std::tie(state, block) = upstreamWithClient->getSomeForShard(atMost, _distributeId); + std::tie(state, skipped, block) = + upstreamWithClient->executeForClient(stack, _distributeId); } TRI_IF_FAILURE("ExecutionBlock::getBlock") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } + // Cannot do skipping here + // Temporary! + TRI_ASSERT(skipped == 0); if (state == ExecutionState::WAITING) { TRI_ASSERT(block == nullptr); @@ -162,13 +173,22 @@ DependencyProxy::fetchBlockForDependency(size_t dependency, si TRI_ASSERT(atMost > 0); ExecutionState state; SharedAqlItemBlockPtr block; + + // Temporary. + // Just do a copy of the stack here to not mess with it. + AqlCallStack stack = _injectedStack; + stack.pushCall(AqlCall::SimulateGetSome(atMost)); + // Also temporary, will not be used here. + size_t skipped = 0; + if (_distributeId.empty()) { - std::tie(state, block) = upstream.getSome(atMost); + std::tie(state, skipped, block) = upstream.execute(stack); } else { auto upstreamWithClient = dynamic_cast(&upstream); - std::tie(state, block) = upstreamWithClient->getSomeForShard(atMost, _distributeId); + std::tie(state, skipped, block) = + upstreamWithClient->executeForClient(stack, _distributeId); } - + TRI_ASSERT(skipped == 0); TRI_IF_FAILURE("ExecutionBlock::getBlock") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } @@ -205,14 +225,31 @@ std::pair DependencyProxy::skipSomeFor ExecutionState state = ExecutionState::HASMORE; + // Temporary. + // Just do a copy of the stack here to not mess with it. + AqlCallStack stack = _injectedStack; + stack.pushCall(AqlCall::SimulateSkipSome(atMost)); + // Also temporary, will not be used here. + SharedAqlItemBlockPtr block; + while (state == ExecutionState::HASMORE && _skipped < atMost) { size_t skippedNow; TRI_ASSERT(_skipped <= atMost); - std::tie(state, skippedNow) = upstream.skipSome(atMost - _skipped); + { + // Make sure we call with the correct offset + // This is just a temporary dance until execute is implemented everywhere. + auto tmpCall = stack.popCall(); + tmpCall.offset = atMost - _skipped; + stack.pushCall(std::move(tmpCall)); + } + std::tie(state, skippedNow, block) = upstream.execute(stack); if (state == ExecutionState::WAITING) { TRI_ASSERT(skippedNow == 0); return {state, 0}; } + // Temporary. + // If we return a block here it will be lost + TRI_ASSERT(block == nullptr); _skipped += skippedNow; TRI_ASSERT(_skipped <= atMost); @@ -234,17 +271,31 @@ std::pair DependencyProxy::skipSome(si TRI_ASSERT(_skipped <= toSkip); ExecutionState state = ExecutionState::HASMORE; + // Temporary. + // Just do a copy of the stack here to not mess with it. + AqlCallStack stack = _injectedStack; + stack.pushCall(AqlCall::SimulateSkipSome(toSkip)); + // Also temporary, will not be used here. + SharedAqlItemBlockPtr block; + while (_skipped < toSkip) { size_t skippedNow; // Note: upstreamBlock will return next dependency // if we need to loop here TRI_ASSERT(_skipped <= toSkip); + { + // Make sure we call with the correct offset + // This is just a temporary dance until execute is implemented everywhere. + auto tmpCall = stack.popCall(); + tmpCall.offset = toSkip - _skipped; + stack.pushCall(std::move(tmpCall)); + } if (_distributeId.empty()) { - std::tie(state, skippedNow) = upstreamBlock().skipSome(toSkip - _skipped); + std::tie(state, skippedNow, block) = upstreamBlock().execute(stack); } else { auto upstreamWithClient = dynamic_cast(&upstreamBlock()); - std::tie(state, skippedNow) = - upstreamWithClient->skipSomeForShard(toSkip - _skipped, _distributeId); + std::tie(state, skippedNow, block) = + upstreamWithClient->executeForClient(stack, _distributeId); } TRI_ASSERT(skippedNow <= toSkip - _skipped); @@ -254,6 +305,10 @@ std::pair DependencyProxy::skipSome(si return {state, 0}; } + // Temporary. + // If we return a block here it will be lost + TRI_ASSERT(block == nullptr); + _skipped += skippedNow; // When the current dependency is done, advance. @@ -312,7 +367,12 @@ DependencyProxy::DependencyProxy( _blockPassThroughQueue(), _currentDependency(0), _skipped(0), - _vpackOptions(options) {} + _vpackOptions(options), + _injectedStack(AqlCall{}) { + // Make the default stack usable, for tests only. + // This needs to be removed soon. + _injectedStack.popCall(); +} template RegisterId DependencyProxy::getNrInputRegisters() const { diff --git a/arangod/Aql/DependencyProxy.h b/arangod/Aql/DependencyProxy.h index 7779c922ef41..c66011845512 100644 --- a/arangod/Aql/DependencyProxy.h +++ b/arangod/Aql/DependencyProxy.h @@ -23,6 +23,7 @@ #ifndef ARANGOD_AQL_BLOCK_FETCHER_H #define ARANGOD_AQL_BLOCK_FETCHER_H +#include "Aql/AqlCallStack.h" #include "Aql/ExecutionBlock.h" #include "Aql/ExecutionState.h" #include "Aql/SharedAqlItemBlockPtr.h" @@ -68,8 +69,7 @@ class DependencyProxy { DependencyProxy(std::vector const& dependencies, AqlItemBlockManager& itemBlockManager, std::shared_ptr const> inputRegisters, - RegisterId nrInputRegisters, - velocypack::Options const*); + RegisterId nrInputRegisters, velocypack::Options const*); TEST_VIRTUAL ~DependencyProxy() = default; @@ -115,6 +115,9 @@ class DependencyProxy { [[nodiscard]] velocypack::Options const* velocypackOptions() const noexcept; + //@deprecated + auto useStack(AqlCallStack stack) -> void { _injectedStack = stack; } + protected: [[nodiscard]] AqlItemBlockManager& itemBlockManager(); [[nodiscard]] AqlItemBlockManager const& itemBlockManager() const; @@ -141,6 +144,9 @@ class DependencyProxy { size_t _currentDependency; size_t _skipped; velocypack::Options const* const _vpackOptions; + + // @deprecated + AqlCallStack _injectedStack; }; } // namespace arangodb::aql diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 1cb86283cf7d..031907416678 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -619,6 +619,7 @@ std::tuple ExecutionBlockImpl MultiDependencySingleRowFetcher::preFetchNumberOfRowsForDependency( @@ -354,3 +355,8 @@ bool MultiDependencySingleRowFetcher::fetchBlockIfNecessary(size_t const depende } return true; } + +//@deprecated +auto MultiDependencySingleRowFetcher::useStack(AqlCallStack const& stack) -> void { + _dependencyProxy->useStack(stack); +} \ No newline at end of file diff --git a/arangod/Aql/MultiDependencySingleRowFetcher.h b/arangod/Aql/MultiDependencySingleRowFetcher.h index 6889e7060fa4..fd17efc44f6d 100644 --- a/arangod/Aql/MultiDependencySingleRowFetcher.h +++ b/arangod/Aql/MultiDependencySingleRowFetcher.h @@ -129,8 +129,10 @@ class MultiDependencySingleRowFetcher { std::pair skipRowsForDependency(size_t dependency, size_t atMost); - std::pair fetchShadowRow( - size_t atMost = ExecutionBlock::DefaultBatchSize); + std::pair fetchShadowRow(size_t atMost = ExecutionBlock::DefaultBatchSize); + + //@deprecated + auto useStack(AqlCallStack const& stack) -> void; private: DependencyProxy* _dependencyProxy; @@ -165,7 +167,7 @@ class MultiDependencySingleRowFetcher { * subquery level. If it returns false, there may or may not be more. */ bool noMoreDataRows(DependencyInfo const& info) const; - + bool isAtShadowRow(DependencyInfo const& info) const; bool fetchBlockIfNecessary(const size_t dependency, const size_t atMost); diff --git a/arangod/Aql/SingleRowFetcher.cpp b/arangod/Aql/SingleRowFetcher.cpp index db6f14d7ba65..0043efd86771 100644 --- a/arangod/Aql/SingleRowFetcher.cpp +++ b/arangod/Aql/SingleRowFetcher.cpp @@ -278,5 +278,11 @@ template std::pair SingleRowFetcher::fetchBlockForPassthrough(size_t atMost); #endif +//@deprecated +template +auto SingleRowFetcher::useStack(AqlCallStack const& stack) -> void { + _dependencyProxy->useStack(stack); +} + template class ::arangodb::aql::SingleRowFetcher; template class ::arangodb::aql::SingleRowFetcher; diff --git a/arangod/Aql/SingleRowFetcher.h b/arangod/Aql/SingleRowFetcher.h index ef98004b4084..e33717eff395 100644 --- a/arangod/Aql/SingleRowFetcher.h +++ b/arangod/Aql/SingleRowFetcher.h @@ -138,6 +138,9 @@ class SingleRowFetcher { [[nodiscard]] bool isAtShadowRow() const; #endif + //@deprecated + auto useStack(AqlCallStack const& stack) -> void; + private: DependencyProxy* _dependencyProxy; diff --git a/etc/testing/arangod-common.conf b/etc/testing/arangod-common.conf index 89fd87728211..b664b6edb9d2 100644 --- a/etc/testing/arangod-common.conf +++ b/etc/testing/arangod-common.conf @@ -3,7 +3,6 @@ line-number = false force-direct = false level = info level = replication=warn -level = queries=debug role = true [database] From 207795831f7458d5437cea81323646651bd8e1cc Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Thu, 20 Feb 2020 15:51:47 +0000 Subject: [PATCH 070/122] Tweak ExecutorTestHelper to test pipelines (#11079) * Tweak ExecutorTestHelper to test pipelines * Modifications * Fixup tests * Address some comments * Fix. --- tests/Aql/EnumerateListExecutorTest.cpp | 67 ++++++++++++-------- tests/Aql/ExecutorTestHelper.h | 83 ++++++++++++++++++++++--- tests/Aql/HashedCollectExecutorTest.cpp | 77 ++++++++++++++--------- tests/Aql/IdExecutorTest.cpp | 5 +- tests/Aql/ReturnExecutorTest.cpp | 32 ++++++---- tests/Aql/SortedCollectExecutorTest.cpp | 17 ++--- 6 files changed, 192 insertions(+), 89 deletions(-) diff --git a/tests/Aql/EnumerateListExecutorTest.cpp b/tests/Aql/EnumerateListExecutorTest.cpp index 6132aad39bb1..32048fc2a8b7 100644 --- a/tests/Aql/EnumerateListExecutorTest.cpp +++ b/tests/Aql/EnumerateListExecutorTest.cpp @@ -133,7 +133,7 @@ TEST_F(EnumerateListExecutorTest, test_check_state_second_row_border) { } // new framework tests -using EnumerateListTestHelper = ExecutorTestHelper; +using EnumerateListTestHelper = ExecutorTestHelper<1, 1>; using EnumerateListSplitType = EnumerateListTestHelper::SplitType; class EnumerateListExecutorTestProduce @@ -173,28 +173,30 @@ class EnumerateListExecutorTestProduce TEST_P(EnumerateListExecutorTestProduce, empty_array_1) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(makeInfos()) .setInputValue({{{R"([])"}}}) .setInputSplitType(split) .expectOutput({}, {}) .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) .expectSkipped(0) .expectedState(ExecutionState::DONE) - .run(makeInfos()); + .run(); } TEST_P(EnumerateListExecutorTestProduce, invalid_value_1) { auto [split] = GetParam(); try { - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(makeInfos()) .setInputValue({{1}}) .setInputSplitType(split) .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) .expectOutput({}, {}) .expectSkipped(0) .expectedState(ExecutionState::DONE) - .run(makeInfos()); + .run(); FAIL(); } catch (const arangodb::basics::Exception& e) { ASSERT_EQ(e.code(), 1563); @@ -204,85 +206,92 @@ TEST_P(EnumerateListExecutorTestProduce, invalid_value_1) { TEST_P(EnumerateListExecutorTestProduce, default_1) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(makeInfos()) .setInputValue({{{R"([1, 1, 2])"}}}) .setInputSplitType(split) .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) .expectOutput({1}, {{1}, {1}, {2}}) .expectSkipped(0) .expectedState(ExecutionState::DONE) - .run(makeInfos()); + .run(); } TEST_P(EnumerateListExecutorTestProduce, offset_1) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos()) .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) .setInputSplitType(split) .setCall(AqlCall{5, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) .expectOutput({1}, {{6}, {7}, {8}, {9}, {10}}) .expectSkipped(5) .expectedState(ExecutionState::DONE) - .run(makeInfos()); + .run(); } TEST_P(EnumerateListExecutorTestProduce, offset_2) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos()) .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) .setInputSplitType(split) .setCall(AqlCall{3, AqlCall::Infinity{}, 2, false}) .expectOutput({1}, {{4}, {5}}) .expectSkipped(3) .expectedState(ExecutionState::DONE) - .run(makeInfos()); + .run(); } TEST_P(EnumerateListExecutorTestProduce, offset_3) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos()) .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) .setInputSplitType(split) .setCall(AqlCall{7, AqlCall::Infinity{}, 3, false}) .expectOutput({1}, {{8}, {9}, {10}}) .expectSkipped(7) .expectedState(ExecutionState::DONE) - .run(makeInfos()); + .run(); } TEST_P(EnumerateListExecutorTestProduce, offset_4) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos()) .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) .setInputSplitType(split) .setCall(AqlCall{5, AqlCall::Infinity{}, 2, true}) .expectOutput({1}, {{6}, {7}}) .expectSkipped(8) .expectedState(ExecutionState::DONE) - .run(makeInfos()); + .run(); } TEST_P(EnumerateListExecutorTestProduce, offset_5) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos()) .setInputValue({{{R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])"}}}) .setInputSplitType(split) .setCall(AqlCall{7, AqlCall::Infinity{}, 3, true}) .expectOutput({1}, {{8}, {9}, {10}}) .expectSkipped(7) .expectedState(ExecutionState::DONE) - .run(makeInfos()); + .run(); } TEST_P(EnumerateListExecutorTestProduce, default_multiple_1) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<4, 5>(*fakedQuery) + .setExecBlock(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})) .setInputValue({{1, 2, 3, R"([1, 2, 3])"}}) .setInputSplitType(split) .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) @@ -291,13 +300,14 @@ TEST_P(EnumerateListExecutorTestProduce, default_multiple_1) { RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 3}}}) .expectSkipped(0) .expectedState(ExecutionState::DONE) - .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); + .run(); } TEST_P(EnumerateListExecutorTestProduce, default_multiple_2) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<4, 5>(*fakedQuery) + .setExecBlock(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})) .setInputValue({RowBuilder<4>{1, 2, 3, R"([1, 2, 3])"}, RowBuilder<4>{1, 2, 3, R"([4, 5, 6])"}}) .setInputSplitType(split) @@ -310,13 +320,14 @@ TEST_P(EnumerateListExecutorTestProduce, default_multiple_2) { RowBuilder<5>{1, 2, 3, R"([4, 5, 6])", 6}}}) .expectSkipped(0) .expectedState(ExecutionState::DONE) - .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); + .run(); } TEST_P(EnumerateListExecutorTestProduce, default_border_first_array_soft) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<4, 5>(*fakedQuery) + .setExecBlock(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})) .setInputValue({RowBuilder<4>{1, 2, 3, R"([1, 2, 3])"}, RowBuilder<4>{1, 2, 3, R"([4, 5, 6])"}}) .setInputSplitType(split) @@ -326,13 +337,14 @@ TEST_P(EnumerateListExecutorTestProduce, default_border_first_array_soft) { RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 3}}}) .expectSkipped(0) .expectedState(ExecutionState::HASMORE) // hasmore because of softLimit - .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); + .run(); } TEST_P(EnumerateListExecutorTestProduce, default_border_first_array_hard) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<4, 5>(*fakedQuery) + .setExecBlock(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})) .setInputValue({RowBuilder<4>{1, 2, 3, R"([1, 2, 3])"}, RowBuilder<4>{1, 2, 3, R"([4, 5, 6])"}}) .setInputSplitType(split) @@ -342,13 +354,14 @@ TEST_P(EnumerateListExecutorTestProduce, default_border_first_array_hard) { RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 3}}}) .expectSkipped(0) .expectedState(ExecutionState::DONE) // done because of hardLimit - .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); + .run(); } TEST_P(EnumerateListExecutorTestProduce, default_border_first_array_hard_fullcount) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<4, 5>(*fakedQuery) + .setExecBlock(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})) .setInputValue({RowBuilder<4>{1, 2, 3, R"([1, 2, 3])"}, RowBuilder<4>{1, 2, 3, R"([4, 5, 6])"}}) .setInputSplitType(split) @@ -358,7 +371,7 @@ TEST_P(EnumerateListExecutorTestProduce, default_border_first_array_hard_fullcou RowBuilder<5>{1, 2, 3, R"([1, 2, 3])", 3}}}) .expectSkipped(3) // skipped amount of 3 in the fullCount phase .expectedState(ExecutionState::DONE) - .run(makeInfos(3, 4, 4, 5, {}, {0, 1, 2, 3})); + .run(); } template diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index 0edc6c4992a8..8c421efc8d92 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -33,6 +33,7 @@ #include "Aql/AqlCallStack.h" #include "Aql/ExecutionBlock.h" #include "Aql/ExecutionBlockImpl.h" +#include "Aql/ExecutionEngine.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutionStats.h" #include "Aql/OutputAqlItemRow.h" @@ -139,7 +140,49 @@ template class AqlExecutorTestCaseWithParam : public AqlExecutorTestCase, public ::testing::WithParamInterface {}; -template +using ExecBlock = std::unique_ptr; + +struct Pipeline { + using PipelineStorage = std::deque; + + Pipeline() : _pipeline{} {}; + Pipeline(ExecBlock&& init) { _pipeline.emplace_back(std::move(init)); } + Pipeline(std::deque&& init) : _pipeline(std::move(init)){}; + Pipeline(Pipeline& other) = delete; + Pipeline(Pipeline&& other) : _pipeline(std::move(other._pipeline)){}; + + Pipeline& operator=(Pipeline&& other) { + _pipeline = std::move(other._pipeline); + return *this; + } + + ~Pipeline() { + for (auto&& b : _pipeline) { + b.release(); + } + }; + + bool empty() const { return _pipeline.empty(); } + void reset() { _pipeline.clear(); } + + std::deque const& get() const { return _pipeline; }; + std::deque& get() { return _pipeline; }; + + private: + PipelineStorage _pipeline; +}; + +inline auto concatPipelines(Pipeline&& bottom, Pipeline&& top) -> Pipeline { + if (!bottom.empty()) { + bottom.get().back()->addDependency(top.get().begin()->get()); + } + bottom.get().insert(std::end(bottom.get()), std::make_move_iterator(top.get().begin()), + std::make_move_iterator(top.get().end())); + + return std::move(bottom); +} + +template struct ExecutorTestHelper { using SplitType = std::variant, std::size_t, std::monostate>; @@ -222,7 +265,29 @@ struct ExecutorTestHelper { _expectedStats = stats; _testStats = true; return *this; - }; + } + + template + auto setExecBlock(typename E::Infos infos) -> ExecutorTestHelper& { + auto& testeeNode = _execNodes.emplace_back(std::move( + std::make_unique(_query.plan(), _execNodes.size()))); + setPipeline(Pipeline{std::make_unique>(_query.engine(), + testeeNode.get(), std::move(infos))}); + return *this; + } + + template + auto createExecBlock(typename E::Infos infos) -> ExecBlock { + auto& testeeNode = _execNodes.emplace_back(std::move( + std::make_unique(_query.plan(), _execNodes.size()))); + return std::make_unique>(_query.engine(), testeeNode.get(), + std::move(infos)); + } + + auto setPipeline(Pipeline&& pipeline) -> ExecutorTestHelper& { + _pipeline = std::move(pipeline); + return *this; + } auto allowAnyOutputOrder(bool expected, size_t skippedRows = 0) -> ExecutorTestHelper& { _unorderedOutput = expected; @@ -244,19 +309,17 @@ struct ExecutorTestHelper { return *this; } - auto run(typename E::Infos infos) -> void { + auto run() -> void { ResourceMonitor monitor; AqlItemBlockManager itemBlockManager(&monitor, SerializationFormat::SHADOWROWS); auto inputBlock = generateInputRanges(itemBlockManager); - auto testeeNode = std::make_unique(_query.plan(), 1); - - ExecutionBlockImpl testee{_query.engine(), testeeNode.get(), std::move(infos)}; - testee.addDependency(inputBlock.get()); + TRI_ASSERT(!_pipeline.empty()); + _pipeline.get().back()->addDependency(inputBlock.get()); AqlCallStack stack{_call}; - auto const [state, skipped, result] = testee.execute(stack); + auto const [state, skipped, result] = _pipeline.get().front()->execute(stack); EXPECT_EQ(skipped, _expectedSkip); EXPECT_EQ(state, _expectedState); @@ -355,6 +418,8 @@ struct ExecutorTestHelper { arangodb::aql::Query& _query; std::unique_ptr _dummyNode; + Pipeline _pipeline; + std::vector> _execNodes; }; enum class ExecutorCall { @@ -376,7 +441,7 @@ using ExecutorStepResult = std::tuple +template std::tuple, arangodb::aql::ExecutionStats> runExecutor(arangodb::aql::AqlItemBlockManager& manager, Executor& executor, arangodb::aql::OutputAqlItemRow& outputRow, size_t const numSkip, diff --git a/tests/Aql/HashedCollectExecutorTest.cpp b/tests/Aql/HashedCollectExecutorTest.cpp index 0c01634af57b..6aca1b82d975 100644 --- a/tests/Aql/HashedCollectExecutorTest.cpp +++ b/tests/Aql/HashedCollectExecutorTest.cpp @@ -54,7 +54,7 @@ namespace tests { namespace aql { // This is only to get a split-type. The Type is independent of actual template parameters -using HashedCollectTestHelper = ExecutorTestHelper; +using HashedCollectTestHelper = ExecutorTestHelper<1, 1>; using HashedCollectSplitType = HashedCollectTestHelper::SplitType; using HashedCollectInputParam = std::tuple; @@ -138,7 +138,8 @@ TEST_P(HashedCollectExecutorTest, collect_only) { auto infos = buildInfos(1, 2, {{1, 0}}); AqlCall call{}; // unlimited produce ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -148,7 +149,7 @@ TEST_P(HashedCollectExecutorTest, collect_only) { .expectedState(ExecutionState::DONE) .appendEmptyBlock(appendEmpty()) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect skip all @@ -157,7 +158,8 @@ TEST_P(HashedCollectExecutorTest, skip_all) { AqlCall call{}; call.offset = 1000; // skip all ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -167,7 +169,7 @@ TEST_P(HashedCollectExecutorTest, skip_all) { .expectedState(ExecutionState::DONE) .appendEmptyBlock(appendEmpty()) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect fullCount all @@ -177,7 +179,8 @@ TEST_P(HashedCollectExecutorTest, fullcount_all) { call.hardLimit = 0; // HardLimit call.fullCount = true; // count all ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -187,7 +190,7 @@ TEST_P(HashedCollectExecutorTest, fullcount_all) { .expectedState(ExecutionState::DONE) .appendEmptyBlock(appendEmpty()) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect get some @@ -196,7 +199,8 @@ TEST_P(HashedCollectExecutorTest, collect_only_soft_less) { AqlCall call{}; call.softLimit = 2; ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -206,7 +210,7 @@ TEST_P(HashedCollectExecutorTest, collect_only_soft_less) { .expectedState(ExecutionState::HASMORE) .appendEmptyBlock(appendEmpty()) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect get some multiple calls @@ -279,7 +283,8 @@ TEST_P(HashedCollectExecutorTest, collect_only_hard_less) { AqlCall call{}; call.hardLimit = 2; ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -289,7 +294,7 @@ TEST_P(HashedCollectExecutorTest, collect_only_hard_less) { .expectedState(ExecutionState::DONE) .appendEmptyBlock(appendEmpty()) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect skip some @@ -299,7 +304,8 @@ TEST_P(HashedCollectExecutorTest, skip_some) { call.offset = 2; // skip some call.softLimit = 0; // 0 limit ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -309,7 +315,7 @@ TEST_P(HashedCollectExecutorTest, skip_some) { .expectedState(ExecutionState::HASMORE) .appendEmptyBlock(appendEmpty()) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect skip and get @@ -319,7 +325,8 @@ TEST_P(HashedCollectExecutorTest, skip_and_get) { call.offset = 2; // skip some call.softLimit = 1000; // high limit ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -329,7 +336,7 @@ TEST_P(HashedCollectExecutorTest, skip_and_get) { .expectedState(ExecutionState::DONE) .appendEmptyBlock(appendEmpty()) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect skip and hardLimit @@ -339,7 +346,8 @@ TEST_P(HashedCollectExecutorTest, skip_and_hardLimit) { call.offset = 2; // skip some call.hardLimit = 1; // hard limit ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -349,7 +357,7 @@ TEST_P(HashedCollectExecutorTest, skip_and_hardLimit) { .expectedState(ExecutionState::DONE) .appendEmptyBlock(appendEmpty()) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect skip and fullCount @@ -360,7 +368,8 @@ TEST_P(HashedCollectExecutorTest, skip_and_fullCount) { call.hardLimit = 2; // hard limit call.fullCount = true; ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -370,7 +379,7 @@ TEST_P(HashedCollectExecutorTest, skip_and_fullCount) { .expectedState(ExecutionState::DONE) .appendEmptyBlock(appendEmpty()) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect with more then one group value @@ -378,7 +387,8 @@ TEST_P(HashedCollectExecutorTest, collect_only_multiple_values) { auto infos = buildInfos(2, 4, {{2, 0}, {3, 1}}); AqlCall call{}; // unlimited produce ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue(MatrixBuilder<2>{RowBuilder<2>{1, 5}, RowBuilder<2>{1, 1}, RowBuilder<2>{2, 2}, RowBuilder<2>{1, 5}, RowBuilder<2>{6, 1}, RowBuilder<2>{2, 2}, @@ -392,7 +402,7 @@ TEST_P(HashedCollectExecutorTest, collect_only_multiple_values) { .expectSkipped(0) .expectedState(ExecutionState::DONE) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect with one group value and count @@ -400,7 +410,8 @@ TEST_P(HashedCollectExecutorTest, count) { auto infos = buildInfos(1, 3, {{1, 0}}, 2); AqlCall call{}; // unlimited produce ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<1, 2>(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{1}}, {{1}}, {{2}}, {{1}}, {{6}}, {{2}}, {{R"("1")"}}}) .setInputSplitType(getSplit()) .setCall(call) @@ -409,7 +420,7 @@ TEST_P(HashedCollectExecutorTest, count) { .expectSkipped(0) .expectedState(ExecutionState::DONE) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect with multiple aggregators @@ -419,7 +430,8 @@ TEST_P(HashedCollectExecutorTest, many_aggregators) { {{3, RegisterPlan::MaxRegisterId}, {4, 1}}); AqlCall call{}; // unlimited produce ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<2, 3>(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue(MatrixBuilder<2>{RowBuilder<2>{1, 5}, RowBuilder<2>{1, 1}, RowBuilder<2>{2, 2}, RowBuilder<2>{1, 5}, RowBuilder<2>{6, 1}, RowBuilder<2>{2, 2}, @@ -433,7 +445,7 @@ TEST_P(HashedCollectExecutorTest, many_aggregators) { .expectSkipped(0) .expectedState(ExecutionState::DONE) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect based on equal arrays. @@ -441,7 +453,8 @@ TEST_P(HashedCollectExecutorTest, collect_arrays) { auto infos = buildInfos(1, 2, {{1, 0}}); AqlCall call{}; // unlimited produce ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{R"([1,1,1])"}}, {{1}}, {{R"([1,1,1,1])"}}, @@ -456,7 +469,7 @@ TEST_P(HashedCollectExecutorTest, collect_arrays) { .expectSkipped(0) .expectedState(ExecutionState::DONE) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } // Collect based on equal objects. @@ -464,7 +477,8 @@ TEST_P(HashedCollectExecutorTest, collect_objects) { auto infos = buildInfos(1, 2, {{1, 0}}); AqlCall call{}; // unlimited produce ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{{R"({"a": 1, "b": 1})"}}, {{1}}, {{R"({"a": 1, "b": 1, "c": 1})"}}, @@ -483,7 +497,7 @@ TEST_P(HashedCollectExecutorTest, collect_objects) { .expectSkipped(0) .expectedState(ExecutionState::DONE) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } /** @@ -607,7 +621,8 @@ TEST_P(HashedCollectExecutorTestAggregate, run) { auto infos = buildInfos(2, 4, {{2, 0}}); AqlCall call{}; // unlimited produce ExecutionStats stats{}; // No stats here - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue(MatrixBuilder<2>{RowBuilder<2>{1, 5}, RowBuilder<2>{1, 1}, RowBuilder<2>{2, 2}, RowBuilder<2>{1, 5}, RowBuilder<2>{6, 1}, RowBuilder<2>{2, 2}, @@ -619,7 +634,7 @@ TEST_P(HashedCollectExecutorTestAggregate, run) { .expectSkipped(0) .expectedState(ExecutionState::DONE) // .expectedStats(stats) - .run(std::move(infos)); + .run(); } } // namespace aql diff --git a/tests/Aql/IdExecutorTest.cpp b/tests/Aql/IdExecutorTest.cpp index e9c2d3df07d7..37f9ce13ac61 100644 --- a/tests/Aql/IdExecutorTest.cpp +++ b/tests/Aql/IdExecutorTest.cpp @@ -330,13 +330,14 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_fullCount) { TEST_F(IdExecutionBlockTest, test_hardlimit_single_row_fetcher) { IdExecutorInfos infos{1, {0}, {}}; - ExecutorTestHelper>>(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock>>(std::move(infos)) .setInputValueList(1, 2, 3, 4, 5, 6) .setCall(AqlCall{0, AqlCall::Infinity{}, 2, false}) .expectOutput({0}, {{1}, {2}}) .expectSkipped(0) .expectedState(ExecutionState::DONE) - .run(std::move(infos)); + .run(); } /** diff --git a/tests/Aql/ReturnExecutorTest.cpp b/tests/Aql/ReturnExecutorTest.cpp index bf8dc3608fa3..7fb106efd822 100644 --- a/tests/Aql/ReturnExecutorTest.cpp +++ b/tests/Aql/ReturnExecutorTest.cpp @@ -47,7 +47,7 @@ namespace tests { namespace aql { // This is only to get a split-type. The Type is independent of actual template parameters -using ReturnExecutorTestHelper = ExecutorTestHelper; +using ReturnExecutorTestHelper = ExecutorTestHelper<1, 1>; using ReturnExecutorSplitType = ReturnExecutorTestHelper::SplitType; using ReturnExecutorParamType = std::tuple; @@ -113,7 +113,8 @@ INSTANTIATE_TEST_CASE_P(ReturnExecutor, ReturnExecutorTest, TEST_P(ReturnExecutorTest, returns_all_from_upstream) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; // unlimited produce - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -121,14 +122,15 @@ TEST_P(ReturnExecutorTest, returns_all_from_upstream) { .expectSkipped(0) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(8)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_soft_limit) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.softLimit = 3; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -136,14 +138,15 @@ TEST_P(ReturnExecutorTest, handle_soft_limit) { .expectSkipped(0) .expectedState(ExecutionState::HASMORE) .expectedStats(getCountStats(3)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_hard_limit) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.hardLimit = 5; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -151,14 +154,15 @@ TEST_P(ReturnExecutorTest, handle_hard_limit) { .expectSkipped(0) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(5)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_offset) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.offset = 4; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -166,7 +170,7 @@ TEST_P(ReturnExecutorTest, handle_offset) { .expectSkipped(4) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(4)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_fullcount) { @@ -174,7 +178,8 @@ TEST_P(ReturnExecutorTest, handle_fullcount) { AqlCall call{}; call.hardLimit = 2; call.fullCount = true; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -182,14 +187,15 @@ TEST_P(ReturnExecutorTest, handle_fullcount) { .expectSkipped(6) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(2)) - .run(std::move(infos)); + .run(); } TEST_P(ReturnExecutorTest, handle_other_inputRegister) { ReturnExecutorInfos infos(1 /*input register*/, 2 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; call.hardLimit = 5; - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper<2, 1>(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValue({{R"("invalid")", 1}, {R"("invalid")", 2}, {R"("invalid")", 5}, @@ -204,7 +210,7 @@ TEST_P(ReturnExecutorTest, handle_other_inputRegister) { .expectSkipped(0) .expectedState(ExecutionState::DONE) .expectedStats(getCountStats(5)) - .run(std::move(infos)); + .run(); } } // namespace aql } // namespace tests diff --git a/tests/Aql/SortedCollectExecutorTest.cpp b/tests/Aql/SortedCollectExecutorTest.cpp index e580e9b45f24..fa8ce07e644c 100644 --- a/tests/Aql/SortedCollectExecutorTest.cpp +++ b/tests/Aql/SortedCollectExecutorTest.cpp @@ -924,7 +924,7 @@ TEST_F(SortedCollectExecutorTestSkip, skip_5) { } } -using SortedCollectTestHelper = ExecutorTestHelper; +using SortedCollectTestHelper = ExecutorTestHelper<1, 1>; using SortedCollectSplitType = SortedCollectTestHelper::SplitType; class SortedCollectExecutorTestSplit @@ -975,40 +975,43 @@ class SortedCollectExecutorTestSplit TEST_P(SortedCollectExecutorTestSplit, split_1) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) .setInputSplitType(split) .setCall(AqlCall{2, AqlCall::Infinity{}, 2, true}) .expectOutputValueList(3, 4) .expectSkipped(3) .expectedState(ExecutionState::DONE) - .run(std::move(infos)); + .run(); } TEST_P(SortedCollectExecutorTestSplit, split_2) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) .setInputSplitType(split) .setCall(AqlCall{2, 2, AqlCall::Infinity{}, false}) .expectOutputValueList(3, 4) .expectSkipped(2) .expectedState(ExecutionState::HASMORE) - .run(std::move(infos)); + .run(); } TEST_P(SortedCollectExecutorTestSplit, split_3) { auto [split] = GetParam(); - ExecutorTestHelper(*fakedQuery) + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) .setInputValueList(1, 2, 3, 4, 5) .setInputSplitType(split) .setCall(AqlCall{1, AqlCall::Infinity{}, 10, true}) .expectOutputValueList(2, 3, 4, 5) .expectSkipped(1) .expectedState(ExecutionState::DONE) - .run(std::move(infos)); + .run(); } template From 2e2e2144fbd4bf195e83120cf4b68ea3beb3c7ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Fri, 21 Feb 2020 16:06:13 +0100 Subject: [PATCH 071/122] AQL execute in LimitExecutor (#10886) * Added a generic Lambda Executor, this can be used in ExecutionBlockImplTests to have finegrained control over the action happening within the Executor * Added first test using the LambdaExecutor. * Added some tests around Execute. GetSome seems to be okayish. Skip not. Namely skipped numbers are not reported correctly. * Made the first ExecutionBlock Execute integration test pass. Still other tests are failing * Simplified the IsSkipSome test, this allows the C++ tests for Mixed Calls to pass. * Added a skip test and fixed the producing executor to recreate the list once for every input line. * More tests. Also added a custom AqlCall printer function for GTest. Tests still red, need to fix fullCount and hardLimit. * Implemented ostream operator for AQLCall. * Properly implemented fullCount incl. UnitTest * Added test for Callforwarding. They still have some todos, but these can only be solved by upgrading the OutputRow. Which should be part of separate PR * Added another test for CallForwarding in passthrough state * Added a Test Implementation for an Executor that uses a dynamic skip implementation. * Fixed skip with HARDLIMIT. * Startet to implement call forwarding test. However we need to improve Outputrow first this will be done in seperate branch * Removed designated initializers. Thanks for not supporting it MSVC! * Removed non-passthrough non-skip Lambda Executor again. We decided to disallow this. * Fixed merge * Update tests/Aql/ExecutionBlockImplTest.cpp Co-Authored-By: Markus Pfeiffer * Reverted accidental change * Started to add implementation of passthrough block allocation * Added a comparator to AqlCall. Mostly for tests * Fixed an issue in skip-passthrough version. Updated the tests. * Allow to 'overSkip' if we do fullCount * Enabled the first set of tests now. Only one set to go * Applied all fixes to get Integration testsuite green * Added some comments on the TestCases executed in the ExecutionBlockImpl * Added tes descriptions and removed a duplicate test * Added some comments on LamdbaExecutors * Added description of ExecutionBlockImple execute logic * Assert no input rows in skip * Began writing a test framework for LimitExecutor * Removed unused code * Continued test framework * Applied review comments, thanks to reviewers * Fixed modulo 2 off by one error * Renamed getLambda() => getProduceLambda() in test code, as we have produce and skip * Make AqlItemBlockInputRange hold a skipped count * Added == for LimitStats, moved code to .cpp file * Added Stats return value to skipRowsRange * WIP: Implement LimitExecutor test framework * Fixed namespace * Added missing advanceRow call * Added parametrized test cases * Dump * Made test output more readable * Fixed merge conflicts * Set engine in createFakeQuery() * [WIP] Dump: not working * Fixed merge conflicts * Switch from boost::variant to std::variant * Fix comparison of differently sized blocks * Fixed usage of temporary * Fixed skip handling in ExecutionBlockImpl * Add upstream fullCount in LimitExecutor * Fix assertion that was too narrow * Again, fixed skip handling in ExecutionBlockImpl * Extended ExecutorTestHelper, mainly a loop to handle passthrough Executors * Fixed expected skipped count * Fixed bugs in LimitExecutor * Another bugfix in LimitExecutor * Fixed expected output calculation * Removed some comments * Improve test speed by not recreating MockAqlServer for every case * Allow additional variants in ExecutorTestHelper * Tried to fix a bug in LimitExecutor, not quite working * A little cleanup * Toggle lieAboutHasMore * Reverted broken fix, fixed WaitingExecutionBlockMock * Fixed merge conflicts * Add LimitStats to expected checks in tests * Fixed another leftover merge conflict * Fixed fullCount stats * Removed old code from LimitExecutor * Updated ExecutionBlockImpl to allow removal of old executor methods * Removed old tests * Simplified test code * Added comments and constructors to AqlCall * Instantiate test cases with multiple different calls * Fixed a bug in run(), made looping optional * Fixed a bug in DependencyProxy * Fixed a bug and an assertion in LimitExecutor * Added assertions and a fix in WaitingExecutionBlockMock * Enabled stats tests again * Made the executor compile under Mac * Let AqlHelper compare profiled stats as well * Prepare reusability of LOG output for ExecutionBlock. * Added first version of DEBUG output to ExecutionBlockImpl * Modified skip logic in LimitExecutor to simplify what has to be reported to downStream and what is handled by internal skip. Also allow to do skip and fullCount in one go, if limit is 0 * Adapted the Expected skipped output in Limit test * Forward fullCount from client through LIMIT. Simplyfied produceLogic. * Reduced the amount of Tests in LimitExecutor. Need to decide on a suitable subset * Removed lieAboutHasMore by simply adding an empty block to the end that is blocking return in the WaitingExecutor * Fixed SingleRowFetcher to allow a Range with only skip information. * Intermed version of ExecutionBlockImpl. Needs to be continued * Allow hardLimit to pass getSome check * Intermed commit * Unified FastForward and FullCount code path * Produce and Skip go to FastForward if the Executor reports done and the output row is full. This way we can handle more FUllCount cases straight away. * Fixed the expected value for FullCount stats in test. This is rather awkward to automatically compute... * Fixed lying of the ExecutionBlockMock. Did not lie correctly if the limit was reached by softLimit. * Make the rules for supporting the OldAPI more relaxed. * Added a missmatch of outputColumns and internal output columns in TestHelper * Fixed AqlItemBlockRangeTests * Implemented unreachable code, otherwise our Jenkins does not compile it ;( * Fixed SortedCollect, it might count the last group twice on fullCount, if produce ends exactly in this group * Use AqlTestCase in ReturnExecutorTest * Return Executor needs to report skipped from upstream properly * Fixed fullCount call behaviour * Fixed expected numberSkipped in LimitExecutor test * Added some DEBUG level log messages in FastForward * Fixed reporting of skip in skipSome case and removed dead code * Fixed Impl test, we cannot let the WaitingBlockMock do counting on ShadowRows, it is not implemented there. * Removed unused function * Pointeless comment * Fixed skipSome of Filter * Avoid endless Loop in indexExecutor fullCount, where it tried to skipped 0 documents forever * Fixed return state of IndexExecutor * Fake DONE on hardLimit for oldAPI, apply only on toplevel. Improve shortcutting behviour to jump into FastForward everytime HARDLimit is reached, even if output is full. * Added an assertion that multiple UPSTREAM call rounds do not mess up the callStack by leaving the _upstreamCall on top. Now the executor itself pops off the call, this is subject to change if we manage the Stack differently * Was erlaube skip? Adjsuted tests to honor that we can do skip and limit now in one go, for all executors that are updated already * Fixed ConstFetcher, to not modify the CallStack, it only has a reference, where the other Fetchers copy it to the Executors, and modify it there * Do not overfetch on passthrough if the block is full. * We cannot create DONE on oldStyle if we need fullcount... * Unfortunately we teporarily need more calls in fullCount case, as long as one of the Executors above is not yet on execute * Mhm gcc and clang do not aggree on which types are equal... * Fixed compile issue. Co-authored-by: Michael Hackstein Co-authored-by: Markus Pfeiffer --- arangod/Aql/AqlCall.h | 72 +- arangod/Aql/AqlCallStack.h | 14 +- arangod/Aql/AqlItemBlock.cpp | 2 +- arangod/Aql/AqlItemBlock.h | 2 +- arangod/Aql/AqlItemBlockInputRange.cpp | 40 +- arangod/Aql/AqlItemBlockInputRange.h | 35 +- arangod/Aql/ConstFetcher.cpp | 8 +- arangod/Aql/DependencyProxy.cpp | 7 +- arangod/Aql/ExecutionBlockImpl.cpp | 497 +++--- arangod/Aql/ExecutionBlockImpl.h | 9 +- arangod/Aql/FilterExecutor.cpp | 16 +- arangod/Aql/IndexExecutor.cpp | 56 +- arangod/Aql/IndexExecutor.h | 2 + arangod/Aql/LimitExecutor.cpp | 381 ++--- arangod/Aql/LimitExecutor.h | 87 +- arangod/Aql/LimitStats.cpp | 66 + arangod/Aql/LimitStats.h | 50 +- arangod/Aql/Query.cpp | 5 + arangod/Aql/Query.h | 1 + arangod/Aql/ReturnExecutor.cpp | 13 +- arangod/Aql/SingleRowFetcher.cpp | 10 +- arangod/Aql/SortedCollectExecutor.cpp | 10 +- arangod/CMakeLists.txt | 1 + tests/Aql/AqlHelper.cpp | 12 +- tests/Aql/AqlItemBlockHelper.h | 28 +- tests/Aql/AqlItemBlockInputRangeTest.cpp | 3 +- .../EngineInfoContainerCoordinatorTest.cpp | 57 +- tests/Aql/EnumerateListExecutorTest.cpp | 4 +- tests/Aql/ExecutionBlockImplTest.cpp | 8 +- tests/Aql/ExecutorTestHelper.h | 104 +- tests/Aql/FilterExecutorTest.cpp | 10 +- tests/Aql/IdExecutorTest.cpp | 3 +- tests/Aql/LimitExecutorTest.cpp | 1376 +++++------------ tests/Aql/MockTypedNode.cpp | 56 + tests/Aql/MockTypedNode.h | 60 + tests/Aql/ReturnExecutorTest.cpp | 29 +- tests/Aql/ShortestPathExecutorTest.cpp | 7 +- tests/Aql/SingleRowFetcherTest.cpp | 4 +- tests/Aql/SortedCollectExecutorTest.cpp | 36 +- tests/Aql/WaitingExecutionBlockMock.cpp | 25 +- tests/Aql/WaitingExecutionBlockMock.h | 4 +- tests/CMakeLists.txt | 1 + tests/Mocks/Servers.cpp | 6 + tests/js/server/aql/aql-profiler.js | 6 +- 44 files changed, 1459 insertions(+), 1764 deletions(-) create mode 100644 arangod/Aql/LimitStats.cpp create mode 100644 tests/Aql/MockTypedNode.cpp create mode 100644 tests/Aql/MockTypedNode.h diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 63fbf31b5883..75d2c7048b7a 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -29,12 +29,35 @@ #include #include -namespace arangodb { -namespace aql { +namespace arangodb::aql { + struct AqlCall { + // TODO We currently have softLimit and hardLimit, where both can be a number + // or Infinity - but not both may be non-infinite at the same time. + // In addition, a soft limit does only make sense together with a hard + // limit. + // The data structures and APIs should reflect that. E.g.: + // Infinity | SoftLimit { count : Int } | HardLimit { count : Int, fullCount : Bool } + // On a less important case, softLimit = 0 and offset = 0 do not occur together, + // but it's probably not worth implementing that in terms of data structures. class Infinity {}; using Limit = std::variant; + AqlCall() = default; + // Replacements for struct initialization + explicit AqlCall(size_t offset, Limit softLimit = Infinity{}, + Limit hardLimit = Infinity{}, bool fullCount = false) + : offset{offset}, softLimit{softLimit}, hardLimit{hardLimit}, fullCount{fullCount} {} + + enum class LimitType { SOFT, HARD }; + AqlCall(size_t offset, bool fullCount, Infinity) + : offset{offset}, softLimit{Infinity{}}, hardLimit{Infinity{}}, fullCount{fullCount} {} + AqlCall(size_t offset, bool fullCount, size_t limit, LimitType limitType) + : offset{offset}, + softLimit{limitType == LimitType::SOFT ? Limit{limit} : Limit{Infinity{}}}, + hardLimit{limitType == LimitType::HARD ? Limit{limit} : Limit{Infinity{}}}, + fullCount{fullCount} {} + // TODO Remove me, this will not be necessary later static AqlCall SimulateSkipSome(std::size_t toSkip) { AqlCall call; @@ -57,12 +80,12 @@ struct AqlCall { // TODO Remove me, this will not be necessary later static bool IsSkipSomeCall(AqlCall const& call) { - return !call.hasHardLimit() && call.getOffset() > 0; + return call.getOffset() > 0; } // TODO Remove me, this will not be necessary later static bool IsGetSomeCall(AqlCall const& call) { - return !call.hasHardLimit() && call.getLimit() > 0 && call.getOffset() == 0; + return call.getLimit() > 0 && call.getOffset() == 0; } // TODO Remove me, this will not be necessary later @@ -71,8 +94,18 @@ struct AqlCall { call.getOffset() == 0 && call.needsFullCount(); } + static bool IsFastForwardCall(AqlCall const& call) { + return call.hasHardLimit() && call.getLimit() == 0 && + call.getOffset() == 0 && !call.needsFullCount(); + } + std::size_t offset{0}; // TODO: The defaultBatchSize function could move into this file instead + // TODO We must guarantee that at most one of those is not Infinity. + // To do that, we should replace softLimit and hardLimit with + // Limit limit; + // bool isHardLimit; + // . Limit softLimit{Infinity{}}; Limit hardLimit{Infinity{}}; bool fullCount{false}; @@ -80,6 +113,9 @@ struct AqlCall { std::size_t getOffset() const { return offset; } + // TODO I think this should return the actual limit without regards to the batch size, + // so we can use it to calculate upstream calls. The batch size should be applied + // when allocating blocks only! std::size_t getLimit() const { return clampToLimit(ExecutionBlock::DefaultBatchSize); } @@ -132,18 +168,14 @@ struct AqlCall { return !std::holds_alternative(hardLimit); } + bool hasSoftLimit() const { + return !std::holds_alternative(softLimit); + } + bool needsFullCount() const { return fullCount; } bool shouldSkip() const { - if (getOffset() > 0) { - // Still need to skip. - return true; - } - if (getLimit() > 0) { - // Still need to produce. - return false; - } - return needsFullCount(); + return getOffset() > 0 || (getLimit() == 0 && needsFullCount()); } }; @@ -154,10 +186,7 @@ constexpr bool operator<(AqlCall::Limit const& a, AqlCall::Limit const& b) { if (std::holds_alternative(b)) { return true; } - if (std::get(a) < std::get(b)) { - return true; - } - return false; + return std::get(a) < std::get(b); } constexpr AqlCall::Limit operator+(AqlCall::Limit const& a, size_t n) { @@ -210,12 +239,11 @@ inline std::ostream& operator<<(std::ostream& out, } inline std::ostream& operator<<(std::ostream& out, const arangodb::aql::AqlCall& call) { - return out << "skip: " << call.getOffset() << " softLimit: " << call.softLimit - << " hardLimit: " << call.hardLimit - << " fullCount: " << std::boolalpha << call.fullCount; + return out << "{ skip: " << call.getOffset() << ", softLimit: " << call.softLimit + << ", hardLimit: " << call.hardLimit + << ", fullCount: " << std::boolalpha << call.fullCount << " }"; } -} // namespace aql -} // namespace arangodb +} // namespace arangodb::aql #endif diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index b4af03eec652..0566b2340cb6 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -33,7 +33,7 @@ namespace aql { class AqlCallStack { public: // Initial - AqlCallStack(AqlCall call); + explicit AqlCallStack(AqlCall call); // Used in subquery AqlCallStack(AqlCallStack const& other, AqlCall call); // Used to pass between blocks @@ -73,6 +73,16 @@ class AqlCallStack { // This is used to bypass all executors until we reach the next subquery start. void increaseSubqueryDepth(); + // TODO: Remove me again, only used to fake DONE + // @deprecated + auto empty() const noexcept -> bool { + return _operations.empty() && _depth == 0; + } + + auto subqueryLevel() const noexcept -> size_t { + return _operations.size() + _depth; + } + private: // The list of operations, stacked by depth (e.g. bottom element is from main query) std::stack _operations; @@ -87,4 +97,4 @@ class AqlCallStack { } // namespace aql } // namespace arangodb -#endif \ No newline at end of file +#endif diff --git a/arangod/Aql/AqlItemBlock.cpp b/arangod/Aql/AqlItemBlock.cpp index 0238f9a646f1..82edf0f715e3 100644 --- a/arangod/Aql/AqlItemBlock.cpp +++ b/arangod/Aql/AqlItemBlock.cpp @@ -956,7 +956,7 @@ RegisterId AqlItemBlock::getNrRegs() const noexcept { return _nrRegs; } size_t AqlItemBlock::size() const noexcept { return _nrItems; } -std::tuple AqlItemBlock::getRelevantRange() { +std::tuple AqlItemBlock::getRelevantRange() const { // NOTE: // Right now we can only support a range of datarows, that ends // In a range of ShadowRows. diff --git a/arangod/Aql/AqlItemBlock.h b/arangod/Aql/AqlItemBlock.h index 6badb45b8c5d..e765ba3ac2ad 100644 --- a/arangod/Aql/AqlItemBlock.h +++ b/arangod/Aql/AqlItemBlock.h @@ -167,7 +167,7 @@ class AqlItemBlock { size_t size() const noexcept; /// @brief get the relevant consumable range of the block - std::tuple getRelevantRange(); + std::tuple getRelevantRange() const; /// @brief Number of entries in the matrix. If this changes, the memory usage /// must be / in- or decreased appropriately as well. diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index fa171c5a7243..4a0156247621 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -30,22 +30,22 @@ using namespace arangodb; using namespace arangodb::aql; -AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state) - : _block(nullptr), _rowIndex(0), _endIndex(0), _finalState(state) { +AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, std::size_t skipped) + : _finalState{state}, _skipped{skipped} { TRI_ASSERT(!hasDataRow()); } -AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, - SharedAqlItemBlockPtr const& block, - std::size_t index, std::size_t) - : _block{block}, _rowIndex{index}, _endIndex(_block->size()), _finalState{state} { +AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, std::size_t skipped, + arangodb::aql::SharedAqlItemBlockPtr const& block, + std::size_t index) + : _block{block}, _rowIndex{index}, _finalState{state}, _skipped{skipped} { TRI_ASSERT(index <= _block->size()); } -AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, - SharedAqlItemBlockPtr&& block, - std::size_t index, std::size_t) noexcept - : _block{std::move(block)}, _rowIndex{index}, _endIndex(_block->size()), _finalState{state} { +AqlItemBlockInputRange::AqlItemBlockInputRange(ExecutorState state, std::size_t skipped, + arangodb::aql::SharedAqlItemBlockPtr&& block, + std::size_t index) noexcept + : _block{std::move(block)}, _rowIndex{index}, _finalState{state}, _skipped{skipped} { TRI_ASSERT(index <= _block->size()); } @@ -57,7 +57,7 @@ bool AqlItemBlockInputRange::hasDataRow() const noexcept { return isIndexValid(_rowIndex) && !isShadowRowAtIndex(_rowIndex); } -std::pair AqlItemBlockInputRange::peekDataRow() { +std::pair AqlItemBlockInputRange::peekDataRow() const { if (hasDataRow()) { return std::make_pair(nextState(), InputAqlItemRow{_block, _rowIndex}); @@ -95,7 +95,7 @@ bool AqlItemBlockInputRange::isShadowRowAtIndex(std::size_t index) const noexcep return _block->isShadowRow(index); } -std::pair AqlItemBlockInputRange::peekShadowRow() { +std::pair AqlItemBlockInputRange::peekShadowRow() const { if (hasShadowRow()) { return std::make_pair(nextState(), ShadowAqlItemRow{_block, _rowIndex}); @@ -146,3 +146,19 @@ ExecutorState AqlItemBlockInputRange::nextState() const noexcept { return ExecutorState::DONE; } } + +auto AqlItemBlockInputRange::skip(std::size_t const toSkip) noexcept -> std::size_t { + auto const skipCount = std::min(_skipped, toSkip); + _skipped -= skipCount; + return skipCount; +} + +auto AqlItemBlockInputRange::skippedInFlight() const noexcept -> std::size_t { + return _skipped; +} + +auto AqlItemBlockInputRange::skipAll() noexcept -> std::size_t { + auto const skipped = _skipped; + _skipped = 0; + return skipped; +} diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index 42de49fbb586..79461c8b66f4 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -33,12 +33,14 @@ class ShadowAqlItemRow; class AqlItemBlockInputRange { public: - explicit AqlItemBlockInputRange(ExecutorState state); + explicit AqlItemBlockInputRange(ExecutorState state, std::size_t skipped = 0); - AqlItemBlockInputRange(ExecutorState, arangodb::aql::SharedAqlItemBlockPtr const&, - std::size_t startIndex, std::size_t endIndex); - AqlItemBlockInputRange(ExecutorState, arangodb::aql::SharedAqlItemBlockPtr&&, - std::size_t startIndex, std::size_t endIndex) noexcept; + AqlItemBlockInputRange(ExecutorState, std::size_t skipped, + arangodb::aql::SharedAqlItemBlockPtr const&, std::size_t startIndex); + + AqlItemBlockInputRange(ExecutorState, std::size_t skipped, + arangodb::aql::SharedAqlItemBlockPtr&&, + std::size_t startIndex) noexcept; arangodb::aql::SharedAqlItemBlockPtr getBlock() const noexcept; @@ -47,18 +49,26 @@ class AqlItemBlockInputRange { bool hasDataRow() const noexcept; - std::pair peekDataRow(); + std::pair peekDataRow() const; std::pair nextDataRow(); - std::size_t getRowIndex() noexcept { return _rowIndex; }; + std::size_t getRowIndex() const noexcept { return _rowIndex; }; bool hasShadowRow() const noexcept; - std::pair peekShadowRow(); + std::pair peekShadowRow() const; std::pair nextShadowRow(); + // Subtract up to this many rows from the local `_skipped` state; return + // the number actually skipped. Does not skip data rows. + [[nodiscard]] auto skip(std::size_t) noexcept -> std::size_t; + + [[nodiscard]] auto skipAll() noexcept -> std::size_t; + + [[nodiscard]] auto skippedInFlight() const noexcept -> std::size_t; + private: bool isIndexValid(std::size_t index) const noexcept; @@ -70,10 +80,11 @@ class AqlItemBlockInputRange { ExecutorState nextState() const noexcept; private: - arangodb::aql::SharedAqlItemBlockPtr _block; - std::size_t _rowIndex; - std::size_t _endIndex; - ExecutorState _finalState; + arangodb::aql::SharedAqlItemBlockPtr _block{nullptr}; + std::size_t _rowIndex{}; + ExecutorState _finalState{ExecutorState::HASMORE}; + // How many rows were skipped upstream + std::size_t _skipped{}; }; } // namespace arangodb::aql diff --git a/arangod/Aql/ConstFetcher.cpp b/arangod/Aql/ConstFetcher.cpp index 16c011228246..bef0a172bef9 100644 --- a/arangod/Aql/ConstFetcher.cpp +++ b/arangod/Aql/ConstFetcher.cpp @@ -40,7 +40,9 @@ auto ConstFetcher::execute(AqlCallStack& stack) -> std::tuple { // Note this fetcher can only be executed on top level (it is the singleton, or test) TRI_ASSERT(stack.isRelevant()); - auto call = stack.popCall(); + // We only peek the call here, as we do not take over ownership. + // We can replace this by pop again if all executors also only take a reference to the stack. + auto call = stack.peek(); if (_blockForPassThrough == nullptr) { // we are done, nothing to move arround here. return {ExecutionState::DONE, 0, AqlItemBlockInputRange{ExecutorState::DONE}}; @@ -151,7 +153,7 @@ auto ConstFetcher::execute(AqlCallStack& stack) _blockForPassThrough.reset(nullptr); _rowIndex = 0; return {ExecutionState::DONE, call.getSkipCount(), - DataRange{ExecutorState::DONE, resultBlock, 0, resultBlock->size()}}; + DataRange{ExecutorState::DONE, call.getSkipCount(), resultBlock, 0}}; } SharedAqlItemBlockPtr resultBlock = _blockForPassThrough; @@ -185,7 +187,7 @@ auto ConstFetcher::execute(AqlCallStack& stack) resultBlock = resultBlock->slice(sliceIndexes); return {resState, call.getSkipCount(), - DataRange{rangeState, resultBlock, 0, resultBlock->size()}}; + DataRange{rangeState, call.getSkipCount(), resultBlock, 0}}; } void ConstFetcher::injectBlock(SharedAqlItemBlockPtr block) { diff --git a/arangod/Aql/DependencyProxy.cpp b/arangod/Aql/DependencyProxy.cpp index 5d16d05dd4e6..b7828fea8ec4 100644 --- a/arangod/Aql/DependencyProxy.cpp +++ b/arangod/Aql/DependencyProxy.cpp @@ -65,15 +65,16 @@ DependencyProxy::execute(AqlCallStack& stack) { break; } - if (block == nullptr) { - // We're not waiting and didn't get a block, so we have to be done. + if (skipped == 0 && block == nullptr) { + // We're not waiting and didn't get any input, so we have to be done. TRI_ASSERT(state == ExecutionState::DONE); if (!advanceDependency()) { break; } } - } while (block == nullptr); + } while (skipped == 0 && block == nullptr); + return {state, skipped, block}; } diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 031907416678..7a0333327d6f 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -141,7 +141,7 @@ constexpr bool isNewStyleExecutor = TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif - ShortestPathExecutor, EnumerateListExecutor>; + ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, @@ -186,134 +186,137 @@ std::pair ExecutionBlockImpl::g template std::pair ExecutionBlockImpl::getSomeWithoutTrace(size_t atMost) { - TRI_ASSERT(atMost <= ExecutionBlock::DefaultBatchSize); - // silence tests -- we need to introduce new failure tests for fetchers - TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome1") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome2") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome3") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - - if (getQuery().killed()) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); - } + if constexpr (isNewStyleExecutor) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); + } else { + TRI_ASSERT(atMost <= ExecutionBlock::DefaultBatchSize); + // silence tests -- we need to introduce new failure tests for fetchers + TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome1") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome2") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome3") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } - if (_state == InternalState::DONE) { - // We are done, so we stay done - return {ExecutionState::DONE, nullptr}; - } + if (getQuery().killed()) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); + } - if (!_outputItemRow) { - ExecutionState state; - SharedAqlItemBlockPtr newBlock; - std::tie(state, newBlock) = - requestWrappedBlock(atMost, _infos.numberOfOutputRegisters()); - if (state == ExecutionState::WAITING) { - TRI_ASSERT(newBlock == nullptr); - return {state, nullptr}; + if (_state == InternalState::DONE) { + // We are done, so we stay done + return {ExecutionState::DONE, nullptr}; } - if (newBlock == nullptr) { - TRI_ASSERT(state == ExecutionState::DONE); - _state = InternalState::DONE; - // _rowFetcher must be DONE now already - return {state, nullptr}; + + if (!_outputItemRow) { + ExecutionState state; + SharedAqlItemBlockPtr newBlock; + std::tie(state, newBlock) = + requestWrappedBlock(atMost, _infos.numberOfOutputRegisters()); + if (state == ExecutionState::WAITING) { + TRI_ASSERT(newBlock == nullptr); + return {state, nullptr}; + } + if (newBlock == nullptr) { + TRI_ASSERT(state == ExecutionState::DONE); + _state = InternalState::DONE; + // _rowFetcher must be DONE now already + return {state, nullptr}; + } + TRI_ASSERT(newBlock != nullptr); + TRI_ASSERT(newBlock->size() > 0); + // We cannot hold this assertion, if we are on a pass-through + // block and the upstream uses execute already. + // TRI_ASSERT(newBlock->size() <= atMost); + _outputItemRow = createOutputRow(newBlock, AqlCall{}); } - TRI_ASSERT(newBlock != nullptr); - TRI_ASSERT(newBlock->size() > 0); - // We cannot hold this assertion, if we are on a pass-through - // block and the upstream uses execute already. - // TRI_ASSERT(newBlock->size() <= atMost); - _outputItemRow = createOutputRow(newBlock, AqlCall{}); - } - ExecutionState state = ExecutionState::HASMORE; - ExecutorStats executorStats{}; - - TRI_ASSERT(atMost > 0); - - if (isInSplicedSubquery()) { - // The loop has to be entered at least once! - TRI_ASSERT(!_outputItemRow->isFull()); - while (!_outputItemRow->isFull() && _state != InternalState::DONE) { - // Assert that write-head is always pointing to a free row - TRI_ASSERT(!_outputItemRow->produced()); - switch (_state) { - case InternalState::FETCH_DATA: { - std::tie(state, executorStats) = _executor.produceRows(*_outputItemRow); - // Count global but executor-specific statistics, like number of - // filtered rows. - _engine->_stats += executorStats; - if (_outputItemRow->produced()) { - _outputItemRow->advanceRow(); - } + ExecutionState state = ExecutionState::HASMORE; + ExecutorStats executorStats{}; + + TRI_ASSERT(atMost > 0); + + if (isInSplicedSubquery()) { + // The loop has to be entered at least once! + TRI_ASSERT(!_outputItemRow->isFull()); + while (!_outputItemRow->isFull() && _state != InternalState::DONE) { + // Assert that write-head is always pointing to a free row + TRI_ASSERT(!_outputItemRow->produced()); + switch (_state) { + case InternalState::FETCH_DATA: { + std::tie(state, executorStats) = _executor.produceRows(*_outputItemRow); + // Count global but executor-specific statistics, like number of + // filtered rows. + _engine->_stats += executorStats; + if (_outputItemRow->produced()) { + _outputItemRow->advanceRow(); + } - if (state == ExecutionState::WAITING) { - return {state, nullptr}; - } + if (state == ExecutionState::WAITING) { + return {state, nullptr}; + } - if (state == ExecutionState::DONE) { - _state = InternalState::FETCH_SHADOWROWS; + if (state == ExecutionState::DONE) { + _state = InternalState::FETCH_SHADOWROWS; + } + break; } - break; - } - case InternalState::FETCH_SHADOWROWS: { - state = fetchShadowRowInternal(); - if (state == ExecutionState::WAITING) { - return {state, nullptr}; + case InternalState::FETCH_SHADOWROWS: { + state = fetchShadowRowInternal(); + if (state == ExecutionState::WAITING) { + return {state, nullptr}; + } + break; + } + case InternalState::DONE: { + TRI_ASSERT(false); // Invalid state } - break; - } - case InternalState::DONE: { - TRI_ASSERT(false); // Invalid state } } - } - // Modify the return state. - // As long as we do still have ShadowRows - // We need to return HASMORE! - if (_state == InternalState::DONE) { - state = ExecutionState::DONE; - } else { - state = ExecutionState::HASMORE; - } - } else { - // The loop has to be entered at least once! - TRI_ASSERT(!_outputItemRow->isFull()); - while (!_outputItemRow->isFull()) { - std::tie(state, executorStats) = _executor.produceRows(*_outputItemRow); - // Count global but executor-specific statistics, like number of filtered - // rows. - _engine->_stats += executorStats; - if (_outputItemRow->produced()) { - _outputItemRow->advanceRow(); + // Modify the return state. + // As long as we do still have ShadowRows + // We need to return HASMORE! + if (_state == InternalState::DONE) { + state = ExecutionState::DONE; + } else { + state = ExecutionState::HASMORE; } + } else { + // The loop has to be entered at least once! + TRI_ASSERT(!_outputItemRow->isFull()); + while (!_outputItemRow->isFull()) { + std::tie(state, executorStats) = _executor.produceRows(*_outputItemRow); + // Count global but executor-specific statistics, like number of filtered rows. + _engine->_stats += executorStats; + if (_outputItemRow->produced()) { + _outputItemRow->advanceRow(); + } - if (state == ExecutionState::WAITING) { - return {state, nullptr}; - } + if (state == ExecutionState::WAITING) { + return {state, nullptr}; + } - if (state == ExecutionState::DONE) { - auto outputBlock = _outputItemRow->stealBlock(); - // This is not strictly necessary here, as we shouldn't be called again - // after DONE. - _outputItemRow.reset(); - return {state, std::move(outputBlock)}; + if (state == ExecutionState::DONE) { + auto outputBlock = _outputItemRow->stealBlock(); + // This is not strictly necessary here, as we shouldn't be called again after DONE. + _outputItemRow.reset(); + return {state, std::move(outputBlock)}; + } } + + TRI_ASSERT(state == ExecutionState::HASMORE); + TRI_ASSERT(_outputItemRow->isFull()); } - TRI_ASSERT(state == ExecutionState::HASMORE); - TRI_ASSERT(_outputItemRow->isFull()); + auto outputBlock = _outputItemRow->stealBlock(); + // we guarantee that we do return a valid pointer in the HASMORE case. + TRI_ASSERT(outputBlock != nullptr || _state == InternalState::DONE); + _outputItemRow.reset(); + return {state, std::move(outputBlock)}; } - - auto outputBlock = _outputItemRow->stealBlock(); - // we guarantee that we do return a valid pointer in the HASMORE case. - TRI_ASSERT(outputBlock != nullptr || _state == InternalState::DONE); - _outputItemRow.reset(); - return {state, std::move(outputBlock)}; } template @@ -402,6 +405,7 @@ struct ExecuteSkipVariant { template static SkipVariants constexpr skipType() { + static_assert(!isNewStyleExecutor); bool constexpr useFetcher = Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable && !std::is_same>::value; @@ -507,30 +511,35 @@ std::pair ExecutionBlockImpl::skipSome(size_t template std::pair ExecutionBlockImpl::skipSomeOnceWithoutTrace(size_t atMost) { - constexpr SkipVariants customSkipType = skipType(); + if constexpr (isNewStyleExecutor) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); + } else { + constexpr SkipVariants customSkipType = skipType(); - if (customSkipType == SkipVariants::GET_SOME) { - atMost = std::min(atMost, DefaultBatchSize); - auto res = getSomeWithoutTrace(atMost); + if constexpr (customSkipType == SkipVariants::GET_SOME) { + atMost = std::min(atMost, DefaultBatchSize); + auto res = getSomeWithoutTrace(atMost); - size_t skipped = 0; - if (res.second != nullptr) { - skipped = res.second->size(); + size_t skipped = 0; + if (res.second != nullptr) { + skipped = res.second->size(); + } + TRI_ASSERT(skipped <= atMost); + + return {res.first, skipped}; } + + ExecutionState state; + typename Executor::Stats stats; + size_t skipped; + std::tie(state, stats, skipped) = + ExecuteSkipVariant::executeSkip(_executor, _rowFetcher, atMost); + _engine->_stats += stats; TRI_ASSERT(skipped <= atMost); - return {res.first, skipped}; + return {state, skipped}; } - - ExecutionState state; - typename Executor::Stats stats; - size_t skipped; - std::tie(state, stats, skipped) = - ExecuteSkipVariant::executeSkip(_executor, _rowFetcher, atMost); - _engine->_stats += stats; - TRI_ASSERT(skipped <= atMost); - - return {state, skipped}; } template @@ -617,9 +626,11 @@ std::tuple ExecutionBlockImpl ExecutionBlockImplsize()); + if (myCall.getLimit() == 0) { + return {ExecutionState::DONE, 0, block}; + } + } + return {state, 0, block}; } else if (AqlCall::IsFullCountCall(myCall)) { auto const [state, skipped] = skipSome(ExecutionBlock::SkipAllSize()); @@ -636,6 +655,9 @@ std::tuple ExecutionBlockImpl ExecutionBlockImpl::r "Properties::inputSizeRestrictsOutputSize is true"); constexpr RequestWrappedBlockVariant variant = - Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable - ? RequestWrappedBlockVariant::PASS_THROUGH - : Executor::Properties::inputSizeRestrictsOutputSize - ? RequestWrappedBlockVariant::INPUTRESTRICTED - : RequestWrappedBlockVariant::DEFAULT; + isNewStyleExecutor + ? RequestWrappedBlockVariant::DEFAULT + : Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable + ? RequestWrappedBlockVariant::PASS_THROUGH + : Executor::Properties::inputSizeRestrictsOutputSize + ? RequestWrappedBlockVariant::INPUTRESTRICTED + : RequestWrappedBlockVariant::DEFAULT; // Override for spliced subqueries, this optimization does not work there. if (isInSplicedSubquery() && variant == RequestWrappedBlockVariant::INPUTRESTRICTED) { @@ -1003,10 +1027,6 @@ auto ExecutionBlockImpl::nextState(AqlCall const& call) const -> ExecS // Then produce return ExecState::PRODUCE; } - if (call.needsFullCount()) { - // then fullcount - return ExecState::FULLCOUNT; - } if (call.hardLimit == 0) { // We reached hardLimit, fast forward return ExecState::FASTFORWARD; @@ -1030,8 +1050,6 @@ SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, // ahead on the input range, fetching new blocks when necessary // EXECUTOR: the executor has a specialised skipRowsRange method // that will be called to skip -// GET_SOME: we just request rows from the executor and then discard -// them // enum class SkipRowsRangeVariant { FETCHER, EXECUTOR }; @@ -1061,8 +1079,7 @@ static SkipRowsRangeVariant constexpr skipRowsType() { #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif - EnumerateListExecutor, SortedCollectExecutor>), - + EnumerateListExecutor, SortedCollectExecutor, LimitExecutor>), "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! @@ -1085,9 +1102,9 @@ template struct dependent_false : std::false_type {}; template -std::tuple -ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& inputRange, - AqlCall& call) { +auto ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& inputRange, + AqlCall& call) + -> std::tuple { if constexpr (isNewStyleExecutor) { call.skippedRows = 0; if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { @@ -1117,6 +1134,66 @@ ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& input return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call); } +/** + * @brief Define the variant of FastForward behaviour + * + * FULLCOUNT => Call executeSkipRowsRange and report what has been skipped. + * EXECUTOR => Call executeSkipRowsRange, but do not report what has been skipped. + * (This instance is used to make sure Modifications are performed, or stats are correct) + * FETCHER => Do not bother the Executor, drop all from input, without further reporting + * + */ +enum class FastForwardVariant { FULLCOUNT, EXECUTOR, FETCHER }; + +template +static auto fastForwardType(AqlCall const& call, Executor const& e) -> FastForwardVariant { + if (call.needsFullCount() && call.getOffset() == 0 && call.getLimit() == 0) { + // Only start fullCount after the original call is fulfilled. Otherwise + // do fast-forward variant + TRI_ASSERT(call.hasHardLimit()); + return FastForwardVariant::FULLCOUNT; + } + // TODO: We only need to do this is the executor actually require to call. + // e.g. Modifications will always need to be called. Limit only if it needs to report fullCount + if constexpr (is_one_of_v) { + return FastForwardVariant::EXECUTOR; + } + return FastForwardVariant::FETCHER; +} + +template +auto ExecutionBlockImpl::executeFastForward(AqlItemBlockInputRange& inputRange, + AqlCall& clientCall) + -> std::tuple { + TRI_ASSERT(isNewStyleExecutor); + auto type = fastForwardType(clientCall, _executor); + switch (type) { + case FastForwardVariant::FULLCOUNT: + case FastForwardVariant::EXECUTOR: { + LOG_QUERY("cb135", DEBUG) << printTypeInfo() << " apply full count."; + auto [state, stats, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); + if (type == FastForwardVariant::EXECUTOR) { + // We do not report the skip + skippedLocal = 0; + } + return {state, stats, skippedLocal, call}; + } + case FastForwardVariant::FETCHER: { + LOG_QUERY("fa327", DEBUG) << printTypeInfo() << " bypass unused rows."; + while (inputRange.hasDataRow()) { + auto [state, row] = inputRange.nextDataRow(); + TRI_ASSERT(row.isInitialized()); + } + AqlCall call{}; + call.hardLimit = 0; + return {inputRange.upstreamState(), typename Executor::Stats{}, 0, call}; + } + } + // Unreachable + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); +} + /** * @brief This is the central function of an executor, and it acts like a * coroutine: It can be called multiple times and keeps state across @@ -1124,13 +1201,12 @@ ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& input * * The intended behaviour of this function is best described in terms of * a state machine; the possible states are the ExecStates - * SKIP, PRODUCE, FULLCOUNT, FASTFORWARD, UPSTREAM, SHADOWROWS, DONE + * SKIP, PRODUCE, FASTFORWARD, UPSTREAM, SHADOWROWS, DONE * * SKIP skipping rows. How rows are skipped is determined by * the Executor that is used. See SkipVariants * PRODUCE calls produceRows of the executor - * FULLCOUNT again skipping rows. like skip, but will skip all rows - * FASTFORWARD like fullcount, but does not count skipped rows. + * FASTFORWARD again skipping rows, will count skipped rows, if fullCount is requested. * UPSTREAM fetches rows from the upstream executor(s) to be processed by * our executor. * SHADOWROWS process any shadow rows @@ -1139,12 +1215,12 @@ ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& input * * We progress within the states in the following way: * There is a nextState method that determines the next state based on the call, it can only lead to: - * SKIP, PRODUCE, FULLCOUNT, FASTFORWAD, DONE + * SKIP, PRODUCE, FASTFORWAD, DONE * * On the first call we will use nextState to get to our starting point. - * After any of SKIP, PRODUCE, FULLCOUNT, FASTFORWAD, DONE We either go to - * 1. DONE (if output is full) - * 2. SHADOWROWS (if executor is done) + * After any of SKIP, PRODUCE,, FASTFORWAD, DONE We either go to + * 1. FASTFORWARD (if executor is done) + * 2. DONE (if output is full) * 3. UPSTREAM if executor has More, (Invariant: input fully consumed) * 4. NextState (if none of the above applies) * @@ -1177,6 +1253,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { AqlCall clientCall = stack.popCall(); ExecutorState localExecutorState = ExecutorState::DONE; + TRI_ASSERT(!(clientCall.getOffset() == 0 && clientCall.softLimit == AqlCall::Limit{0})); + TRI_ASSERT(!(clientCall.hasSoftLimit() && clientCall.fullCount)); + TRI_ASSERT(!(clientCall.hasSoftLimit() && clientCall.hasHardLimit())); + // We can only have returned the following internal states TRI_ASSERT(_execState == ExecState::CHECKCALL || _execState == ExecState::SHADOWROWS || _execState == ExecState::UPSTREAM); @@ -1195,6 +1275,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { clientCall = _clientRequest; } + auto returnToState = ExecState::CHECKCALL; + LOG_QUERY("007ac", DEBUG) << "starting statemachine of executor " << printBlockInfo(); while (_execState != ExecState::DONE) { switch (_execState) { @@ -1219,8 +1301,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // This means that they have to be removed from clientCall.getOffset() // This has to be done by the Executor calling call.didSkip() // accordingly. - if (canPassFullcount) { - // In htis case we can first skip. But straight after continue with fullCount, so we might skip more + // The LIMIT executor with a LIMIT of 0 can also bypass fullCount + // here, even if callLimit > 0 + if (canPassFullcount || std::is_same_v) { + // In this case we can first skip. But straight after continue with fullCount, so we might skip more TRI_ASSERT(clientCall.getOffset() + skippedLocal >= offsetBefore); if (clientCall.getOffset() + skippedLocal > offsetBefore) { // First need to count down offset. @@ -1235,7 +1319,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _engine->_stats += stats; // The execute might have modified the client call. if (state == ExecutorState::DONE) { - _execState = ExecState::SHADOWROWS; + _execState = ExecState::FASTFORWARD; } else if (clientCall.getOffset() > 0) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more @@ -1253,6 +1337,15 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(clientCall.getLimit() > 0); LOG_QUERY("1f786", DEBUG) << printTypeInfo() << " call produceRows " << clientCall; + if (outputIsFull()) { + // We need to be able to write data + // But maybe the existing block is full here + // Then we need to wake up again. + // However the client might decide on a different + // call, so we do not record this position + _execState = ExecState::DONE; + break; + } ensureOutputBlock(std::move(clientCall)); TRI_ASSERT(_outputItemRow); @@ -1265,10 +1358,13 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // Produce might have modified the clientCall clientCall = _outputItemRow->getClientCall(); - if (_outputItemRow->isInitialized() && _outputItemRow->allRowsUsed()) { + if (state == ExecutorState::DONE) { + _execState = ExecState::FASTFORWARD; + } else if (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable && + outputIsFull()) { + // In pass through variant we need to stop whenever the block is full. _execState = ExecState::DONE; - } else if (state == ExecutorState::DONE) { - _execState = ExecState::SHADOWROWS; + break; } else if (clientCall.getLimit() > 0 && !_lastRange.hasDataRow()) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more @@ -1283,39 +1379,21 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { case ExecState::FASTFORWARD: { LOG_QUERY("96e2c", DEBUG) << printTypeInfo() << " all produced, fast forward to end up (sub-)query."; - // We can either do FASTFORWARD or FULLCOUNT, difference is that - // fullcount counts what is produced now, FASTFORWARD simply drops - TRI_ASSERT(!clientCall.needsFullCount()); - // We need to claim that the Executor was done - localExecutorState = ExecutorState::DONE; - - // We can drop all dataRows from upstream - while (_lastRange.hasDataRow()) { - auto [state, row] = _lastRange.nextDataRow(); - TRI_ASSERT(row.isInitialized()); - } - if (_lastRange.upstreamState() == ExecutorState::DONE) { - _execState = ExecState::SHADOWROWS; - } else { - // We need to request more, simply send hardLimit 0 upstream - _upstreamRequest = AqlCall{}; - _upstreamRequest.hardLimit = 0; - _execState = ExecState::UPSTREAM; - } - break; - } - case ExecState::FULLCOUNT: { - LOG_QUERY("ff258", DEBUG) - << printTypeInfo() - << " all produced, skip to end up (sub-)query, for fullCount."; auto [state, stats, skippedLocal, call] = - executeSkipRowsRange(_lastRange, clientCall); + executeFastForward(_lastRange, clientCall); + _skipped += skippedLocal; _engine->_stats += stats; localExecutorState = state; if (state == ExecutorState::DONE) { - _execState = ExecState::SHADOWROWS; + if (_outputItemRow && _outputItemRow->isInitialized() && + _outputItemRow->allRowsUsed()) { + // We have a block with data, but no more place for a shadow row. + _execState = ExecState::DONE; + } else { + _execState = ExecState::SHADOWROWS; + } } else { // We need to request more _upstreamRequest = call; @@ -1334,8 +1412,18 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(!_lastRange.hasShadowRow()); size_t skippedLocal = 0; auto callCopy = _upstreamRequest; +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + size_t subqueryLevelBefore = stack.subqueryLevel(); +#endif stack.pushCall(std::move(callCopy)); std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); + // As the stack is copied into the fetcher, we need to pop off our call again. + // If we use other datastructures or moving we may hand over ownership of the stack here + // instead and no popCall is necessary. + stack.popCall(); +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + TRI_ASSERT(subqueryLevelBefore == stack.subqueryLevel()); +#endif if (_upstreamState == ExecutionState::WAITING) { // We need to persist the old call before we return. // We might have some local accounting to this call. @@ -1343,11 +1431,16 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // We do not return anything in WAITING state, also NOT skipped. return {_upstreamState, 0, nullptr}; } - // We have a new range, passthrough can use this range. - _hasUsedDataRangeBlock = false; - _skipped += skippedLocal; - // We skipped through passthroug, so count that a skip was solved. - clientCall.didSkip(skippedLocal); + if constexpr (Executor::Properties::allowsBlockPassthrough == + BlockPassthrough::Enable) { + // We have a new range, passthrough can use this range. + _hasUsedDataRangeBlock = false; + } + if constexpr (skipRowsType() == SkipRowsRangeVariant::FETCHER) { + _skipped += skippedLocal; + // We skipped through passthrough, so count that a skip was solved. + clientCall.didSkip(skippedLocal); + } _execState = ExecState::CHECKCALL; break; } @@ -1361,6 +1454,14 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // NOTE: I do not think this is an issue, as the Executor will always say that it cannot do anything with // an empty input. Only exception might be COLLECT COUNT. if (_lastRange.hasShadowRow()) { + if (outputIsFull()) { + // We need to be able to write data + // But maybe the existing block is full here + // Then we need to wake up again here. + returnToState = ExecState::SHADOWROWS; + _execState = ExecState::DONE; + break; + } auto const& [state, shadowRow] = _lastRange.nextShadowRow(); TRI_ASSERT(shadowRow.isInitialized()); ensureOutputBlock(std::move(clientCall)); @@ -1393,6 +1494,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { default: // unreachable TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); } } LOG_QUERY("80c24", DEBUG) << printBlockInfo() << " local statemachine done. Return now."; @@ -1401,7 +1503,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { : SharedAqlItemBlockPtr{nullptr}; // We are locally done with our output. // Next time we need to check the client call again - _execState = ExecState::CHECKCALL; + _execState = returnToState; // This is not strictly necessary here, as we shouldn't be called again // after DONE. _outputItemRow.reset(); @@ -1415,6 +1517,9 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0)); return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; } + // We must return skipped and/or data when reporting HASMORE + TRI_ASSERT(_upstreamState != ExecutionState::HASMORE || + (skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0))); return {_upstreamState, skipped, std::move(outputBlock)}; } else { // TODO this branch must never be taken with an executor that has not been @@ -1461,6 +1566,12 @@ ExecutionState ExecutionBlockImpl::fetchShadowRowInternal() { return state; } +template +auto ExecutionBlockImpl::outputIsFull() const noexcept -> bool { + return _outputItemRow != nullptr && _outputItemRow->isInitialized() && + _outputItemRow->allRowsUsed(); +} + template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 5868ae27739a..fd6a486412f4 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -122,10 +122,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { SKIP, // We are producing rows PRODUCE, - // We are done producing (limit reached) and drop all rows that are unneeded + // We are done producing (limit reached) and drop all rows that are unneeded, might count. FASTFORWARD, - // We are done producing (limit reached), but we count all rows that could be used on higher limit - FULLCOUNT, // We need more information from dependency UPSTREAM, // We are done with a subquery, we need to pass forward ShadowRows @@ -233,6 +231,9 @@ class ExecutionBlockImpl final : public ExecutionBlock { std::tuple executeSkipRowsRange( AqlItemBlockInputRange& input, AqlCall& call); + auto executeFastForward(AqlItemBlockInputRange& inputRange, AqlCall& clientCall) + -> std::tuple; + /** * @brief Inner getSome() part, without the tracing calls. */ @@ -285,6 +286,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { // Can only be one of Skip/Produce/FullCount/FastForward/Done [[nodiscard]] auto nextState(AqlCall const& call) const -> ExecState; + [[nodiscard]] auto outputIsFull() const noexcept -> bool; + private: /** * @brief Used to allow the row Fetcher to access selected methods of this diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 8c2d66f09032..cb0d06bb60af 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -105,24 +105,28 @@ std::pair FilterExecutor::expectedNumberOfRows(size_t at auto FilterExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) -> std::tuple { FilterStats stats{}; - size_t skipped = 0; - while (inputRange.hasDataRow() && skipped < call.getOffset()) { + while (inputRange.hasDataRow() && call.needSkipMore()) { auto const [unused, input] = inputRange.nextDataRow(); if (!input) { TRI_ASSERT(!inputRange.hasDataRow()); break; } if (input.getValue(_infos.getInputRegister()).toBoolean()) { - skipped++; + call.didSkip(1); } else { stats.incrFiltered(); } } - call.didSkip(skipped); AqlCall upstreamCall{}; - upstreamCall.softLimit = call.getOffset(); - return {inputRange.upstreamState(), stats, skipped, upstreamCall}; + if (call.needSkipMore() && call.getLimit() == 0) { + // FullCount case, we need to skip more, but limit is reached. + upstreamCall.softLimit = ExecutionBlock::SkipAllSize(); + } else { + upstreamCall.softLimit = call.getOffset(); + } + + return {inputRange.upstreamState(), stats, call.getSkipCount(), upstreamCall}; } auto FilterExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) diff --git a/arangod/Aql/IndexExecutor.cpp b/arangod/Aql/IndexExecutor.cpp index d862d9b98900..91e6c33ecf47 100644 --- a/arangod/Aql/IndexExecutor.cpp +++ b/arangod/Aql/IndexExecutor.cpp @@ -484,7 +484,8 @@ size_t IndexExecutor::CursorReader::skipIndex(size_t toSkip) { case Type::Document: TRI_ASSERT(_documentSkipper != nullptr); _cursor->nextDocument(_documentSkipper, toSkip); - break;; + break; + ; } skipped = _context.getAndResetNumScanned() - _context.getAndResetNumFiltered(); } else { @@ -735,7 +736,9 @@ auto IndexExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlIte bool more = getCursor().readIndex(output); TRI_ASSERT(more == getCursor().hasMore()); - LOG_DEVEL_IDX << "IndexExecutor::produceRows::innerLoop output.numRowsWritten() == " << output.numRowsWritten(); + LOG_DEVEL_IDX + << "IndexExecutor::produceRows::innerLoop output.numRowsWritten() == " + << output.numRowsWritten(); // NOTE: more => output.isFull() does not hold, if we do uniqness checks. // The index iterator does still count skipped rows for limit. // Nevertheless loop here, the cursor has more so we will retigger @@ -744,19 +747,15 @@ auto IndexExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlIte // Or the cursor is done, so we need to advance } - stats.incrScanned(_documentProducingFunctionContext.getAndResetNumScanned()); stats.incrFiltered(_documentProducingFunctionContext.getAndResetNumFiltered()); } - - bool reportDone = _state == ExecutorState::DONE && !_input.isInitialized(); - AqlCall upstreamCall; upstreamCall.fullCount = clientCall.needsFullCount(); - LOG_DEVEL_IDX << "IndexExecutor::produceRows reporting state " << (reportDone ? ExecutorState::DONE : ExecutorState::HASMORE); - return {reportDone ? ExecutorState::DONE : ExecutorState::HASMORE, stats, upstreamCall}; + LOG_DEVEL_IDX << "IndexExecutor::produceRows reporting state " << returnState(); + return {returnState(), stats, upstreamCall}; } auto IndexExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& clientCall) @@ -775,21 +774,22 @@ auto IndexExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& c IndexStats stats{}; while (clientCall.needSkipMore()) { - LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange skipped " << _skipped - << " " << clientCall.getOffset(); + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange skipped " << _skipped << " " + << clientCall.getOffset(); // get an input row first, if necessary if (!_input.isInitialized()) { std::tie(_state, _input) = inputRange.peekDataRow(); - LOG_DEVEL_IDX - << "IndexExecutor::skipRowsRange input not initialized, peek next row: " << _state - << " " << std::boolalpha << _input.isInitialized(); + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange input not initialized, " + "peek next row: " + << _state << " " << std::boolalpha << _input.isInitialized(); if (_input.isInitialized()) { LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange initIndexes"; initIndexes(_input); if (!advanceCursor()) { - LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange failed to advanceCursor " - "after init"; + LOG_DEVEL_IDX + << "IndexExecutor::skipRowsRange failed to advanceCursor " + "after init"; std::ignore = inputRange.nextDataRow(); _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; // just to validate that after continue we get into retry mode @@ -809,9 +809,14 @@ auto IndexExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& c continue; } - LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange skipIndex(" - << clientCall.getOffset() << ")"; - size_t skippedNow = getCursor().skipIndex(clientCall.getOffset()); + auto toSkip = clientCall.getOffset(); + if (toSkip == 0) { + TRI_ASSERT(clientCall.needsFullCount()); + toSkip = ExecutionBlock::SkipAllSize(); + } + TRI_ASSERT(toSkip > 0); + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange skipIndex(" << toSkip << ")"; + size_t skippedNow = getCursor().skipIndex(toSkip); LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange skipIndex(...) == " << skippedNow; stats.incrScanned(skippedNow); @@ -823,9 +828,18 @@ auto IndexExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& c _skipped = 0; AqlCall upstreamCall; - upstreamCall.fullCount = clientCall.needsFullCount(); + LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange returning " << returnState() + << " " << skipped << " " << upstreamCall; + return {returnState(), stats, skipped, upstreamCall}; +} - LOG_DEVEL_IDX << "IndexExecutor::skipRowsRange returning " << _state << " " << skipped << " " << upstreamCall; - return {_state, stats, skipped, upstreamCall}; +auto IndexExecutor::returnState() const noexcept -> ExecutorState { + if (_input.isInitialized()) { + // We are still working. + // TODO: Potential optimization: We can ask if the cursor has more, or there + // are other cursors. + return ExecutorState::HASMORE; + } + return _state; } diff --git a/arangod/Aql/IndexExecutor.h b/arangod/Aql/IndexExecutor.h index 6dfbf13accdd..618905d7babf 100644 --- a/arangod/Aql/IndexExecutor.h +++ b/arangod/Aql/IndexExecutor.h @@ -248,6 +248,8 @@ class IndexExecutor { bool needsUniquenessCheck() const noexcept; + auto returnState() const noexcept -> ExecutorState; + private: Infos& _infos; DocumentProducingFunctionContext _documentProducingFunctionContext; diff --git a/arangod/Aql/LimitExecutor.cpp b/arangod/Aql/LimitExecutor.cpp index 42a6befce2a9..7d3d3ea6f089 100644 --- a/arangod/Aql/LimitExecutor.cpp +++ b/arangod/Aql/LimitExecutor.cpp @@ -52,238 +52,189 @@ LimitExecutorInfos::LimitExecutorInfos(RegisterId nrInputRegisters, RegisterId n _fullCount(fullCount) {} LimitExecutor::LimitExecutor(Fetcher& fetcher, Infos& infos) - : _infos(infos), - _fetcher(fetcher), - _lastRowToOutput(CreateInvalidInputRowHint{}), - _stateOfLastRowToOutput(ExecutionState::HASMORE) {} -LimitExecutor::~LimitExecutor() = default; - -std::pair LimitExecutor::skipOffset() { - ExecutionState state; - size_t skipped; - std::tie(state, skipped) = _fetcher.skipRows(maxRowsLeftToSkip()); - - // WAITING => skipped == 0 - TRI_ASSERT(state != ExecutionState::WAITING || skipped == 0); - - _counter += skipped; + : _infos(infos), _lastRowToOutput(CreateInvalidInputRowHint{}) {} - LimitStats stats{}; - if (infos().isFullCountEnabled()) { - stats.incrFullCountBy(skipped); - } +LimitExecutor::~LimitExecutor() = default; - return {state, stats}; +auto LimitExecutor::limitFulfilled() const noexcept -> bool { + return remainingOffset() + remainingLimit() == 0; } -std::pair LimitExecutor::skipRestForFullCount() { - ExecutionState state; - size_t skipped; - LimitStats stats{}; - // skip ALL the rows - std::tie(state, skipped) = _fetcher.skipRows(ExecutionBlock::SkipAllSize()); - - if (state == ExecutionState::WAITING) { - TRI_ASSERT(skipped == 0); - return {state, stats}; +auto LimitExecutor::calculateUpstreamCall(AqlCall const& clientCall) const -> AqlCall { + auto upstreamCall = AqlCall{}; + + // Offsets can simply be added. + upstreamCall.offset = clientCall.getOffset() + remainingOffset(); + + // To get the limit for upstream, we must subtract the downstream offset from + // our limit, and take the minimum of this and the downstream limit. + auto const localLimitMinusDownstreamOffset = + remainingLimit() - std::min(remainingLimit(), clientCall.getOffset()); + auto const limit = + std::min(clientCall.getLimit(), localLimitMinusDownstreamOffset); + + // Generally, we create a hard limit. However, if we get a soft limit from + // downstream that is lower than our hard limit, we use that instead. + bool const useSoftLimit = clientCall.hasSoftLimit() && + clientCall.getLimit() < localLimitMinusDownstreamOffset; + + if (useSoftLimit) { + upstreamCall.softLimit = limit; + upstreamCall.fullCount = false; + } else { + upstreamCall.hardLimit = limit; + // We need the fullCount either if we need to report it ourselfes. + // or if the clientCall needs to report it. + upstreamCall.fullCount = infos().isFullCountEnabled() || clientCall.fullCount; } - // We must not update _counter here. It is only used to count until - // offset+limit is reached. - - if (infos().isFullCountEnabled()) { - stats.incrFullCountBy(skipped); - } - - return {state, stats}; + return upstreamCall; } -std::pair LimitExecutor::produceRows(OutputAqlItemRow& output) { - TRI_IF_FAILURE("LimitExecutor::produceRows") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - InputAqlItemRow input{CreateInvalidInputRowHint{}}; - ExecutionState state; - LimitStats stats{}; - - while (LimitState::SKIPPING == currentState()) { - LimitStats tmpStats; - std::tie(state, tmpStats) = skipOffset(); - stats += tmpStats; - if (state == ExecutionState::WAITING || state == ExecutionState::DONE) { - return {state, stats}; - } - } - while (LimitState::RETURNING == currentState()) { - std::tie(state, input) = _fetcher.fetchRow(maxRowsLeftToFetch()); - - if (state == ExecutionState::WAITING) { - return {state, stats}; - } - - // This executor is pass-through. Thus we will never get asked to write an - // output row for which there is no input, as in- and output rows have a - // 1:1 correspondence. - TRI_ASSERT(input.isInitialized()); - - // We've got one input row - _counter++; - - if (infos().isFullCountEnabled()) { - stats.incrFullCount(); - } - - // Return one row - output.copyRow(input); - return {state, stats}; - } - - // This case is special for two reasons. - // First, after this we want to return DONE, regardless of the upstream's - // state. - // Second, when fullCount is enabled, we need to get the fullCount before - // returning the last row, as the count is returned with the stats (and we - // would not be asked again by ExecutionBlockImpl in any case). - if (LimitState::RETURNING_LAST_ROW == currentState()) { - if (_lastRowToOutput.isInitialized()) { - // Use previously saved row iff there is one. We can get here only if - // fullCount is enabled. If it is, we can get here multiple times (until - // we consumed the whole upstream, which might return WAITING repeatedly). - TRI_ASSERT(infos().isFullCountEnabled()); - state = _stateOfLastRowToOutput; - TRI_ASSERT(state != ExecutionState::WAITING); - input = std::move(_lastRowToOutput); - TRI_ASSERT(!_lastRowToOutput.isInitialized()); // rely on the move - } else { - std::tie(state, input) = _fetcher.fetchRow(maxRowsLeftToFetch()); - - if (state == ExecutionState::WAITING) { - return {state, stats}; +auto LimitExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) + -> std::tuple { + // I think this *should* be the case, because we're passthrough. However, + // isFull() ignores shadow rows in the passthrough case, which it probably + // should not. + // static_assert(Properties::allowsBlockPassthrough == BlockPassthrough::Enable); + // TRI_ASSERT(input.hasDataRow() == !output.isFull()); + + auto const& clientCall = output.getClientCall(); + TRI_ASSERT(clientCall.getOffset() == 0); + + auto stats = LimitStats{}; + + auto call = output.getClientCall(); + TRI_ASSERT(call.getOffset() == 0); + while (inputRange.skippedInFlight() > 0 || inputRange.hasDataRow()) { + if (remainingOffset() > 0) { + // First we skip in the input row until we fullfill our local offset. + auto const didSkip = inputRange.skip(remainingOffset()); + // Need to forward the + _counter += didSkip; + // We do not report this to downstream + // But we report it in fullCount + if (infos().isFullCountEnabled()) { + stats.incrFullCountBy(didSkip); } - } - - // This executor is pass-through. Thus we will never get asked to write an - // output row for which there is no input, as in- and output rows have a - // 1:1 correspondence. - TRI_ASSERT(input.isInitialized()); - - if (infos().isFullCountEnabled()) { - // Save the state now. The _stateOfLastRowToOutput will not be used unless - // _lastRowToOutput gets set. - _stateOfLastRowToOutput = state; - LimitStats tmpStats; - std::tie(state, tmpStats) = skipRestForFullCount(); - stats += tmpStats; - if (state == ExecutionState::WAITING) { - // Save the row - _lastRowToOutput = std::move(input); - return {state, stats}; + } else if (!output.isFull()) { + auto numRowsWritten = size_t{0}; + + while (inputRange.hasDataRow()) { + // This block is passhthrough. + static_assert(Properties::allowsBlockPassthrough == BlockPassthrough::Enable, + "For LIMIT with passthrough to work, there must be " + "exactly enough space for all input in the output."); + // So there will always be enough place for all inputRows within + // the output. + TRI_ASSERT(!output.isFull()); + // Also this number can be at most remainingOffset. + TRI_ASSERT(remainingLimit() > numRowsWritten); + output.copyRow(inputRange.nextDataRow().second); + output.advanceRow(); + numRowsWritten++; } + _counter += numRowsWritten; + if (infos().isFullCountEnabled()) { + stats.incrFullCountBy(numRowsWritten); + } + } else if (call.needsFullCount()) { + // We are done with producing. + // ExecutionBlockImpl will now call skipSome for the remainder + // There cannot be a dataRow left, as this block is passthrough! + TRI_ASSERT(!inputRange.hasDataRow()); + // There are still skippedInflights; + TRI_ASSERT(inputRange.skippedInFlight() > 0); + break; + } else { + // We are done with producing. + if (infos().isFullCountEnabled()) { + // However we need to report the fullCount from above. + stats.incrFullCountBy(inputRange.skipAll()); + } + // ExecutionBlockImpl will now call skipSome for the remainder + // There cannot be a dataRow left, as this block is passthrough! + TRI_ASSERT(!inputRange.hasDataRow()); + TRI_ASSERT(inputRange.skippedInFlight() == 0); + break; } - - // It's important to increase the counter for the last row only *after* - // skipRestForFullCount() is done, because we need currentState() to stay - // at RETURNING_LAST_ROW until we've actually returned the last row. - _counter++; - if (infos().isFullCountEnabled()) { - stats.incrFullCount(); - } - - output.copyRow(input); - return {ExecutionState::DONE, stats}; } - - // We should never be COUNTING, this must already be done in the - // RETURNING_LAST_ROW-handler. - TRI_ASSERT(LimitState::LIMIT_REACHED == currentState()); - // When fullCount is enabled, the loop may only abort when upstream is done. - TRI_ASSERT(!infos().isFullCountEnabled()); - - return {ExecutionState::DONE, stats}; + // We're passthrough, we must not have any input left when the limit isfulfilled + TRI_ASSERT(!limitFulfilled() || !inputRange.hasDataRow()); + return {inputRange.upstreamState(), stats, calculateUpstreamCall(call)}; } -std::tuple LimitExecutor::fetchBlockForPassthrough(size_t atMost) { - switch (currentState()) { - case LimitState::LIMIT_REACHED: - // We are done with our rows! - return {ExecutionState::DONE, LimitStats{}, nullptr}; - case LimitState::COUNTING: { - LimitStats stats{}; - while (LimitState::LIMIT_REACHED != currentState()) { - ExecutionState state; - LimitStats tmpStats{}; - std::tie(state, tmpStats) = skipRestForFullCount(); - stats += tmpStats; - - if (state == ExecutionState::WAITING || state == ExecutionState::DONE) { - return {state, stats, nullptr}; - } +auto LimitExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { + auto upstreamCall = calculateUpstreamCall(call); + + if (ADB_UNLIKELY(inputRange.skippedInFlight() < upstreamCall.getOffset() && + inputRange.hasDataRow())) { + static_assert(Properties::allowsBlockPassthrough == BlockPassthrough::Enable, + "For LIMIT with passthrough to work, there must no input " + "rows before the offset was skipped."); + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL_AQL, + "Unexpected input while skipping: got data " + "rows before offset was reached."); + } + auto stats = LimitStats{}; + while (inputRange.skippedInFlight() > 0) { + if (remainingOffset() > 0) { + // First we skip in the input row until we fullfill our local offset. + auto const didSkip = inputRange.skip(remainingOffset()); + // Need to forward the + _counter += didSkip; + // We do not report this to downstream + // But we report it in fullCount + if (infos().isFullCountEnabled()) { + stats.incrFullCountBy(didSkip); } - return {ExecutionState::DONE, stats, nullptr}; - } - case LimitState::SKIPPING: { - LimitStats stats{}; - while (LimitState::SKIPPING == currentState()) { - ExecutionState state; - LimitStats tmpStats{}; - std::tie(state, tmpStats) = skipOffset(); - stats += tmpStats; - if (state == ExecutionState::WAITING || state == ExecutionState::DONE) { - return {state, stats, nullptr}; + } else if (remainingLimit() > 0) { + // We do only report to downstream if we have a limit to produce + if (call.getOffset() > 0) { + // Next we skip as many rows as ordered by the client, + // but never more then remainingLimit + auto const didSkip = + inputRange.skip(std::min(remainingLimit(), call.getOffset())); + call.didSkip(didSkip); + _counter += didSkip; + if (infos().isFullCountEnabled()) { + stats.incrFullCountBy(didSkip); + } + } else if (call.getLimit() > 0) { + // If we get here we need to break out, and let produce rows be called. + break; + } else if (call.needsFullCount()) { + auto const didSkip = inputRange.skip(remainingLimit()); + call.didSkip(didSkip); + _counter += didSkip; + if (infos().isFullCountEnabled()) { + stats.incrFullCountBy(didSkip); + } + } else if (infos().isFullCountEnabled()) { + // Skip the remainder, it does not matter if we need to produce + // anything or not. This is only for reporting of the skipped numbers + auto const didSkip = inputRange.skipAll(); + _counter += didSkip; + if (infos().isFullCountEnabled()) { + stats.incrFullCountBy(didSkip); } } - - // We should have reached the next state now - TRI_ASSERT(currentState() != LimitState::SKIPPING); - // Now jump to the correct case - auto rv = fetchBlockForPassthrough(atMost); - // Add the stats we collected to the return value - std::get(rv) += stats; - return rv; + } else if (infos().isFullCountEnabled()) { + // Skip the remainder, it does not matter if we need to produce + // anything or not. This is only for reporting of the skipped numbers + auto const didSkip = inputRange.skipAll(); + _counter += didSkip; + if (infos().isFullCountEnabled()) { + stats.incrFullCountBy(didSkip); + } + } else { + // We are done. + // All produced, all skipped, nothing to report + break; } - case LimitState::RETURNING_LAST_ROW: - case LimitState::RETURNING: - auto rv = _fetcher.fetchBlockForPassthrough(std::min(atMost, maxRowsLeftToFetch())); - return {rv.first, LimitStats{}, std::move(rv.second)}; } - // The control flow cannot reach this. It is only here to make MSVC happy, - // which is unable to figure out that the switch above is complete. - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); + return {inputRange.upstreamState(), stats, call.getSkipCount(), + calculateUpstreamCall(call)}; } - -std::tuple LimitExecutor::skipRows(size_t const toSkipRequested) { - // fullCount can only be enabled on the last top-level LIMIT block. Thus - // skip cannot be called on it! If this requirement is changed for some - // reason, the current implementation will not work. - TRI_ASSERT(!infos().isFullCountEnabled()); - - // If we're still skipping ourselves up to offset, this needs to be done first. - size_t const toSkipOffset = - currentState() == LimitState::SKIPPING ? maxRowsLeftToSkip() : 0; - - // We have to skip - // our offset (toSkipOffset or maxRowsLeftToSkip()), - // plus what we were requested to skip (toSkipRequested), - // but not more than our total limit (maxRowsLeftToFetch()). - size_t const toSkipTotal = - std::min(toSkipRequested + toSkipOffset, maxRowsLeftToFetch()); - - ExecutionState state; - size_t skipped; - std::tie(state, skipped) = _fetcher.skipRows(toSkipTotal); - - // WAITING => skipped == 0 - TRI_ASSERT(state != ExecutionState::WAITING || skipped == 0); - - _counter += skipped; - - // Do NOT report the rows we skipped up to the offset, they don't count. - size_t const reportSkipped = toSkipOffset >= skipped ? 0 : skipped - toSkipOffset; - - if (currentState() == LimitState::LIMIT_REACHED) { - state = ExecutionState::DONE; - } - - return std::make_tuple(state, LimitStats{}, reportSkipped); -} \ No newline at end of file diff --git a/arangod/Aql/LimitExecutor.h b/arangod/Aql/LimitExecutor.h index db8071392ee9..aaa40e9b7022 100644 --- a/arangod/Aql/LimitExecutor.h +++ b/arangod/Aql/LimitExecutor.h @@ -95,87 +95,52 @@ class LimitExecutor { ~LimitExecutor(); /** - * @brief produce the next Row of Aql Values. + * @brief produce the next Rows of Aql Values. * - * @return ExecutionState, and if successful exactly one new Row of AqlItems. + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream */ - std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; /** - * @brief Custom skipRows() implementation. This is obligatory to increase - * _counter! + * @brief skip the next Row of Aql Values. * - * Semantically, we first skip until our local offset. We may not report the - * number of rows skipped this way. Second, we skip up to the number of rows - * requested; but at most up to our limit. + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream */ - std::tuple skipRows(size_t toSkipRequested); - - std::tuple fetchBlockForPassthrough(size_t atMost); + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple; private: Infos const& infos() const noexcept { return _infos; }; - size_t maxRowsLeftToFetch() const noexcept { - // counter should never exceed this count! - TRI_ASSERT(infos().getLimitPlusOffset() >= _counter); - return infos().getLimitPlusOffset() - _counter; - } + auto remainingOffset() const noexcept -> size_t { + auto const offset = infos().getOffset(); - size_t maxRowsLeftToSkip() const noexcept { - // should not be called after skipping the offset! - TRI_ASSERT(infos().getOffset() >= _counter); - return infos().getOffset() - _counter; + // Restricted value of _counter in [0, offset] + auto const boundedCounter = std::min(offset, _counter); + TRI_ASSERT(boundedCounter <= offset); + + return offset - boundedCounter; } - enum class LimitState { - // state is SKIPPING until the offset is reached - SKIPPING, - // state is RETURNING until the limit is reached - RETURNING, - // state is RETURNING_LAST_ROW if we've seen the second to last row before - // the limit is reached - RETURNING_LAST_ROW, - // state is COUNTING when the limit is reached and fullcount is enabled - COUNTING, - // state is LIMIT_REACHED only if fullCount is disabled, and we've seen all - // rows up to limit - LIMIT_REACHED, - }; + auto remainingLimit() const noexcept -> size_t { + auto const offset = infos().getOffset(); + auto const limitPlusOffset = infos().getLimitPlusOffset(); - /** - * @brief Returns the current state of the executor, based on _counter (i.e. - * number of lines seen), limit, offset and fullCount. - * @return See LimitState comments for a description. - */ - LimitState currentState() const noexcept { - // Note that not only offset, but also limit can be zero. Thus the order - // of all following checks is important, even the first two! - - if (_counter < infos().getOffset()) { - return LimitState::SKIPPING; - } - if (_counter + 1 == infos().getLimitPlusOffset()) { - return LimitState::RETURNING_LAST_ROW; - } - if (_counter < infos().getLimitPlusOffset()) { - return LimitState::RETURNING; - } - if (infos().isFullCountEnabled()) { - return LimitState::COUNTING; - } - - return LimitState::LIMIT_REACHED; + // Restricted value of _counter in [offset, limitPlusOffset] + auto const boundedCounter = std::min(limitPlusOffset, std::max(offset, _counter)); + TRI_ASSERT(offset <= boundedCounter); + TRI_ASSERT(boundedCounter <= limitPlusOffset); + return limitPlusOffset - boundedCounter; } - std::pair skipOffset(); - std::pair skipRestForFullCount(); + [[nodiscard]] auto limitFulfilled() const noexcept -> bool; + + auto calculateUpstreamCall(const AqlCall& clientCall) const -> AqlCall; private: Infos const& _infos; - Fetcher& _fetcher; InputAqlItemRow _lastRowToOutput; - ExecutionState _stateOfLastRowToOutput; // Number of input lines seen size_t _counter = 0; }; diff --git a/arangod/Aql/LimitStats.cpp b/arangod/Aql/LimitStats.cpp new file mode 100644 index 000000000000..dec1b561b3ca --- /dev/null +++ b/arangod/Aql/LimitStats.cpp @@ -0,0 +1,66 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#include "LimitStats.h" + +using namespace arangodb; +using namespace arangodb::aql; + +LimitStats::LimitStats(LimitStats&& other) noexcept + : _fullCount(other._fullCount) { + // It is relied upon that other._fullcount is zero after the move! + other._fullCount = 0; +} + +auto LimitStats::operator=(LimitStats&& other) noexcept -> LimitStats& { + _fullCount = other._fullCount; + other._fullCount = 0; + return *this; +} + +void LimitStats::incrFullCount() noexcept { _fullCount++; } + +void LimitStats::incrFullCountBy(size_t amount) noexcept { + _fullCount += amount; +} + +auto LimitStats::getFullCount() const noexcept -> std::size_t { + return _fullCount; +} + +auto aql::operator+=(LimitStats& limitStats, LimitStats const& other) noexcept -> LimitStats& { + limitStats.incrFullCountBy(other.getFullCount()); + return limitStats; +} + +auto aql::operator+=(ExecutionStats& executionStats, LimitStats const& limitStats) noexcept + -> ExecutionStats& { + executionStats.fullCount += limitStats.getFullCount(); + return executionStats; +} + +auto aql::operator==(LimitStats const& left, LimitStats const& right) noexcept -> bool { + static_assert( + sizeof(LimitStats) == sizeof(left.getFullCount()), + "When adding members to LimitStats, remember to update operator==!"); + return left.getFullCount() == right.getFullCount(); +} diff --git a/arangod/Aql/LimitStats.h b/arangod/Aql/LimitStats.h index a791efdb1c7c..c99d29211726 100644 --- a/arangod/Aql/LimitStats.h +++ b/arangod/Aql/LimitStats.h @@ -23,51 +23,39 @@ #ifndef ARANGOD_AQL_LIMIT_STATS_H #define ARANGOD_AQL_LIMIT_STATS_H -#include #include "ExecutionStats.h" +#include -namespace arangodb { -namespace aql { +namespace arangodb::aql { class LimitStats { public: - LimitStats() noexcept : _fullCount(0) {} - + LimitStats() noexcept = default; LimitStats(LimitStats const&) = default; - LimitStats& operator=(LimitStats const&) = default; - // It is relied upon that other._fullcount is zero after the move! - LimitStats(LimitStats&& other) noexcept : _fullCount(other._fullCount) { - other._fullCount = 0; - } - LimitStats& operator=(LimitStats&& other) noexcept { - _fullCount = other._fullCount; - other._fullCount = 0; - return *this; - }; + LimitStats(LimitStats&& other) noexcept; + + auto operator=(LimitStats const&) -> LimitStats& = default; + auto operator=(LimitStats&& other) noexcept -> LimitStats&; - void incrFullCount() noexcept { _fullCount++; } - void incrFullCountBy(size_t amount) noexcept { _fullCount += amount; } + void incrFullCount() noexcept; + void incrFullCountBy(size_t amount) noexcept; - std::size_t getFullCount() const noexcept { return _fullCount; } + [[nodiscard]] auto getFullCount() const noexcept -> std::size_t; private: - std::size_t _fullCount; + std::size_t _fullCount{0}; + // Don't forget to update operator== when adding new members! }; -inline ExecutionStats& operator+=(ExecutionStats& executionStats, - LimitStats const& limitStats) noexcept { - executionStats.fullCount += limitStats.getFullCount(); - return executionStats; -} +auto operator+=(ExecutionStats& executionStats, LimitStats const& limitStats) noexcept + -> ExecutionStats&; + +auto operator+=(LimitStats& limitStats, LimitStats const& other) noexcept -> LimitStats&; -inline LimitStats& operator+=(LimitStats& limitStats, LimitStats const& other) noexcept { - limitStats.incrFullCountBy(other.getFullCount()); - return limitStats; -} +auto operator==(LimitStats const&, LimitStats const&) noexcept -> bool; -} -} +} // namespace arangodb::aql -#endif // ARANGOD_AQL_LIMIT_STATS_H +#endif // ARANGOD_AQL_LIMIT_STATS_H diff --git a/arangod/Aql/Query.cpp b/arangod/Aql/Query.cpp index 7b6325020c00..690203f39e0b 100644 --- a/arangod/Aql/Query.cpp +++ b/arangod/Aql/Query.cpp @@ -1215,6 +1215,11 @@ void Query::setEngine(ExecutionEngine* engine) { _engine.reset(engine); } +void Query::setEngine(std::unique_ptr&& engine) { + TRI_ASSERT(engine != nullptr); + _engine = std::move(engine); +} + /// @brief prepare a V8 context for execution for this expression /// this needs to be called once before executing any V8 function in this /// expression diff --git a/arangod/Aql/Query.h b/arangod/Aql/Query.h index 05b7f9a5b01c..a01d91008d22 100644 --- a/arangod/Aql/Query.h +++ b/arangod/Aql/Query.h @@ -252,6 +252,7 @@ class Query { /// @brief inject the engine TEST_VIRTUAL void setEngine(ExecutionEngine* engine); + TEST_VIRTUAL void setEngine(std::unique_ptr&& engine); /// @brief return the transaction, if prepared TEST_VIRTUAL inline transaction::Methods* trx() const { return _trx.get(); } diff --git a/arangod/Aql/ReturnExecutor.cpp b/arangod/Aql/ReturnExecutor.cpp index 35723a24524d..52a03761d39d 100644 --- a/arangod/Aql/ReturnExecutor.cpp +++ b/arangod/Aql/ReturnExecutor.cpp @@ -70,7 +70,17 @@ auto ReturnExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& TRI_IF_FAILURE("ReturnExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - Stats stats{}; + + auto stats = Stats{}; + auto skippedUpstream = inputRange.skipAll(); + call.didSkip(skippedUpstream); + /* + if (_infos.doCount()) { + // TODO: do we need to include counted here? + stats.incrCounted(skippedUpstream); + } + */ + while (inputRange.hasDataRow() && call.needSkipMore()) { // I do not think that this is actually called. // It will be called first to get the upstream-Call @@ -91,6 +101,7 @@ auto ReturnExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& } */ } + return {inputRange.upstreamState(), stats, call.getSkipCount(), call}; } diff --git a/arangod/Aql/SingleRowFetcher.cpp b/arangod/Aql/SingleRowFetcher.cpp index 0043efd86771..62e054c5b875 100644 --- a/arangod/Aql/SingleRowFetcher.cpp +++ b/arangod/Aql/SingleRowFetcher.cpp @@ -85,16 +85,20 @@ SingleRowFetcher::execute(AqlCallStack& stack) { return {state, 0, AqlItemBlockInputRange{ExecutorState::HASMORE}}; } if (block == nullptr) { - return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE}}; + if (state == ExecutionState::HASMORE) { + return {state, skipped, AqlItemBlockInputRange{ExecutorState::HASMORE, skipped}}; + } + return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE, skipped}}; } auto [start, end] = block->getRelevantRange(); if (state == ExecutionState::HASMORE) { TRI_ASSERT(block != nullptr); return {state, skipped, - AqlItemBlockInputRange{ExecutorState::HASMORE, block, start, end}}; + AqlItemBlockInputRange{ExecutorState::HASMORE, skipped, block, start}}; } - return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE, block, start, end}}; + return {state, skipped, + AqlItemBlockInputRange{ExecutorState::DONE, skipped, block, start}}; } template diff --git a/arangod/Aql/SortedCollectExecutor.cpp b/arangod/Aql/SortedCollectExecutor.cpp index c59058930845..1e3ec9446d95 100644 --- a/arangod/Aql/SortedCollectExecutor.cpp +++ b/arangod/Aql/SortedCollectExecutor.cpp @@ -451,9 +451,11 @@ auto SortedCollectExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, Aq clientCall.didSkip(1); } } else { - LOG_DEVEL_SC << "skipping final group"; - clientCall.didSkip(1); - _currentGroup.reset(InputAqlItemRow{CreateInvalidInputRowHint{}}); + if (_currentGroup.isValid()) { + LOG_DEVEL_SC << "skipping final group"; + clientCall.didSkip(1); + _currentGroup.reset(InputAqlItemRow{CreateInvalidInputRowHint{}}); + } } break; } else if (!input.isInitialized()) { @@ -467,5 +469,5 @@ auto SortedCollectExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, Aq LOG_DEVEL_SC << " skipped rows: " << clientCall.getSkipCount(); LOG_DEVEL_SC << "reporting state: " << inputRange.upstreamState(); - return {inputRange.upstreamState(), Stats{}, clientCall.getSkipCount(), AqlCall{}}; + return {inputRange.upstreamState(), NoStats{}, clientCall.getSkipCount(), AqlCall{}}; } diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 9e5b88e2cad5..515380f4cd6d 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -296,6 +296,7 @@ set(LIB_ARANGO_AQL_SOURCES Aql/KShortestPathsNode.cpp Aql/LateMaterializedOptimizerRulesCommon.cpp Aql/LimitExecutor.cpp + Aql/LimitStats.cpp Aql/MaterializeExecutor.cpp Aql/ModificationExecutor.cpp Aql/ModificationExecutorHelpers.cpp diff --git a/tests/Aql/AqlHelper.cpp b/tests/Aql/AqlHelper.cpp index 79a784984575..64ea816717c6 100644 --- a/tests/Aql/AqlHelper.cpp +++ b/tests/Aql/AqlHelper.cpp @@ -59,12 +59,12 @@ std::ostream& arangodb::aql::operator<<(std::ostream& stream, AqlItemBlock const bool arangodb::aql::operator==(arangodb::aql::ExecutionStats const& left, arangodb::aql::ExecutionStats const& right) { - TRI_ASSERT(left.nodes.empty()); - TRI_ASSERT(right.nodes.empty()); - TRI_ASSERT(left.executionTime == 0.0); - TRI_ASSERT(right.executionTime == 0.0); - TRI_ASSERT(left.peakMemoryUsage == 0); - TRI_ASSERT(right.peakMemoryUsage == 0); + // The below information is only set on profiling in AQL + // They are not included on purpose as they will never be equal. + // * nodes + // * executionTime + // * peakMemeoryUsage + // clang-format off return left.writesExecuted == right.writesExecuted && left.writesIgnored == right.writesIgnored diff --git a/tests/Aql/AqlItemBlockHelper.h b/tests/Aql/AqlItemBlockHelper.h index 430bc37e83b9..fac8f0d7d36b 100644 --- a/tests/Aql/AqlItemBlockHelper.h +++ b/tests/Aql/AqlItemBlockHelper.h @@ -25,12 +25,12 @@ #include #include - -#include +#include #include "Aql/AqlItemBlock.h" #include "Aql/ResourceUsage.h" #include "Aql/SharedAqlItemBlockPtr.h" +#include "Basics/overload.h" #include "AqlHelper.h" #include "VelocyPackHelper.h" @@ -77,7 +77,7 @@ namespace arangodb { namespace tests { namespace aql { -using EntryBuilder = boost::variant; +using EntryBuilder = std::variant; template <::arangodb::aql::RegisterId columns> using RowBuilder = std::array; @@ -100,16 +100,6 @@ namespace aql { using namespace ::arangodb::aql; -class EntryToAqlValueVisitor : public boost::static_visitor { - public: - AqlValue operator()(int i) const { return AqlValue{AqlValueHintInt{i}}; } - - AqlValue operator()(const char* json) const { - VPackBufferPtr tmpVpack = vpackFromJsonString(json); - return AqlValue{AqlValueHintCopy{tmpVpack->data()}}; - } -}; - template SharedAqlItemBlockPtr buildBlock(AqlItemBlockManager& manager, MatrixBuilder&& matrix, @@ -122,8 +112,16 @@ SharedAqlItemBlockPtr buildBlock(AqlItemBlockManager& manager, for (size_t row = 0; row < matrix.size(); row++) { for (RegisterId col = 0; col < columns; col++) { auto const& entry = matrix[row][col]; - auto visitor = EntryToAqlValueVisitor(); - block->setValue(row, col, boost::apply_visitor(visitor, entry)); + auto value = std::visit( + overload{ + [](int i) { return AqlValue{AqlValueHintInt{i}}; }, + [](const char* json) { + VPackBufferPtr tmpVpack = vpackFromJsonString(json); + return AqlValue{AqlValueHintCopy{tmpVpack->data()}}; + }, + }, + entry); + block->setValue(row, col, value); } } diff --git a/tests/Aql/AqlItemBlockInputRangeTest.cpp b/tests/Aql/AqlItemBlockInputRangeTest.cpp index 884058d6e7d7..ac80439ff7a8 100644 --- a/tests/Aql/AqlItemBlockInputRangeTest.cpp +++ b/tests/Aql/AqlItemBlockInputRangeTest.cpp @@ -52,8 +52,7 @@ class InputRangeTest : public ::testing::TestWithParam { } AqlItemBlockInputRange createFromBlock(arangodb::aql::SharedAqlItemBlockPtr& block) { - auto const [start, end] = block->getRelevantRange(); - return AqlItemBlockInputRange(GetParam(), block, start, end); + return AqlItemBlockInputRange(GetParam(), 0, block, 0); } void validateEndReached(AqlItemBlockInputRange& testee) { diff --git a/tests/Aql/EngineInfoContainerCoordinatorTest.cpp b/tests/Aql/EngineInfoContainerCoordinatorTest.cpp index 21a74a56c92a..a3af215272f2 100644 --- a/tests/Aql/EngineInfoContainerCoordinatorTest.cpp +++ b/tests/Aql/EngineInfoContainerCoordinatorTest.cpp @@ -119,14 +119,16 @@ TEST(EngineInfoContainerTest, it_should_create_an_executionengine_for_the_first_ // Section: Mock Functions // ------------------------------ - fakeit::When(Method(mockQuery, setEngine)).Do([&](ExecutionEngine* eng) -> void { - // We expect that the snippet injects a new engine into our - // query. - // However we have to return a mocked engine later - ASSERT_NE(eng, nullptr); - // Throw it away - delete eng; - }); + fakeit::When(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine * ))).Do( + [](ExecutionEngine *eng) -> void { + // We expect that the snippet injects a new engine into our + // query. + // However we have to return a mocked engine later + ASSERT_NE(eng, nullptr); + // Throw it away + delete eng; + } + ); fakeit::When(Method(mockQuery, trx)).Return(&trx); fakeit::When(Method(mockQuery, engine)).Return(&myEngine).Return(&myEngine); @@ -154,7 +156,7 @@ TEST(EngineInfoContainerTest, it_should_create_an_executionengine_for_the_first_ ASSERT_TRUE(queryIds.empty()); // Validate that the query is wired up with the engine - fakeit::Verify(Method(mockQuery, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine *))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockEngine, createBlocks)).Exactly(1); } @@ -223,7 +225,7 @@ TEST(EngineInfoContainerTest, // Section: Mock Functions // ------------------------------ - fakeit::When(Method(mockQuery, setEngine)).Do([&](ExecutionEngine* eng) -> void { + fakeit::When(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine*))).Do([&](ExecutionEngine* eng) -> void { // We expect that the snippet injects a new engine into our // query. // However we have to return a mocked engine later @@ -250,7 +252,8 @@ TEST(EngineInfoContainerTest, return &queryClone; }); - fakeit::When(Method(mockQueryClone, setEngine)).Do([&](ExecutionEngine* eng) -> void { + fakeit::When(OverloadedMethod(mockQueryClone, setEngine, void(ExecutionEngine * ))).Do( + [&](ExecutionEngine *eng) -> void { // We expect that the snippet injects a new engine into our // query. // However we have to return a mocked engine later @@ -314,12 +317,12 @@ TEST(EngineInfoContainerTest, ASSERT_TRUE(queryIds.empty()); // Validate that the query is wired up with the engine - fakeit::Verify(Method(mockQuery, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine*))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockEngine, createBlocks)).Exactly(1); // Validate that the second query is wired up with the second engine - fakeit::Verify(Method(mockQueryClone, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQueryClone, setEngine, void(ExecutionEngine*))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockSecondEngine, createBlocks)).Exactly(1); fakeit::Verify(Method(mockRegistry, insert)).Exactly(1); @@ -440,7 +443,7 @@ TEST(EngineInfoContainerTest, snippets_are_a_stack_insert_node_always_into_top_s // Section: Mock Functions // ------------------------------ - fakeit::When(Method(mockQuery, setEngine)).Do(setEngineCallback); + fakeit::When(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine*))).Do(setEngineCallback); fakeit::When(Method(mockQuery, trx)).Return(&trx); fakeit::When(Method(mockQuery, engine)).Return(&myEngine).Return(&myEngine); fakeit::When(Method(mockEngine, createBlocks)) @@ -467,7 +470,7 @@ TEST(EngineInfoContainerTest, snippets_are_a_stack_insert_node_always_into_top_s }); // Mock first clone - fakeit::When(Method(mockQueryClone, setEngine)).Do(setEngineCallback); + fakeit::When(OverloadedMethod(mockQueryClone, setEngine, void(ExecutionEngine*))).Do(setEngineCallback); fakeit::When(Method(mockQueryClone, engine)).Return(&mySecondEngine); fakeit::When(Method(mockQueryClone, trx)).Return(&secondTrx); fakeit::When(Method(mockSecondEngine, createBlocks)) @@ -481,7 +484,7 @@ TEST(EngineInfoContainerTest, snippets_are_a_stack_insert_node_always_into_top_s .AlwaysReturn(&block); // Mock second clone - fakeit::When(Method(mockQuerySecondClone, setEngine)).Do(setEngineCallback); + fakeit::When(OverloadedMethod(mockQuerySecondClone, setEngine, void(ExecutionEngine*))).Do(setEngineCallback); fakeit::When(Method(mockQuerySecondClone, engine)).Return(&myThirdEngine); fakeit::When(Method(mockQuerySecondClone, trx)).Return(&thirdTrx); fakeit::When(Method(mockThirdEngine, createBlocks)) @@ -550,17 +553,17 @@ TEST(EngineInfoContainerTest, snippets_are_a_stack_insert_node_always_into_top_s ASSERT_TRUE(queryIds.empty()); // Validate that the query is wired up with the engine - fakeit::Verify(Method(mockQuery, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine*))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockEngine, createBlocks)).Exactly(1); // Validate that the second query is wired up with the second engine - fakeit::Verify(Method(mockQueryClone, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQueryClone, setEngine, void(ExecutionEngine*))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockSecondEngine, createBlocks)).Exactly(1); // Validate that the second query is wired up with the second engine - fakeit::Verify(Method(mockQuerySecondClone, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQuerySecondClone, setEngine, void(ExecutionEngine*))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockThirdEngine, createBlocks)).Exactly(1); @@ -627,7 +630,7 @@ TEST(EngineInfoContainerTest, error_cases_cloning_of_a_query_fails_throws_an_err // Section: Mock Functions // ------------------------------ - fakeit::When(Method(mockQuery, setEngine)).Do([&](ExecutionEngine* eng) -> void { + fakeit::When(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine*))).Do([&](ExecutionEngine* eng) -> void { // We expect that the snippet injects a new engine into our // query. // However we have to return a mocked engine later @@ -640,7 +643,7 @@ TEST(EngineInfoContainerTest, error_cases_cloning_of_a_query_fails_throws_an_err fakeit::When(Method(mockEngine, createBlocks)).AlwaysReturn(Result{TRI_ERROR_NO_ERROR}); fakeit::When(ConstOverloadedMethod(mockEngine, root, ExecutionBlock * ())).AlwaysReturn(&block); - fakeit::When(Method(mockQueryClone, setEngine)).Do([&](ExecutionEngine* eng) -> void { + fakeit::When(OverloadedMethod(mockQueryClone, setEngine, void(ExecutionEngine*))).Do([&](ExecutionEngine* eng) -> void { // We expect that the snippet injects a new engine into our // query. // However we have to return a mocked engine later @@ -717,12 +720,12 @@ TEST(EngineInfoContainerTest, error_cases_cloning_of_a_query_fails_throws_an_err // Validate that the path up to intended error was taken // Validate that the query is wired up with the engine - fakeit::Verify(Method(mockQuery, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine*))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockEngine, createBlocks)).Exactly(1); // Validate that the second query is wired up with the second engine - fakeit::Verify(Method(mockQueryClone, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQueryClone, setEngine, void(ExecutionEngine*))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockSecondEngine, createBlocks)).Exactly(1); fakeit::Verify(Method(mockRegistry, insert)).Exactly(1); @@ -792,7 +795,7 @@ TEST(EngineInfoContainerTest, error_cases_cloning_of_a_query_fails_returns_a_nul // Section: Mock Functions // ------------------------------ - fakeit::When(Method(mockQuery, setEngine)).Do([&](ExecutionEngine* eng) -> void { + fakeit::When(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine*))).Do([&](ExecutionEngine* eng) -> void { // We expect that the snippet injects a new engine into our // query. // However we have to return a mocked engine later @@ -805,7 +808,7 @@ TEST(EngineInfoContainerTest, error_cases_cloning_of_a_query_fails_returns_a_nul fakeit::When(Method(mockEngine, createBlocks)).AlwaysReturn(Result{TRI_ERROR_NO_ERROR}); fakeit::When(ConstOverloadedMethod(mockEngine, root, ExecutionBlock * ())).AlwaysReturn(&block); - fakeit::When(Method(mockQueryClone, setEngine)).Do([&](ExecutionEngine* eng) -> void { + fakeit::When(OverloadedMethod(mockQueryClone, setEngine, void(ExecutionEngine*))).Do([&](ExecutionEngine* eng) -> void { // We expect that the snippet injects a new engine into our // query. // However we have to return a mocked engine later @@ -886,12 +889,12 @@ TEST(EngineInfoContainerTest, error_cases_cloning_of_a_query_fails_returns_a_nul // Validate that the path up to intended error was taken // Validate that the query is wired up with the engine - fakeit::Verify(Method(mockQuery, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQuery, setEngine, void(ExecutionEngine*))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockEngine, createBlocks)).Exactly(1); // Validate that the second query is wired up with the second engine - fakeit::Verify(Method(mockQueryClone, setEngine)).Exactly(1); + fakeit::Verify(OverloadedMethod(mockQueryClone, setEngine, void(ExecutionEngine*))).Exactly(1); // Validate that createBlocks has been called! fakeit::Verify(Method(mockSecondEngine, createBlocks)).Exactly(1); fakeit::Verify(Method(mockRegistry, insert)).Exactly(1); diff --git a/tests/Aql/EnumerateListExecutorTest.cpp b/tests/Aql/EnumerateListExecutorTest.cpp index 32048fc2a8b7..5cc1e4ef7669 100644 --- a/tests/Aql/EnumerateListExecutorTest.cpp +++ b/tests/Aql/EnumerateListExecutorTest.cpp @@ -81,7 +81,7 @@ TEST_F(EnumerateListExecutorTest, test_check_state_first_row_border) { buildBlock<4>(itemBlockManager, {{{{1}, {2}, {3}, {R"([true, 1, 2])"}}}, {{{1}, {2}, {3}, {R"([true, 1, 2])"}}}}); - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); @@ -115,7 +115,7 @@ TEST_F(EnumerateListExecutorTest, test_check_state_second_row_border) { buildBlock<4>(itemBlockManager, {{{{1}, {2}, {3}, {R"([true, 1, 2])"}}}, {{{1}, {2}, {3}, {R"([true, 1, 2])"}}}}); - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); diff --git a/tests/Aql/ExecutionBlockImplTest.cpp b/tests/Aql/ExecutionBlockImplTest.cpp index c06c04776340..0fcde0a15adb 100644 --- a/tests/Aql/ExecutionBlockImplTest.cpp +++ b/tests/Aql/ExecutionBlockImplTest.cpp @@ -1537,7 +1537,11 @@ class ExecutionBlockImplExecuteIntegrationTest output.copyRow(input); output.advanceRow(); } - return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; + // Do forward a softLimit call only. + // Do not oeverfetch here. + AqlCall request{}; + request.softLimit = output.getClientCall().getLimit(); + return {inputRange.upstreamState(), NoStats{}, request}; }; auto skipData = [&skipAsserter](AqlItemBlockInputRange& inputRange, AqlCall& call) @@ -1553,7 +1557,7 @@ class ExecutionBlockImplExecuteIntegrationTest } // Do forward a softLimit call only. // Do not oeverfetch here. - AqlCall request; + AqlCall request{}; if (call.getOffset() > 0) { request.softLimit = call.getOffset(); } // else fullCount case, simple get UNLIMITED from above diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index 8c421efc8d92..b280ab9c7c3b 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -26,20 +26,25 @@ #include "gtest/gtest.h" #include "AqlItemBlockHelper.h" +#include "MockTypedNode.h" #include "Mocks/Servers.h" #include "WaitingExecutionBlockMock.h" #include "Aql/AqlCall.h" #include "Aql/AqlCallStack.h" +#include "Aql/AqlItemMatrix.h" #include "Aql/ExecutionBlock.h" #include "Aql/ExecutionBlockImpl.h" #include "Aql/ExecutionEngine.h" +#include "Aql/ExecutionNode.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutionStats.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/Query.h" #include "Aql/SharedAqlItemBlockPtr.h" +#include "Logger/LogMacros.h" +#include #include namespace arangodb { @@ -214,6 +219,15 @@ struct ExecutorTestHelper { return *this; } + auto setInputFromRowNum(size_t rows) -> ExecutorTestHelper& { + static_assert(inputColumns == 1); + _input.clear(); + for (auto i = size_t{0}; i < rows; ++i) { + _input.emplace_back(RowBuilder<1>{i}); + } + return *this; + } + auto setInputSplit(std::vector const& list) -> ExecutorTestHelper& { _inputSplit = list; return *this; @@ -236,6 +250,17 @@ struct ExecutorTestHelper { return *this; } + auto setTesteeNodeType(ExecutionNode::NodeType nodeType) -> ExecutorTestHelper& { + _testeeNodeType = nodeType; + return *this; + } + + auto setWaitingBehaviour(WaitingExecutionBlockMock::WaitingBehaviour waitingBehaviour) + -> ExecutorTestHelper& { + _waitingBehaviour = waitingBehaviour; + return *this; + } + auto expectOutput(std::array const& regs, MatrixBuilder const& out) -> ExecutorTestHelper& { _outputRegisters = regs; @@ -267,19 +292,32 @@ struct ExecutorTestHelper { return *this; } + /** + * @brief Set the Execution Block object + * + * @tparam E The executor + * @param infos to build the executor + * @param nodeType The type of executor node, only used for debug printing, defaults to SINGLETON + * @return ExecutorTestHelper& + */ template - auto setExecBlock(typename E::Infos infos) -> ExecutorTestHelper& { + auto setExecBlock(typename E::Infos infos, + ExecutionNode::NodeType nodeType = ExecutionNode::SINGLETON) + -> ExecutorTestHelper& { auto& testeeNode = _execNodes.emplace_back(std::move( - std::make_unique(_query.plan(), _execNodes.size()))); - setPipeline(Pipeline{std::make_unique>(_query.engine(), - testeeNode.get(), std::move(infos))}); + std::make_unique(_query.plan(), _execNodes.size(), nodeType))); + setPipeline(Pipeline{ + std::make_unique>(_query.engine(), testeeNode.get(), + std::move(infos))}); return *this; } template - auto createExecBlock(typename E::Infos infos) -> ExecBlock { - auto& testeeNode = _execNodes.emplace_back(std::move( - std::make_unique(_query.plan(), _execNodes.size()))); + auto createExecBlock(typename E::Infos infos, + ExecutionNode::NodeType nodeType = ExecutionNode::SINGLETON) + -> ExecBlock { + auto& testeeNode = _execNodes.emplace_back( + std::move(std::make_unique(_query.plan(), _execNodes.size(), nodeType))); return std::make_unique>(_query.engine(), testeeNode.get(), std::move(infos)); } @@ -309,20 +347,48 @@ struct ExecutorTestHelper { return *this; } - auto run() -> void { + auto run(bool const loop = false) -> void { ResourceMonitor monitor; AqlItemBlockManager itemBlockManager(&monitor, SerializationFormat::SHADOWROWS); auto inputBlock = generateInputRanges(itemBlockManager); + auto skippedTotal = size_t{0}; + auto finalState = ExecutionState::HASMORE; + TRI_ASSERT(!_pipeline.empty()); _pipeline.get().back()->addDependency(inputBlock.get()); - AqlCallStack stack{_call}; - auto const [state, skipped, result] = _pipeline.get().front()->execute(stack); - EXPECT_EQ(skipped, _expectedSkip); + BlockCollector allResults{&itemBlockManager}; - EXPECT_EQ(state, _expectedState); + if (!loop) { + AqlCallStack stack{_call}; + auto const [state, skipped, result] = _pipeline.get().front()->execute(stack); + skippedTotal = skipped; + finalState = state; + if (result != nullptr) { + allResults.add(result); + } + } else { + auto call = _call; + do { + AqlCallStack stack{call}; + auto const [state, skipped, result] = _pipeline.get().front()->execute(stack); + finalState = state; + skippedTotal += skipped; + if (result != nullptr) { + allResults.add(result); + } + call = _call; + call.didSkip(skippedTotal); + call.didProduce(allResults.totalSize()); + } while (finalState != ExecutionState::DONE && + (!call.hasSoftLimit() || (call.getLimit() + call.getOffset()) > 0)); + } + + EXPECT_EQ(skippedTotal, _expectedSkip); + EXPECT_EQ(finalState, _expectedState); + SharedAqlItemBlockPtr result = allResults.steal(); if (result == nullptr) { // Empty output, possible if we skip all EXPECT_EQ(_output.size(), 0) @@ -365,6 +431,8 @@ struct ExecutorTestHelper { for (auto const& value : _input) { matrix.push_back(value); + TRI_ASSERT(!_inputSplit.valueless_by_exception()); + bool openNewBlock = std::visit(overload{[&](VectorSizeT& list) { if (*iter != *end && matrix.size() == **iter) { @@ -396,9 +464,10 @@ struct ExecutorTestHelper { blockDeque.emplace_back(nullptr); } - return std::make_unique( - _query.engine(), _dummyNode.get(), std::move(blockDeque), - WaitingExecutionBlockMock::WaitingBehaviour::NEVER); + return std::make_unique(_query.engine(), + _dummyNode.get(), + std::move(blockDeque), + _waitingBehaviour); } AqlCall _call; @@ -409,6 +478,9 @@ struct ExecutorTestHelper { ExecutionState _expectedState; ExecutionStats _expectedStats; bool _testStats; + ExecutionNode::NodeType _testeeNodeType{ExecutionNode::MAX_NODE_TYPE_VALUE}; + WaitingExecutionBlockMock::WaitingBehaviour _waitingBehaviour = + WaitingExecutionBlockMock::NEVER; bool _unorderedOutput; bool _appendEmptyBlock; std::size_t _unorderedSkippedRows; @@ -419,7 +491,7 @@ struct ExecutorTestHelper { arangodb::aql::Query& _query; std::unique_ptr _dummyNode; Pipeline _pipeline; - std::vector> _execNodes; + std::vector> _execNodes; }; enum class ExecutorCall { diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index 86907101764b..a51463030664 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -363,7 +363,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange) { buildBlock<1>(itemBlockManager, {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, infos.registersToClear()); @@ -388,7 +388,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { buildBlock<1>(itemBlockManager, {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); size_t hardLimit = 1000; - AqlItemBlockInputRange input{ExecutorState::HASMORE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::HASMORE, 0, inBlock, 0}; AqlCall limitedCall{}; limitedCall.hardLimit = hardLimit; OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, @@ -420,7 +420,7 @@ TEST_F(FilterExecutorTest, test_skip_datarange_need_more) { buildBlock<1>(itemBlockManager, {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - AqlItemBlockInputRange input{ExecutorState::HASMORE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::HASMORE, 0, inBlock, 0}; AqlCall clientCall; clientCall.offset = 1000; @@ -453,7 +453,7 @@ TEST_F(FilterExecutorTest, test_produce_datarange_has_more) { buildBlock<1>(itemBlockManager, {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; block.reset(new AqlItemBlock(itemBlockManager, 2, 1)); OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, infos.registersToClear()); @@ -492,7 +492,7 @@ TEST_F(FilterExecutorTest, test_skip_datarange_has_more) { buildBlock<1>(itemBlockManager, {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; AqlCall clientCall; clientCall.offset = 2; auto const [state, stats, skipped, call] = testee.skipRowsRange(input, clientCall); diff --git a/tests/Aql/IdExecutorTest.cpp b/tests/Aql/IdExecutorTest.cpp index 37f9ce13ac61..6595e5612206 100644 --- a/tests/Aql/IdExecutorTest.cpp +++ b/tests/Aql/IdExecutorTest.cpp @@ -68,7 +68,8 @@ class IdExecutorTestCombiner : public AqlExecutorTestCaseWithParam { matrix.emplace_back(RowBuilder<1>{{it}}); } SharedAqlItemBlockPtr block = buildBlock<1>(manager(), std::move(matrix)); - return AqlItemBlockInputRange{upstreamState, block, 0, input.size()}; + TRI_ASSERT(clientCall.getSkipCount() == 0); + return AqlItemBlockInputRange{upstreamState, 0, block, 0}; } auto prepareOutputRow(SharedAqlItemBlockPtr input) -> OutputAqlItemRow { diff --git a/tests/Aql/LimitExecutorTest.cpp b/tests/Aql/LimitExecutorTest.cpp index f8a44a1f3c35..18eabe264b41 100644 --- a/tests/Aql/LimitExecutorTest.cpp +++ b/tests/Aql/LimitExecutorTest.cpp @@ -25,6 +25,7 @@ #include "AqlHelper.h" #include "AqlItemBlockHelper.h" #include "ExecutorTestHelper.h" +#include "Mocks/Servers.h" #include "RowFetcherHelper.h" #include "VelocyPackHelper.h" @@ -33,20 +34,63 @@ #include "Aql/InputAqlItemRow.h" #include "Aql/LimitExecutor.h" #include "Aql/ResourceUsage.h" -#include "Aql/SingleRowFetcher.h" #include #include +#include using namespace arangodb; using namespace arangodb::aql; -namespace arangodb { -namespace tests { -namespace aql { +namespace arangodb::aql { +void PrintTo(LimitStats const& stats, std::ostream* os) { + *os << "LimitStats{" << stats.getFullCount() << "}"; +} +} // namespace arangodb::aql + +namespace arangodb::tests::aql { + +/* + * How a test case for LimitExecutor is described: + * + * Obviously, we need the LimitExecutor parameters + * 1) offset, + * 2) limit, and + * 3) fullCount. + * We also need an input, specified as a + * 4) vector of input lengths, + * which maps to a vector of input blocks, each with the specified number of + * rows. + * Finally, we need a call in form of an + * 5) AqlCall + * which breaks down to: + * - offset + * - limit, + * - hard/soft ~, and + * - fullCount. + * Plus something like + * 6) doneResultIsEmpty + * to cover both the case where the last upstream non-empty result returns with + * HASMORE, or immediately with DONE. + */ +class LimitExecutorTest + : public ::testing::TestWithParam, AqlCall, bool>> { + public: + // Creating a server instance costs a lot of time, so do it only once. + // Note that newer version of gtest call these SetUpTestSuite/TearDownTestSuite + static void SetUpTestCase() { + server = std::make_unique(); + // Logger::QUERIES.setLogLevel(LogLevel::DEBUG); + } + static void TearDownTestCase() { + // Logger::QUERIES.setLogLevel(LogLevel::INFO); + server.reset(); + } -class LimitExecutorTest : public ::testing::Test { protected: + static std::unique_ptr server; + std::unique_ptr fakedQuery{}; + ExecutionState state; ResourceMonitor monitor; AqlItemBlockManager itemBlockManager; @@ -54,11 +98,9 @@ class LimitExecutorTest : public ::testing::Test { std::shared_ptr> outputRegisters; std::shared_ptr> registersToKeep; - // Special parameters: - // 4th offset - // 5th limit - // 6th fullCount - // 7th queryDepth + // Should never be called, and can be removed as soon as the LimitExecutor's + // Fetcher argument&member are removed. + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> dummyFetcher; LimitExecutorTest() : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), @@ -66,1039 +108,317 @@ class LimitExecutorTest : public ::testing::Test { outputRegisters(std::make_shared>( std::initializer_list{})), registersToKeep(std::make_shared>( - std::initializer_list{0})) {} -}; - -TEST_F(LimitExecutorTest, row_upstream_the_producer_doesnt_wait) { - auto input = VPackParser::fromJson("[ [1] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, 0, 1, true); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), false); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - - OutputAqlItemRow result{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - ASSERT_EQ(1, stats.getFullCount()); -} - -TEST_F(LimitExecutorTest, row_upstream_the_producer_waits) { - auto input = VPackParser::fromJson("[ [1] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, 0, 1, true); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), true); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - - OutputAqlItemRow result{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(0, stats.getFullCount()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_TRUE(result.produced()); - ASSERT_EQ(1, stats.getFullCount()); -} - -TEST_F(LimitExecutorTest, rows_upstream_the_producer_doesnt_wait_limit_1_offset_0_fullcount_false) { - auto input = VPackParser::fromJson("[ [1], [2], [3], [4] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, 0, 1, false); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), false); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_FALSE(row.produced()); -} - -TEST_F(LimitExecutorTest, rows_upstream_the_producer_doesnt_wait_limit_1_offset_0_fullcount_true) { - auto input = VPackParser::fromJson("[ [1], [2], [3], [4] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, 0, 1, true); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), false); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_TRUE(row.produced()); - ASSERT_EQ(4, stats.getFullCount()); - - auto block = row.stealBlock(); - AqlValue value = block->getValue(0, 0); - ASSERT_TRUE(value.isNumber()); - ASSERT_EQ(1, value.toInt64()); -} - -TEST_F(LimitExecutorTest, rows_upstream_the_producer_doesnt_wait_limit_1_offset_1_fullcount_true) { - auto input = VPackParser::fromJson("[ [1], [2], [3], [4] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, 1, 1, true); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), false); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_TRUE(row.produced()); - ASSERT_EQ(4, stats.getFullCount()); - - auto block = row.stealBlock(); - AqlValue value = block->getValue(0, 0); - ASSERT_TRUE(value.isNumber()); - ASSERT_EQ(2, value.toInt64()); -} - -TEST_F(LimitExecutorTest, rows_upstream_the_producer_waits_limit_1_offset_0_fullcount_false) { - auto input = VPackParser::fromJson("[ [1], [2], [3], [4] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, 0, 1, false); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), true); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_TRUE(row.produced()); - - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_FALSE(row.produced()); - - auto block = row.stealBlock(); - AqlValue value = block->getValue(0, 0); - ASSERT_TRUE(value.isNumber()); - ASSERT_EQ(1, value.toInt64()); -} - -TEST_F(LimitExecutorTest, rows_upstream_the_producer_waits_limit_1_offset_0_fullcount_true) { - auto input = VPackParser::fromJson("[ [1], [2], [3], [4] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, 0, 1, true); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), true); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - size_t fullCount = 0; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - EXPECT_EQ(0, stats.getFullCount()); - fullCount += stats.getFullCount(); - - // In the following, the EXPECTs against stats.getFullCount() after each - // produceRows() call are not strictly required, but implementation dependent. - // The implementation of LimitExecutor would be allowed to return rows it has - // already seen at these points. - // It is sufficient that the sum of the stats equals 4, which is asserted at - // the end. So the intermediate EXPECTs are against the actual implementation - // and thus just there in order to find the location of an error faster. - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - EXPECT_EQ(0, stats.getFullCount()); // not strictly required, see comment above - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - EXPECT_EQ(0, stats.getFullCount()); // not strictly required, see comment above - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - EXPECT_EQ(0, stats.getFullCount()); // not strictly required, see comment above - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_TRUE(row.produced()); - EXPECT_EQ(4, stats.getFullCount()); // not strictly required, see comment above - fullCount += stats.getFullCount(); - - ASSERT_EQ(4, fullCount); - - auto block = row.stealBlock(); - AqlValue value = block->getValue(0, 0); - ASSERT_TRUE(value.isNumber()); - ASSERT_EQ(1, value.toInt64()); -} - -TEST_F(LimitExecutorTest, rows_upstream_the_producer_doesnt_wait_limit_6_offset_1_fullcount_false) { - size_t constexpr offset = 1; - size_t constexpr limit = 6; - bool constexpr fullcount = false; - bool constexpr waiting = false; - auto input = VPackParser::fromJson("[ [1], [2], [3], [4] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, fullcount); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), waiting); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::HASMORE, state); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::HASMORE, state); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_TRUE(row.produced()); - - auto block = row.stealBlock(); - EXPECT_EQ(3, block->size()); - AqlValue value = block->getValue(0, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(2, value.toInt64()); - value = block->getValue(1, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(3, value.toInt64()); - value = block->getValue(2, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(4, value.toInt64()); -} - -TEST_F(LimitExecutorTest, rows_upstream_the_producer_doesnt_wait_limit_6_offset_1_fullcount_true) { - size_t constexpr offset = 1; - size_t constexpr limit = 6; - bool constexpr fullcount = true; - bool constexpr waiting = false; - auto input = VPackParser::fromJson("[ [1], [2], [3], [4] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, fullcount); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), waiting); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - size_t fullCount = 0; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::HASMORE, state); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - EXPECT_EQ(2, stats.getFullCount()); - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::HASMORE, state); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - EXPECT_EQ(1, stats.getFullCount()); - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_TRUE(row.produced()); - EXPECT_EQ(1, stats.getFullCount()); - fullCount += stats.getFullCount(); - - ASSERT_EQ(4, fullCount); - - auto block = row.stealBlock(); - EXPECT_EQ(3, block->size()); - AqlValue value = block->getValue(0, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(2, value.toInt64()); - value = block->getValue(1, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(3, value.toInt64()); - value = block->getValue(2, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(4, value.toInt64()); -} -TEST_F(LimitExecutorTest, rows_upstream_the_producer_waits_limit_6_offset_1_fullcount_false) { - size_t constexpr offset = 1; - size_t constexpr limit = 6; - bool constexpr fullcount = false; - bool constexpr waiting = true; - auto input = VPackParser::fromJson("[ [1], [2], [3], [4] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, fullcount); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), waiting); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::HASMORE, state); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::HASMORE, state); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_TRUE(row.produced()); - - auto block = row.stealBlock(); - EXPECT_EQ(3, block->size()); - AqlValue value = block->getValue(0, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(2, value.toInt64()); - value = block->getValue(1, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(3, value.toInt64()); - value = block->getValue(2, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(4, value.toInt64()); -} - -TEST_F(LimitExecutorTest, rows_upstream_the_producer_waits_limit_6_offset_1_fullcount_true) { - size_t constexpr offset = 1; - size_t constexpr limit = 6; - bool constexpr fullcount = true; - bool constexpr waiting = true; - auto input = VPackParser::fromJson("[ [1], [2], [3], [4] ]"); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, fullcount); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), waiting); - LimitExecutor testee(fetcher, infos); - LimitStats stats{}; - size_t fullCount = 0; - - OutputAqlItemRow row{std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - EXPECT_EQ(0, stats.getFullCount()); - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - EXPECT_EQ(1, stats.getFullCount()); - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::HASMORE, state); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - EXPECT_EQ(1, stats.getFullCount()); - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - EXPECT_EQ(0, stats.getFullCount()); - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::HASMORE, state); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - EXPECT_EQ(1, stats.getFullCount()); - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::WAITING, state); - ASSERT_FALSE(row.produced()); - EXPECT_EQ(0, stats.getFullCount()); - fullCount += stats.getFullCount(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(ExecutionState::DONE, state); - ASSERT_TRUE(row.produced()); - EXPECT_EQ(1, stats.getFullCount()); - fullCount += stats.getFullCount(); - - ASSERT_EQ(4, fullCount); - - auto block = row.stealBlock(); - EXPECT_EQ(3, block->size()); - AqlValue value = block->getValue(0, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(2, value.toInt64()); - value = block->getValue(1, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(3, value.toInt64()); - value = block->getValue(2, 0); - ASSERT_TRUE(value.isNumber()); - EXPECT_EQ(4, value.toInt64()); -} - -class LimitExecutorTestBase { - protected: - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager; - std::shared_ptr> outputRegisters; - std::shared_ptr> registersToKeep; - - LimitExecutorTestBase() - : monitor(), - itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - outputRegisters(std::make_shared>( - std::initializer_list{})), - registersToKeep(std::make_shared>( - std::initializer_list{0})) {} -}; - -// skip and fullCount cannot go together: Only the last limit block may get -// fullCount, so there is no block after that could skip. -// For these cases, use this class. -class LimitExecutorWaitingTest : public LimitExecutorTestBase, - public ::testing::TestWithParam { - protected: - bool waiting{}; - - LimitExecutorWaitingTest() : LimitExecutorTestBase() {} - - virtual void SetUp() { waiting = GetParam(); } -}; - -// Fields: -// [0] bool waiting -// [1] bool fullCount -using ExtendedLimitTestParameters = std::tuple; - -class LimitExecutorWaitingFullCountTest - : public LimitExecutorTestBase, - public ::testing::TestWithParam { - protected: - bool waiting{}; - bool fullCount{}; - - LimitExecutorWaitingFullCountTest() : LimitExecutorTestBase() {} - - virtual void SetUp() { - ExtendedLimitTestParameters const& params = GetParam(); - std::tie(waiting, fullCount) = params; + std::initializer_list{0})), + dummyFetcher(itemBlockManager, 1, false, nullptr) { + fakedQuery = server->createFakeQuery(); } -}; -void removeWaiting(std::vector& results) { - std::vector tmp; - for (auto const result : results) { - if (std::get(result) != ExecutionState::WAITING) { - tmp.emplace_back(result); + auto buildBlockRange(size_t const begin, size_t const end) -> SharedAqlItemBlockPtr { + auto builder = MatrixBuilder<1>{}; + builder.reserve(end - begin); + for (size_t i = begin; i < end; ++i) { + builder.emplace_back(RowBuilder<1>{i}); } + return buildBlock<1>(itemBlockManager, std::move(builder)); } - results.swap(tmp); -} - -TEST_P(LimitExecutorWaitingFullCountTest, rows_9_blocksize_3_limit_10) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 0; - size_t constexpr limit = 10; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, fullCount); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{ - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 3}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 3}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::DONE, 3}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::DONE, 1}, - }; - if (!waiting) { - removeWaiting(expectedStates); - } - ExecutionStats expectedStats{}; - if (fullCount) { - expectedStats.fullCount = 9; - } else { - expectedStats.fullCount = 0; - } - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; - - auto result = - runExecutor(itemBlockManager, testee, outputRow, 0, expectedOutputSize, false); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); - - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); - } -} - -TEST_P(LimitExecutorWaitingFullCountTest, rows_9_blocksize_3_limit_4) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 0; - size_t constexpr limit = 4; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, fullCount); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{ - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 3}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 3}, - }; - if (fullCount) { - expectedStates.emplace_back(ExecutorCall::PRODUCE_ROWS, ExecutionState::WAITING, 0); - } - expectedStates.emplace_back(ExecutorCall::PRODUCE_ROWS, ExecutionState::DONE, 1); - if (!waiting) { - removeWaiting(expectedStates); - } - ExecutionStats expectedStats{}; - if (fullCount) { - expectedStats.fullCount = 9; - } else { - expectedStats.fullCount = 0; - } - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; - - auto result = - runExecutor(itemBlockManager, testee, outputRow, 0, expectedOutputSize, false); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); - - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); - } -} - -TEST_P(LimitExecutorWaitingFullCountTest, rows_9_blocksize_3_limit_0) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 0; - size_t constexpr limit = 0; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, fullCount); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = buildBlock<1>(itemBlockManager, {}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{}; - if (fullCount) { - expectedStates.emplace_back(ExecutorCall::FETCH_FOR_PASSTHROUGH, - ExecutionState::WAITING, 0); - expectedStates.emplace_back(ExecutorCall::FETCH_FOR_PASSTHROUGH, - ExecutionState::WAITING, 0); - expectedStates.emplace_back(ExecutorCall::FETCH_FOR_PASSTHROUGH, - ExecutionState::WAITING, 0); - } - expectedStates.emplace_back(ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::DONE, 0); - if (!waiting) { - removeWaiting(expectedStates); - } - ExecutionStats expectedStats{}; - if (fullCount) { - expectedStats.fullCount = 9; - } else { - expectedStats.fullCount = 0; - } - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; - - auto result = - runExecutor(itemBlockManager, testee, outputRow, 0, expectedOutputSize, false); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); - - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); - } -} - -TEST_P(LimitExecutorWaitingFullCountTest, rows_9_blocksize_3_offset_4_limit_4) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 4; - size_t constexpr limit = 4; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, fullCount); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = - buildBlock<1>(itemBlockManager, {{4}, {5}, {6}, {7}}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{ - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 2}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 3}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::DONE, 1}, - }; - if (!waiting) { - removeWaiting(expectedStates); - } - ExecutionStats expectedStats{}; - if (fullCount) { - expectedStats.fullCount = 9; - } else { - expectedStats.fullCount = 0; - } - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; - - auto result = - runExecutor(itemBlockManager, testee, outputRow, 0, expectedOutputSize, false); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); - - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); - } -} - -TEST_P(LimitExecutorWaitingFullCountTest, rows_9_blocksize_3_offset_10_limit_1) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 10; - size_t constexpr limit = 1; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, fullCount); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = buildBlock<1>(itemBlockManager, {}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{ - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::DONE, 0}, - }; - if (!waiting) { - removeWaiting(expectedStates); - } - ExecutionStats expectedStats{}; - if (fullCount) { - expectedStats.fullCount = 9; - } else { - expectedStats.fullCount = 0; - } - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; - - auto result = - runExecutor(itemBlockManager, testee, outputRow, 0, expectedOutputSize, false); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); - - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); - } -} - -INSTANTIATE_TEST_CASE_P(LimitExecutorVariations, LimitExecutorWaitingFullCountTest, - testing::Combine(testing::Bool(), testing::Bool())); - -TEST_P(LimitExecutorWaitingTest, rows_9_blocksize_3_skip_4_offset_1_limit_7) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 1; - size_t constexpr limit = 7; - size_t constexpr skip = 4; - size_t constexpr readRows = 2; - bool constexpr skipAfter = true; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, false); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = - buildBlock<1>(itemBlockManager, {{5}, {6}}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{ - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::HASMORE, 4}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 1}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::WAITING, 0}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 3}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::SKIP_ROWS, ExecutionState::DONE, 1}, - }; - if (!waiting) { - removeWaiting(expectedStates); - } - ExecutionStats expectedStats{}; - expectedStats.fullCount = 0; - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; - - auto result = runExecutor(itemBlockManager, testee, outputRow, skip, readRows, skipAfter); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); - - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); - } -} - -TEST_P(LimitExecutorWaitingTest, rows_9_blocksize_3_skip_4_offset_1_limit_3) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 1; - size_t constexpr limit = 3; - size_t constexpr skip = 4; - size_t constexpr readRows = 1; - bool constexpr skipAfter = true; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, false); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = buildBlock<1>(itemBlockManager, {}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{ - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::DONE, 3}, - }; - if (!waiting) { - removeWaiting(expectedStates); - } - ExecutionStats expectedStats{}; - expectedStats.fullCount = 0; - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; - - auto result = runExecutor(itemBlockManager, testee, outputRow, skip, readRows, skipAfter); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); +}; - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); +std::unique_ptr LimitExecutorTest::server{nullptr}; + +auto const testingFullCount = ::testing::Bool(); +using InputLengths = std::vector; +#define USE_FULL_SUITE false +#if USE_FULL_SUITE +auto const testingOffsets = ::testing::Values(0, 1, 2, 3, 10, 100'000'000); +auto const testingLimits = ::testing::Values(0, 1, 2, 3, 10, 100'000'000); +auto const testingInputLengths = ::testing::Values( + // 0 + InputLengths{}, + // 1 + InputLengths{1}, + // 2 + InputLengths{2}, InputLengths{1, 1}, + // 3 + InputLengths{3}, InputLengths{1, 2}, InputLengths{2, 1}, InputLengths{1, 1, 1}, + // 4 + InputLengths{4}, InputLengths{3, 1}, InputLengths{2, 2}, + // 9 + InputLengths{9}, + // 10 + InputLengths{10}, InputLengths{9, 1}, + // 11 + InputLengths{11}, InputLengths{10, 1}, InputLengths{9, 2}, InputLengths{9, 1, 1}, + // 19 + InputLengths{19}, + // 20 + InputLengths{20}, InputLengths{1, 19}, InputLengths{19, 1}, InputLengths{10, 10}, + // 21 + InputLengths{21}, InputLengths{20, 1}, InputLengths{19, 2}, + InputLengths{19, 1, 1}, InputLengths{10, 10, 1}, InputLengths{1, 9, 9, 1, 1}); +#else +auto const testingOffsets = ::testing::Values(0, 3, 100'000'000); +auto const testingLimits = ::testing::Values(0, 3, 100'000'000); +auto const testingInputLengths = ::testing::Values( + // 0 + InputLengths{}, + // 1 + InputLengths{1}, + // 3 + InputLengths{3}, InputLengths{1, 2}, InputLengths{2, 1}, InputLengths{1, 1, 1}, + // 11 + InputLengths{9, 2}, InputLengths{9, 1, 1}, + // 19 + InputLengths{19}, + // 21 + InputLengths{10, 10, 1}, InputLengths{1, 9, 9, 1, 1}); + +#endif + +// Note that fullCount does only make sense with a hard limit, and +// soft limit = 0 and offset = 0 must not occur together. +auto const testingAqlCalls = ::testing::ValuesIn( + std::array{AqlCall{0, false, AqlCall::Infinity{}}, + AqlCall{0, false, 1, AqlCall::LimitType::SOFT}, + AqlCall{0, false, 2, AqlCall::LimitType::SOFT}, + AqlCall{0, false, 3, AqlCall::LimitType::SOFT}, + AqlCall{0, false, 10, AqlCall::LimitType::SOFT}, + AqlCall{0, false, 0, AqlCall::LimitType::HARD}, + AqlCall{0, false, 1, AqlCall::LimitType::HARD}, + AqlCall{0, false, 2, AqlCall::LimitType::HARD}, + AqlCall{0, false, 3, AqlCall::LimitType::HARD}, + AqlCall{0, false, 10, AqlCall::LimitType::HARD}, + AqlCall{1, false, AqlCall::Infinity{}}, + AqlCall{1, false, 0, AqlCall::LimitType::SOFT}, + AqlCall{1, false, 1, AqlCall::LimitType::SOFT}, + AqlCall{1, false, 2, AqlCall::LimitType::SOFT}, + AqlCall{1, false, 3, AqlCall::LimitType::SOFT}, + AqlCall{1, false, 10, AqlCall::LimitType::SOFT}, + AqlCall{1, false, 0, AqlCall::LimitType::HARD}, + AqlCall{1, false, 1, AqlCall::LimitType::HARD}, + AqlCall{1, false, 2, AqlCall::LimitType::HARD}, + AqlCall{1, false, 3, AqlCall::LimitType::HARD}, + AqlCall{1, false, 10, AqlCall::LimitType::HARD}, + AqlCall{2, false, AqlCall::Infinity{}}, + AqlCall{2, false, 0, AqlCall::LimitType::SOFT}, + AqlCall{2, false, 1, AqlCall::LimitType::SOFT}, + AqlCall{2, false, 2, AqlCall::LimitType::SOFT}, + AqlCall{2, false, 3, AqlCall::LimitType::SOFT}, + AqlCall{2, false, 10, AqlCall::LimitType::SOFT}, + AqlCall{2, false, 0, AqlCall::LimitType::HARD}, + AqlCall{2, false, 1, AqlCall::LimitType::HARD}, + AqlCall{2, false, 2, AqlCall::LimitType::HARD}, + AqlCall{2, false, 3, AqlCall::LimitType::HARD}, + AqlCall{2, false, 10, AqlCall::LimitType::HARD}, + AqlCall{3, false, AqlCall::Infinity{}}, + AqlCall{3, false, 0, AqlCall::LimitType::SOFT}, + AqlCall{3, false, 1, AqlCall::LimitType::SOFT}, + AqlCall{3, false, 2, AqlCall::LimitType::SOFT}, + AqlCall{3, false, 3, AqlCall::LimitType::SOFT}, + AqlCall{3, false, 10, AqlCall::LimitType::SOFT}, + AqlCall{3, false, 0, AqlCall::LimitType::HARD}, + AqlCall{3, false, 1, AqlCall::LimitType::HARD}, + AqlCall{3, false, 2, AqlCall::LimitType::HARD}, + AqlCall{3, false, 3, AqlCall::LimitType::HARD}, + AqlCall{3, false, 10, AqlCall::LimitType::HARD}, + AqlCall{10, false, AqlCall::Infinity{}}, + AqlCall{10, false, 0, AqlCall::LimitType::SOFT}, + AqlCall{10, false, 1, AqlCall::LimitType::SOFT}, + AqlCall{10, false, 2, AqlCall::LimitType::SOFT}, + AqlCall{10, false, 3, AqlCall::LimitType::SOFT}, + AqlCall{10, false, 10, AqlCall::LimitType::SOFT}, + AqlCall{10, false, 0, AqlCall::LimitType::HARD}, + AqlCall{10, false, 1, AqlCall::LimitType::HARD}, + AqlCall{10, false, 2, AqlCall::LimitType::HARD}, + AqlCall{10, false, 3, AqlCall::LimitType::HARD}, + AqlCall{10, false, 10, AqlCall::LimitType::HARD}, + AqlCall{0, true, 0, AqlCall::LimitType::HARD}, + AqlCall{0, true, 1, AqlCall::LimitType::HARD}, + AqlCall{0, true, 2, AqlCall::LimitType::HARD}, + AqlCall{0, true, 3, AqlCall::LimitType::HARD}, + AqlCall{0, true, 10, AqlCall::LimitType::HARD}, + AqlCall{1, true, 0, AqlCall::LimitType::HARD}, + AqlCall{1, true, 1, AqlCall::LimitType::HARD}, + AqlCall{1, true, 2, AqlCall::LimitType::HARD}, + AqlCall{1, true, 3, AqlCall::LimitType::HARD}, + AqlCall{1, true, 10, AqlCall::LimitType::HARD}, + AqlCall{2, true, 0, AqlCall::LimitType::HARD}, + AqlCall{2, true, 1, AqlCall::LimitType::HARD}, + AqlCall{2, true, 2, AqlCall::LimitType::HARD}, + AqlCall{2, true, 3, AqlCall::LimitType::HARD}, + AqlCall{2, true, 10, AqlCall::LimitType::HARD}, + AqlCall{3, true, 0, AqlCall::LimitType::HARD}, + AqlCall{3, true, 1, AqlCall::LimitType::HARD}, + AqlCall{3, true, 2, AqlCall::LimitType::HARD}, + AqlCall{3, true, 3, AqlCall::LimitType::HARD}, + AqlCall{3, true, 10, AqlCall::LimitType::HARD}, + AqlCall{10, true, 0, AqlCall::LimitType::HARD}, + AqlCall{10, true, 1, AqlCall::LimitType::HARD}, + AqlCall{10, true, 2, AqlCall::LimitType::HARD}, + AqlCall{10, true, 3, AqlCall::LimitType::HARD}, + AqlCall{10, true, 10, AqlCall::LimitType::HARD}}); +auto const testingDoneResultIsEmpty = ::testing::Bool(); + +auto const limitTestCases = + ::testing::Combine(testingOffsets, testingLimits, testingFullCount, + testingInputLengths, testingAqlCalls, testingDoneResultIsEmpty); + +TEST_P(LimitExecutorTest, testSuite) { + // Input. + auto const& [offset, limit, fullCount, inputLengths, clientCall, doneResultIsEmpty] = + GetParam(); + + TRI_ASSERT(!(clientCall.getOffset() == 0 && clientCall.softLimit == AqlCall::Limit{0})); + TRI_ASSERT(!(clientCall.hasSoftLimit() && clientCall.fullCount)); + TRI_ASSERT(!(clientCall.hasSoftLimit() && clientCall.hasHardLimit())); + + auto const numInputRows = + std::accumulate(inputLengths.begin(), inputLengths.end(), size_t{0}); + { // Validation of the test case: + TRI_ASSERT(std::all_of(inputLengths.begin(), inputLengths.end(), + [](auto l) { return l > 0; })); } -} -TEST_P(LimitExecutorWaitingTest, rows_9_blocksize_3_skip_2_read_1_offset_2_limit_4) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 2; - size_t constexpr limit = 4; - size_t constexpr skip = 2; - size_t constexpr readRows = 1; - bool constexpr skipAfter = true; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, false); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = buildBlock<1>(itemBlockManager, {{4}}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{ - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::HASMORE, 2}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 2}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::SKIP_ROWS, ExecutionState::DONE, 1}, + auto const nonNegativeSubtraction = [](auto minuend, auto subtrahend) { + // same as std::max(0, minuend - subtrahend), but safe from underflows + return minuend - std::min(minuend, subtrahend); }; - if (!waiting) { - removeWaiting(expectedStates); - } - ExecutionStats expectedStats{}; - expectedStats.fullCount = 0; - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; - auto result = runExecutor(itemBlockManager, testee, outputRow, skip, readRows, skipAfter); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); - - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); - } + // Expected output, though the expectedPassedBlocks are also the input. + // Note that structured bindings are *not* captured by lambdas, at least in + // C++17. So we must explicity capture them. + auto const [expectedSkipped, expectedOutput, expectedLimitStats, expectedState] = + std::invoke([&, offset = offset, limit = limit, fullCount = fullCount, + &inputLengths = inputLengths, clientCall = clientCall, + doneResultIsEmpty = doneResultIsEmpty]() { + auto const numInputRows = + std::accumulate(inputLengths.begin(), inputLengths.end(), size_t{0}); + auto const effectiveOffset = clientCall.getOffset() + offset; + // The combined limit of a call and a LimitExecutor: + auto const effectiveLimit = + std::min(clientCall.getLimit(), + nonNegativeSubtraction(limit, clientCall.getOffset())); + + auto const numRowsReturnable = + nonNegativeSubtraction(std::min(numInputRows, offset + limit), offset); + + // Only the client's offset counts against the "skipped" count returned + // by the limit block, the rest is upstream! + auto skipped = std::min(numRowsReturnable, clientCall.getOffset()); + if (clientCall.needsFullCount()) { + // offset and limit are already handled. + // New we need to include the amount of rows left to count them by + // skipped. However only those rows that the LIMIT will return. + skipped += nonNegativeSubtraction(numRowsReturnable, + clientCall.getOffset() + clientCall.getLimit()); + } + + auto const output = std::invoke([&]() { + auto output = MatrixBuilder<1>{}; + + auto const begin = effectiveOffset; + auto const end = std::min(effectiveOffset + effectiveLimit, numInputRows); + for (auto k = begin; k < end; ++k) { + output.emplace_back(RowBuilder<1>{k}); + } + + return output; + }); + + auto stats = LimitStats{}; + if (fullCount) { + if (!clientCall.hasHardLimit()) { + auto rowsToTriggerFullCountInExecutor = offset + limit; + auto rowsByClient = clientCall.getOffset() + clientCall.getLimit(); + + // If we do not have a hard limit, we only report fullCount + // up to the point where the Executor has actually consumed input. + if (rowsByClient >= limit && rowsToTriggerFullCountInExecutor < numInputRows) { + // however if the limit of the executor is smaller than the input + // it will itself start counting. + stats.incrFullCountBy(numInputRows); + } else { + stats.incrFullCountBy(std::min(effectiveOffset + effectiveLimit, numInputRows)); + } + } else { + stats.incrFullCountBy(numInputRows); + } + } + + // Whether the execution should return HASMORE: + auto const hasMore = std::invoke([&] { + auto const clientLimitIsSmaller = + clientCall.getOffset() + clientCall.getLimit() < limit; + auto const effectiveLimitIsHardLimit = + clientLimitIsSmaller ? clientCall.hasHardLimit() : true; + if (effectiveLimitIsHardLimit) { + return false; + } + // We have a softLimit: + if (doneResultIsEmpty) { + return effectiveOffset + effectiveLimit <= numInputRows; + } else { + return effectiveOffset + effectiveLimit < numInputRows; + } + }); + auto const state = hasMore ? ExecutionState::HASMORE : ExecutionState::DONE; + + return std::make_tuple(skipped, output, stats, state); + }); + + auto infos = LimitExecutorInfos{1, 1, {}, {0}, offset, limit, fullCount}; + + auto expectedStats = ExecutionStats{}; + expectedStats += expectedLimitStats; + + // fakedQuery->queryOptions().profile = PROFILE_LEVEL_TRACE_2; + ExecutorTestHelper<>{*fakedQuery} + .setExecBlock(std::move(infos), ExecutionNode::LIMIT) + .setInputFromRowNum(numInputRows) + .setInputSplitType(inputLengths) + .setCall(clientCall) + .appendEmptyBlock(doneResultIsEmpty) + .expectedStats(expectedStats) + .expectOutput({0}, expectedOutput) + .expectSkipped(expectedSkipped) + .expectedState(expectedState) + .run(true); } -TEST_P(LimitExecutorWaitingTest, rows_9_blocksize_3_skip_10_limit_12) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 0; - size_t constexpr limit = 12; - size_t constexpr skip = 10; - size_t constexpr readRows = 1; - bool constexpr skipAfter = true; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, false); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = buildBlock<1>(itemBlockManager, {}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{ - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::DONE, 9}, - }; - if (!waiting) { - removeWaiting(expectedStates); - } - ExecutionStats expectedStats{}; - expectedStats.fullCount = 0; - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; +auto printTestCase = + [](testing::TestParamInfo, AqlCall, bool>> const& paramInfo) + -> std::string { + auto const& [offset, limit, fullCount, inputLengths, clientCall, doneResultIsEmpty] = + paramInfo.param; - auto result = runExecutor(itemBlockManager, testee, outputRow, skip, readRows, skipAfter); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); + std::stringstream out; - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); + out << "offset" << offset; + out << "limit" << limit; + out << "fullCount" << (fullCount ? "True" : "False"); + out << "inputLengths"; + for (auto const& it : inputLengths) { + out << it << "_"; } -} - -TEST_P(LimitExecutorWaitingTest, rows_9_blocksize_3_skip_1_read_1_limit_12) { - // Input spec: - size_t constexpr blocksize = 3; - size_t constexpr offset = 0; - size_t constexpr limit = 12; - size_t constexpr skip = 1; - size_t constexpr readRows = 1; - bool constexpr skipAfter = true; - SharedAqlItemBlockPtr const input = - buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, blocksize, waiting, input); - LimitExecutorInfos infos(1, 1, {}, {0}, offset, limit, false); - - // Output spec: - SharedAqlItemBlockPtr const expectedOutput = buildBlock<1>(itemBlockManager, {{1}}); - size_t const expectedOutputSize = - expectedOutput == nullptr ? 0 : expectedOutput->size(); - std::vector expectedStates{ - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::FETCH_FOR_PASSTHROUGH, ExecutionState::HASMORE, 2}, - {ExecutorCall::PRODUCE_ROWS, ExecutionState::HASMORE, 1}, - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::WAITING, 0}, - {ExecutorCall::SKIP_ROWS, ExecutionState::DONE, 7}, - }; - if (!waiting) { - removeWaiting(expectedStates); + out << "clientCall"; + { + if (clientCall.getOffset() > 0) { + out << "_offset" << clientCall.getOffset(); + } + if (clientCall.hasHardLimit() || clientCall.hasSoftLimit()) { + auto const clientLimit = + std::get(std::min(clientCall.softLimit, clientCall.hardLimit)); + out << "_" << (clientCall.hasHardLimit() ? "hard" : "soft") << "Limit" << clientLimit; + } + if (clientCall.needsFullCount()) { + out << "_fullCount"; + } } - ExecutionStats expectedStats{}; - expectedStats.fullCount = 0; - - // Run: - LimitExecutor testee(fetcher, infos); - // Allocate at least one output row more than expected! - SharedAqlItemBlockPtr block = itemBlockManager.requestBlock(expectedOutputSize + 1, 1); - OutputAqlItemRow outputRow{block, outputRegisters, registersToKeep, - infos.registersToClear()}; - - auto result = runExecutor(itemBlockManager, testee, outputRow, skip, readRows, skipAfter); - auto& actualOutput = std::get(result); - auto& actualStats = std::get(result); - auto& actualStates = std::get>(result); + out << "doneResultIsEmpty" << (doneResultIsEmpty ? "True" : "False"); - EXPECT_EQ(expectedStats, actualStats); - EXPECT_EQ(expectedStates, actualStates); - if (expectedOutput == nullptr) { - ASSERT_EQ(actualOutput, nullptr); - } else { - ASSERT_FALSE(actualOutput == nullptr); - EXPECT_EQ(*expectedOutput, *actualOutput); - } -} + return out.str(); +}; -INSTANTIATE_TEST_CASE_P(LimitExecutorVariations, LimitExecutorWaitingTest, testing::Bool()); +INSTANTIATE_TEST_CASE_P(LimitExecutorVariations, LimitExecutorTest, + limitTestCases, printTestCase); -} // namespace aql -} // namespace tests -} // namespace arangodb +} // namespace arangodb::tests::aql diff --git a/tests/Aql/MockTypedNode.cpp b/tests/Aql/MockTypedNode.cpp new file mode 100644 index 000000000000..fafc033c9222 --- /dev/null +++ b/tests/Aql/MockTypedNode.cpp @@ -0,0 +1,56 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#include "MockTypedNode.h" + +#include "Basics/Exceptions.h" + +using namespace arangodb; +using namespace arangodb::aql; +using namespace arangodb::tests; +using namespace arangodb::tests::aql; + +MockTypedNode::MockTypedNode(::arangodb::aql::ExecutionPlan* plan, size_t id, NodeType type) + : ExecutionNode(plan, id), _mockedType(type) {} + +ExecutionNode* MockTypedNode::clone(ExecutionPlan* plan, bool withDependencies, + bool withProperties) const { + return cloneHelper(std::make_unique(plan, _id, _mockedType), + withDependencies, withProperties); +} + +::arangodb::aql::CostEstimate MockTypedNode::estimateCost() const { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +std::unique_ptr<::arangodb::aql::ExecutionBlock> MockTypedNode::createBlock( + ::arangodb::aql::ExecutionEngine& engine, + std::unordered_map const&) const { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +void MockTypedNode::toVelocyPackHelper(arangodb::velocypack::Builder&, unsigned flags, + std::unordered_set& seen) const { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +ExecutionNode::NodeType MockTypedNode::getType() const { return _mockedType; } diff --git a/tests/Aql/MockTypedNode.h b/tests/Aql/MockTypedNode.h new file mode 100644 index 000000000000..f2a309929408 --- /dev/null +++ b/tests/Aql/MockTypedNode.h @@ -0,0 +1,60 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGODB_TESTS_MOCK_TYPED_NODE_H +#define ARANGODB_TESTS_MOCK_TYPED_NODE_H 1 + +#include "Aql/ExecutionNode.h" + +namespace arangodb::tests::aql { + +// An execution node pretending to be of an arbitrary type. +class MockTypedNode : public ::arangodb::aql::ExecutionNode { + friend class ExecutionBlock; + + public: + MockTypedNode(::arangodb::aql::ExecutionPlan* plan, size_t id, NodeType); + + // return mocked type + NodeType getType() const final; + + // Necessary overrides, all not implemented: + + void toVelocyPackHelper(arangodb::velocypack::Builder&, unsigned flags, + std::unordered_set& seen) const override; + + std::unique_ptr<::arangodb::aql::ExecutionBlock> createBlock( + ::arangodb::aql::ExecutionEngine& engine, + std::unordered_map const&) const override; + + ExecutionNode* clone(::arangodb::aql::ExecutionPlan* plan, + bool withDependencies, bool withProperties) const override; + + ::arangodb::aql::CostEstimate estimateCost() const override; + + private: + NodeType _mockedType{}; +}; + +} // namespace arangodb::tests::aql + +#endif // ARANGODB_TESTS_MOCK_TYPED_NODE_H diff --git a/tests/Aql/ReturnExecutorTest.cpp b/tests/Aql/ReturnExecutorTest.cpp index 7fb106efd822..4e6764095b88 100644 --- a/tests/Aql/ReturnExecutorTest.cpp +++ b/tests/Aql/ReturnExecutorTest.cpp @@ -51,23 +51,8 @@ using ReturnExecutorTestHelper = ExecutorTestHelper<1, 1>; using ReturnExecutorSplitType = ReturnExecutorTestHelper::SplitType; using ReturnExecutorParamType = std::tuple; -class ReturnExecutorTest : public ::testing::TestWithParam { +class ReturnExecutorTest : public AqlExecutorTestCaseWithParam { protected: - // ExecutionState state; - ResourceMonitor monitor{}; - mocks::MockAqlServer server{}; - AqlItemBlockManager itemBlockManager; - - std::unique_ptr fakedQuery; - - ReturnExecutorTest() - : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - fakedQuery(server.createFakeQuery()) { - auto engine = - std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); - fakedQuery->setEngine(engine.release()); - } - auto getSplit() -> ReturnExecutorSplitType { auto [split, unused] = GetParam(); return split; @@ -114,7 +99,7 @@ TEST_P(ReturnExecutorTest, returns_all_from_upstream) { ReturnExecutorInfos infos(0 /*input register*/, 1 /*nr in*/, 1 /*nr out*/, doCount()); AqlCall call{}; // unlimited produce ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + .setExecBlock(std::move(infos), ExecutionNode::RETURN) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -130,7 +115,7 @@ TEST_P(ReturnExecutorTest, handle_soft_limit) { AqlCall call{}; call.softLimit = 3; ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + .setExecBlock(std::move(infos), ExecutionNode::RETURN) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -146,7 +131,7 @@ TEST_P(ReturnExecutorTest, handle_hard_limit) { AqlCall call{}; call.hardLimit = 5; ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + .setExecBlock(std::move(infos), ExecutionNode::RETURN) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -162,7 +147,7 @@ TEST_P(ReturnExecutorTest, handle_offset) { AqlCall call{}; call.offset = 4; ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + .setExecBlock(std::move(infos), ExecutionNode::RETURN) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -179,7 +164,7 @@ TEST_P(ReturnExecutorTest, handle_fullcount) { call.hardLimit = 2; call.fullCount = true; ExecutorTestHelper(*fakedQuery) - .setExecBlock(std::move(infos)) + .setExecBlock(std::move(infos), ExecutionNode::RETURN) .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) .setInputSplitType(getSplit()) .setCall(call) @@ -195,7 +180,7 @@ TEST_P(ReturnExecutorTest, handle_other_inputRegister) { AqlCall call{}; call.hardLimit = 5; ExecutorTestHelper<2, 1>(*fakedQuery) - .setExecBlock(std::move(infos)) + .setExecBlock(std::move(infos), ExecutionNode::RETURN) .setInputValue({{R"("invalid")", 1}, {R"("invalid")", 2}, {R"("invalid")", 5}, diff --git a/tests/Aql/ShortestPathExecutorTest.cpp b/tests/Aql/ShortestPathExecutorTest.cpp index 93eca4fcfc98..11ad12eacb56 100644 --- a/tests/Aql/ShortestPathExecutorTest.cpp +++ b/tests/Aql/ShortestPathExecutorTest.cpp @@ -287,8 +287,7 @@ class ShortestPathExecutorTest std::move(parameters._source), std::move(parameters._target)), finder(static_cast(infos.finder())), inputBlock(buildBlock<2>(itemBlockManager, std::move(parameters._inputMatrix))), - input(AqlItemBlockInputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size())), + input(AqlItemBlockInputRange(ExecutorState::DONE, 0, inputBlock, 0)), fakeUnusedBlock(VPackParser::fromJson("[]")), fetcher(itemBlockManager, fakeUnusedBlock->steal(), false), testee(fetcher, infos) { @@ -430,7 +429,7 @@ class ShortestPathExecutorTest // If an offset is requested, skip if (ourCall.getOffset() > 0) { - std::tie(state, std::ignore, skippedInitial, std::ignore) = + std::tie(state, std::ignore /* stats */, skippedInitial, std::ignore) = testee.skipRowsRange(input, ourCall); } @@ -454,7 +453,7 @@ class ShortestPathExecutorTest // Emulate being called with a full count ourCall.hardLimit = 0; ourCall.softLimit = 0; - std::tie(state, std::ignore, skippedFullCount, std::ignore) = + std::tie(state, std::ignore /* stats */, skippedFullCount, std::ignore) = testee.skipRowsRange(input, ourCall); } diff --git a/tests/Aql/SingleRowFetcherTest.cpp b/tests/Aql/SingleRowFetcherTest.cpp index 06f7b5fde85e..f0ed040e2783 100644 --- a/tests/Aql/SingleRowFetcherTest.cpp +++ b/tests/Aql/SingleRowFetcherTest.cpp @@ -1223,7 +1223,7 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_oneAndDone { SingleRowFetcher testee(dependencyProxyMock); AqlCall call; - AqlCallStack stack = {call}; + auto stack = AqlCallStack{call}; // First no data row auto [state, skipped, input] = testee.execute(stack); @@ -1258,7 +1258,7 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_twoAndHasM { SingleRowFetcher testee(dependencyProxyMock); AqlCall call; - AqlCallStack stack = {call}; + auto stack = AqlCallStack{call}; auto [state, skipped, input] = testee.execute(stack); // We only have one block, no more calls to execute necessary diff --git a/tests/Aql/SortedCollectExecutorTest.cpp b/tests/Aql/SortedCollectExecutorTest.cpp index fa8ce07e644c..90ef4ca3beb2 100644 --- a/tests/Aql/SortedCollectExecutorTest.cpp +++ b/tests/Aql/SortedCollectExecutorTest.cpp @@ -206,8 +206,7 @@ TEST_F(SortedCollectExecutorTestRowsUpstream, producer_1) { AqlCall clientCall; AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, 0, inputBlock, 0); SharedAqlItemBlockPtr outputBlock = itemBlockManager.requestBlock(2, infos.numberOfOutputRegisters()); @@ -253,8 +252,7 @@ TEST_F(SortedCollectExecutorTestRowsUpstream, producer_2) { AqlCall clientCall; AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, 0, inputBlock, 0); SharedAqlItemBlockPtr outputBlock = itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); @@ -305,8 +303,7 @@ TEST_F(SortedCollectExecutorTestRowsUpstream, producer_3) { AqlCall clientCall; AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, 0, inputBlock, 0); SharedAqlItemBlockPtr outputBlock = itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); @@ -356,8 +353,7 @@ TEST_F(SortedCollectExecutorTestRowsUpstream, producer_4) { AqlCall clientCall; AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, 0, inputBlock, 0); SharedAqlItemBlockPtr outputBlock = itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); @@ -431,8 +427,7 @@ TEST(SortedCollectExecutorTestRowsUpstreamCount, test) { AqlCall clientCall; AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, 0, inputBlock, 0); SharedAqlItemBlockPtr outputBlock = itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); @@ -533,8 +528,7 @@ TEST(SortedCollectExecutorTestRowsUpstreamCountStrings, test) { AqlCall clientCall; AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, 0, inputBlock, 0); SharedAqlItemBlockPtr outputBlock = itemBlockManager.requestBlock(inputBlock->size(), infos.numberOfOutputRegisters()); @@ -653,8 +647,7 @@ TEST_F(SortedCollectExecutorTestSkip, skip_1) { SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {2}}); AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, 0, inputBlock, 0); SortedCollectExecutor testee(fetcher, infos); @@ -690,8 +683,7 @@ TEST_F(SortedCollectExecutorTestSkip, skip_2) { SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {2}}); AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, 0, inputBlock, 0); SortedCollectExecutor testee(fetcher, infos); @@ -750,8 +742,7 @@ TEST_F(SortedCollectExecutorTestSkip, skip_3) { SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {1}}); AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::HASMORE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::HASMORE, 0, inputBlock, 0); AqlItemBlockInputRange emptyInputRangeDone(ExecutorState::DONE); SortedCollectExecutor testee(fetcher, infos); @@ -795,12 +786,10 @@ TEST_F(SortedCollectExecutorTestSkip, skip_4) { SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {1}}); AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::HASMORE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::HASMORE, 0, inputBlock, 0); SharedAqlItemBlockPtr inputBlock2 = buildBlock<1>(itemBlockManager, {{2}}); - AqlItemBlockInputRange inputRange2(ExecutorState::HASMORE, inputBlock2, 0, - inputBlock2->size()); + AqlItemBlockInputRange inputRange2(ExecutorState::HASMORE, 0, inputBlock2, 0); AqlItemBlockInputRange emptyInputRangeDone(ExecutorState::DONE); SortedCollectExecutor testee(fetcher, infos); @@ -876,8 +865,7 @@ TEST_F(SortedCollectExecutorTestSkip, skip_5) { SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}, {1}, {2}}); AqlItemBlockInputRange emptyInputRange(ExecutorState::HASMORE); - AqlItemBlockInputRange inputRange(ExecutorState::DONE, inputBlock, 0, - inputBlock->size()); + AqlItemBlockInputRange inputRange(ExecutorState::DONE, 0, inputBlock, 0); SortedCollectExecutor testee(fetcher, infos); diff --git a/tests/Aql/WaitingExecutionBlockMock.cpp b/tests/Aql/WaitingExecutionBlockMock.cpp index 74000072c520..b20a09c843ca 100644 --- a/tests/Aql/WaitingExecutionBlockMock.cpp +++ b/tests/Aql/WaitingExecutionBlockMock.cpp @@ -125,24 +125,29 @@ std::tuple WaitingExecutionBlockM stack.pop(); } auto myCall = stack.popCall(); + + TRI_ASSERT(!(myCall.getOffset() == 0 && myCall.softLimit == AqlCall::Limit{0})); + TRI_ASSERT(!(myCall.hasSoftLimit() && myCall.fullCount)); + TRI_ASSERT(!(myCall.hasSoftLimit() && myCall.hasHardLimit())); + if (_variant != WaitingBehaviour::NEVER && !_hasWaited) { - // If we orderd waiting check on _hasWaited and wait if not + // If we ordered waiting check on _hasWaited and wait if not _hasWaited = true; return {ExecutionState::WAITING, 0, nullptr}; } if (_variant == WaitingBehaviour::ALWAYS) { - // If we allways wait, reset. + // If we always wait, reset. _hasWaited = false; } size_t skipped = 0; SharedAqlItemBlockPtr result = nullptr; - if (_data.front() == nullptr) { + if (!_data.empty() && _data.front() == nullptr) { dropBlock(); } while (!_data.empty()) { if (_data.front() == nullptr) { - if (myCall.getOffset() > 0 || myCall.getLimit() > 0) { - TRI_ASSERT(skipped > 0 || result != nullptr); + if ((skipped > 0 || result != nullptr) && + !(myCall.hasHardLimit() && myCall.getLimit() == 0)) { // This is a specific break point return now. // Sorry we can only return one block. // This means we have prepared the first block. @@ -205,10 +210,12 @@ std::tuple WaitingExecutionBlockM dropBlock(); } } - if (_data.empty()) { - return {ExecutionState::DONE, skipped, result}; - } else { + if (!_data.empty()) { return {ExecutionState::HASMORE, skipped, result}; + } else if (result != nullptr && result->size() < myCall.hardLimit) { + return {ExecutionState::HASMORE, skipped, result}; + } else { + return {ExecutionState::DONE, skipped, result}; } } } @@ -219,4 +226,4 @@ void WaitingExecutionBlockMock::dropBlock() { TRI_ASSERT(!_data.empty()); _data.pop_front(); _inflight = 0; -} \ No newline at end of file +} diff --git a/tests/Aql/WaitingExecutionBlockMock.h b/tests/Aql/WaitingExecutionBlockMock.h index a66c3a913b4e..2c147ba08e91 100644 --- a/tests/Aql/WaitingExecutionBlockMock.h +++ b/tests/Aql/WaitingExecutionBlockMock.h @@ -51,7 +51,7 @@ class WaitingExecutionBlockMock final : public arangodb::aql::ExecutionBlock { enum WaitingBehaviour { NEVER, // Never return WAITING ONCE, // Return WAITING on the first execute call, afterwards return all blocks - ALWAYS // Return Waiting once for every execute Call. + ALWAYS // Return WAITING once for every execute Call. }; /** @@ -60,7 +60,7 @@ class WaitingExecutionBlockMock final : public arangodb::aql::ExecutionBlock { * @param engine Required by API. * @param node Required by API. * @param data Must be a shared_ptr to an VPackArray. - * @param variant The waiting behaviour of this block (default ONCE), see WaitingBehaviour + * @param variant The waiting behaviour of this block (default ALWAYS), see WaitingBehaviour */ WaitingExecutionBlockMock(arangodb::aql::ExecutionEngine* engine, arangodb::aql::ExecutionNode const* node, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7b7c9fda3323..6ba73256b04f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -51,6 +51,7 @@ set(ARANGODB_TESTS_SOURCES Aql/IndexNodeTest.cpp Aql/InsertExecutorTest.cpp Aql/LimitExecutorTest.cpp + Aql/MockTypedNode.cpp Aql/MultiDepFetcherHelper.cpp Aql/MultiDependencySingleRowFetcherTest.cpp Aql/NoResultsExecutorTest.cpp diff --git a/tests/Mocks/Servers.cpp b/tests/Mocks/Servers.cpp index e9e0e8e66f63..126dde89f4cc 100644 --- a/tests/Mocks/Servers.cpp +++ b/tests/Mocks/Servers.cpp @@ -26,6 +26,8 @@ #include "ApplicationFeatures/CommunicationFeaturePhase.h" #include "ApplicationFeatures/GreetingsFeaturePhase.h" #include "Aql/AqlFunctionFeature.h" +#include "Aql/AqlItemBlockSerializationFormat.h" +#include "Aql/ExecutionEngine.h" #include "Aql/OptimizerRulesFeature.h" #include "Aql/Query.h" #include "Basics/files.h" @@ -463,6 +465,10 @@ std::unique_ptr MockAqlServer::createFakeQuery(bool activa fakeQueryString, bindParams, queryOptions, arangodb::aql::QueryPart::PART_DEPENDENT); query->injectTransaction(createFakeTransaction()); + + auto engine = std::make_unique(*query, aql::SerializationFormat::SHADOWROWS); + query->setEngine(std::move(engine)); + return query; } diff --git a/tests/js/server/aql/aql-profiler.js b/tests/js/server/aql/aql-profiler.js index 27c7cb7a5727..421d7254a7f7 100644 --- a/tests/js/server/aql/aql-profiler.js +++ b/tests/js/server/aql/aql-profiler.js @@ -551,7 +551,7 @@ function ahuacatlProfilerTestSuite () { const genNodeList = (rows) => [ {type: SingletonBlock, calls: 1, items: 1}, {type: CalculationBlock, calls: 1, items: 1}, - {type: EnumerateListBlock, calls: limitBatches(rows) + skipOffsetBatches(rows), items: limit(rows) + offset(rows)}, + {type: EnumerateListBlock, calls: limitBatches(rows), items: limit(rows) + offset(rows)}, {type: LimitBlock, calls: limitBatches(rows), items: limit(rows)}, {type: ReturnBlock, calls: limitBatches(rows), items: limit(rows)}, ]; @@ -705,8 +705,8 @@ function ahuacatlProfilerTestSuite () { { type : CalculationBlock, calls : 1, items : 1 }, { type : EnumerateListBlock, calls : batches, items : rows }, { type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows) + remainderBatches(rows), items : rows }, - { type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }, - { type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) } + { type : LimitBlock, calls : limitMinusSkipBatches(rows) + /* this is only during ::execute work, should remove later again */ remainderBatches(rows), items : limitMinusSkip(rows) }, + { type : ReturnBlock, calls : limitMinusSkipBatches(rows) + /* this is only during ::execute work, should remove later again */ remainderBatches(rows), items : limitMinusSkip(rows) } ]; const bind = rows => ({ rows, From e7f36c35cf918002a495968a97d7a2606b87a47f Mon Sep 17 00:00:00 2001 From: Heiko Date: Fri, 21 Feb 2020 16:54:01 +0100 Subject: [PATCH 072/122] Feature/aql subquery operations stack calculation exec enumerate exec (#11098) * removed unused import * re-ordered asserts to see which register was wrongly used * added implementation of produceRows with inputRange * added first test to test new produceRows including inputRange * rm skip from calc executor * rm not correct assert, added second test * added produceRows inputrange hasmore test * added new produceRows + test * added skipRowsRange enumerate collection executor * added skipRowsRange enumerate collection executor test * added assert * adde fake query helper, one without auto trx but with prepare plan, the other one without prepare but with trx - also removed fakeit mocks from enumeratecollectionexecutor tests * fixed skipRowsRange in enumerateCollectionExecutor, added test as well * removed todo * added skipAll * added EnumerateCollection as new style executor * added produce and skip rows, not ready yet * added new tests, still not ready, need unsorted compare operators in helper, waiting * rm logs * intermidiate commit - may not work * removed unused import * added implementation of produceRows with inputRange * added first test to test new produceRows including inputRange * rm skip from calc executor * rm not correct assert, added second test * added produceRows inputrange hasmore test * added new produceRows + test * added skipRowsRange enumerate collection executor * added skipRowsRange enumerate collection executor test * added assert * adde fake query helper, one without auto trx but with prepare plan, the other one without prepare but with trx - also removed fakeit mocks from enumeratecollectionexecutor tests * fixed skipRowsRange in enumerateCollectionExecutor, added test as well * removed todo * added skipAll * added EnumerateCollection as new style executor * added produce and skip rows, not ready yet * added new tests, still not ready, need unsorted compare operators in helper, waiting * rm logs * intermidiate commit - may not work * make branch compile again * mv assert * protected server * make enum collection tests work again * fixed ExecBlockImpl * todo optimization * fix gtest calc executor * header file enumcollexec, helper method * fix shrink method in aqlitemblock * impl * nodiscard * calc executor * rm empty line * fix calc exec * tmp re-enable old method * tmp make tests green again, needs cleanup later * use uint64_t instead of u_int64_t * removed unused code * Update arangod/Aql/CalculationExecutor.cpp Co-Authored-By: Michael Hackstein * Update arangod/Aql/CalculationExecutor.cpp Co-Authored-By: Michael Hackstein * remove implementation code of old api * Update arangod/Aql/EnumerateCollectionExecutor.cpp Co-Authored-By: Michael Hackstein * buildable tests again * rm unused code * added assert * make tests compile again ... * disable few more tests, need to be optimized after subquery end start merges Co-authored-by: Michael Hackstein --- arangod/Aql/AqlItemBlock.cpp | 14 +- arangod/Aql/CalculationExecutor.cpp | 45 ++- arangod/Aql/CalculationExecutor.h | 13 +- arangod/Aql/EnumerateCollectionExecutor.cpp | 220 ++++++---- arangod/Aql/EnumerateCollectionExecutor.h | 38 +- arangod/Aql/ExecutionBlockImpl.cpp | 8 +- arangod/Aql/ExecutionNode.cpp | 2 +- arangod/Aql/FilterExecutor.cpp | 1 - arangod/Utils/OperationCursor.cpp | 22 + arangod/Utils/OperationCursor.h | 6 + tests/Aql/CalculationExecutorTest.cpp | 143 ++++++- tests/Aql/EnumerateCollectionExecutorTest.cpp | 378 +++++++++++++++--- tests/Aql/ExecutorTestHelper.h | 3 +- tests/Mocks/Servers.cpp | 6 +- tests/Mocks/Servers.h | 2 +- 15 files changed, 729 insertions(+), 172 deletions(-) diff --git a/arangod/Aql/AqlItemBlock.cpp b/arangod/Aql/AqlItemBlock.cpp index 82edf0f715e3..79d7428910ae 100644 --- a/arangod/Aql/AqlItemBlock.cpp +++ b/arangod/Aql/AqlItemBlock.cpp @@ -347,6 +347,13 @@ void AqlItemBlock::shrink(size_t nrItems) { decreaseMemoryUsage(sizeof(AqlValue) * (_nrItems - nrItems) * _nrRegs); + // remove the shadow row indices pointing to now invalid rows. + _shadowRowIndexes.erase(_shadowRowIndexes.lower_bound(nrItems), + _shadowRowIndexes.end()); + + // adjust the size of the block + _nrItems = nrItems; + for (size_t i = numEntries(); i < _data.size(); ++i) { AqlValue& a = _data[i]; if (a.requiresDestruction()) { @@ -368,13 +375,6 @@ void AqlItemBlock::shrink(size_t nrItems) { } a.erase(); } - - // remove the shadow row indices pointing to now invalid rows. - _shadowRowIndexes.erase(_shadowRowIndexes.lower_bound(nrItems), - _shadowRowIndexes.end()); - - // adjust the size of the block - _nrItems = nrItems; } void AqlItemBlock::rescale(size_t nrItems, RegisterId nrRegs) { diff --git a/arangod/Aql/CalculationExecutor.cpp b/arangod/Aql/CalculationExecutor.cpp index c6b3513afc15..191324660f17 100644 --- a/arangod/Aql/CalculationExecutor.cpp +++ b/arangod/Aql/CalculationExecutor.cpp @@ -21,13 +21,16 @@ //////////////////////////////////////////////////////////////////////////////// #include "CalculationExecutor.h" +#include +#include "Aql/AqlCall.h" +#include "Aql/AqlCallStack.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutorExpressionContext.h" #include "Aql/Expression.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/Query.h" #include "Aql/SingleRowFetcher.h" -#include "Basics/Common.h" #include "Basics/Exceptions.h" #include "Basics/ScopeGuard.h" #include "Cluster/ServerState.h" @@ -86,6 +89,8 @@ std::vector const& CalculationExecutorInfos::getExpInRegs() const no template std::pair::Stats> CalculationExecutor::produceRows(OutputAqlItemRow& output) { + // TRI_ASSERT(false); + // THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); ExecutionState state; InputAqlItemRow row = InputAqlItemRow{CreateInvalidInputRowHint{}}; std::tie(state, row) = _fetcher.fetchRow(); @@ -125,6 +130,44 @@ CalculationExecutor::produceRows(OutputAqlItemRow& output) { return {state, NoStats{}}; } +template +std::tuple::Stats, AqlCall> +CalculationExecutor::produceRows(AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) { + TRI_IF_FAILURE("CalculationExecutor::produceRows") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + ExecutorState state = ExecutorState::HASMORE; + InputAqlItemRow input = InputAqlItemRow{CreateInvalidInputRowHint{}}; + + while (inputRange.hasDataRow()) { + // This executor is passthrough. it has enough place to write. + TRI_ASSERT(!output.isFull()); + std::tie(state, input) = inputRange.nextDataRow(); + TRI_ASSERT(input.isInitialized()); + + doEvaluation(input, output); + output.advanceRow(); + + // _hasEnteredContext implies the query has entered the context, but not + // the other way round because it may be owned by exterior. + TRI_ASSERT(!_hasEnteredContext || _infos.getQuery().hasEnteredContext()); + + // The following only affects V8Conditions. If we should exit the V8 context + // between blocks, because we might have to wait for client or upstream, then + // hasEnteredContext => state == HASMORE, + // as we only leave the context open when there are rows left in the current + // block. + // Note that _infos.getQuery().hasEnteredContext() may be true, even if + // _hasEnteredContext is false, if (and only if) the query context is owned + // by exterior. + TRI_ASSERT(!shouldExitContextBetweenBlocks() || !_hasEnteredContext || + state == ExecutorState::HASMORE); + } + + return {inputRange.upstreamState(), NoStats{}, output.getClientCall()}; +} + template std::tuple::Stats, SharedAqlItemBlockPtr> CalculationExecutor::fetchBlockForPassthrough(size_t atMost) { diff --git a/arangod/Aql/CalculationExecutor.h b/arangod/Aql/CalculationExecutor.h index 77c429d72118..8ab0b812721b 100644 --- a/arangod/Aql/CalculationExecutor.h +++ b/arangod/Aql/CalculationExecutor.h @@ -40,10 +40,12 @@ class Methods; namespace aql { +struct AqlCall; +class AqlItemBlockInputRange; class Expression; class OutputAqlItemRow; class Query; -template +template class SingleRowFetcher; struct Variable; @@ -104,6 +106,14 @@ class CalculationExecutor { */ std::pair produceRows(OutputAqlItemRow& output); + /** + * @brief produce the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple produceRows( + AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output); + std::tuple fetchBlockForPassthrough(size_t atMost); private: @@ -134,7 +144,6 @@ class CalculationExecutor { bool _hasEnteredContext; }; - } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/EnumerateCollectionExecutor.cpp b/arangod/Aql/EnumerateCollectionExecutor.cpp index 15a5db47ef83..ca2b97ba660a 100644 --- a/arangod/Aql/EnumerateCollectionExecutor.cpp +++ b/arangod/Aql/EnumerateCollectionExecutor.cpp @@ -25,6 +25,8 @@ #include "EnumerateCollectionExecutor.h" +#include "Aql/AqlCall.h" +#include "Aql/AqlCallStack.h" #include "Aql/AqlValue.h" #include "Aql/Collection.h" #include "Aql/DocumentProducingHelper.h" @@ -35,9 +37,11 @@ #include "Aql/Query.h" #include "Aql/SingleRowFetcher.h" #include "Aql/Stats.h" +#include "AqlCall.h" #include "Transaction/Methods.h" #include "Utils/OperationCursor.h" +#include #include using namespace arangodb; @@ -54,8 +58,8 @@ EnumerateCollectionExecutorInfos::EnumerateCollectionExecutorInfos( // cppcheck-suppress passedByValue std::unordered_set registersToKeep, ExecutionEngine* engine, Collection const* collection, Variable const* outVariable, bool produceResult, - Expression* filter, - std::vector const& projections, + Expression* filter, std::vector const& projections, + std::vector const& coveringIndexAttributePositions, bool useRawDocumentPointers, bool random) : ExecutorInfos(make_shared_unordered_set(), make_shared_unordered_set({outputRegister}), @@ -66,6 +70,7 @@ EnumerateCollectionExecutorInfos::EnumerateCollectionExecutorInfos( _outVariable(outVariable), _filter(filter), _projections(projections), + _coveringIndexAttributePositions(coveringIndexAttributePositions), _outputRegisterId(outputRegister), _useRawDocumentPointers(useRawDocumentPointers), _produceResult(produceResult), @@ -99,6 +104,11 @@ std::vector const& EnumerateCollectionExecutorInfos::getProjections return _projections; } +std::vector const& EnumerateCollectionExecutorInfos::getCoveringIndexAttributePositions() const + noexcept { + return _coveringIndexAttributePositions; +} + bool EnumerateCollectionExecutorInfos::getProduceResult() const { return _produceResult; } @@ -116,15 +126,15 @@ EnumerateCollectionExecutor::EnumerateCollectionExecutor(Fetcher& fetcher, Infos : _infos(infos), _fetcher(fetcher), _documentProducer(nullptr), - _documentProducingFunctionContext(_input, nullptr, _infos.getOutputRegisterId(), - _infos.getProduceResult(), - _infos.getQuery(), _infos.getFilter(), - _infos.getProjections(), - ::emptyAttributePositions, + _documentProducingFunctionContext(_currentRow, nullptr, _infos.getOutputRegisterId(), + _infos.getProduceResult(), _infos.getQuery(), + _infos.getFilter(), _infos.getProjections(), + _infos.getCoveringIndexAttributePositions(), true, _infos.getUseRawDocumentPointers(), false), _state(ExecutionState::HASMORE), + _executorState(ExecutorState::HASMORE), _cursorHasMore(false), - _input(InputAqlItemRow{CreateInvalidInputRowHint{}}) { + _currentRow(InputAqlItemRow{CreateInvalidInputRowHint{}}) { _cursor = std::make_unique( _infos.getTrxPtr()->indexScan(_infos.getCollection()->name(), (_infos.getRandom() @@ -139,7 +149,8 @@ EnumerateCollectionExecutor::EnumerateCollectionExecutor(Fetcher& fetcher, Infos std::to_string(maxWait) + ")"); } if (_infos.getProduceResult()) { - _documentProducer = buildDocumentCallback(_documentProducingFunctionContext); + _documentProducer = + buildDocumentCallback(_documentProducingFunctionContext); } _documentSkipper = buildDocumentCallback(_documentProducingFunctionContext); } @@ -148,119 +159,158 @@ EnumerateCollectionExecutor::~EnumerateCollectionExecutor() = default; std::pair EnumerateCollectionExecutor::produceRows( OutputAqlItemRow& output) { - TRI_IF_FAILURE("EnumerateCollectionExecutor::produceRows") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +std::tuple EnumerateCollectionExecutor::skipRows( + size_t const toSkip) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +uint64_t EnumerateCollectionExecutor::skipEntries(size_t toSkip, + EnumerateCollectionStats& stats) { + uint64_t actuallySkipped = 0; + + if (_infos.getFilter() == nullptr) { + _cursor->skip(toSkip, actuallySkipped); + stats.incrScanned(actuallySkipped); + _documentProducingFunctionContext.getAndResetNumScanned(); + } else { + _cursor->nextDocument(_documentSkipper, toSkip); + size_t filtered = _documentProducingFunctionContext.getAndResetNumFiltered(); + size_t scanned = _documentProducingFunctionContext.getAndResetNumScanned(); + TRI_ASSERT(scanned >= filtered); + stats.incrFiltered(filtered); + stats.incrScanned(scanned); + actuallySkipped = scanned - filtered; } + _cursorHasMore = _cursor->hasMore(); + + return actuallySkipped; +} + +std::tuple EnumerateCollectionExecutor::skipRowsRange( + AqlItemBlockInputRange& inputRange, AqlCall& call) { + AqlCall upstreamCall{}; EnumerateCollectionStats stats{}; + bool offsetPhase = (call.getOffset() > 0); + TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumScanned() == 0); TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumFiltered() == 0); - _documentProducingFunctionContext.setOutputRow(&output); - while (true) { - if (!_cursorHasMore) { - std::tie(_state, _input) = _fetcher.fetchRow(); + while (inputRange.hasDataRow() && call.shouldSkip()) { + uint64_t skipped = 0; - if (_state == ExecutionState::WAITING) { - return {_state, stats}; - } + if (!_cursorHasMore) { + initializeNewRow(inputRange); + } - if (!_input) { - TRI_ASSERT(_state == ExecutionState::DONE); - return {_state, stats}; + if (_cursorHasMore) { + TRI_ASSERT(_currentRow.isInitialized()); + // if offset is > 0, we're in offset skip phase + if (offsetPhase) { + if (skipped < call.getOffset()) { + skipped += skipEntries(call.getOffset(), stats); + } else { + // we skipped enough in our offset phase + break; + } + } else { + // fullCount phase + _cursor->skipAll(skipped); + stats.incrScanned(skipped); + _documentProducingFunctionContext.getAndResetNumScanned(); + TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumScanned() == 0); } - _cursor->reset(); _cursorHasMore = _cursor->hasMore(); - continue; + call.didSkip(skipped); } + } - TRI_ASSERT(_input.isInitialized()); + if (_cursorHasMore) { + return {ExecutorState::HASMORE, stats, call.getSkipCount(), upstreamCall}; + } - TRI_IF_FAILURE("EnumerateCollectionBlock::moreDocuments") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } + upstreamCall.softLimit = call.getOffset(); + return {inputRange.upstreamState(), stats, call.getSkipCount(), upstreamCall}; +} - if (_infos.getProduceResult()) { - // properly build up results by fetching the actual documents - // using nextDocument() - _cursorHasMore = - _cursor->nextDocument(_documentProducer, output.numRowsLeft() /*atMost*/); - } else { - // performance optimization: we do not need the documents at all, - // so just call next() - TRI_ASSERT(!_documentProducingFunctionContext.hasFilter()); - _cursorHasMore = - _cursor->next(getNullCallback(_documentProducingFunctionContext), - output.numRowsLeft() /*atMost*/); - } +void EnumerateCollectionExecutor::initializeNewRow(AqlItemBlockInputRange& inputRange) { + if (_currentRow) { + std::ignore = inputRange.nextDataRow(); + } + std::tie(_currentRowState, _currentRow) = inputRange.peekDataRow(); + if (!_currentRow) { + return; + } - stats.incrScanned(_documentProducingFunctionContext.getAndResetNumScanned()); - stats.incrFiltered(_documentProducingFunctionContext.getAndResetNumFiltered()); + TRI_ASSERT(_currentRow.isInitialized()); - if (_state == ExecutionState::DONE && !_cursorHasMore) { - return {_state, stats}; - } - return {ExecutionState::HASMORE, stats}; - } + _cursor->reset(); + _cursorHasMore = _cursor->hasMore(); } -std::tuple EnumerateCollectionExecutor::skipRows( - size_t const toSkip) { - EnumerateCollectionStats stats{}; - TRI_IF_FAILURE("EnumerateCollectionExecutor::skipRows") { +std::tuple EnumerateCollectionExecutor::produceRows( + AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { + TRI_IF_FAILURE("EnumerateCollectionExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - if (!_cursorHasMore) { - std::tie(_state, _input) = _fetcher.fetchRow(); + EnumerateCollectionStats stats{}; + AqlCall upstreamCall{}; + upstreamCall.fullCount = output.getClientCall().fullCount; - if (_state == ExecutionState::WAITING) { - return std::make_tuple(_state, stats, 0); // tuple, cannot use initializer list due to build failure - } + TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumScanned() == 0); + TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumFiltered() == 0); + _documentProducingFunctionContext.setOutputRow(&output); + + while (inputRange.hasDataRow() && !output.isFull()) { - if (!_input) { - TRI_ASSERT(_state == ExecutionState::DONE); - return std::make_tuple(_state, stats, 0); // tuple, cannot use initializer list due to build failure + if (!_cursorHasMore) { + initializeNewRow(inputRange); } - _cursor->reset(); - _cursorHasMore = _cursor->hasMore(); - } + if (_cursorHasMore) { + TRI_ASSERT(_currentRow.isInitialized()); + if (_infos.getProduceResult()) { + // properly build up results by fetching the actual documents + // using nextDocument() + _cursorHasMore = + _cursor->nextDocument(_documentProducer, output.numRowsLeft() /*atMost*/); + } else { + // performance optimization: we do not need the documents at all, + // so just call next() + TRI_ASSERT(!_documentProducingFunctionContext.hasFilter()); + _cursorHasMore = + _cursor->next(getNullCallback(_documentProducingFunctionContext), + output.numRowsLeft() /*atMost*/); + } - TRI_ASSERT(_input.isInitialized()); - TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumScanned() == 0); - TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumFiltered() == 0); + stats.incrScanned(_documentProducingFunctionContext.getAndResetNumScanned()); + stats.incrFiltered(_documentProducingFunctionContext.getAndResetNumFiltered()); + } - uint64_t actuallySkipped = 0; - if (_infos.getFilter() == nullptr) { - _cursor->skip(toSkip, actuallySkipped); - stats.incrScanned(actuallySkipped); - _documentProducingFunctionContext.getAndResetNumScanned(); - } else { - _cursor->nextDocument(_documentSkipper, toSkip); - size_t filtered = _documentProducingFunctionContext.getAndResetNumFiltered(); - size_t scanned = _documentProducingFunctionContext.getAndResetNumScanned(); - TRI_ASSERT(scanned >= filtered); - stats.incrFiltered(filtered); - stats.incrScanned(scanned); - actuallySkipped = scanned - filtered; + TRI_IF_FAILURE("EnumerateCollectionBlock::moreDocuments") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } } - _cursorHasMore = _cursor->hasMore(); - if (_state == ExecutionState::DONE && !_cursorHasMore) { - return std::make_tuple(ExecutionState::DONE, stats, - actuallySkipped); // tuple, cannot use initializer list due to build failure + if (!_cursorHasMore) { + initializeNewRow(inputRange); } - return std::make_tuple(ExecutionState::HASMORE, stats, actuallySkipped); // tuple, cannot use initializer list due to build failure + return {inputRange.upstreamState(), stats, upstreamCall}; } void EnumerateCollectionExecutor::initializeCursor() { _state = ExecutionState::HASMORE; - _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; + _executorState = ExecutorState::HASMORE; + _currentRow = InputAqlItemRow{CreateInvalidInputRowHint{}}; _cursorHasMore = false; _cursor->reset(); } - #ifndef USE_ENTERPRISE bool EnumerateCollectionExecutor::waitForSatellites(ExecutionEngine* engine, Collection const* collection) const { diff --git a/arangod/Aql/EnumerateCollectionExecutor.h b/arangod/Aql/EnumerateCollectionExecutor.h index ff9b51127f52..31aa9446c41a 100644 --- a/arangod/Aql/EnumerateCollectionExecutor.h +++ b/arangod/Aql/EnumerateCollectionExecutor.h @@ -43,6 +43,8 @@ class Methods; } namespace aql { +struct AqlCall; +class AqlItemBlockInputRange; struct Collection; class EnumerateCollectionStats; class ExecutionEngine; @@ -62,8 +64,8 @@ class EnumerateCollectionExecutorInfos : public ExecutorInfos { RegisterId nrOutputRegisters, std::unordered_set registersToClear, std::unordered_set registersToKeep, ExecutionEngine* engine, Collection const* collection, Variable const* outVariable, bool produceResult, - Expression* filter, - std::vector const& projections, + Expression* filter, std::vector const& projections, + std::vector const& coveringIndexAttributePositions, bool useRawDocumentPointers, bool random); EnumerateCollectionExecutorInfos() = delete; @@ -78,6 +80,7 @@ class EnumerateCollectionExecutorInfos : public ExecutorInfos { transaction::Methods* getTrxPtr() const; Expression* getFilter() const; std::vector const& getProjections() const noexcept; + std::vector const& getCoveringIndexAttributePositions() const noexcept; bool getProduceResult() const; bool getUseRawDocumentPointers() const; bool getRandom() const; @@ -89,6 +92,7 @@ class EnumerateCollectionExecutorInfos : public ExecutorInfos { Variable const* _outVariable; Expression* _filter; std::vector const& _projections; + std::vector const& _coveringIndexAttributePositions; RegisterId _outputRegisterId; bool _useRawDocumentPointers; bool _produceResult; @@ -117,6 +121,13 @@ class EnumerateCollectionExecutor { EnumerateCollectionExecutor(Fetcher& fetcher, Infos&); ~EnumerateCollectionExecutor(); + /** + * @brief Will fetch a new InputRow if necessary and store their local state + * + * @return bool done in case we do not have any input and upstreamState is done + */ + void initializeNewRow(AqlItemBlockInputRange& inputRange); + /** * @brief produce the next Row of Aql Values. * @@ -126,11 +137,30 @@ class EnumerateCollectionExecutor { std::pair produceRows(OutputAqlItemRow& output); std::tuple skipRows(size_t atMost); + /** + * @brief produce the next Rows of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple produceRows( + AqlItemBlockInputRange& input, OutputAqlItemRow& output); + + uint64_t skipEntries(size_t toSkip, EnumerateCollectionStats& stats); + /** + * @brief skip the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple skipRowsRange( + AqlItemBlockInputRange& inputRange, AqlCall& call); + void initializeCursor(); private: bool waitForSatellites(ExecutionEngine* engine, Collection const* collection) const; + void setAllowCoveringIndexOptimization(bool allowCoveringIndexOptimization); + private: Infos& _infos; Fetcher& _fetcher; @@ -138,8 +168,10 @@ class EnumerateCollectionExecutor { IndexIterator::DocumentCallback _documentSkipper; DocumentProducingFunctionContext _documentProducingFunctionContext; ExecutionState _state; + ExecutorState _executorState; bool _cursorHasMore; - InputAqlItemRow _input; + InputAqlItemRow _currentRow; + ExecutorState _currentRowState; std::unique_ptr _cursor; }; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 7a0333327d6f..dbb8bdbac935 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -136,7 +136,10 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); template constexpr bool isNewStyleExecutor = is_one_of_v, - IdExecutor>, ReturnExecutor, HashedCollectExecutor, IndexExecutor, + IdExecutor>, ReturnExecutor, IndexExecutor, EnumerateCollectionExecutor, + // TODO: re-enable after new subquery end & start are implemented + // CalculationExecutor, CalculationExecutor, CalculationExecutor, + HashedCollectExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode @@ -1075,7 +1078,8 @@ static SkipRowsRangeVariant constexpr skipRowsType() { "Fetcher is chosen for skipping, but has not skipRows method!"); static_assert(useExecutor == - (is_one_of_v EnumerateCollectionNode::createBlock( getRegisterPlan()->nrRegs[getDepth()], getRegsToClear(), calcRegsToKeep(), &engine, this->_collection, _outVariable, (this->isVarUsedLater(_outVariable) || this->_filter != nullptr), - this->_filter.get(), this->projections(), + this->_filter.get(), this->projections(), this->coveringIndexAttributePositions(), EngineSelectorFeature::ENGINE->useRawDocumentPointers(), this->_random); return std::make_unique>(&engine, this, std::move(infos)); diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index cb0d06bb60af..515c72fdfce4 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -28,7 +28,6 @@ #include "Aql/AqlCall.h" #include "Aql/AqlCallStack.h" #include "Aql/AqlItemBlockInputRange.h" -#include "Aql/AqlValue.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" #include "Aql/OutputAqlItemRow.h" diff --git a/arangod/Utils/OperationCursor.cpp b/arangod/Utils/OperationCursor.cpp index df98e0c6d7d7..e6030a9dc83d 100644 --- a/arangod/Utils/OperationCursor.cpp +++ b/arangod/Utils/OperationCursor.cpp @@ -129,3 +129,25 @@ void OperationCursor::skip(uint64_t toSkip, uint64_t& skipped) { _hasMore = false; } } + +/// @brief Skip all elements. +/// skipped will be increased by the amount of skipped elements +/// afterwards Check hasMore()==true before using this NOTE: This will +/// throw on OUT_OF_MEMORY +void OperationCursor::skipAll(uint64_t& skipped) { + // TODO: move that logic to index level to become more efficient + size_t toSkip = 1000; + + if (!hasMore()) { + TRI_ASSERT(false); + // You requested more even if you should have checked it before. + return; + } + + while (_hasMore) { + _indexIterator->skip(toSkip, skipped); + if (skipped != toSkip) { + _hasMore = false; + } + } +} diff --git a/arangod/Utils/OperationCursor.h b/arangod/Utils/OperationCursor.h index 5afb3cf5232c..05a0f955c991 100644 --- a/arangod/Utils/OperationCursor.h +++ b/arangod/Utils/OperationCursor.h @@ -114,6 +114,12 @@ struct OperationCursor { /// afterwards Check hasMore()==true before using this NOTE: This will /// throw on OUT_OF_MEMORY void skip(uint64_t toSkip, uint64_t& skipped); + + /// @brief skip all elements. + /// skipped will be increased by the amount of skipped elements + /// afterwards Check hasMore()==true before using this NOTE: This will + /// throw on OUT_OF_MEMORY + void skipAll(uint64_t& skipped); }; } // namespace arangodb diff --git a/tests/Aql/CalculationExecutorTest.cpp b/tests/Aql/CalculationExecutorTest.cpp index 80f79f67eed5..878b5323668b 100644 --- a/tests/Aql/CalculationExecutorTest.cpp +++ b/tests/Aql/CalculationExecutorTest.cpp @@ -25,12 +25,14 @@ #include "gtest/gtest.h" +#include "Aql/AqlCall.h" +#include "AqlItemBlockHelper.h" +#include "ExecutorTestHelper.h" #include "RowFetcherHelper.h" #include "Aql/AqlItemBlock.h" #include "Aql/Ast.h" #include "Aql/CalculationExecutor.h" -#include "Aql/ExecutionBlockImpl.h" #include "Aql/ExecutionPlan.h" #include "Aql/Expression.h" #include "Aql/OutputAqlItemRow.h" @@ -67,13 +69,10 @@ namespace aql { // CalculationExecutor and // CalculationExecutor! -class CalculationExecutorTest : public ::testing::Test { +class CalculationExecutorTest : public AqlExecutorTestCase { protected: ExecutionState state; - ResourceMonitor monitor; AqlItemBlockManager itemBlockManager; - mocks::MockAqlServer server; - std::unique_ptr fakedQuery; Ast ast; AstNode* one; Variable var; @@ -87,8 +86,6 @@ class CalculationExecutorTest : public ::testing::Test { CalculationExecutorTest() : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - server(), - fakedQuery(server.createFakeQuery()), ast(fakedQuery.get()), one(ast.createNodeValueInt(1)), var("a", 0), @@ -109,7 +106,8 @@ class CalculationExecutorTest : public ::testing::Test { TEST_F(CalculationExecutorTest, there_are_no_rows_upstream_the_producer_does_not_wait) { SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input.steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( + itemBlockManager, input.steal(), false); CalculationExecutor testee(fetcher, infos); // Use this instead of std::ignore, so the tests will be noticed and // updated when someone changes the stats type in the return value of @@ -126,7 +124,8 @@ TEST_F(CalculationExecutorTest, there_are_no_rows_upstream_the_producer_does_not TEST_F(CalculationExecutorTest, there_are_no_rows_upstream_the_producer_waits) { SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input.steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( + itemBlockManager, input.steal(), true); CalculationExecutor testee(fetcher, infos); // Use this instead of std::ignore, so the tests will be noticed and // updated when someone changes the stats type in the return value of @@ -147,7 +146,8 @@ TEST_F(CalculationExecutorTest, there_are_no_rows_upstream_the_producer_waits) { TEST_F(CalculationExecutorTest, there_are_rows_in_the_upstream_the_producer_does_not_wait) { SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; auto input = VPackParser::fromJson("[ [0], [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( + itemBlockManager, input->steal(), false); CalculationExecutor testee(fetcher, infos); NoStats stats{}; @@ -191,7 +191,8 @@ TEST_F(CalculationExecutorTest, there_are_rows_in_the_upstream_the_producer_does TEST_F(CalculationExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) { SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; auto input = VPackParser::fromJson("[ [0], [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( + itemBlockManager, input->steal(), true); CalculationExecutor testee(fetcher, infos); NoStats stats{}; @@ -236,6 +237,126 @@ TEST_F(CalculationExecutorTest, there_are_rows_in_the_upstream_the_producer_wait ASSERT_FALSE(row.produced()); } +TEST_F(CalculationExecutorTest, test_produce_datarange) { + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; + CalculationExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, {{R"(0)"}, {R"(1)"}, {R"(2)"}}); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + EXPECT_EQ(output.numRowsWritten(), 0); + auto const [state, stats, call] = testee.produceRows(input, output); + EXPECT_EQ(output.numRowsWritten(), 3); + + EXPECT_EQ(state, ExecutorState::DONE); + // verify calculation + { + AqlValue value; + auto block = output.stealBlock(); + for (std::size_t index = 0; index < 3; index++) { + value = block->getValue(index, outRegID); + ASSERT_TRUE(value.isNumber()); + ASSERT_EQ(value.toInt64(), static_cast(index + 1)); + } + } +} + +TEST_F(CalculationExecutorTest, test_produce_datarange_need_more) { + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; + CalculationExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, {{R"(0)"}, {R"(1)"}, {R"(2)"}}); + + AqlItemBlockInputRange input{ExecutorState::HASMORE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), + infos.registersToClear(), + AqlCall{0, 3, AqlCall::Infinity{}, false}); + + auto myCall = output.getClientCall(); + EXPECT_EQ(myCall.getLimit(), 3); + EXPECT_EQ(output.numRowsWritten(), 0); + + auto const [state, stats, outputCall] = testee.produceRows(input, output); + EXPECT_EQ(output.numRowsWritten(), 3); + + EXPECT_EQ(state, ExecutorState::HASMORE); + // verify calculation + { + AqlValue value; + auto block = output.stealBlock(); + for (std::size_t index = 0; index < 3; index++) { + value = block->getValue(index, outRegID); + ASSERT_TRUE(value.isNumber()); + ASSERT_EQ(value.toInt64(), static_cast(index + 1)); + } + } + // Test the Call we send to upstream + EXPECT_EQ(outputCall.offset, 0); + EXPECT_FALSE(outputCall.hasHardLimit()); + // Avoid overfetching. I do not have a strong requirement on this + // test, however this is what we do right now. + EXPECT_EQ(outputCall.getLimit(), 0); + EXPECT_FALSE(outputCall.fullCount); +} + +TEST_F(CalculationExecutorTest, DISABLED_test_produce_datarange_has_more) { // TODO: fix and re-enable after this executor newStyle is active + // This fetcher will not be called! + // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! + auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( + itemBlockManager, fakeUnusedBlock->steal(), false); + + // This is the relevant part of the test + SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; + CalculationExecutor testee(fetcher, infos); + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, {{R"(0)"}, {R"(1)"}, {R"(2)"}, {R"(3)"}, {R"(4)"}}); + + AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + EXPECT_EQ(output.numRowsWritten(), 0); + AqlCall myCall{0, 3, AqlCall::Infinity{}, false}; + output.setCall(std::move(myCall)); + + auto const [state, stats, call] = testee.produceRows(input, output); + EXPECT_EQ(output.numRowsWritten(), 3); + + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_TRUE(input.hasDataRow()); + // We still have two values in block: 3 and 4 + { + // pop 3 + auto const [state, row] = input.nextDataRow(); + EXPECT_EQ(state, ExecutorState::HASMORE); + EXPECT_EQ(row.getValue(0).toInt64(), 3); + } + { + // pop 4 + auto const [state, row] = input.nextDataRow(); + EXPECT_EQ(state, ExecutorState::DONE); + EXPECT_EQ(row.getValue(0).toInt64(), 4); + } + EXPECT_FALSE(input.hasDataRow()); +} + } // namespace aql } // namespace tests } // namespace arangodb diff --git a/tests/Aql/EnumerateCollectionExecutorTest.cpp b/tests/Aql/EnumerateCollectionExecutorTest.cpp index fea5b93c5bfe..5f3419c43621 100644 --- a/tests/Aql/EnumerateCollectionExecutorTest.cpp +++ b/tests/Aql/EnumerateCollectionExecutorTest.cpp @@ -25,12 +25,16 @@ #include "gtest/gtest.h" +#include "Aql/AqlCall.h" +#include "AqlItemBlockHelper.h" +#include "ExecutorTestHelper.h" #include "IResearch/common.h" #include "Mocks/Servers.h" +#include "QueryHelper.h" #include "RowFetcherHelper.h" -#include "fakeit.hpp" #include "Aql/AqlItemBlock.h" +#include "Aql/Ast.h" #include "Aql/Collection.h" #include "Aql/EnumerateCollectionExecutor.h" #include "Aql/ExecutionBlockImpl.h" @@ -39,17 +43,14 @@ #include "Aql/ResourceUsage.h" #include "Aql/Stats.h" #include "Aql/Variable.h" -#include "Mocks/StorageEngineMock.h" #include "RestServer/QueryRegistryFeature.h" #include "Sharding/ShardingFeature.h" #include "StorageEngine/EngineSelectorFeature.h" -#include "StorageEngine/StorageEngine.h" #include "Transaction/Context.h" #include "Transaction/Methods.h" #include "VocBase/AccessMode.h" #include "VocBase/LogicalCollection.h" -#include #include #include @@ -60,29 +61,31 @@ namespace arangodb { namespace tests { namespace aql { +// old tests + +static const std::string GetAllDocs = + R"aql(FOR doc IN UnitTestCollection SORT doc.sortValue RETURN doc.value)aql"; + using CursorType = arangodb::transaction::Methods::CursorType; -class EnumerateCollectionExecutorTestNoRowsUpstream : public ::testing::Test { +class EnumerateCollectionExecutorTest : public AqlExecutorTestCase { protected: ExecutionState state; - ResourceMonitor monitor; AqlItemBlockManager itemBlockManager; - arangodb::tests::mocks::MockAqlServer server; - TRI_vocbase_t vocbase; // required to create collection + TRI_vocbase_t& vocbase; std::shared_ptr json; - arangodb::LogicalCollection collection; - fakeit::Mock mockEngine; - fakeit::Mock mockTrx; // fake transaction::Methods - fakeit::Mock mockQuery; + std::shared_ptr collection; + + Ast ast; Variable outVariable; bool varUsedLater; std::unordered_set const regToClear; std::unordered_set const regToKeep; - ExecutionEngine& engine; - Collection abc; + ExecutionEngine* engine; + Collection aqlCollection; std::vector const projections; - transaction::Methods& trx; + std::vector const coveringIndexAttributePositions; bool useRawPointers; bool random; @@ -91,71 +94,338 @@ class EnumerateCollectionExecutorTestNoRowsUpstream : public ::testing::Test { SharedAqlItemBlockPtr block; VPackBuilder input; - EnumerateCollectionExecutorTestNoRowsUpstream() + EnumerateCollectionExecutorTest() : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - server(), - vocbase(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, systemDBInfo(server.server())), - json(arangodb::velocypack::Parser::fromJson( - "{ \"cid\" : \"1337\", \"name\": \"UnitTestCollection\" }")), - collection(vocbase, json->slice(), true), + vocbase(_server->getSystemDatabase()), + json(VPackParser::fromJson(R"({"name":"UnitTestCollection"})")), + // collection(), + // fakedQuery(server.createFakeQuery(false, "return 1")), + ast(fakedQuery.get()), outVariable("name", 1), varUsedLater(false), - engine(mockEngine.get()), - abc("blabli", &vocbase, arangodb::AccessMode::Type::READ), - trx(mockTrx.get()), + engine(fakedQuery->engine()), + aqlCollection("UnitTestCollection", &vocbase, arangodb::AccessMode::Type::READ), useRawPointers(false), random(false), - infos(0 /*outReg*/, 1 /*nrIn*/, 1 /*nrOut*/, regToClear, regToKeep, - &engine, &abc, &outVariable, varUsedLater, nullptr, projections, - useRawPointers, random), + infos(0 /*outReg*/, 1 /*nrIn*/, 1 /*nrOut*/, regToClear, regToKeep, engine, + &aqlCollection, &outVariable, varUsedLater, nullptr, projections, + coveringIndexAttributePositions, useRawPointers, random), block(new AqlItemBlock(itemBlockManager, 1000, 2)) { - // fake indexScan - fakeit::When(Method(mockTrx, indexScan)) - .AlwaysDo(std::function(std::string const&, CursorType&)>( - [this](std::string const&, CursorType&) -> std::unique_ptr { - return std::make_unique(&collection, &(mockTrx.get())); - })); - - Query& query = mockQuery.get(); - fakeit::When(Method(mockQuery, trx)).AlwaysReturn(&(mockTrx.get())); - fakeit::When(Method(mockEngine, getQuery)).AlwaysReturn(&query); + try { + collection = vocbase.createCollection(json->slice()); + } catch (std::exception const& e) { + // ignore, already created the collection + } } }; -TEST_F(EnumerateCollectionExecutorTestNoRowsUpstream, the_producer_does_not_wait) { - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); +TEST_F(EnumerateCollectionExecutorTest, the_produce_datarange_empty) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), false); + EnumerateCollectionExecutor testee(fetcher, infos); + // Use this instead of std::ignore, so the tests will be noticed and + // updated when someone changes the stats type in the return value of + // EnumerateCollectionExecutor::produceRows(). + + SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{}}); + + AqlItemBlockInputRange inputRange{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + + auto const [state, stats, call] = testee.produceRows(inputRange, output); + ASSERT_EQ(state, ExecutorState::DONE); + ASSERT_FALSE(output.produced()); +} + +TEST_F(EnumerateCollectionExecutorTest, the_skip_datarange_empty) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), false); + EnumerateCollectionExecutor testee(fetcher, infos); + // Use this instead of std::ignore, so the tests will be noticed and + // updated when someone changes the stats type in the return value of + // EnumerateCollectionExecutor::produceRows(). + + SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{}}); + + AqlItemBlockInputRange inputRange{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + AqlCall skipCall{1000, AqlCall::Infinity{}, AqlCall::Infinity{}, false}; + auto const [state, stats, skipped, call] = testee.skipRowsRange(inputRange, skipCall); + ASSERT_EQ(state, ExecutorState::DONE); + ASSERT_EQ(skipped, 0); + ASSERT_FALSE(output.produced()); +} + +TEST_F(EnumerateCollectionExecutorTest, the_produce_datarange) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), false); EnumerateCollectionExecutor testee(fetcher, infos); // Use this instead of std::ignore, so the tests will be noticed and // updated when someone changes the stats type in the return value of // EnumerateCollectionExecutor::produceRows(). - EnumerateCollectionStats stats{}; - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, + {{R"({ "cid" : "1337", "name": "UnitTestCollection" })"}}); + + // insert 3x documents + std::string insertQuery = + R"aql(INSERT {_key: "testee", value: 1, sortValue: 1, nestedObject: {value: 1} } INTO UnitTestCollection)aql"; + SCOPED_TRACE(insertQuery); + AssertQueryHasResult(vocbase, insertQuery, VPackSlice::emptyArraySlice()); + auto expected = VPackParser::fromJson(R"([1])"); + AssertQueryHasResult(vocbase, GetAllDocs, expected->slice()); + + std::string insertQueryB = + R"aql(INSERT {_key: "testeeB", value: 1, sortValue: 1, nestedObject: {value: 1} } INTO UnitTestCollection)aql"; + SCOPED_TRACE(insertQueryB); + AssertQueryHasResult(vocbase, insertQueryB, VPackSlice::emptyArraySlice()); + + std::string insertQueryC = + R"aql(INSERT {_key: "testeeC", value: 1, sortValue: 1, nestedObject: {value: 1} } INTO UnitTestCollection)aql"; + SCOPED_TRACE(insertQueryC); + AssertQueryHasResult(vocbase, insertQueryC, VPackSlice::emptyArraySlice()); + + AqlItemBlockInputRange inputRange{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); + + auto const [state, stats, call] = testee.produceRows(inputRange, output); + ASSERT_EQ(state, ExecutorState::DONE); + ASSERT_EQ(stats.getFiltered(), 0); + ASSERT_EQ(stats.getScanned(), 3); + ASSERT_FALSE(output.produced()); } -TEST_F(EnumerateCollectionExecutorTestNoRowsUpstream, the_producer_waits) { - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); +TEST_F(EnumerateCollectionExecutorTest, the_skip_datarange) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, input.steal(), false); EnumerateCollectionExecutor testee(fetcher, infos); // Use this instead of std::ignore, so the tests will be noticed and // updated when someone changes the stats type in the return value of // EnumerateCollectionExecutor::produceRows(). - EnumerateCollectionStats stats{}; - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), + SharedAqlItemBlockPtr inBlock = + buildBlock<1>(itemBlockManager, + {{R"({ "cid" : "1337", "name": "UnitTestCollection" })"}}); + + /* WE ALREADY inserted 3x documents in the test before TODO: clean this up -> proper setup/teardown + // insert 3x documents + std::string insertQuery = + R"aql(INSERT {_key: "testee", value: 1, sortValue: 1, nestedObject: {value: 1} } INTO UnitTestCollection)aql"; + SCOPED_TRACE(insertQuery); + AssertQueryHasResult(vocbase, insertQuery, VPackSlice::emptyArraySlice()); + auto expected = VPackParser::fromJson(R"([1])"); + AssertQueryHasResult(vocbase, GetAllDocs, expected->slice()); + + std::string insertQueryB = + R"aql(INSERT {_key: "testeeB", value: 1, sortValue: 1, nestedObject: {value: 1} } INTO UnitTestCollection)aql"; + SCOPED_TRACE(insertQueryB); + AssertQueryHasResult(vocbase, insertQueryB, VPackSlice::emptyArraySlice()); + + std::string insertQueryC = + R"aql(INSERT {_key: "testeeC", value: 1, sortValue: 1, nestedObject: {value: 1} } INTO UnitTestCollection)aql"; + SCOPED_TRACE(insertQueryC); + AssertQueryHasResult(vocbase, insertQueryC, VPackSlice::emptyArraySlice()); + */ + AqlItemBlockInputRange inputRange{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); + AqlCall skipCall{1000, AqlCall::Infinity{}, AqlCall::Infinity{}, false}; + auto const [state, stats, skipped, call] = testee.skipRowsRange(inputRange, skipCall); + ASSERT_EQ(state, ExecutorState::DONE); + ASSERT_EQ(skipped, 3); + ASSERT_FALSE(output.produced()); +} + +// new framework tests + +// This is only to get a split-type. The Type is independent of actual template parameters +using EnumerateCollectionTestHelper = + ExecutorTestHelper<1, 1>; +using EnumerateCollectionSplitType = EnumerateCollectionTestHelper::SplitType; +using EnumerateCollectionInputParam = std::tuple; + +class EnumerateCollectionExecutorTestProduce + : public AqlExecutorTestCaseWithParam { + protected: + ResourceMonitor monitor; + AqlItemBlockManager itemBlockManager; + + TRI_vocbase_t& vocbase; + std::shared_ptr json; + std::shared_ptr collection; + + SharedAqlItemBlockPtr block; + NoStats stats; + Ast ast; + + // needed for infos + Variable outVariable; + bool varUsedLater; + ExecutionEngine* engine; + std::vector const projections; + std::vector const coveringIndexAttributePositions; + Collection aqlCollection; + bool useRawPointers; + bool random; + + EnumerateCollectionExecutorInfos infos; + + EnumerateCollectionExecutorTestProduce() + : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), + vocbase(_server->getSystemDatabase()), + json(VPackParser::fromJson(R"({"name":"UnitTestCollection"})")), + collection(vocbase.createCollection(json->slice())), + ast(fakedQuery.get()), + outVariable("name", 1), + varUsedLater(true), + engine(fakedQuery.get()->engine()), + aqlCollection("UnitTestCollection", &vocbase, arangodb::AccessMode::Type::READ), + useRawPointers(false), + random(false), + infos(1, 1, 2, {}, {}, engine, &aqlCollection, &outVariable, varUsedLater, nullptr, + projections, coveringIndexAttributePositions, useRawPointers, random) { + } + + auto makeInfos(RegisterId outputRegister = 0, RegisterId nrInputRegister = 1, + RegisterId nrOutputRegister = 1, + std::unordered_set regToClear = {}, + std::unordered_set regToKeep = {}) + -> EnumerateCollectionExecutorInfos { + EnumerateCollectionExecutorInfos infos{ + outputRegister, nrInputRegister, nrOutputRegister, + regToClear, regToKeep, engine, + &aqlCollection, &outVariable, varUsedLater, + nullptr, projections, coveringIndexAttributePositions, + useRawPointers, random}; + block = SharedAqlItemBlockPtr{new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)}; + return infos; + } + + // insert amount of documents into the vocbase + VPackOptions* insertDocuments(size_t amount, std::vector& queryResults) { + // TODO: Can be optimized to not use AQL INSERT (trx object directly instead) + std::string insertQuery = + R"aql(INSERT {_key: "testee1", value: 1, sortValue: 1, nestedObject: {value: 1} } INTO UnitTestCollection RETURN NEW)aql"; + SCOPED_TRACE(insertQuery); + auto queryRes = arangodb::tests::executeQuery(vocbase, insertQuery, {}); + queryResults.push_back(queryRes.data.get()->slice().at(0).toJson(queryRes.context->getVPackOptions())); + + for (size_t i = 2; i <= amount; i++) { + std::string insertQueryPart1 = R"aql(INSERT {_key: "testee)aql"; + std::string insertQueryPart2 = std::to_string(i); + std::string insertQueryPart3 = + R"(", value: 1, sortValue: 1, nestedObject: {value: 1} } INTO UnitTestCollection RETURN NEW)"; + std::string finalQuery = insertQueryPart1 + insertQueryPart2 + insertQueryPart3; + SCOPED_TRACE(finalQuery); + auto queryResInner = arangodb::tests::executeQuery(vocbase, finalQuery, {}); + queryResults.push_back(queryResInner.data.get()->slice().at(0).toJson(queryResInner.context->getVPackOptions())); + } + + return queryRes.context->getVPackOptions(); + } + + MatrixBuilder<1> buildQueryDocumentsMatrix(std::vector& queryResults) { + size_t documentAmount = queryResults.size(); + MatrixBuilder<1> matrix; + matrix.reserve(documentAmount); + for (auto const& documentStr : queryResults) { + const char* cstr = documentStr.c_str(); + matrix.emplace_back(RowBuilder<1>{cstr}); + } + + return matrix; + } +}; + +// DISABLED because we need to be able to compare real documents (currently not possible) +TEST_P(EnumerateCollectionExecutorTestProduce, DISABLED_produce_all_documents) { + auto [split] = GetParam(); + + uint64_t numberOfDocumentsToInsert = 10; + std::vector queryResults; + auto vpackOptions = insertDocuments(numberOfDocumentsToInsert, queryResults); + EXPECT_EQ(vocbase.lookupCollection("UnitTestCollection") + ->numberDocuments(fakedQuery->trx(), transaction::CountType::Normal), + numberOfDocumentsToInsert); // validate that our document inserts worked + + ExecutorTestHelper<1, 1>(*fakedQuery) + .setInputValue({{RowBuilder<1>{R"("unused")"}}}) + .setInputSplitType(split) + .setCall(AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, false}) + //.setQueryVpackOptions(true, vpackOptions) + //.allowAnyOutputOrder(true) + .expectSkipped(0) + //.expectOutput({0}, buildQueryDocumentsMatrix(queryResults)) + /* .expectOutput({0}, { + {R"(null)"}, + {R"(null)"}, + {R"(null)"}, + {R"(null)"}, + {R"(null)"}, + {R"(null)"}, + {R"(null)"}, + {R"(null)"}, + {R"(null)"}, + {R"(null)"}})*/ + .expectedState(ExecutionState::DONE) + .setExecBlock(std::move(makeInfos())) + .run(); +} + +// DISABLED because we need to be able to compare real documents (currently not possible) +TEST_P(EnumerateCollectionExecutorTestProduce, DISABLED_produce_5_documents) { + auto [split] = GetParam(); + + uint64_t numberOfDocumentsToInsert = 10; + std::vector queryResults; + auto vpackOptions = insertDocuments(numberOfDocumentsToInsert, queryResults); + + ExecutorTestHelper<1, 1>(*fakedQuery) + .setInputValue({{RowBuilder<1>{R"({ "cid" : "1337", "name": "UnitTestCollection" })"}}}) + // .setInputValue({{RowBuilder<1>{R"("unused")"}}}) + .setInputSplitType(split) + .setCall(AqlCall{0, 5, AqlCall::Infinity{}, false}) + .expectSkipped(0) + .expectOutput({0}, {{R"(null)"}, {R"(null)"}, {R"(null)"}, {R"(null)"}, {R"(null)"}}) + .expectedState(ExecutionState::HASMORE) + .setExecBlock(std::move(makeInfos())) + .run(); +} + + +// DISABLED because we need to be able to compare real documents (currently not possible) +TEST_P(EnumerateCollectionExecutorTestProduce, DISABLED_skip_5_documents_default) { + auto [split] = GetParam(); + + uint64_t numberOfDocumentsToInsert = 10; + std::vector queryResults; + auto vpackOptions = insertDocuments(numberOfDocumentsToInsert, queryResults); + + ExecutorTestHelper<1, 1>(*fakedQuery) + .setInputValue({{RowBuilder<1>{R"({ "cid" : "1337", "name": +"UnitTestCollection" })"}}}) .setInputSplitType(split) .setCall(AqlCall{5, +AqlCall::Infinity{}, AqlCall::Infinity{}, false}) .expectSkipped(5) .expectOutput({0}, +{{R"(null)"}, {R"(null)"}, {R"(null)"}, {R"(null)"}, {R"(null)"}}) + .expectedState(ExecutionState::DONE) + .setExecBlock(std::move(makeInfos())) + .run(); } +template +const EnumerateCollectionSplitType splitIntoBlocks = + EnumerateCollectionSplitType{std::vector{vs...}}; +template +const EnumerateCollectionSplitType splitStep = EnumerateCollectionSplitType{step}; + +INSTANTIATE_TEST_CASE_P(EnumerateCollectionExecutor, EnumerateCollectionExecutorTestProduce, + ::testing::Values(splitIntoBlocks<2, 3>, + splitIntoBlocks<3, 4>, splitStep<2>)); + } // namespace aql } // namespace tests } // namespace arangodb diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index b280ab9c7c3b..cfc344f5f4f8 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -38,6 +38,7 @@ #include "Aql/ExecutionEngine.h" #include "Aql/ExecutionNode.h" #include "Aql/ExecutionState.h" +#include "Aql/ExecutionEngine.h" #include "Aql/ExecutionStats.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/Query.h" @@ -125,11 +126,11 @@ class AqlExecutorTestCase : public ::testing::Test { auto manager() const -> AqlItemBlockManager&; private: - static inline std::unique_ptr _server; std::vector> _execNodes; protected: // available variables + static inline std::unique_ptr _server; ResourceMonitor monitor{}; AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; std::unique_ptr fakedQuery; diff --git a/tests/Mocks/Servers.cpp b/tests/Mocks/Servers.cpp index 126dde89f4cc..abbb331f4074 100644 --- a/tests/Mocks/Servers.cpp +++ b/tests/Mocks/Servers.cpp @@ -88,7 +88,7 @@ #include #include -using namespace arangodb; + using namespace arangodb; using namespace arangodb::tests; using namespace arangodb::tests::mocks; @@ -449,7 +449,7 @@ std::shared_ptr MockAqlServer::createFakeTransac noCollections, opts); } -std::unique_ptr MockAqlServer::createFakeQuery(bool activateTracing) const { +std::unique_ptr MockAqlServer::createFakeQuery(bool activateTracing, std::string queryString) const { auto bindParams = std::make_shared(); bindParams->openObject(); bindParams->close(); @@ -459,7 +459,7 @@ std::unique_ptr MockAqlServer::createFakeQuery(bool activa queryOptions->add("profile", VPackValue(aql::PROFILE_LEVEL_TRACE_2)); } queryOptions->close(); - aql::QueryString fakeQueryString(""); + aql::QueryString fakeQueryString(queryString); auto query = std::make_unique(false, getSystemDatabase(), fakeQueryString, bindParams, queryOptions, diff --git a/tests/Mocks/Servers.h b/tests/Mocks/Servers.h index de8d7d3ba552..75693948fb30 100644 --- a/tests/Mocks/Servers.h +++ b/tests/Mocks/Servers.h @@ -128,7 +128,7 @@ class MockAqlServer : public MockServer, ~MockAqlServer(); std::shared_ptr createFakeTransaction() const; - std::unique_ptr createFakeQuery(bool activateTracing = false) const; + std::unique_ptr createFakeQuery(bool activateTracing = false, std::string queryString = "") const; }; class MockRestServer : public MockServer, From 557d5c585a4efcce3b3c37343a3b9e39fefe47d5 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Mon, 24 Feb 2020 11:13:24 +0000 Subject: [PATCH 073/122] Quickfix for compilation error due to merge --- tests/Aql/CalculationExecutorTest.cpp | 6 +++--- tests/Aql/EnumerateCollectionExecutorTest.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/Aql/CalculationExecutorTest.cpp b/tests/Aql/CalculationExecutorTest.cpp index 878b5323668b..ab64ffbf4d42 100644 --- a/tests/Aql/CalculationExecutorTest.cpp +++ b/tests/Aql/CalculationExecutorTest.cpp @@ -250,7 +250,7 @@ TEST_F(CalculationExecutorTest, test_produce_datarange) { SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{R"(0)"}, {R"(1)"}, {R"(2)"}}); - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); EXPECT_EQ(output.numRowsWritten(), 0); @@ -283,7 +283,7 @@ TEST_F(CalculationExecutorTest, test_produce_datarange_need_more) { SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{R"(0)"}, {R"(1)"}, {R"(2)"}}); - AqlItemBlockInputRange input{ExecutorState::HASMORE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::HASMORE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear(), @@ -329,7 +329,7 @@ TEST_F(CalculationExecutorTest, DISABLED_test_produce_datarange_has_more) { // T SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{R"(0)"}, {R"(1)"}, {R"(2)"}, {R"(3)"}, {R"(4)"}}); - AqlItemBlockInputRange input{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); EXPECT_EQ(output.numRowsWritten(), 0); diff --git a/tests/Aql/EnumerateCollectionExecutorTest.cpp b/tests/Aql/EnumerateCollectionExecutorTest.cpp index 5f3419c43621..e3dffe955f29 100644 --- a/tests/Aql/EnumerateCollectionExecutorTest.cpp +++ b/tests/Aql/EnumerateCollectionExecutorTest.cpp @@ -129,7 +129,7 @@ TEST_F(EnumerateCollectionExecutorTest, the_produce_datarange_empty) { SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{}}); - AqlItemBlockInputRange inputRange{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange inputRange{ExecutorState::DONE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); @@ -148,7 +148,7 @@ TEST_F(EnumerateCollectionExecutorTest, the_skip_datarange_empty) { SharedAqlItemBlockPtr inBlock = buildBlock<1>(itemBlockManager, {{}}); - AqlItemBlockInputRange inputRange{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange inputRange{ExecutorState::DONE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); AqlCall skipCall{1000, AqlCall::Infinity{}, AqlCall::Infinity{}, false}; @@ -188,7 +188,7 @@ TEST_F(EnumerateCollectionExecutorTest, the_produce_datarange) { SCOPED_TRACE(insertQueryC); AssertQueryHasResult(vocbase, insertQueryC, VPackSlice::emptyArraySlice()); - AqlItemBlockInputRange inputRange{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange inputRange{ExecutorState::DONE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); @@ -230,7 +230,7 @@ TEST_F(EnumerateCollectionExecutorTest, the_skip_datarange) { SCOPED_TRACE(insertQueryC); AssertQueryHasResult(vocbase, insertQueryC, VPackSlice::emptyArraySlice()); */ - AqlItemBlockInputRange inputRange{ExecutorState::DONE, inBlock, 0, inBlock->size()}; + AqlItemBlockInputRange inputRange{ExecutorState::DONE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); From e6a31b75d619ef453a3fba59b08c35cf95b4e3c9 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Tue, 25 Feb 2020 10:38:15 +0000 Subject: [PATCH 074/122] Feature/aql subquery execution block impl execute implementation k shortest path executor (#10940) * Add new produceRow API code for KShortestPathsExecutor * Add KShortestPathsExecutor test to build * Add KShortestPathsExecutor test * Test and bugfix for KShortestPathExecutor * Update k-shortest-paths executor for new interface * Update tests for k-shortest-paths executor * Make ShortestPathExecutor and KShortestPathExecutor more similar * Fix merge messup * Refactor KShortestPathsExecutorInfos * Move KShortestPathsExecutor to new executor interface * Fix tests * Resolve naming conflict * Address some review comments * Fix skip * Fix for new signatures * Fix KShortestPathsExecutor * Fix KShortestPaths regression Co-authored-by: Michael Hackstein --- arangod/Aql/ExecutionBlockImpl.cpp | 6 +- arangod/Aql/KShortestPathsExecutor.cpp | 263 ++++++++------ arangod/Aql/KShortestPathsExecutor.h | 59 ++-- arangod/Graph/KShortestPathsFinder.cpp | 39 ++- arangod/Graph/KShortestPathsFinder.h | 13 +- tests/Aql/KShortestPathsExecutorTest.cpp | 427 +++++++++++++++++++++++ tests/Aql/ShortestPathExecutorTest.cpp | 4 +- tests/CMakeLists.txt | 1 + 8 files changed, 645 insertions(+), 167 deletions(-) create mode 100644 tests/Aql/KShortestPathsExecutorTest.cpp diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index dbb8bdbac935..5c63d1bccdd1 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -144,7 +144,7 @@ constexpr bool isNewStyleExecutor = TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif - ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; + KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, @@ -1078,8 +1078,8 @@ static SkipRowsRangeVariant constexpr skipRowsType() { "Fetcher is chosen for skipping, but has not skipRows method!"); static_assert(useExecutor == - (is_one_of_v bool { + return _source.type == InputVertex::Type::REGISTER; } -RegisterId KShortestPathsExecutorInfos::getInputRegister(bool isTarget) const { - TRI_ASSERT(useRegisterForInput(isTarget)); - if (isTarget) { - return _target.reg; - } +auto KShortestPathsExecutorInfos::useRegisterForTargetInput() const -> bool { + return _target.type == InputVertex::Type::REGISTER; +} + +auto KShortestPathsExecutorInfos::getSourceInputRegister() const -> RegisterId { + TRI_ASSERT(useRegisterForSourceInput()); return _source.reg; } -std::string const& KShortestPathsExecutorInfos::getInputValue(bool isTarget) const { - TRI_ASSERT(!useRegisterForInput(isTarget)); - if (isTarget) { - return _target.value; - } +auto KShortestPathsExecutorInfos::getTargetInputRegister() const -> RegisterId { + TRI_ASSERT(useRegisterForTargetInput()); + return _target.reg; +} + +auto KShortestPathsExecutorInfos::getSourceInputValue() const -> std::string const& { + TRI_ASSERT(!useRegisterForSourceInput()); return _source.value; } -RegisterId KShortestPathsExecutorInfos::getOutputRegister() const { +auto KShortestPathsExecutorInfos::getTargetInputValue() const -> std::string const& { + TRI_ASSERT(!useRegisterForTargetInput()); + return _target.value; +} + +auto KShortestPathsExecutorInfos::getOutputRegister() const -> RegisterId { TRI_ASSERT(_outputRegister != RegisterPlan::MaxRegisterId); return _outputRegister; } -graph::TraverserCache* KShortestPathsExecutorInfos::cache() const { +auto KShortestPathsExecutorInfos::getSourceVertex() const noexcept + -> KShortestPathsExecutorInfos::InputVertex { + return _source; +} + +auto KShortestPathsExecutorInfos::getTargetVertex() const noexcept + -> KShortestPathsExecutorInfos::InputVertex { + return _target; +} + +auto KShortestPathsExecutorInfos::cache() const -> graph::TraverserCache* { return _finder->options().cache(); } KShortestPathsExecutor::KShortestPathsExecutor(Fetcher& fetcher, Infos& infos) : _infos(infos), - _fetcher(fetcher), - _input{CreateInvalidInputRowHint{}}, + _inputRow{CreateInvalidInputRowHint{}}, _rowState(ExecutionState::HASMORE), _finder{infos.finder()}, _sourceBuilder{}, _targetBuilder{} { - if (!_infos.useRegisterForInput(false)) { - _sourceBuilder.add(VPackValue(_infos.getInputValue(false))); + if (!_infos.useRegisterForSourceInput()) { + _sourceBuilder.add(VPackValue(_infos.getSourceInputValue())); } - if (!_infos.useRegisterForInput(true)) { - _targetBuilder.add(VPackValue(_infos.getInputValue(true))); + if (!_infos.useRegisterForTargetInput()) { + _targetBuilder.add(VPackValue(_infos.getTargetInputValue())); } } -KShortestPathsExecutor::~KShortestPathsExecutor() = default; - // Shutdown query -std::pair KShortestPathsExecutor::shutdown(int errorCode) { +auto KShortestPathsExecutor::shutdown(int errorCode) -> std::pair { _finder.destroyEngines(); return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; } -std::pair KShortestPathsExecutor::produceRows(OutputAqlItemRow& output) { - NoStats s; +auto KShortestPathsExecutor::produceRows(OutputAqlItemRow& output) + -> std::pair { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} - while (true) { - // We will have paths available, or return - if (!_finder.isPathAvailable()) { - if (!fetchPaths()) { - return {_rowState, s}; +auto KShortestPathsExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { + while (!output.isFull()) { + if (_finder.isDone()) { + if (!fetchPaths(input)) { + TRI_ASSERT(!input.hasDataRow()); + return {input.upstreamState(), NoStats{}, AqlCall{}}; } + } else { + doOutputPath(output); } + } - // Now we have a path available, so we go and output it - transaction::BuilderLeaser tmp(_finder.options().trx()); - tmp->clear(); - if (_finder.getNextPathAql(*tmp.builder())) { - AqlValue path = AqlValue(*tmp.builder()); - AqlValueGuard guard{path, true}; - output.moveValueInto(_infos.getOutputRegister(), _input, guard); - return {computeState(), s}; - } + if (_finder.isDone()) { + return {input.upstreamState(), NoStats{}, AqlCall{}}; + } else { + return {ExecutorState::HASMORE, NoStats{}, AqlCall{}}; } } -bool KShortestPathsExecutor::fetchPaths() { - VPackSlice start; - VPackSlice end; - while (true) { - // Fetch a row from upstream - std::tie(_rowState, _input) = _fetcher.fetchRow(); - if (!_input.isInitialized()) { - // Either WAITING or DONE, in either case we cannot produce any paths. - TRI_ASSERT(_rowState == ExecutionState::WAITING || _rowState == ExecutionState::DONE); - return false; +auto KShortestPathsExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple { + auto stats = NoStats{}; + auto skipped = size_t{0}; + + while (call.shouldSkip()) { + // _finder.isDone() == true means that there is currently no path available + // from the _finder, we can try calling fetchPaths to make one available, + // but if that fails too, we must be DONE + if (_finder.isDone()) { + if (!fetchPaths(input)) { + TRI_ASSERT(!input.hasDataRow()); + return {input.upstreamState(), stats, skipped, AqlCall{}}; + } + } else { + if (_finder.skipPath()) { + skipped++; + call.didSkip(1); + } } + } + + if (_finder.isDone()) { + return {input.upstreamState(), stats, skipped, AqlCall{}}; + } else { + return {ExecutorState::HASMORE, stats, skipped, AqlCall{}}; + } +} + +auto KShortestPathsExecutor::fetchPaths(AqlItemBlockInputRange& input) -> bool { + TRI_ASSERT(_finder.isDone()); + while (input.hasDataRow()) { + auto source = VPackSlice{}; + auto target = VPackSlice{}; + std::tie(std::ignore, _inputRow) = input.nextDataRow(); + TRI_ASSERT(_inputRow.isInitialized()); + // Check start and end for validity - if (!getVertexId(false, start) || !getVertexId(true, end)) { - // Fetch another row - continue; - } - TRI_ASSERT(start.isString()); - TRI_ASSERT(end.isString()); - if (_finder.startKShortestPathsTraversal(start, end)) { - break; + if (getVertexId(_infos.getSourceVertex(), _inputRow, _sourceBuilder, source) && + getVertexId(_infos.getTargetVertex(), _inputRow, _targetBuilder, target) && + _finder.startKShortestPathsTraversal(source, target)) { + return true; } - } - return true; + } + return false; } -ExecutionState KShortestPathsExecutor::computeState() const { - if (_rowState == ExecutionState::HASMORE || _finder.isPathAvailable()) { - return ExecutionState::HASMORE; +auto KShortestPathsExecutor::doOutputPath(OutputAqlItemRow& output) -> void { + auto tmp = transaction::BuilderLeaser{_finder.options().trx()}; + tmp->clear(); + + if (_finder.getNextPathAql(*tmp.builder())) { + output.cloneValueInto(_infos.getOutputRegister(), _inputRow, AqlValue(*tmp.builder())); + output.advanceRow(); } - return ExecutionState::DONE; } -bool KShortestPathsExecutor::getVertexId(bool isTarget, VPackSlice& id) { - if (_infos.useRegisterForInput(isTarget)) { - // The input row stays valid until the next fetchRow is executed. - // So the slice can easily point to it. - RegisterId reg = _infos.getInputRegister(isTarget); - AqlValue const& in = _input.getValue(reg); - if (in.isObject()) { - try { - auto idString = _finder.options().trx()->extractIdString(in.slice()); - if (isTarget) { - _targetBuilder.clear(); - _targetBuilder.add(VPackValue(idString)); - id = _targetBuilder.slice(); - } else { - _sourceBuilder.clear(); - _sourceBuilder.add(VPackValue(idString)); - id = _sourceBuilder.slice(); +auto KShortestPathsExecutor::getVertexId(KShortestPathsExecutorInfos::InputVertex const& vertex, + InputAqlItemRow& row, VPackBuilder& builder, + VPackSlice& id) -> bool { + switch (vertex.type) { + case KShortestPathsExecutorInfos::InputVertex::Type::REGISTER: { + AqlValue const& in = row.getValue(vertex.reg); + if (in.isObject()) { + try { + auto idString = _finder.options().trx()->extractIdString(in.slice()); + builder.clear(); + builder.add(VPackValue(idString)); + id = builder.slice(); + // Guranteed by extractIdValue + TRI_ASSERT(::isValidId(id)); + } catch (...) { + // _id or _key not present... ignore this error and fall through + // returning no path + return false; + } + return true; + } else if (in.isString()) { + id = in.slice(); + // Validation + if (!::isValidId(id)) { + _finder.options().query()->registerWarning( + TRI_ERROR_BAD_PARAMETER, + "Invalid input for Shortest Path: " + "Only id strings or objects with " + "_id are allowed"); + return false; } - // Guranteed by extractIdValue - TRI_ASSERT(::isValidId(id)); - } catch (...) { - // _id or _key not present... ignore this error and fall through - // returning no path + return true; + } else { + _finder.options().query()->registerWarning( + TRI_ERROR_BAD_PARAMETER, + "Invalid input for Shortest Path: " + "Only id strings or objects with " + "_id are allowed"); return false; } - return true; - } else if (in.isString()) { - id = in.slice(); - // Validation + } + case KShortestPathsExecutorInfos::InputVertex::Type::CONSTANT: { + id = builder.slice(); if (!::isValidId(id)) { _finder.options().query()->registerWarning( TRI_ERROR_BAD_PARAMETER, - "Invalid input for k Shortest Paths: " + "Invalid input for Shortest Path: " "Only id strings or objects with " "_id are allowed"); return false; } return true; - } else { - _finder.options().query()->registerWarning( - TRI_ERROR_BAD_PARAMETER, - "Invalid input for k Shortest Paths: " - "Only id strings or objects with " - "_id are allowed"); - return false; - } - } else { - if (isTarget) { - id = _targetBuilder.slice(); - } else { - id = _sourceBuilder.slice(); - } - if (!::isValidId(id)) { - _finder.options().query()->registerWarning( - TRI_ERROR_BAD_PARAMETER, - "Invalid input for k Shortest Paths: " - "Only id strings or objects with " - "_id are allowed"); - return false; } - return true; } + return false; } diff --git a/arangod/Aql/KShortestPathsExecutor.h b/arangod/Aql/KShortestPathsExecutor.h index 9477d3bc3b28..e06ee743d3ce 100644 --- a/arangod/Aql/KShortestPathsExecutor.h +++ b/arangod/Aql/KShortestPathsExecutor.h @@ -23,12 +23,16 @@ #ifndef ARANGOD_AQL_KSHORTEST_PATHS_EXECUTOR_H #define ARANGOD_AQL_KSHORTEST_PATHS_EXECUTOR_H +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" #include +using namespace arangodb::velocypack; + namespace arangodb { class Result; @@ -54,16 +58,17 @@ class NoStats; class KShortestPathsExecutorInfos : public ExecutorInfos { public: struct InputVertex { - enum { CONSTANT, REGISTER } type; + enum class Type { CONSTANT, REGISTER }; + Type type; // TODO make the following two a union instead RegisterId reg; std::string value; // cppcheck-suppress passedByValue explicit InputVertex(std::string value) - : type(CONSTANT), reg(0), value(std::move(value)) {} + : type(Type::CONSTANT), reg(0), value(std::move(value)) {} explicit InputVertex(RegisterId reg) - : type(REGISTER), reg(reg), value("") {} + : type(Type::REGISTER), reg(reg), value("") {} }; KShortestPathsExecutorInfos(std::shared_ptr> inputRegisters, @@ -80,35 +85,41 @@ class KShortestPathsExecutorInfos : public ExecutorInfos { KShortestPathsExecutorInfos(KShortestPathsExecutorInfos const&) = delete; ~KShortestPathsExecutorInfos(); - arangodb::graph::KShortestPathsFinder& finder() const; + [[nodiscard]] auto finder() const -> arangodb::graph::KShortestPathsFinder&; /** * @brief test if we use a register or a constant input * * @param isTarget defines if we look for target(true) or source(false) */ - bool useRegisterForInput(bool isTarget) const; + [[nodiscard]] auto useRegisterForSourceInput() const -> bool; + [[nodiscard]] auto useRegisterForTargetInput() const -> bool; /** * @brief get the register used for the input * * @param isTarget defines if we look for target(true) or source(false) */ - RegisterId getInputRegister(bool isTarget) const; + [[nodiscard]] auto getSourceInputRegister() const -> RegisterId; + [[nodiscard]] auto getTargetInputRegister() const -> RegisterId; /** * @brief get the const value for the input * * @param isTarget defines if we look for target(true) or source(false) */ - std::string const& getInputValue(bool isTarget) const; + [[nodiscard]] auto getSourceInputValue() const -> std::string const&; + [[nodiscard]] auto getTargetInputValue() const -> std::string const&; /** * @brief get the output register for the given type */ - RegisterId getOutputRegister() const; + [[nodiscard]] auto getOutputRegister() const -> RegisterId; + + [[nodiscard]] auto cache() const -> graph::TraverserCache*; - graph::TraverserCache* cache() const; + [[nodiscard]] auto getSourceVertex() const noexcept -> InputVertex; + [[nodiscard]] auto getTargetVertex() const noexcept -> InputVertex; private: /// @brief the shortest path finder. @@ -141,21 +152,26 @@ class KShortestPathsExecutor { KShortestPathsExecutor(KShortestPathsExecutor&&) = default; KShortestPathsExecutor(Fetcher& fetcher, Infos&); - ~KShortestPathsExecutor(); + ~KShortestPathsExecutor() = default; /** * @brief Shutdown will be called once for every query * * @return ExecutionState and no error. */ - std::pair shutdown(int errorCode); + [[nodiscard]] auto shutdown(int errorCode) -> std::pair; /** * @brief produce the next Row of Aql Values. * * @return ExecutionState, and if successful exactly one new Row of AqlItems. */ - std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(OutputAqlItemRow& output) + -> std::pair; + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple; private: /** @@ -163,25 +179,20 @@ class KShortestPathsExecutor { * * @return false if we are done and no path could be found. */ - bool fetchPaths(); - - /** - * @brief compute the correct return state - * - * @return DONE if no more is expected - */ - - ExecutionState computeState() const; + [[nodiscard]] auto fetchPaths(AqlItemBlockInputRange& input) -> bool; + auto doOutputPath(OutputAqlItemRow& output) -> void; /** * @brief get the id of a input vertex */ - bool getVertexId(bool isTarget, arangodb::velocypack::Slice& id); + [[nodiscard]] auto getVertexId(bool isTarget, arangodb::velocypack::Slice& id) -> bool; + + [[nodiscard]] auto getVertexId(KShortestPathsExecutorInfos::InputVertex const& vertex, + InputAqlItemRow& row, Builder& builder, Slice& id) -> bool; private: Infos& _infos; - Fetcher& _fetcher; - InputAqlItemRow _input; + InputAqlItemRow _inputRow; ExecutionState _rowState; /// @brief the shortest path finder. arangodb::graph::KShortestPathsFinder& _finder; diff --git a/arangod/Graph/KShortestPathsFinder.cpp b/arangod/Graph/KShortestPathsFinder.cpp index 3c572ac4a969..56371dc1e4f1 100644 --- a/arangod/Graph/KShortestPathsFinder.cpp +++ b/arangod/Graph/KShortestPathsFinder.cpp @@ -42,9 +42,9 @@ using namespace arangodb; using namespace arangodb::graph; -// KShortestPathsFinder::KShortestPathsFinder(ShortestPathOptions& options) - : ShortestPathFinder(options), _pathAvailable(false) {} + : ShortestPathFinder(options), _traversalDone(true) {} + KShortestPathsFinder::~KShortestPathsFinder() = default; // Sets up k-shortest-paths traversal from start to end @@ -55,13 +55,12 @@ bool KShortestPathsFinder::startKShortestPathsTraversal( _start = arangodb::velocypack::StringRef(start); _end = arangodb::velocypack::StringRef(end); - _pathAvailable = true; + _traversalDone = false; + _shortestPaths.clear(); _candidatePaths.clear(); - TRI_IF_FAILURE("TraversalOOMInitialize") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } + TRI_IF_FAILURE("Travefalse") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } return true; } @@ -340,38 +339,37 @@ bool KShortestPathsFinder::computeNextShortestPath(Path& result) { } bool KShortestPathsFinder::getNextPath(Path& result) { - bool available = false; result.clear(); - // TODO: this looks a bit ugly + // This is for the first time that getNextPath is called if (_shortestPaths.empty()) { if (_start == _end) { TRI_ASSERT(!_start.empty()); result._vertices.emplace_back(_start); result._weight = 0; - available = true; } else { - available = computeShortestPath(_start, _end, {}, {}, result); + // Compute the first shortest path (i.e. the shortest path + // between _start and _end!) + computeShortestPath(_start, _end, {}, {}, result); result._branchpoint = 0; } } else { - if (_start == _end) { - available = false; - } else { - available = computeNextShortestPath(result); - } + // We must not have _start == _end here, because we handle _start == _end + computeNextShortestPath(result); } - if (available) { + if (result.length() > 0) { _shortestPaths.emplace_back(result); _options.fetchVerticesCoordinator(result._vertices); TRI_IF_FAILURE("TraversalOOMPath") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } + } else { + // If we did not find a path, traversal is done. + _traversalDone = true; } - _pathAvailable = available; - return available; + return !_traversalDone; } bool KShortestPathsFinder::getNextPathShortestPathResult(ShortestPathResult& result) { @@ -420,3 +418,8 @@ bool KShortestPathsFinder::getNextPathAql(arangodb::velocypack::Builder& result) return false; } } + +bool KShortestPathsFinder::skipPath() { + Path path; + return getNextPath(path); +} diff --git a/arangod/Graph/KShortestPathsFinder.h b/arangod/Graph/KShortestPathsFinder.h index 1f7a482c3daa..1d62cc863894 100644 --- a/arangod/Graph/KShortestPathsFinder.h +++ b/arangod/Graph/KShortestPathsFinder.h @@ -162,7 +162,7 @@ class KShortestPathsFinder : public ShortestPathFinder { Ball() {} Ball(VertexRef const& center, Direction direction) : _center(center), _direction(direction), _closest(0) { - _frontier.insert(center , std::make_unique(center)); + _frontier.insert(center, std::make_unique(center)); } ~Ball() = default; const VertexRef center() const { return _center; }; @@ -220,18 +220,19 @@ class KShortestPathsFinder : public ShortestPathFinder { } // initialise k Shortest Paths - bool startKShortestPathsTraversal(arangodb::velocypack::Slice const& start, - arangodb::velocypack::Slice const& end); + TEST_VIRTUAL bool startKShortestPathsTraversal(arangodb::velocypack::Slice const& start, + arangodb::velocypack::Slice const& end); // get the next available path as AQL value. - bool getNextPathAql(arangodb::velocypack::Builder& builder); + TEST_VIRTUAL bool getNextPathAql(arangodb::velocypack::Builder& builder); // get the next available path as a ShortestPathResult // TODO: this is only here to not break catch-tests and needs a cleaner solution. // probably by making ShortestPathResult versatile enough and using that bool getNextPathShortestPathResult(ShortestPathResult& path); // get the next available path as a Path bool getNextPath(Path& path); - bool isPathAvailable() const { return _pathAvailable; } + TEST_VIRTUAL bool skipPath(); + TEST_VIRTUAL bool isDone() const { return _traversalDone; } private: // Compute the first shortest path @@ -257,7 +258,7 @@ class KShortestPathsFinder : public ShortestPathFinder { VertexRef& join, std::optional& currentBest); private: - bool _pathAvailable; + bool _traversalDone; VertexRef _start; VertexRef _end; diff --git a/tests/Aql/KShortestPathsExecutorTest.cpp b/tests/Aql/KShortestPathsExecutorTest.cpp new file mode 100644 index 000000000000..ceaf4bf40d1f --- /dev/null +++ b/tests/Aql/KShortestPathsExecutorTest.cpp @@ -0,0 +1,427 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2018 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#include "gtest/gtest.h" + +#include +#include +#include +#include + +#include "Aql/RowFetcherHelper.h" +#include "Mocks/LogLevels.h" +#include "Mocks/Servers.h" + +#include "Aql/AqlItemBlock.h" +#include "Aql/AqlItemBlockHelper.h" +#include "Aql/AqlItemBlockManager.h" +#include "Aql/AqlValue.h" +#include "Aql/ExecutorInfos.h" +#include "Aql/InputAqlItemRow.h" +#include "Aql/KShortestPathsExecutor.h" +#include "Aql/OutputAqlItemRow.h" +#include "Aql/Query.h" +#include "Aql/ResourceUsage.h" +#include "Aql/Stats.h" +#include "Graph/EdgeDocumentToken.h" +#include "Graph/GraphTestTools.h" +#include "Graph/KShortestPathsFinder.h" +#include "Graph/ShortestPathOptions.h" +#include "Graph/ShortestPathResult.h" +#include "Graph/TraverserCache.h" +#include "Graph/TraverserOptions.h" + +#include "../Mocks/Servers.h" + +using namespace arangodb; +using namespace arangodb::aql; +using namespace arangodb::graph; +using namespace arangodb::tests::mocks; + +namespace arangodb { +namespace tests { +namespace aql { + +using Vertex = KShortestPathsExecutorInfos::InputVertex; +using RegisterSet = std::unordered_set; +using Path = std::vector; +using PathSequence = std::vector; + +// The FakeShortestPathsFinder does not do any real k shortest paths search; it +// is merely initialized with a set of "paths" and then outputs them, keeping a +// record of which paths it produced. This record is used in the validation +// whether the executor output the correct sequence of rows. +class FakeKShortestPathsFinder : public KShortestPathsFinder { + public: + FakeKShortestPathsFinder(ShortestPathOptions& options, PathSequence const& kpaths) + : KShortestPathsFinder(options), _kpaths(kpaths), _traversalDone(true) {} + ~FakeKShortestPathsFinder() = default; + + auto gotoNextPath() -> bool { + EXPECT_NE(_source, ""); + EXPECT_NE(_target, ""); + EXPECT_NE(_source, _target); + + while (_finder != std::end(_kpaths)) { + if (_finder->front() == _source && _finder->back() == _target) { + return true; + } + _finder++; + } + return false; + } + + bool startKShortestPathsTraversal(Slice const& start, Slice const& end) override { + _source = std::string{start.copyString()}; + _target = std::string{end.copyString()}; + + _calledWith.emplace_back(std::make_pair(_source, _target)); + + EXPECT_NE(_source, ""); + EXPECT_NE(_target, ""); + EXPECT_NE(_source, _target); + + _finder = _kpaths.begin(); + return true; + } + + bool getNextPathAql(Builder& builder) override { + _traversalDone = !gotoNextPath(); + + if (_traversalDone) { + return false; + } else { + _pathsProduced.emplace_back(*_finder); + // fill builder with something sensible? + builder.openArray(); + for (auto&& v : *_finder) { + builder.add(VPackValue(v)); + } + builder.close(); + + // HACK + _finder++; + return true; + } + } + + bool skipPath() override { + Builder builder{}; + return getNextPathAql(builder); + } + + bool isDone() const override { return _traversalDone; } + + PathSequence& getPathsProduced() noexcept { return _pathsProduced; } + std::vector> getCalledWith() noexcept { + return _calledWith; + } + + private: + // We emulate a number of paths between PathPair + PathSequence const& _kpaths; + std::string _source; + std::string _target; + bool _traversalDone; + PathSequence::const_iterator _finder; + PathSequence _pathsProduced; + std::vector> _calledWith; +}; + +// TODO: this needs a << operator +struct KShortestPathsTestParameters { + KShortestPathsTestParameters(std::tuple, PathSequence, AqlCall, size_t> params) + : _source(std::get<0>(params)), + _target(std::get<1>(params)), + // TODO: Make output registers configurable? + _outputRegisters(std::initializer_list{2}), + _inputMatrix(std::get<2>(params)), + _paths(std::get<3>(params)), + _call(std::get<4>(params)), + _blockSize(std::get<5>(params)){}; + + Vertex _source; + Vertex _target; + RegisterSet _inputRegisters; + RegisterSet _outputRegisters; + MatrixBuilder<2> _inputMatrix; + PathSequence _paths; + AqlCall _call; + size_t _blockSize{1000}; +}; + +class KShortestPathsExecutorTest + : public ::testing::Test, + public ::testing::WithParamInterface, PathSequence, AqlCall, size_t>> { + protected: + // parameters are copied because they are const otherwise + // and that doesn't mix with std::move + KShortestPathsTestParameters parameters; + + MockAqlServer server; + ExecutionState state; + ResourceMonitor monitor; + AqlItemBlockManager itemBlockManager; + SharedAqlItemBlockPtr block; + + std::unique_ptr fakedQuery; + ShortestPathOptions options; + + KShortestPathsExecutorInfos infos; + + FakeKShortestPathsFinder& finder; + + SharedAqlItemBlockPtr inputBlock; + AqlItemBlockInputRange input; + + std::shared_ptr fakeUnusedBlock; + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher; + + KShortestPathsExecutor testee; + OutputAqlItemRow output; + + KShortestPathsExecutorTest() + : parameters(GetParam()), + server{}, + itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), + fakedQuery(server.createFakeQuery()), + options(fakedQuery.get()), + infos(std::make_shared(parameters._inputRegisters), + std::make_shared(parameters._outputRegisters), 2, 3, {}, {0}, + std::make_unique(options, parameters._paths), + std::move(parameters._source), std::move(parameters._target)), + finder(static_cast(infos.finder())), + inputBlock(buildBlock<2>(itemBlockManager, std::move(parameters._inputMatrix))), + input(AqlItemBlockInputRange(ExecutorState::DONE, 0, inputBlock, 0)), + fakeUnusedBlock(VPackParser::fromJson("[]")), + fetcher(itemBlockManager, fakeUnusedBlock->steal(), false), + testee(fetcher, infos), + output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()) {} + + size_t ExpectedNumberOfRowsProduced(size_t expectedFound) { + if (parameters._call.getOffset() >= expectedFound) { + return 0; + } else { + expectedFound -= parameters._call.getOffset(); + } + return parameters._call.clampToLimit(expectedFound); + } + + void ValidateCalledWith() { + auto calledWith = finder.getCalledWith(); + auto block = buildBlock<2>(itemBlockManager, std::move(parameters._inputMatrix)); + + // We should always only call the finder at most for all input rows + ASSERT_LE(calledWith.size(), block->size()); + + auto blockIndex = size_t{0}; + for (auto const& input : calledWith) { + auto source = std::string{}; + auto target = std::string{}; + + if (infos.useRegisterForSourceInput()) { + AqlValue value = block->getValue(blockIndex, infos.getSourceInputRegister()); + ASSERT_TRUE(value.isString()); + source = value.slice().copyString(); + } else { + source = infos.getSourceInputValue(); + } + + if (infos.useRegisterForTargetInput()) { + AqlValue value = block->getValue(blockIndex, infos.getTargetInputRegister()); + ASSERT_TRUE(value.isString()); + target = value.slice().copyString(); + } else { + target = infos.getTargetInputValue(); + } + ASSERT_EQ(source, input.first); + ASSERT_EQ(target, input.second); + blockIndex++; + } + } + + void ValidateResult(std::vector& results, + size_t skippedInitial, size_t skippedFullCount) { + auto pathsFound = finder.getPathsProduced(); + + // We expect to be getting exactly the rows returned + // that we produced with the shortest path finder. + // in exactly the order they were produced in. + + auto expectedNrRowsSkippedInitial = + std::min(parameters._call.getOffset(), pathsFound.size()); + EXPECT_EQ(skippedInitial, expectedNrRowsSkippedInitial); + + auto expectedNrRowsProduced = ExpectedNumberOfRowsProduced(pathsFound.size()); + + auto expectedRowsIndex = size_t{skippedInitial}; + for (auto const& block : results) { + if (block != nullptr) { + for (size_t blockIndex = 0; blockIndex < block->size(); ++blockIndex, ++expectedRowsIndex) { + AqlValue value = block->getValue(blockIndex, infos.getOutputRegister()); + EXPECT_TRUE(value.isArray()); + + // Note that the correct layout of the result path is currently the + // responsibility of the path finder (tested separately), so we get + // away with fake outputs. + auto verticesResult = VPackArrayIterator(value.slice()); + auto pathExpected = pathsFound.at(expectedRowsIndex); + auto verticesExpected = std::begin(pathExpected); + + while (verticesExpected != std::end(pathExpected) && + verticesResult != verticesResult.end()) { + ASSERT_EQ((*verticesResult).copyString(), *verticesExpected); + verticesResult++; + verticesExpected++; + } + ASSERT_TRUE((verticesExpected == std::end(pathExpected)) && + // Yes, really, they didn't implement == for iterators + !(verticesResult != verticesResult.end())); + } + } + } + + ASSERT_EQ(expectedRowsIndex - skippedInitial, expectedNrRowsProduced); + + // If a fullCount was requested, the sum (skippedInitial + produced + + // skippedFullCount) should be exactly the number of rows we produced. + if (parameters._call.fullCount) { + ASSERT_EQ(skippedInitial + (expectedRowsIndex - skippedInitial) + skippedFullCount, + pathsFound.size()); + } + } + + void TestExecutor(KShortestPathsExecutorInfos& infos, AqlItemBlockInputRange& input) { + // This will fetch everything now, unless we give a small enough atMost + + auto stats = NoStats{}; + auto ourCall = AqlCall{parameters._call}; + auto skippedInitial = size_t{0}; + auto skippedFullCount = size_t{0}; + auto state = ExecutorState{ExecutorState::HASMORE}; + auto outputs = std::vector{}; + + if (ourCall.getOffset() > 0) { + std::tie(state, stats, skippedInitial, std::ignore) = + testee.skipRowsRange(input, ourCall); + } + + while (state == ExecutorState::HASMORE && ourCall.getLimit() > 0) { + SharedAqlItemBlockPtr block = + itemBlockManager.requestBlock(parameters._blockSize, 4); + + OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), + infos.registersToKeep(), infos.registersToClear()); + output.setCall(std::move(ourCall)); + + std::tie(state, std::ignore, std::ignore) = testee.produceRows(input, output); + + outputs.emplace_back(output.stealBlock()); + ourCall = output.stealClientCall(); + } + + if (ourCall.needsFullCount()) { + std::tie(state, stats, skippedFullCount, std::ignore) = + testee.skipRowsRange(input, ourCall); + } + + ValidateCalledWith(); + ValidateResult(outputs, skippedInitial, skippedFullCount); + } +}; // namespace aql + +TEST_P(KShortestPathsExecutorTest, the_test) { TestExecutor(infos, input); } + +// Conflict with the other shortest path finder +namespace { + +Vertex const constSource("vertex/source"), constTarget("vertex/target"), + regSource(0), regTarget(1), brokenSource{"IwillBreakYourSearch"}, + brokenTarget{"I will also break your search"}; + +MatrixBuilder<2> const noneRow{{{{}}}}; +MatrixBuilder<2> const oneRow{{{{R"("vertex/source")"}, {R"("vertex/target")"}}}}; +MatrixBuilder<2> const twoRows{{{{R"("vertex/source")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/a")"}, {R"("vertex/b")"}}}}; +MatrixBuilder<2> const threeRows{{{{R"("vertex/source")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/a")"}, {R"("vertex/b")"}}}, + {{{R"("vertex/a")"}, {R"("vertex/target")"}}}}; +MatrixBuilder<2> const someRows{{{{R"("vertex/c")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/b")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/e")"}, {R"("vertex/target")"}}}, + {{{R"("vertex/a")"}, {R"("vertex/target")"}}}}; + +PathSequence const noPath = {}; +PathSequence const onePath = { + {"vertex/source", "vertex/intermed", "vertex/target"}}; + +PathSequence const threePaths = { + {"vertex/source", "vertex/intermed", "vertex/target"}, + {"vertex/a", "vertex/b", "vertex/c", "vertex/d"}, + {"vertex/source", "vertex/b", "vertex/c", "vertex/d"}, + {"vertex/a", "vertex/b", "vertex/target"}}; + +PathSequence const somePaths = { + {"vertex/source", "vertex/intermed0", "vertex/target"}, + {"vertex/a", "vertex/b", "vertex/c", "vertex/d"}, + {"vertex/source", "vertex/intermed1", "vertex/target"}, + {"vertex/source", "vertex/intermed2", "vertex/target"}, + {"vertex/a", "vertex/b", "vertex/c", "vertex/d"}, + {"vertex/source", "vertex/intermed3", "vertex/target"}, + {"vertex/source", "vertex/intermed4", "vertex/target"}, + {"vertex/a", "vertex/b", "vertex/c", "vertex/d"}, + {"vertex/source", "vertex/intermed5", "vertex/target"}, +}; + +// Some of the bigger test cases we should generate and not write out like a caveperson +PathSequence generateSomeBiggerCase(size_t n) { + auto paths = PathSequence{}; + + for (size_t i = 0; i < n; i++) { + paths.push_back({"vertex/source", "vertex/intermed0", "vertex/target"}); + } + + return paths; +} + +auto sources = testing::Values(constSource, regSource, brokenSource); +auto targets = testing::Values(constTarget, regTarget, brokenTarget); +auto inputs = testing::Values(noneRow, oneRow, twoRows, threeRows); +auto paths = + testing::Values(noPath, onePath, threePaths, somePaths, + generateSomeBiggerCase(100), generateSomeBiggerCase(999), + generateSomeBiggerCase(1000), generateSomeBiggerCase(2000)); +auto calls = + testing::Values(AqlCall{}, AqlCall{0, 0, 0, false}, AqlCall{0, 1, 0, false}, + AqlCall{0, 0, 1, false}, AqlCall{0, 1, 1, false}, + AqlCall{1, 1, 1}, AqlCall{100, 1, 1}, AqlCall{1000}, + AqlCall{0, AqlCall::Infinity{}, AqlCall::Infinity{}, true}); +auto blockSizes = testing::Values(5, 1000); + +INSTANTIATE_TEST_CASE_P(KShortestPathExecutorTestInstance, KShortestPathsExecutorTest, + testing::Combine(sources, targets, inputs, paths, calls, blockSizes)); +} // namespace + +} // namespace aql +} // namespace tests +} // namespace arangodb diff --git a/tests/Aql/ShortestPathExecutorTest.cpp b/tests/Aql/ShortestPathExecutorTest.cpp index 11ad12eacb56..a275e920ab95 100644 --- a/tests/Aql/ShortestPathExecutorTest.cpp +++ b/tests/Aql/ShortestPathExecutorTest.cpp @@ -486,6 +486,8 @@ class ShortestPathExecutorTest TEST_P(ShortestPathExecutorTest, the_test) { TestExecutor(); } +// Namespace conflict with the other shortest path executor +namespace { Vertex const constSource("vertex/source"), constTarget("vertex/target"), regSource(0), regTarget(1), brokenSource{"IwillBreakYourSearch"}, brokenTarget{"I will also break your search"}; @@ -545,7 +547,7 @@ auto blockSizes = testing::Values(size_t{5}, 1000); INSTANTIATE_TEST_CASE_P(ShortestPathExecutorTestInstance, ShortestPathExecutorTest, testing::Combine(sources, targets, inputs, paths, calls, variants, blockSizes)); - +} // namespace } // namespace aql } // namespace tests } // namespace arangodb diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6ba73256b04f..5061dd53a789 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -50,6 +50,7 @@ set(ARANGODB_TESTS_SOURCES Aql/IdExecutorTest.cpp Aql/IndexNodeTest.cpp Aql/InsertExecutorTest.cpp + Aql/KShortestPathsExecutorTest.cpp Aql/LimitExecutorTest.cpp Aql/MockTypedNode.cpp Aql/MultiDepFetcherHelper.cpp From b81dc9437ee46ed045fec52084f71605ac22492b Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Tue, 25 Feb 2020 10:57:11 +0000 Subject: [PATCH 075/122] Feature/aql subquery execution block impl execute implementation traversal executor (#11023) * Move to new executor interface * Fix tests and some bugs * new skip signature * new skip signature * Fix Traversal Executor Skipping * Fix compiler complaint Co-authored-by: Michael Hackstein --- arangod/Aql/ExecutionBlockImpl.cpp | 4 +- arangod/Aql/TraversalExecutor.cpp | 252 +++++++++++++++------------ arangod/Aql/TraversalExecutor.h | 24 +-- tests/Aql/TraversalExecutorTest.cpp | 255 +++++++++++++--------------- 4 files changed, 274 insertions(+), 261 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 5c63d1bccdd1..6395cd3ad42b 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -144,7 +144,7 @@ constexpr bool isNewStyleExecutor = TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif - KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; + TraversalExecutor, KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, @@ -1083,7 +1083,7 @@ static SkipRowsRangeVariant constexpr skipRowsType() { #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif - EnumerateListExecutor, SortedCollectExecutor, LimitExecutor>), + TraversalExecutor, EnumerateListExecutor, SortedCollectExecutor, LimitExecutor>), "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! diff --git a/arangod/Aql/TraversalExecutor.cpp b/arangod/Aql/TraversalExecutor.cpp index 946a3fcc0fd1..c111df3861a4 100644 --- a/arangod/Aql/TraversalExecutor.cpp +++ b/arangod/Aql/TraversalExecutor.cpp @@ -146,11 +146,7 @@ std::vector> const& TraversalExecutorInfo } TraversalExecutor::TraversalExecutor(Fetcher& fetcher, Infos& infos) - : _infos(infos), - _fetcher(fetcher), - _input{CreateInvalidInputRowHint{}}, - _rowState(ExecutionState::HASMORE), - _traverser(infos.traverser()) {} + : _infos(infos), _inputRow{CreateInvalidInputRowHint{}}, _traverser(infos.traverser()) {} TraversalExecutor::~TraversalExecutor() { auto opts = _traverser.options(); @@ -175,133 +171,169 @@ std::pair TraversalExecutor::shutdown(int errorCode) { } std::pair TraversalExecutor::produceRows(OutputAqlItemRow& output) { - TraversalStats s; + // TODO: Remove me! + TRI_ASSERT(false); + return {ExecutionState::DONE, TraversalStats{}}; +} + +auto TraversalExecutor::doOutput(OutputAqlItemRow& output) -> void { + // TODO check whether _traverser.hasMore is obsolete here + while (!output.isFull() && _traverser.hasMore() && _traverser.next()) { + TRI_ASSERT(_inputRow.isInitialized()); + + // traverser now has next v, e, p values + if (_infos.useVertexOutput()) { + AqlValue vertex = _traverser.lastVertexToAqlValue(); + output.cloneValueInto(_infos.vertexRegister(), _inputRow, vertex); + } + if (_infos.useEdgeOutput()) { + AqlValue edge = _traverser.lastEdgeToAqlValue(); + output.cloneValueInto(_infos.edgeRegister(), _inputRow, edge); + } + if (_infos.usePathOutput()) { + transaction::BuilderLeaser tmp(_traverser.trx()); + AqlValue path = _traverser.pathToAqlValue(*tmp.builder()); + output.cloneValueInto(_infos.pathRegister(), _inputRow, path); + } + output.advanceRow(); + } +} + +auto TraversalExecutor::doSkip(AqlCall& call) -> size_t { + auto skip = size_t{0}; + + while (call.shouldSkip() && _traverser.hasMore() && _traverser.next()) { + skip++; + call.didSkip(1); + } + + return skip; +} + +auto TraversalExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { + TraversalStats stats; + ExecutorState state{ExecutorState::HASMORE}; while (true) { - if (!_input.isInitialized()) { - if (_rowState == ExecutionState::DONE) { - // we are done - s.addFiltered(_traverser.getAndResetFilteredPaths()); - s.addScannedIndex(_traverser.getAndResetReadDocuments()); - s.addHttpRequests(_traverser.getAndResetHttpRequests()); - return {_rowState, s}; - } - std::tie(_rowState, _input) = _fetcher.fetchRow(); - if (_rowState == ExecutionState::WAITING) { - TRI_ASSERT(!_input.isInitialized()); - s.addFiltered(_traverser.getAndResetFilteredPaths()); - s.addScannedIndex(_traverser.getAndResetReadDocuments()); - s.addHttpRequests(_traverser.getAndResetHttpRequests()); - return {_rowState, s}; - } + if (_traverser.hasMore()) { + TRI_ASSERT(_inputRow.isInitialized()); + doOutput(output); - if (!_input.isInitialized()) { - // We tried to fetch, but no upstream - TRI_ASSERT(_rowState == ExecutionState::DONE); - s.addFiltered(_traverser.getAndResetFilteredPaths()); - s.addScannedIndex(_traverser.getAndResetReadDocuments()); - s.addHttpRequests(_traverser.getAndResetHttpRequests()); - return {_rowState, s}; - } - if (!resetTraverser()) { - // Could not start here, (invalid) - // Go to next - _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; - continue; + if (output.isFull()) { + if (_traverser.hasMore()) { + state = ExecutorState::HASMORE; + break; + } else { + state = input.upstreamState(); + break; + } } - } - if (!_traverser.hasMore() || !_traverser.next()) { - // Nothing more to read, reset input to refetch - _input = InputAqlItemRow{CreateInvalidInputRowHint{}}; } else { - // traverser now has next v, e, p values - if (_infos.useVertexOutput()) { - AqlValue vertex = _traverser.lastVertexToAqlValue(); - AqlValueGuard guard{vertex, true}; - output.moveValueInto(_infos.vertexRegister(), _input, guard); - } - if (_infos.useEdgeOutput()) { - AqlValue edge = _traverser.lastEdgeToAqlValue(); - AqlValueGuard guard{edge, true}; - output.moveValueInto(_infos.edgeRegister(), _input, guard); + if (!initTraverser(input)) { + state = input.upstreamState(); + break; } - if (_infos.usePathOutput()) { - transaction::BuilderLeaser tmp(_traverser.trx()); - AqlValue path = _traverser.pathToAqlValue(*tmp.builder()); - AqlValueGuard guard{path, true}; - output.moveValueInto(_infos.pathRegister(), _input, guard); - } - s.addFiltered(_traverser.getAndResetFilteredPaths()); - s.addScannedIndex(_traverser.getAndResetReadDocuments()); - s.addHttpRequests(_traverser.getAndResetHttpRequests()); - return {computeState(), s}; + TRI_ASSERT(_inputRow.isInitialized()); } } - s.addFiltered(_traverser.getAndResetFilteredPaths()); - s.addScannedIndex(_traverser.getAndResetReadDocuments()); - return {ExecutionState::DONE, s}; + stats.addFiltered(_traverser.getAndResetFilteredPaths()); + stats.addScannedIndex(_traverser.getAndResetReadDocuments()); + stats.addHttpRequests(_traverser.getAndResetHttpRequests()); + + return {state, stats, AqlCall{}}; } -ExecutionState TraversalExecutor::computeState() const { - if (_rowState == ExecutionState::DONE && !_traverser.hasMore()) { - return ExecutionState::DONE; +auto TraversalExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple { + TraversalStats stats{}; + auto skipped = size_t{0}; + + while (true) { + skipped += doSkip(call); + + stats.addFiltered(_traverser.getAndResetFilteredPaths()); + stats.addScannedIndex(_traverser.getAndResetReadDocuments()); + stats.addHttpRequests(_traverser.getAndResetHttpRequests()); + + if (!_traverser.hasMore()) { + if (!initTraverser(input)) { + return {input.upstreamState(), stats, skipped, AqlCall{}}; + } + } else { + TRI_ASSERT(call.getOffset() == 0); + return {ExecutorState::HASMORE, stats, skipped, AqlCall{}}; + } } - return ExecutionState::HASMORE; } -bool TraversalExecutor::resetTraverser() { +// +// Set a new start vertex for traversal, for this fetch inputs +// from input until we are either successful or input is unwilling +// to give us more. +// +// TODO: this is quite a big function, refactor +bool TraversalExecutor::initTraverser(AqlItemBlockInputRange& input) { _traverser.clear(); - - // Initialize the Expressions within the options. - // We need to find the variable and read its value here. Everything is - // computed right now. auto opts = _traverser.options(); opts->clearVariableValues(); - for (auto const& pair : _infos.filterConditionVariables()) { - opts->setVariableValue(pair.first, _input.getValue(pair.second)); - } - if (opts->usesPrune()) { - auto* evaluator = opts->getPruneEvaluator(); - // Replace by inputRow - evaluator->prepareContext(_input); - } + // Now reset the traverser - if (_infos.usesFixedSource()) { - auto pos = _infos.getFixedSource().find('/'); - if (pos == std::string::npos) { - _traverser.options()->query()->registerWarning( - TRI_ERROR_BAD_PARAMETER, - "Invalid input for traversal: " - "Only id strings or objects with " - "_id are allowed"); - return false; - } else { - // Use constant value - _traverser.setStartVertex(_infos.getFixedSource()); - return true; + // NOTE: It is correct to ask for whether there is a data row here + // even if we're using a constant start vertex, as we expect + // to provide output for every input row + while (input.hasDataRow()) { + // Try to acquire a starting vertex + std::tie(std::ignore, _inputRow) = input.nextDataRow(); + TRI_ASSERT(_inputRow.isInitialized()); + + if (opts->usesPrune()) { + auto* evaluator = opts->getPruneEvaluator(); + // Replace by inputRow + evaluator->prepareContext(_inputRow); + TRI_ASSERT(_inputRow.isInitialized()); } - } else { - AqlValue const& in = _input.getValue(_infos.getInputRegister()); - if (in.isObject()) { - try { - _traverser.setStartVertex(_traverser.options()->trx()->extractIdString(in.slice())); + + TRI_ASSERT(_inputRow.isInitialized()); + if (_infos.usesFixedSource()) { + auto pos = _infos.getFixedSource().find('/'); + if (pos == std::string::npos) { + _traverser.options()->query()->registerWarning( + TRI_ERROR_BAD_PARAMETER, + "Invalid input for traversal: " + "Only id strings or objects with " + "_id are allowed"); + } else { + // Use constant value + _traverser.setStartVertex(_infos.getFixedSource()); + TRI_ASSERT(_inputRow.isInitialized()); return true; - } catch (...) { - // on purpose ignore this error. - return false; } - // _id or _key not present we cannot start here, register warning take next - } else if (in.isString()) { - _traverser.setStartVertex(in.slice().copyString()); - return true; } else { - _traverser.options()->query()->registerWarning( - TRI_ERROR_BAD_PARAMETER, - "Invalid input for traversal: Only " - "id strings or objects with _id are " - "allowed"); - return false; + AqlValue const& in = _inputRow.getValue(_infos.getInputRegister()); + if (in.isObject()) { + try { + _traverser.setStartVertex( + _traverser.options()->trx()->extractIdString(in.slice())); + TRI_ASSERT(_inputRow.isInitialized()); + return true; + } catch (...) { + // on purpose ignore this error. + } + } else if (in.isString()) { + _traverser.setStartVertex(in.slice().copyString()); + TRI_ASSERT(_inputRow.isInitialized()); + return true; + } else { + // _id or _key not present we cannot start here, register warning take next + _traverser.options()->query()->registerWarning( + TRI_ERROR_BAD_PARAMETER, + "Invalid input for traversal: Only " + "id strings or objects with _id are " + "allowed"); + } } } + return false; } diff --git a/arangod/Aql/TraversalExecutor.h b/arangod/Aql/TraversalExecutor.h index f749e85d8301..a614fa0d5e56 100644 --- a/arangod/Aql/TraversalExecutor.h +++ b/arangod/Aql/TraversalExecutor.h @@ -23,6 +23,8 @@ #ifndef ARANGOD_AQL_TRAVERSAL_EXECUTOR_H #define ARANGOD_AQL_TRAVERSAL_EXECUTOR_H +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" @@ -136,22 +138,24 @@ class TraversalExecutor { * * @return ExecutionState, and if successful exactly one new Row of AqlItems. */ - std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(OutputAqlItemRow& output) + -> std::pair; + + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple; private: - /** - * @brief compute the return state - * @return DONE if traverser and remote are both done, HASMORE otherwise - */ - ExecutionState computeState() const; + auto doOutput(OutputAqlItemRow& output) -> void; + [[nodiscard]] auto doSkip(AqlCall& call) -> size_t; - bool resetTraverser(); + [[nodiscard]] bool initTraverser(AqlItemBlockInputRange& input); private: Infos& _infos; - Fetcher& _fetcher; - InputAqlItemRow _input; - ExecutionState _rowState; + InputAqlItemRow _inputRow; + traverser::Traverser& _traverser; }; diff --git a/tests/Aql/TraversalExecutorTest.cpp b/tests/Aql/TraversalExecutorTest.cpp index 597ad49e0bd1..f219a3cb1c27 100644 --- a/tests/Aql/TraversalExecutorTest.cpp +++ b/tests/Aql/TraversalExecutorTest.cpp @@ -36,6 +36,8 @@ #include "Graph/Traverser.h" #include "Graph/TraverserOptions.h" +#include "Aql/AqlItemBlockHelper.h" +#include "Aql/RowFetcherHelper.h" #include "Mocks/Servers.h" #include @@ -253,7 +255,7 @@ static TraverserOptions generateOptions(arangodb::aql::Query* query, size_t min, class TraversalExecutorTestInputStartVertex : public ::testing::Test { protected: - ExecutionState state; + ExecutorState state; mocks::MockAqlServer server; std::unique_ptr fakedQuery; @@ -302,145 +304,121 @@ class TraversalExecutorTestInputStartVertex : public ::testing::Test { {} }; -TEST_F(TraversalExecutorTestInputStartVertex, there_are_no_rows_upstream_producer_doesnt_wait) { - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); - TraversalExecutor testee(fetcher, infos); - TraversalStats stats{}; - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); -} +TEST_F(TraversalExecutorTestInputStartVertex, there_are_no_rows_upstream_producer_doesnt_produce) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, VPackParser::fromJson("[]")->steal(), false); -TEST_F(TraversalExecutorTestInputStartVertex, there_are_no_rows_upstream_producer_waits) { - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; + auto inputBlock = buildBlock<1>(itemBlockManager, MatrixBuilder<1>{{{}}}); + auto input = + AqlItemBlockInputRange{ExecutorState::DONE, 0, inputBlock, 0}; + OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); + AqlCall call; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, result); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); } -TEST_F(TraversalExecutorTestInputStartVertex, there_are_rows_upstream_producer_doesnt_wait) { +TEST_F(TraversalExecutorTestInputStartVertex, there_are_rows_upstream_producer_produced) { myGraph.addVertex("1"); myGraph.addVertex("2"); myGraph.addVertex("3"); - auto input = VPackParser::fromJson(R"([["v/1"], ["v/2"], ["v/3"]])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, VPackParser::fromJson("[]")->steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; + auto inputBlock = + buildBlock<1>(itemBlockManager, + MatrixBuilder<1>{{{{R"("v/1")"}}}, {{{R"("v/2")"}}}, {{{R"("v/3")"}}}}); + auto input = + AqlItemBlockInputRange{ExecutorState::DONE, 0, inputBlock, 0}; + OutputAqlItemRow row(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); + auto call = AqlCall{}; - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_EQ(stats.getFiltered(), 0); ASSERT_FALSE(row.produced()); - ASSERT_TRUE(fetcher.isDone()); - ASSERT_EQ(fetcher.nrCalled(), 3); ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); ASSERT_EQ(traverser->startVertexUsedAt(1), "v/2"); ASSERT_EQ(traverser->startVertexUsedAt(2), "v/3"); - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_EQ(stats.getFiltered(), 0); ASSERT_FALSE(row.produced()); - ASSERT_TRUE(fetcher.isDone()); - ASSERT_EQ(fetcher.nrCalled(), 3); } -TEST_F(TraversalExecutorTestInputStartVertex, - there_are_rows_upstream_producer_waits_no_edges_are_connected) { +TEST_F(TraversalExecutorTestInputStartVertex, there_are_rows_no_edges_are_connected) { myGraph.addVertex("1"); myGraph.addVertex("2"); myGraph.addVertex("3"); - auto input = VPackParser::fromJson(R"([["v/1"], ["v/2"], ["v/3"]])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, VPackParser::fromJson("[]")->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; + auto inputBlock = + buildBlock<1>(itemBlockManager, + MatrixBuilder<1>{{{{R"("v/1")"}}}, {{{R"("v/2")"}}}, {{{R"("v/3")"}}}}); + auto input = + AqlItemBlockInputRange{ExecutorState::DONE, 0, inputBlock, 0}; + OutputAqlItemRow row(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); + auto call = AqlCall{}; - for (size_t i = 0; i < 3; ++i) { - // We expect to wait 3 times - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - } - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_EQ(stats.getFiltered(), 0); ASSERT_FALSE(row.produced()); - ASSERT_TRUE(fetcher.isDone()); - ASSERT_EQ(fetcher.nrCalled(), 3); ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); ASSERT_EQ(traverser->startVertexUsedAt(1), "v/2"); ASSERT_EQ(traverser->startVertexUsedAt(2), "v/3"); - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_EQ(stats.getFiltered(), 0); ASSERT_FALSE(row.produced()); - ASSERT_TRUE(fetcher.isDone()); // WAITING is not part of called counts - ASSERT_EQ(fetcher.nrCalled(), 3); } -TEST_F(TraversalExecutorTestInputStartVertex, - there_are_rows_upstream_producer_waits_edges_are_connected) { +TEST_F(TraversalExecutorTestInputStartVertex, there_are_rows_upstream_edges_are_connected) { myGraph.addVertex("1"); myGraph.addVertex("2"); myGraph.addVertex("3"); - auto input = VPackParser::fromJson(R"([["v/1"], ["v/2"], ["v/3"]])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, VPackParser::fromJson("[]")->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; + auto inputBlock = + buildBlock<1>(itemBlockManager, + MatrixBuilder<1>{{{{R"("v/1")"}}}, {{{R"("v/2")"}}}, {{{R"("v/3")"}}}}); + auto input = + AqlItemBlockInputRange{ExecutorState::DONE, 0, inputBlock, 0}; + myGraph.addEdge("1", "2", "1->2"); myGraph.addEdge("2", "3", "2->3"); myGraph.addEdge("3", "1", "3->1"); + ExecutionStats total; OutputAqlItemRow row(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); + auto call = AqlCall{}; - for (int64_t i = 0; i < 3; ++i) { - // We expect to wait 3 times - std::tie(state, stats) = testee.produceRows(row); - total += stats; - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - std::tie(state, stats) = testee.produceRows(row); - ASSERT_TRUE(row.produced()); - ASSERT_EQ(state, ExecutionState::HASMORE); - row.advanceRow(); - total += stats; - ASSERT_EQ(total.filtered, 0); - /* We cannot ASSERT this because of internally to complex - mechanism */ - // ASSERT_EQ(total.scannedIndex, i + 1); - ASSERT_EQ(fetcher.nrCalled(), (uint64_t)(i + 1)); - } - ASSERT_TRUE(fetcher.isDone()); - // The traverser will lie - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_FALSE(row.produced()); ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); @@ -462,7 +440,7 @@ TEST_F(TraversalExecutorTestInputStartVertex, class TraversalExecutorTestConstantStartVertex : public ::testing::Test { protected: - ExecutionState state; + ExecutorState state; mocks::MockAqlServer server; std::unique_ptr fakedQuery; @@ -508,34 +486,41 @@ class TraversalExecutorTestConstantStartVertex : public ::testing::Test { RegisterPlan::MaxRegisterId, filterConditionVariables) {} }; -TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream_producer_doesnt_wait) { - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); +TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream_producer_doesnt_produce) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, VPackParser::fromJson("[]")->steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; + auto inputBlock = buildBlock<1>(itemBlockManager, MatrixBuilder<1>{{{{}}}}); + auto input = + AqlItemBlockInputRange{ExecutorState::DONE, 0, inputBlock, 0}; + OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); + AqlCall call; + + std::tie(state, stats, call) = testee.produceRows(input, result); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_FALSE(result.produced()); } -TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream_producer_waits) { - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); +TEST_F(TraversalExecutorTestConstantStartVertex, no_rows_upstream) { + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, VPackParser::fromJson("[]")->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; + auto inputBlock = buildBlock<1>(itemBlockManager, MatrixBuilder<1>{{{{}}}}); + auto input = + AqlItemBlockInputRange{ExecutorState::DONE, 0, inputBlock, 0}; + OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); + AqlCall call; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, result); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_FALSE(result.produced()); ASSERT_EQ(stats.getFiltered(), 0); } @@ -544,78 +529,80 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_doesnt_w myGraph.addVertex("1"); myGraph.addVertex("2"); myGraph.addVertex("3"); - auto input = VPackParser::fromJson(R"([ ["v/1"], ["v/2"], ["v/3"] ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, VPackParser::fromJson("[]")->steal(), false); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; + auto inputBlock = + buildBlock<1>(itemBlockManager, + MatrixBuilder<1>{{{{R"("v/1")"}}}, {{{R"("v/2")"}}}, {{{R"("v/3")"}}}}); + auto input = + AqlItemBlockInputRange{ExecutorState::DONE, 0, inputBlock, 0}; + OutputAqlItemRow row(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); + AqlCall call; - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_EQ(stats.getFiltered(), 0); ASSERT_FALSE(row.produced()); - ASSERT_TRUE(fetcher.isDone()); - ASSERT_EQ(fetcher.nrCalled(), 3); ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); ASSERT_EQ(traverser->startVertexUsedAt(1), "v/1"); ASSERT_EQ(traverser->startVertexUsedAt(2), "v/1"); - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_EQ(stats.getFiltered(), 0); ASSERT_FALSE(row.produced()); - ASSERT_TRUE(fetcher.isDone()); - ASSERT_EQ(fetcher.nrCalled(), 3); } TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_waits_no_edges_connected) { myGraph.addVertex("1"); myGraph.addVertex("2"); myGraph.addVertex("3"); - auto input = VPackParser::fromJson(R"([ ["v/1"], ["v/2"], ["v/3"] ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, VPackParser::fromJson("[]")->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; OutputAqlItemRow row(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); - for (size_t i = 0; i < 3; ++i) { - // We expect to wait 3 times - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - } - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + auto inputBlock = + buildBlock<1>(itemBlockManager, + MatrixBuilder<1>{{{{R"("v/1")"}}}, {{{R"("v/2")"}}}, {{{R"("v/3")"}}}}); + auto input = + AqlItemBlockInputRange{ExecutorState::DONE, 0, inputBlock, 0}; + + AqlCall call; + + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_EQ(stats.getFiltered(), 0); ASSERT_FALSE(row.produced()); - ASSERT_TRUE(fetcher.isDone()); - ASSERT_EQ(fetcher.nrCalled(), 3); ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); ASSERT_EQ(traverser->startVertexUsedAt(1), "v/1"); ASSERT_EQ(traverser->startVertexUsedAt(2), "v/1"); - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_EQ(stats.getFiltered(), 0); ASSERT_FALSE(row.produced()); - ASSERT_TRUE(fetcher.isDone()); // WAITING is not part of called counts - ASSERT_EQ(fetcher.nrCalled(), 3); } -TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_waits_edges_connected) { +TEST_F(TraversalExecutorTestConstantStartVertex, rows_edges_connected) { myGraph.addVertex("1"); myGraph.addVertex("2"); myGraph.addVertex("3"); - auto input = VPackParser::fromJson(R"([ ["v/1"], ["v/2"], ["v/3"] ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( + itemBlockManager, VPackParser::fromJson("[]")->steal(), true); TraversalExecutor testee(fetcher, infos); TraversalStats stats{}; myGraph.addEdge("1", "2", "1->2"); @@ -625,27 +612,17 @@ TEST_F(TraversalExecutorTestConstantStartVertex, rows_upstream_producer_waits_ed OutputAqlItemRow row(std::move(block), infos.getOutputRegisters(), infos.registersToKeep(), infos.registersToClear()); - for (int64_t i = 0; i < 3; ++i) { - // We expect to wait 3 times - std::tie(state, stats) = testee.produceRows(row); - total += stats; - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - std::tie(state, stats) = testee.produceRows(row); - ASSERT_TRUE(row.produced()); - ASSERT_EQ(state, ExecutionState::HASMORE); - row.advanceRow(); - total += stats; - ASSERT_EQ(total.filtered, 0); - /* We cannot ASSERT this because of internally to complex - mechanism */ - // ASSERT_EQ(total.scannedIndex, i + 1); - ASSERT_EQ(fetcher.nrCalled(), (uint64_t)(i + 1)); - } - ASSERT_TRUE(fetcher.isDone()); + auto inputBlock = + buildBlock<1>(itemBlockManager, + MatrixBuilder<1>{{{{R"("v/1")"}}}, {{{R"("v/2")"}}}, {{{R"("v/3")"}}}}); + auto input = + AqlItemBlockInputRange{ExecutorState::DONE, 0, inputBlock, 0}; + + AqlCall call; + // The traverser will lie - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); + std::tie(state, stats, call) = testee.produceRows(input, row); + ASSERT_EQ(state, ExecutorState::DONE); ASSERT_FALSE(row.produced()); ASSERT_EQ(traverser->startVertexUsedAt(0), "v/1"); From 4dadc223aba766d02c04a86141575308a46bd70d Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Wed, 26 Feb 2020 21:01:42 +0000 Subject: [PATCH 076/122] Move SubqueryStartExecutor to new interface (#11025) * Move SubqueryStartExecutor to new interface * Diddle * Tests pass for SubqueryStartExecutor * Delete most of the SubqueryEndExecutor tests They are irrelevant now as they only tested shadowrow behaviour. Shadow Rows are now handled by ExecutionBlockImpl. * Skeleton tests * Junk * Cleanup and make work * Tweak ExecutorTestHelper to test pipelines * SubqueryStart/SubqueryEnd * Blafummel. * Modifications * Fixup tests * Address some comments * Fix. * Fix tests * Various fixes. * Made the branch compile on Mac * Intermediate commit, does not compile, required for merge * Fixed one of the SplicedSubqueryTests * Seperated ShadowRow handling for SubqueryStart/End executors to get a better overview and understanding on how it should behave. * Allow to use a fullCallStack in ExecutorTestHelper. * Added asserts on SubqueryEnd that is not allowed to be called with Skipped from upstream * Use AqlTestCase in SubqueryStartExecutorTest. Transformed first test to TestHelper pipeline * Let ExecutorTestHelper also test for shadowRows * Adapted SingleInput=>Data+Shadow test to testing Helepr and make it pass * Transformed existing SubqueryStart tests to Helper, fixed a test where there was no place for the ShadowRow to be written * Fixed Skip in SubqueryStartExecutor * Allow the Pipeline to add Consumer and dependency without fiddling with the Deque from outside * Added Skip adn ShadowRowForwarding tests to SubqueryStartExecutor * Fixed fullCount of SubqueryStart to be called properly. * Renamed isModificationSubquery -> isModificatioNode again * Consume more than one subquery en-block * Fixed debugReporting in SplicedSubqueryTest. Disabled the skip in subquery tests for now * Removed AQL debug log level from testing config * First version of Subquery Skip bypassing. Including 1 passing test. * Added disabled tests for skip, and proposed to do it later * Fixed reporting of FILTERED in fullCount of EnumerateCollectionExecutor * Enable calculationExecutor execute interface * Fixed compile issues * To Velocypack the modification Subquery value * Fixed illegal assert and reactivated failure tests * Fixed compile issue * Deactivated Calculation again * Added Compatibility Mode to AqlCallStack * Removed IdExecutor the only feature it has can be moved into IDexecutor in general. This still has to be done * When falling back in BlocksWithClients use compatibility stack * ConstFetcher now correctly reports if it skipped all input * Fixed a bug in ExecutionBlockImpl, which would go into FetchMore state, even if the Executor still has data and is happy to produce more, but the output is full. Co-authored-by: Michael Hackstein --- arangod/Aql/AqlCallStack.cpp | 29 +- arangod/Aql/AqlCallStack.h | 10 +- arangod/Aql/BlocksWithClients.cpp | 4 +- arangod/Aql/ClusterNodes.cpp | 2 +- arangod/Aql/ConstFetcher.cpp | 3 +- arangod/Aql/DistributeConsumerNode.cpp | 3 +- arangod/Aql/DistributeExecutor.cpp | 6 +- arangod/Aql/EnumerateCollectionExecutor.cpp | 20 +- arangod/Aql/ExecutionBlockImpl.cpp | 336 ++++++++++--- arangod/Aql/ExecutionBlockImpl.h | 11 + arangod/Aql/ExecutionEngine.cpp | 22 +- arangod/Aql/ExecutionNode.cpp | 9 +- arangod/Aql/ExecutionState.cpp | 3 + arangod/Aql/IdExecutor.cpp | 139 ++---- arangod/Aql/IdExecutor.h | 71 +-- arangod/Aql/OptimizerRules.cpp | 137 +++--- arangod/Aql/OutputAqlItemRow.cpp | 2 + arangod/Aql/ScatterExecutor.cpp | 6 +- arangod/Aql/ShadowAqlItemRow.cpp | 2 +- arangod/Aql/SubqueryEndExecutionNode.cpp | 32 +- arangod/Aql/SubqueryEndExecutionNode.h | 9 +- arangod/Aql/SubqueryEndExecutor.cpp | 144 +++--- arangod/Aql/SubqueryEndExecutor.h | 31 +- arangod/Aql/SubqueryStartExecutionNode.h | 3 + arangod/Aql/SubqueryStartExecutor.cpp | 191 +++----- arangod/Aql/SubqueryStartExecutor.h | 54 +-- tests/Aql/ExecutionBlockImplTest.cpp | 2 +- tests/Aql/ExecutionNodeTest.cpp | 22 +- tests/Aql/ExecutorTestHelper.cpp | 25 + tests/Aql/ExecutorTestHelper.h | 58 ++- tests/Aql/IdExecutorTest.cpp | 221 ++++++--- tests/Aql/SplicedSubqueryIntegrationTest.cpp | 471 +++++++++++++++++++ tests/Aql/SubqueryEndExecutorTest.cpp | 249 +--------- tests/Aql/SubqueryStartExecutorTest.cpp | 435 ++++++++++------- tests/Aql/WaitingExecutionBlockMock.cpp | 2 + tests/CMakeLists.txt | 1 + 36 files changed, 1681 insertions(+), 1084 deletions(-) create mode 100644 tests/Aql/SplicedSubqueryIntegrationTest.cpp diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp index 33975e259bf0..0b826fb6886b 100644 --- a/arangod/Aql/AqlCallStack.cpp +++ b/arangod/Aql/AqlCallStack.cpp @@ -29,8 +29,10 @@ using namespace arangodb; using namespace arangodb::aql; -AqlCallStack::AqlCallStack(AqlCall call) - : _operations{{std::move(call)}}, _depth(0) {} +AqlCallStack::AqlCallStack(AqlCall call, bool compatibilityMode3_6) + : _operations{{std::move(call)}}, + _depth(0), + _compatibilityMode3_6(compatibilityMode3_6) {} AqlCallStack::AqlCallStack(AqlCallStack const& other, AqlCall call) : _operations{other._operations}, _depth(0) { @@ -38,16 +40,30 @@ AqlCallStack::AqlCallStack(AqlCallStack const& other, AqlCall call) // Alothers need to use passThrough constructor TRI_ASSERT(other._depth == 0); _operations.push(std::move(call)); + _compatibilityMode3_6 = other._compatibilityMode3_6; } AqlCallStack::AqlCallStack(AqlCallStack const& other) - : _operations{other._operations}, _depth(other._depth) {} + : _operations{other._operations}, + _depth(other._depth), + _compatibilityMode3_6(other._compatibilityMode3_6) {} bool AqlCallStack::isRelevant() const { return _depth == 0; } AqlCall AqlCallStack::popCall() { TRI_ASSERT(isRelevant()); - TRI_ASSERT(!_operations.empty()); + TRI_ASSERT(_compatibilityMode3_6 || !_operations.empty()); + if (_compatibilityMode3_6 && _operations.empty()) { + // This is only for compatibility with 3.6 + // there we do not have the stack beeing passed-through + // in AQL, we only have a single call. + // We can only get into this state in the abscence of + // LIMIT => we always do an unlimted softLimit call + // to the upwards subquery. + // => Simply put another fetchAll Call on the stack. + // This code is to be removed in the next version after 3.7 + _operations.push(AqlCall{}); + } auto call = _operations.top(); _operations.pop(); return call; @@ -79,10 +95,7 @@ void AqlCallStack::stackUpMissingCalls() { void AqlCallStack::pop() { if (isRelevant()) { // We have one element to pop - TRI_ASSERT(!_operations.empty()); - _operations.pop(); - // We can never pop the main query, so one element needs to stay - TRI_ASSERT(!_operations.empty()); + std::ignore = popCall(); } else { _depth--; } diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index 0566b2340cb6..16027bfcfd94 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -33,7 +33,7 @@ namespace aql { class AqlCallStack { public: // Initial - explicit AqlCallStack(AqlCall call); + explicit AqlCallStack(AqlCall call, bool compatibilityMode3_6 = false); // Used in subquery AqlCallStack(AqlCallStack const& other, AqlCall call); // Used to pass between blocks @@ -92,6 +92,14 @@ class AqlCallStack { // In most cases this will be zero. // However if we skip a subquery that has a nested subquery this depth will be 1 in the nested subquery. size_t _depth; + + // This flag will be set if and only if + // we are called with the 3.6 and earlier API + // As we only support upgrades between 3.6.* -> 3.7.* + // and not 3.6.* -> 3.8.* we can savely remove + // this flag and all it's side effects on the + // version after 3.7. + bool _compatibilityMode3_6; }; } // namespace aql diff --git a/arangod/Aql/BlocksWithClients.cpp b/arangod/Aql/BlocksWithClients.cpp index 63adcc31c392..8df29015729f 100644 --- a/arangod/Aql/BlocksWithClients.cpp +++ b/arangod/Aql/BlocksWithClients.cpp @@ -285,7 +285,7 @@ auto BlocksWithClientsImpl::fetchMore(AqlCallStack stack) -> Execution template std::pair BlocksWithClientsImpl::getSomeForShard( size_t atMost, std::string const& shardId) { - AqlCallStack stack(AqlCall::SimulateGetSome(atMost)); + AqlCallStack stack(AqlCall::SimulateGetSome(atMost), true); auto [state, skipped, block] = executeForClient(stack, shardId); TRI_ASSERT(skipped == 0); return {state, block}; @@ -296,7 +296,7 @@ std::pair BlocksWithClientsImpl template std::pair BlocksWithClientsImpl::skipSomeForShard( size_t atMost, std::string const& shardId) { - AqlCallStack stack(AqlCall::SimulateSkipSome(atMost)); + AqlCallStack stack(AqlCall::SimulateSkipSome(atMost), true); auto [state, skipped, block] = executeForClient(stack, shardId); TRI_ASSERT(block == nullptr); return {state, skipped}; diff --git a/arangod/Aql/ClusterNodes.cpp b/arangod/Aql/ClusterNodes.cpp index c07fc82a499e..965442481607 100644 --- a/arangod/Aql/ClusterNodes.cpp +++ b/arangod/Aql/ClusterNodes.cpp @@ -511,7 +511,7 @@ std::unique_ptr GatherNode::createBlock( &engine, this, std::move(infos)); } else { IdExecutorInfos infos(getRegisterPlan()->nrRegs[getDepth()], - calcRegsToKeep(), getRegsToClear()); + calcRegsToKeep(), getRegsToClear(), false); return std::make_unique>(&engine, this, std::move(infos)); diff --git a/arangod/Aql/ConstFetcher.cpp b/arangod/Aql/ConstFetcher.cpp index bef0a172bef9..14ba394b584e 100644 --- a/arangod/Aql/ConstFetcher.cpp +++ b/arangod/Aql/ConstFetcher.cpp @@ -176,7 +176,8 @@ auto ConstFetcher::execute(AqlCallStack& stack) // No data to be returned // Block is dropped. resultBlock = nullptr; - return {ExecutionState::DONE, call.getSkipCount(), DataRange{ExecutorState::DONE}}; + return {ExecutionState::DONE, call.getSkipCount(), + DataRange{ExecutorState::DONE, call.getSkipCount()}}; } // Slowest path need to slice, this unfortunately requires copy of data diff --git a/arangod/Aql/DistributeConsumerNode.cpp b/arangod/Aql/DistributeConsumerNode.cpp index 7fc82474cc13..72e43fd8ff53 100644 --- a/arangod/Aql/DistributeConsumerNode.cpp +++ b/arangod/Aql/DistributeConsumerNode.cpp @@ -64,7 +64,8 @@ std::unique_ptr DistributeConsumerNode::createBlock( TRI_ASSERT(getRegisterPlan()->nrRegs[previousNode->getDepth()] == getRegisterPlan()->nrRegs[getDepth()]); IdExecutorInfos infos(getRegisterPlan()->nrRegs[getDepth()], calcRegsToKeep(), - getRegsToClear(), _distributeId, _isResponsibleForInitializeCursor); + getRegsToClear(), false, 0, _distributeId, + _isResponsibleForInitializeCursor); return std::make_unique>>>( &engine, this, std::move(infos)); } diff --git a/arangod/Aql/DistributeExecutor.cpp b/arangod/Aql/DistributeExecutor.cpp index 50970918e72b..c9b13abfa452 100644 --- a/arangod/Aql/DistributeExecutor.cpp +++ b/arangod/Aql/DistributeExecutor.cpp @@ -131,7 +131,11 @@ DistributeExecutor::ClientBlockData::ClientBlockData(ExecutionEngine& engine, // We only get shared ptrs to const data. so we need to copy here... IdExecutorInfos infos{scatterInfos.numberOfInputRegisters(), *scatterInfos.registersToKeep(), - *scatterInfos.registersToClear(), "", false}; + *scatterInfos.registersToClear(), + false, + 0, + "", + false}; // NOTE: Do never change this type! The execute logic below requires this and only this type. _executor = std::make_unique>>(&engine, node, diff --git a/arangod/Aql/EnumerateCollectionExecutor.cpp b/arangod/Aql/EnumerateCollectionExecutor.cpp index ca2b97ba660a..ee200f05ca62 100644 --- a/arangod/Aql/EnumerateCollectionExecutor.cpp +++ b/arangod/Aql/EnumerateCollectionExecutor.cpp @@ -170,7 +170,7 @@ std::tuple EnumerateCollection } uint64_t EnumerateCollectionExecutor::skipEntries(size_t toSkip, - EnumerateCollectionStats& stats) { + EnumerateCollectionStats& stats) { uint64_t actuallySkipped = 0; if (_infos.getFilter() == nullptr) { @@ -219,16 +219,22 @@ std::tuple EnumerateCo } } else { // fullCount phase - _cursor->skipAll(skipped); - stats.incrScanned(skipped); - _documentProducingFunctionContext.getAndResetNumScanned(); - TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumScanned() == 0); + if (_infos.getFilter() == nullptr) { + _cursor->skipAll(skipped); + stats.incrScanned(skipped); + /* For some reason this does not hold + * TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumScanned() == skipped); + */ + _documentProducingFunctionContext.getAndResetNumScanned(); + } else { + // We need to call this to do the Accounting of FILTERED correctly. + skipped += skipEntries(ExecutionBlock::SkipAllSize(), stats); + } } _cursorHasMore = _cursor->hasMore(); call.didSkip(skipped); } } - if (_cursorHasMore) { return {ExecutorState::HASMORE, stats, call.getSkipCount(), upstreamCall}; } @@ -267,7 +273,6 @@ std::tuple EnumerateCollection _documentProducingFunctionContext.setOutputRow(&output); while (inputRange.hasDataRow() && !output.isFull()) { - if (!_cursorHasMore) { initializeNewRow(inputRange); } @@ -296,7 +301,6 @@ std::tuple EnumerateCollection THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } - if (!_cursorHasMore) { initializeNewRow(inputRange); } diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 6395cd3ad42b..f246e366e2bc 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -137,14 +137,16 @@ template constexpr bool isNewStyleExecutor = is_one_of_v, IdExecutor>, ReturnExecutor, IndexExecutor, EnumerateCollectionExecutor, - // TODO: re-enable after new subquery end & start are implemented - // CalculationExecutor, CalculationExecutor, CalculationExecutor, + /* + CalculationExecutor, CalculationExecutor, + CalculationExecutor,*/ HashedCollectExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif - TraversalExecutor, KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; + SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, + ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, @@ -622,6 +624,17 @@ std::tuple ExecutionBlockImpl) { // Only this executor is fully implemented traceExecuteBegin(stack); + // silence tests -- we need to introduce new failure tests for fetchers + TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome1") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome2") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome3") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + auto res = executeWithoutTrace(stack); traceExecuteEnd(res); return res; @@ -1053,8 +1066,15 @@ SharedAqlItemBlockPtr ExecutionBlockImpl::requestBlock(size_t nrItems, // ahead on the input range, fetching new blocks when necessary // EXECUTOR: the executor has a specialised skipRowsRange method // that will be called to skip +// SUBQUERY_START: +// SUBQUERY_END: // -enum class SkipRowsRangeVariant { FETCHER, EXECUTOR }; +enum class SkipRowsRangeVariant { + FETCHER, + EXECUTOR, + SUBQUERY_START, + SUBQUERY_END +}; // This function is just copy&pasted from above to decide which variant of // skip is used for which executor. @@ -1077,14 +1097,17 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert(!useFetcher || hasSkipRows::value, "Fetcher is chosen for skipping, but has not skipRows method!"); - static_assert(useExecutor == - (is_one_of_v>, + IdExecutor, HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS - TestLambdaSkipExecutor, + TestLambdaSkipExecutor, #endif - TraversalExecutor, EnumerateListExecutor, SortedCollectExecutor, LimitExecutor>), - "Unexpected executor for SkipVariants::EXECUTOR"); + TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, + SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor>), + "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! static_assert( @@ -1138,6 +1161,150 @@ auto ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call); } +template <> +auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState { + TRI_ASSERT(_outputItemRow); + TRI_ASSERT(_outputItemRow->isInitialized()); + TRI_ASSERT(!_outputItemRow->allRowsUsed()); + if (_lastRange.hasDataRow()) { + // If we have a dataRow, the executor needs to write it's output. + // If we get woken up by a dataRow during forwarding of ShadowRows + // This will return false, and if so we need to call produce instead. + auto didWrite = _executor.produceShadowRow(_lastRange, *_outputItemRow); + if (didWrite) { + if (_lastRange.hasShadowRow()) { + // Forward the ShadowRows + return ExecState::SHADOWROWS; + } + // If we have more input, + // For now we need to return + // here and cannot start another subquery. + // We do not know what to do with the next DataRow. + return ExecState::DONE; + } else { + // Woken up after shadowRow forwarding + // Need to call the Executor + return ExecState::CHECKCALL; + } + } else { + // Need to forward the ShadowRows + auto const& [state, shadowRow] = _lastRange.nextShadowRow(); + TRI_ASSERT(shadowRow.isInitialized()); + _outputItemRow->increaseShadowRowDepth(shadowRow); + TRI_ASSERT(_outputItemRow->produced()); + _outputItemRow->advanceRow(); + if (_lastRange.hasShadowRow()) { + return ExecState::SHADOWROWS; + } + // If we do not have more shadowRows + // we need to return. + return ExecState::DONE; + } +} + +template <> +auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState { + TRI_ASSERT(_outputItemRow); + TRI_ASSERT(_outputItemRow->isInitialized()); + TRI_ASSERT(!_outputItemRow->allRowsUsed()); + if (!_lastRange.hasShadowRow()) { + // We got back without a ShadowRow in the LastRange + // Let client call again + return ExecState::DONE; + } + auto const& [state, shadowRow] = _lastRange.nextShadowRow(); + TRI_ASSERT(shadowRow.isInitialized()); + bool didConsume = false; + if (shadowRow.isRelevant()) { + // We need to consume the row, and write the Aggregate to it. + _executor.consumeShadowRow(shadowRow, *_outputItemRow); + didConsume = true; + } else { + _outputItemRow->decreaseShadowRowDepth(shadowRow); + } + + TRI_ASSERT(_outputItemRow->produced()); + _outputItemRow->advanceRow(); + + if (state == ExecutorState::DONE) { + // We have consumed everything, we are + // Done with this query + return ExecState::DONE; + } else if (_lastRange.hasDataRow()) { + // Multiple concatenated Subqueries + // This case is disallowed for now, as we do not know the + // look-ahead call + TRI_ASSERT(false); + // If we would know we could now go into a continue with next subquery + // state. + return ExecState::DONE; + } else if (_lastRange.hasShadowRow()) { + // We still have shadowRows, we + // need to forward them + return ExecState::SHADOWROWS; + } else { + if (didConsume) { + // We did only consume the input + // ask upstream + return ExecState::CHECKCALL; + } + // End of input, we are done for now + // Need to call again + return ExecState::DONE; + } +} + +template +auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState { + TRI_ASSERT(_outputItemRow); + TRI_ASSERT(_outputItemRow->isInitialized()); + TRI_ASSERT(!_outputItemRow->allRowsUsed()); + if (!_lastRange.hasShadowRow()) { + // We got back without a ShadowRow in the LastRange + // Let client call again + return ExecState::DONE; + } + + auto const& [state, shadowRow] = _lastRange.nextShadowRow(); + TRI_ASSERT(shadowRow.isInitialized()); + + _outputItemRow->copyRow(shadowRow); + + if (shadowRow.isRelevant()) { + LOG_QUERY("6d337", DEBUG) << printTypeInfo() << " init executor."; + // We found a relevant shadow Row. + // We need to reset the Executor + // cppcheck-suppress unreadVariable + constexpr bool customInit = hasInitializeCursor::value; + InitializeCursor::init(_executor, _rowFetcher, _infos); + } + + TRI_ASSERT(_outputItemRow->produced()); + _outputItemRow->advanceRow(); + + if (state == ExecutorState::DONE) { + // We have consumed everything, we are + // Done with this query + return ExecState::DONE; + } else if (_lastRange.hasDataRow()) { + // Multiple concatenated Subqueries + // This case is disallowed for now, as we do not know the + // look-ahead call + TRI_ASSERT(false); + // If we would know we could now go into a continue with next subquery + // state. + return ExecState::DONE; + } else if (_lastRange.hasShadowRow()) { + // We still have shadowRows, we + // need to forward them + return ExecState::SHADOWROWS; + } else { + // End of input, we are done for now + // Need to call again + return ExecState::DONE; + } +} + /** * @brief Define the variant of FastForward behaviour * @@ -1145,7 +1312,6 @@ auto ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& * EXECUTOR => Call executeSkipRowsRange, but do not report what has been skipped. * (This instance is used to make sure Modifications are performed, or stats are correct) * FETCHER => Do not bother the Executor, drop all from input, without further reporting - * */ enum class FastForwardVariant { FULLCOUNT, EXECUTOR, FETCHER }; @@ -1170,6 +1336,16 @@ auto ExecutionBlockImpl::executeFastForward(AqlItemBlockInputRange& in AqlCall& clientCall) -> std::tuple { TRI_ASSERT(isNewStyleExecutor); + if constexpr (std::is_same_v) { + if (clientCall.needsFullCount() && clientCall.getOffset() == 0 && + clientCall.getLimit() == 0) { + // We can savely call skipRows. + // It will not report anything if the row is already consumed + return executeSkipRowsRange(_lastRange, clientCall); + } + // Do not fastForward anything, the Subquery start will handle it by itself + return {ExecutorState::DONE, NoStats{}, 0, AqlCall{}}; + } auto type = fastForwardType(clientCall, _executor); switch (type) { case FastForwardVariant::FULLCOUNT: @@ -1254,6 +1430,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { std::tie(_upstreamState, skippedLocal, bypassedRange) = _rowFetcher.execute(stack); return {_upstreamState, skippedLocal, bypassedRange.getBlock()}; } + AqlCall clientCall = stack.popCall(); ExecutorState localExecutorState = ExecutorState::DONE; @@ -1266,6 +1443,25 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _execState == ExecState::UPSTREAM); // Skip can only be > 0 if we are in upstream cases. TRI_ASSERT(_skipped == 0 || _execState == ExecState::UPSTREAM); + + if constexpr (std::is_same_v) { + // TODO: implement forwarding of SKIP properly: + // We need to modify the execute API to instead return a vector of skipped + // values. + // Then we can simply push a skip on the Stack here and let it forward. + // In case of a modifaction we need to NOT forward a skip, but instead do + // a limit := limit + offset call and a hardLimit 0 call on top of the stack. + TRI_ASSERT(!clientCall.needSkipMore()); + + // In subqeryEndExecutor we actually manage two calls. + // The clientClient is defined of what will go into the Executor. + // on SubqueryEnd this call is generated based on the call from downstream + stack.pushCall(std::move(clientCall)); + // TODO: Implement different kind of calls we need to inject into Executor + // based on modification, or on forwarding. + // FOr now use a fetchUnlimited Call always + clientCall = AqlCall{}; + } if (_execState == ExecState::UPSTREAM) { // We have been in waiting state. // We may have local work on the original call. @@ -1350,7 +1546,14 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _execState = ExecState::DONE; break; } - ensureOutputBlock(std::move(clientCall)); + if constexpr (std::is_same_v) { + TRI_ASSERT(!stack.empty()); + AqlCall const& subqueryCall = stack.peek(); + AqlCall copyCall = subqueryCall; + ensureOutputBlock(std::move(copyCall)); + } else { + ensureOutputBlock(std::move(clientCall)); + } TRI_ASSERT(_outputItemRow); // Execute getSome @@ -1359,14 +1562,19 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _engine->_stats += stats; localExecutorState = state; - // Produce might have modified the clientCall - clientCall = _outputItemRow->getClientCall(); + if constexpr (!std::is_same_v) { + // Produce might have modified the clientCall + // But only do this if we are not subquery. + clientCall = _outputItemRow->getClientCall(); + } if (state == ExecutorState::DONE) { _execState = ExecState::FASTFORWARD; - } else if (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable && + } else if ((Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable || + clientCall.getLimit() > 0) && outputIsFull()) { // In pass through variant we need to stop whenever the block is full. + // In all other branches only if the client Still needs more data. _execState = ExecState::DONE; break; } else if (clientCall.getLimit() > 0 && !_lastRange.hasDataRow()) { @@ -1395,6 +1603,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _outputItemRow->allRowsUsed()) { // We have a block with data, but no more place for a shadow row. _execState = ExecState::DONE; + } else if (!_lastRange.hasShadowRow() && !_lastRange.hasDataRow()) { + _execState = ExecState::DONE; } else { _execState = ExecState::SHADOWROWS; } @@ -1415,16 +1625,31 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(!_lastRange.hasDataRow()); TRI_ASSERT(!_lastRange.hasShadowRow()); size_t skippedLocal = 0; - auto callCopy = _upstreamRequest; + #ifdef ARANGODB_ENABLE_MAINTAINER_MODE size_t subqueryLevelBefore = stack.subqueryLevel(); #endif - stack.pushCall(std::move(callCopy)); + // If we are SubqueryStart, we remove the top element of the stack + // which belongs to the subquery enclosed by this + // SubqueryStart and the partnered SubqueryEnd by *not* + // pushing the upstream request. + if constexpr (!std::is_same_v) { + auto callCopy = _upstreamRequest; + stack.pushCall(std::move(callCopy)); + } + std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); - // As the stack is copied into the fetcher, we need to pop off our call again. - // If we use other datastructures or moving we may hand over ownership of the stack here - // instead and no popCall is necessary. - stack.popCall(); + + if constexpr (std::is_same_v) { + // Do not pop the call, we did not put it on. + // However we need it for accounting later. + } else { + // As the stack is copied into the fetcher, we need to pop off our call again. + // If we use other datastructures or moving we may hand over ownership of the stack here + // instead and no popCall is necessary. + stack.popCall(); + } + #ifdef ARANGODB_ENABLE_MAINTAINER_MODE TRI_ASSERT(subqueryLevelBefore == stack.subqueryLevel()); #endif @@ -1449,49 +1674,41 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { break; } case ExecState::SHADOWROWS: { + // We only get Called with something in the input. + TRI_ASSERT(_lastRange.hasShadowRow() || _lastRange.hasDataRow()); LOG_QUERY("7c63c", DEBUG) << printTypeInfo() << " (sub-)query completed. Move ShadowRows."; - // TODO: Check if there is a situation where we are at this point, but at the end of a block - // Or if we would not recognize this beforehand + // TODO: Check if we can have the situation that we are between two shadow rows here. // E.g. LastRow is releveant shadowRow. NextRow is non-relevant shadowRow. // NOTE: I do not think this is an issue, as the Executor will always say that it cannot do anything with // an empty input. Only exception might be COLLECT COUNT. - if (_lastRange.hasShadowRow()) { - if (outputIsFull()) { - // We need to be able to write data - // But maybe the existing block is full here - // Then we need to wake up again here. - returnToState = ExecState::SHADOWROWS; - _execState = ExecState::DONE; - break; - } - auto const& [state, shadowRow] = _lastRange.nextShadowRow(); - TRI_ASSERT(shadowRow.isInitialized()); + + if (outputIsFull()) { + // We need to be able to write data + // But maybe the existing block is full here + // Then we need to wake up again here. + returnToState = ExecState::SHADOWROWS; + _execState = ExecState::DONE; + break; + } + if constexpr (std::is_same_v) { + TRI_ASSERT(!stack.empty()); + AqlCall const& subqueryCall = stack.peek(); + AqlCall copyCall = subqueryCall; + ensureOutputBlock(std::move(copyCall)); + } else { ensureOutputBlock(std::move(clientCall)); - TRI_ASSERT(_outputItemRow); - TRI_ASSERT(_outputItemRow->isInitialized()); - - _outputItemRow->copyRow(shadowRow); - - if (shadowRow.isRelevant()) { - LOG_QUERY("6d337", DEBUG) << printTypeInfo() << " init executor."; - // We found a relevant shadow Row. - // We need to reset the Executor - // cppcheck-suppress unreadVariable - constexpr bool customInit = hasInitializeCursor::value; - InitializeCursor::init(_executor, _rowFetcher, _infos); - } - TRI_ASSERT(_outputItemRow->produced()); - _outputItemRow->advanceRow(); + } + + TRI_ASSERT(!_outputItemRow->allRowsUsed()); + + // This may write one or more rows. + _execState = shadowRowForwarding(); + if constexpr (!std::is_same_v) { + // Produce might have modified the clientCall + // But only do this if we are not subquery. clientCall = _outputItemRow->getClientCall(); - if (_outputItemRow->allRowsUsed()) { - _execState = ExecState::DONE; - } else if (state == ExecutorState::DONE) { - _execState = ExecState::DONE; - } - } else { - _execState = ExecState::DONE; } break; } @@ -1576,6 +1793,13 @@ auto ExecutionBlockImpl::outputIsFull() const noexcept -> bool { _outputItemRow->allRowsUsed(); } +template <> +template <> +RegisterId ExecutionBlockImpl>>::getOutputRegisterId() const + noexcept { + return _infos.getOutputRegister(); +} + template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index fd6a486412f4..43d7f4e28d47 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -37,6 +37,9 @@ namespace arangodb::aql { +template +class SingleRowFetcher; + template class IdExecutor; @@ -221,6 +224,9 @@ class ExecutionBlockImpl final : public ExecutionBlock { /// 3. SharedAqlItemBlockPtr: The next data block. std::tuple execute(AqlCallStack stack) override; + template >>>> + [[nodiscard]] RegisterId getOutputRegisterId() const noexcept; + private: /** * @brief Inner execute() part, without the tracing calls. @@ -286,6 +292,11 @@ class ExecutionBlockImpl final : public ExecutionBlock { // Can only be one of Skip/Produce/FullCount/FastForward/Done [[nodiscard]] auto nextState(AqlCall const& call) const -> ExecState; + // Executor is done, we need to handle ShadowRows of subqueries. + // In most executors they are simply copied, in subquery executors + // there needs to be actions applied here. + [[nodiscard]] auto shadowRowForwarding() -> ExecState; + [[nodiscard]] auto outputIsFull() const noexcept -> bool; private: diff --git a/arangod/Aql/ExecutionEngine.cpp b/arangod/Aql/ExecutionEngine.cpp index 77c0b16d3f83..28adf34b8de9 100644 --- a/arangod/Aql/ExecutionEngine.cpp +++ b/arangod/Aql/ExecutionEngine.cpp @@ -574,7 +574,13 @@ std::pair ExecutionEngine::getSome(size_t return {res.first, nullptr}; } } - return _root->getSome((std::min)(atMost, ExecutionBlock::DefaultBatchSize)); + // we use a backwards compatible stack here. + // This will always continue with a fetch-all on underlying subqueries (if any) + AqlCallStack compatibilityStack{AqlCall::SimulateGetSome(atMost), true}; + auto const [state, skipped, block] = _root->execute(std::move(compatibilityStack)); + // We cannot trigger a skip operation from here + TRI_ASSERT(skipped == 0); + return {state, block}; } std::pair ExecutionEngine::skipSome(size_t atMost) { @@ -587,7 +593,15 @@ std::pair ExecutionEngine::skipSome(size_t atMost) { return {res.first, 0}; } } - return _root->skipSome(atMost); + + // we use a backwards compatible stack here. + // This will always continue with a fetch-all on underlying subqueries (if any) + AqlCallStack compatibilityStack{AqlCall::SimulateSkipSome(atMost), true}; + auto const [state, skipped, block] = _root->execute(std::move(compatibilityStack)); + // We cannot be triggered within a subquery from earlier versions. + // Also we cannot produce anything ourselfes here. + TRI_ASSERT(block == nullptr); + return {state, skipped}; } Result ExecutionEngine::shutdownSync(int errorCode) noexcept try { @@ -713,7 +727,9 @@ ExecutionEngine* ExecutionEngine::instantiateFromPlan(QueryRegistry& queryRegist bool const returnInheritedResults = !arangodb::ServerState::isDBServer(role); if (returnInheritedResults) { - auto returnNode = dynamic_cast>*>(root); + auto returnNode = + dynamic_cast>>*>( + root); TRI_ASSERT(returnNode != nullptr); engine->resultRegister(returnNode->getOutputRegisterId()); } else { diff --git a/arangod/Aql/ExecutionNode.cpp b/arangod/Aql/ExecutionNode.cpp index 184d1768c20e..f5eb0184b5b7 100644 --- a/arangod/Aql/ExecutionNode.cpp +++ b/arangod/Aql/ExecutionNode.cpp @@ -1280,7 +1280,7 @@ std::unique_ptr SingletonNode::createBlock( } } - IdExecutorInfos infos(nrRegs, std::move(toKeep), getRegsToClear()); + IdExecutorInfos infos(nrRegs, std::move(toKeep), getRegsToClear(), false); return std::make_unique>>(&engine, this, std::move(infos)); @@ -2165,8 +2165,11 @@ std::unique_ptr ReturnNode::createBlock( getRegisterPlan()->nrRegs[previousNode->getDepth()]; if (returnInheritedResults) { - return std::make_unique>>(&engine, this, - inputRegister, _count); + // TODO Check for keep and clear registers. + // As we are passthrough i think they do not matter + IdExecutorInfos infos(numberInputRegisters, {}, {}, _count, inputRegister); + return std::make_unique>>>( + &engine, this, std::move(infos)); } else { TRI_ASSERT(!returnInheritedResults); // The Return Executor only writes to register 0. diff --git a/arangod/Aql/ExecutionState.cpp b/arangod/Aql/ExecutionState.cpp index 39b426fb6d46..ca7135bb4f11 100644 --- a/arangod/Aql/ExecutionState.cpp +++ b/arangod/Aql/ExecutionState.cpp @@ -50,6 +50,9 @@ std::ostream& operator<<(std::ostream& ostream, ExecutorState state) { case ExecutorState::HASMORE: ostream << "HASMORE"; break; + default: + ostream << " WAT WAT WAT"; + break; } return ostream; } diff --git a/arangod/Aql/IdExecutor.cpp b/arangod/Aql/IdExecutor.cpp index 7965e7fbb4e5..1350038678e0 100644 --- a/arangod/Aql/IdExecutor.cpp +++ b/arangod/Aql/IdExecutor.cpp @@ -40,106 +40,30 @@ using namespace arangodb; using namespace arangodb::aql; -ExecutionBlockImpl>::ExecutionBlockImpl(ExecutionEngine* engine, - ExecutionNode const* node, - RegisterId outputRegister, bool doCount) - : ExecutionBlock(engine, node), - _currentDependency(0), - _outputRegister(outputRegister), - _doCount(doCount) { - // already insert ourselves into the statistics results - if (_profile >= PROFILE_LEVEL_BLOCKS) { - _engine->_stats.nodes.try_emplace(node->id(), ExecutionStats::Node()); - } -} - -std::pair ExecutionBlockImpl>::skipSome(size_t atMost) { - traceSkipSomeBegin(atMost); - if (isDone()) { - return traceSkipSomeEnd(ExecutionState::DONE, 0); - } - - ExecutionState state; - size_t skipped; - std::tie(state, skipped) = currentDependency().skipSome(atMost); - - if (state == ExecutionState::DONE) { - nextDependency(); - } - - return traceSkipSomeEnd(state, skipped); -} - -std::pair ExecutionBlockImpl>::getSome(size_t atMost) { - traceGetSomeBegin(atMost); - if (isDone()) { - return traceGetSomeEnd(ExecutionState::DONE, nullptr); - } - - ExecutionState state; - SharedAqlItemBlockPtr block; - std::tie(state, block) = currentDependency().getSome(atMost); - - countStats(block); - - if (state == ExecutionState::DONE) { - nextDependency(); - } - - return traceGetSomeEnd(state, block); -} - -bool aql::ExecutionBlockImpl>::isDone() const noexcept { - // I'd like to assert this in the constructor, but the dependencies are - // added after construction. - TRI_ASSERT(!_dependencies.empty()); - return _currentDependency >= _dependencies.size(); -} - -RegisterId ExecutionBlockImpl>::getOutputRegisterId() const noexcept { - return _outputRegister; -} - -std::tuple -ExecutionBlockImpl>::execute(AqlCallStack stack) { - // TODO Implement me - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); -} - -ExecutionBlock& ExecutionBlockImpl>::currentDependency() const { - TRI_ASSERT(_currentDependency < _dependencies.size()); - TRI_ASSERT(_dependencies[_currentDependency] != nullptr); - return *_dependencies[_currentDependency]; -} - -void ExecutionBlockImpl>::nextDependency() noexcept { - ++_currentDependency; -} - -bool ExecutionBlockImpl>::doCount() const noexcept { - return _doCount; -} - -void ExecutionBlockImpl>::countStats(SharedAqlItemBlockPtr& block) { - if (doCount() && block != nullptr) { - CountStats stats; - stats.setCounted(block->size()); - _engine->_stats += stats; - } -} - IdExecutorInfos::IdExecutorInfos(RegisterId nrInOutRegisters, // cppcheck-suppress passedByValue std::unordered_set registersToKeep, // cppcheck-suppress passedByValue std::unordered_set registersToClear, + bool doCount, RegisterId outputRegister, std::string distributeId, bool isResponsibleForInitializeCursor) : ExecutorInfos(make_shared_unordered_set(), make_shared_unordered_set(), nrInOutRegisters, nrInOutRegisters, std::move(registersToClear), std::move(registersToKeep)), + _doCount(doCount), + _outputRegister(outputRegister), _distributeId(std::move(distributeId)), - _isResponsibleForInitializeCursor(isResponsibleForInitializeCursor) {} + _isResponsibleForInitializeCursor(isResponsibleForInitializeCursor) { + // We can only doCount in the case where this executor is used as a Return. + // And we can only have a distributeId if this executor is used as Gather. + TRI_ASSERT(!_doCount || _distributeId.empty()); +} + +auto IdExecutorInfos::doCount() const noexcept -> bool { return _doCount; } + +auto IdExecutorInfos::getOutputRegister() const noexcept -> RegisterId { + return _outputRegister; +} std::string const& IdExecutorInfos::distributeId() { return _distributeId; } @@ -149,7 +73,7 @@ bool IdExecutorInfos::isResponsibleForInitializeCursor() const { template IdExecutor::IdExecutor(Fetcher& fetcher, IdExecutorInfos& infos) - : _fetcher(fetcher) { + : _fetcher(fetcher), _infos(infos) { if (!infos.distributeId().empty()) { _fetcher.setDistributeId(infos.distributeId()); } @@ -158,18 +82,14 @@ IdExecutor::IdExecutor(Fetcher& fetcher, IdExecutorInfos& infos) template IdExecutor::~IdExecutor() = default; -template -std::pair IdExecutor::produceRows(OutputAqlItemRow& output) { - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); -} - template auto IdExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) - -> std::tuple { - NoStats stats; - + -> std::tuple { + CountStats stats; + TRI_ASSERT(output.numRowsWritten() == 0); + // TODO: We can implement a fastForward copy here. + // We know that all rows we have will fit into the output while (!output.isFull() && inputRange.hasDataRow()) { auto const& [state, inputRow] = inputRange.nextDataRow(); TRI_ASSERT(inputRow); @@ -187,10 +107,29 @@ auto IdExecutor::produceRows(AqlItemBlockInputRange& inputRange, THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } + if (_infos.doCount()) { + stats.addCounted(output.numRowsWritten()); + } return {inputRange.upstreamState(), stats, output.getClientCall()}; } +template +auto IdExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { + CountStats stats; + size_t skipped = 0; + if (call.getLimit() > 0) { + // we can only account for offset + skipped = inputRange.skip(call.getOffset()); + } else { + skipped = inputRange.skipAll(); + } + call.didSkip(skipped); + // TODO: Do we need to do counting here? + return {inputRange.upstreamState(), stats, skipped, call}; +} + template std::tuple::Stats, SharedAqlItemBlockPtr> IdExecutor::fetchBlockForPassthrough(size_t atMost) { diff --git a/arangod/Aql/IdExecutor.h b/arangod/Aql/IdExecutor.h index 10e63216e50c..dbab3c746ebc 100644 --- a/arangod/Aql/IdExecutor.h +++ b/arangod/Aql/IdExecutor.h @@ -33,9 +33,6 @@ // There are currently three variants of IdExecutor in use: // -// - IdExecutor -// This is a variant of the ReturnBlock. It can optionally count and holds -// an output register id. // - IdExecutor // This is the SingletonBlock. // - IdExecutor> @@ -58,14 +55,14 @@ class AqlItemBlockInputRange; class ExecutionEngine; class ExecutionNode; class ExecutorInfos; -class NoStats; +class CountStats; class OutputAqlItemRow; class IdExecutorInfos : public ExecutorInfos { public: IdExecutorInfos(RegisterId nrInOutRegisters, std::unordered_set registersToKeep, - std::unordered_set registersToClear, - std::string distributeId = {""}, + std::unordered_set registersToClear, bool doCount, + RegisterId outputRegister = 0, std::string distributeId = {""}, bool isResponsibleForInitializeCursor = true); IdExecutorInfos() = delete; @@ -73,11 +70,19 @@ class IdExecutorInfos : public ExecutorInfos { IdExecutorInfos(IdExecutorInfos const&) = delete; ~IdExecutorInfos() = default; + [[nodiscard]] auto doCount() const noexcept -> bool; + + [[nodiscard]] auto getOutputRegister() const noexcept -> RegisterId; + [[nodiscard]] std::string const& distributeId(); [[nodiscard]] bool isResponsibleForInitializeCursor() const; private: + bool _doCount; + + RegisterId _outputRegister; + std::string const _distributeId; bool const _isResponsibleForInitializeCursor; @@ -87,44 +92,6 @@ class IdExecutorInfos : public ExecutorInfos { template class IdExecutor; -// (empty) implementation of IdExecutor -template <> -class IdExecutor {}; - -// implementation of ExecutionBlockImpl> -template <> -class ExecutionBlockImpl> : public ExecutionBlock { - public: - ExecutionBlockImpl(ExecutionEngine* engine, ExecutionNode const* node, - RegisterId outputRegister, bool doCount); - - ~ExecutionBlockImpl() override = default; - - std::pair getSome(size_t atMost) override; - - std::pair skipSome(size_t atMost) override; - - [[nodiscard]] RegisterId getOutputRegisterId() const noexcept; - - std::tuple execute(AqlCallStack stack) override; - - private: - [[nodiscard]] bool isDone() const noexcept; - - [[nodiscard]] ExecutionBlock& currentDependency() const; - - void nextDependency() noexcept; - - [[nodiscard]] bool doCount() const noexcept; - - void countStats(SharedAqlItemBlockPtr& block); - - private: - size_t _currentDependency; - RegisterId const _outputRegister; - bool const _doCount; -}; - template // cppcheck-suppress noConstructor class IdExecutor { @@ -137,19 +104,11 @@ class IdExecutor { // Only Supports SingleRowFetcher and ConstFetcher using Fetcher = UsedFetcher; using Infos = IdExecutorInfos; - using Stats = NoStats; + using Stats = CountStats; - IdExecutor(Fetcher& fetcher, IdExecutorInfos&); + IdExecutor(Fetcher&, IdExecutorInfos& infos); ~IdExecutor(); - /** - * @brief produce the next Row of Aql Values. - * - * @return ExecutionState, - * if something was written output.hasValue() == true - */ - std::pair produceRows(OutputAqlItemRow& output); - /** * @brief produce the next Row of Aql Values. * @@ -158,11 +117,15 @@ class IdExecutor { auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) -> std::tuple; + auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple; + // Deprecated remove me std::tuple fetchBlockForPassthrough(size_t atMost); private: Fetcher& _fetcher; + Infos& _infos; }; } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 9f6884c930fb..ec57cf481828 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -608,10 +608,9 @@ std::vector const patchUpdateRemoveState arangodb::aql::ExecutionNode::UPDATE, arangodb::aql::ExecutionNode::REPLACE, arangodb::aql::ExecutionNode::REMOVE}; std::vector const moveFilterIntoEnumerateTypes{ - arangodb::aql::ExecutionNode::ENUMERATE_COLLECTION, - arangodb::aql::ExecutionNode::INDEX}; + arangodb::aql::ExecutionNode::ENUMERATE_COLLECTION, arangodb::aql::ExecutionNode::INDEX}; std::vector const undistributeNodeTypes{ - arangodb::aql::ExecutionNode::UPDATE, arangodb::aql::ExecutionNode::REPLACE, + arangodb::aql::ExecutionNode::UPDATE, arangodb::aql::ExecutionNode::REPLACE, arangodb::aql::ExecutionNode::REMOVE}; /// @brief find the single shard id for the node to restrict an operation to @@ -1502,7 +1501,7 @@ class PropagateConstantAttributesHelper { if (it == _constants.end()) { _constants.try_emplace(variable, - std::unordered_map{{name, value}}); + std::unordered_map{{name, value}}); return; } @@ -2791,7 +2790,7 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(Optimizer* opt, // no COLLECT found, now replace std::unordered_map replacements; replacements.try_emplace(outVariable->id, - static_cast(rootNode->getData())); + static_cast(rootNode->getData())); RedundantCalculationsReplacer finder(plan->getAst(), replacements); plan->root()->walk(finder); @@ -3112,7 +3111,7 @@ struct SortToIndexNode final : public WalkerWorker { bool isSorted = index->isSorted(); bool isSparse = index->sparse(); std::vector> fields = index->fields(); - + if (indexes.size() != 1) { // can only use this index node if it uses exactly one index or multiple // indexes on exactly the same attributes @@ -3766,11 +3765,12 @@ void arangodb::aql::scatterInClusterRule(Optimizer* opt, std::unique_ptrnumberOfShards()); - auto const parallelism = (((collection->isSmart() && collection->type() == TRI_COL_TYPE_EDGE) || - (collection->numberOfShards() <= 1 && !collection->isSatellite())) ? - GatherNode::Parallelism::Serial : - GatherNode::Parallelism::Undefined); - auto* gatherNode = new GatherNode(plan.get(), plan->nextId(), sortMode, parallelism); + auto const parallelism = + (((collection->isSmart() && collection->type() == TRI_COL_TYPE_EDGE) || + (collection->numberOfShards() <= 1 && !collection->isSatellite())) + ? GatherNode::Parallelism::Serial + : GatherNode::Parallelism::Undefined); + auto* gatherNode = new GatherNode(plan.get(), plan->nextId(), sortMode, parallelism); plan->registerNode(gatherNode); TRI_ASSERT(remoteNode); gatherNode->addDependency(remoteNode); @@ -3980,8 +3980,8 @@ void arangodb::aql::distributeInClusterRule(Optimizer* opt, // an UPSERT node has two input variables! auto upsertNode = ExecutionNode::castTo(node); auto d = new DistributeNode(plan.get(), plan->nextId(), - ScatterNode::ScatterType::SHARD, - collection, upsertNode->inDocVariable(), + ScatterNode::ScatterType::SHARD, collection, + upsertNode->inDocVariable(), upsertNode->insertVariable(), true, true); d->setAllowSpecifiedKeys(true); distNode = ExecutionNode::castTo(d); @@ -4869,8 +4869,8 @@ class RemoveToEnumCollFinder final : public WalkerWorker { bool before(ExecutionNode* en) override final { switch (en->getType()) { - case EN::UPDATE: - case EN::REPLACE: + case EN::UPDATE: + case EN::REPLACE: case EN::REMOVE: { if (_foundModification) { break; @@ -4879,7 +4879,7 @@ class RemoveToEnumCollFinder final : public WalkerWorker { // find the variable we are removing . . . auto rn = ExecutionNode::castTo(en); Variable const* toRemove = nullptr; - + if (en->getType() == EN::REPLACE) { toRemove = ExecutionNode::castTo(en)->inKeyVariable(); } else if (en->getType() == EN::UPDATE) { @@ -4898,7 +4898,7 @@ class RemoveToEnumCollFinder final : public WalkerWorker { _setter = _plan->getVarSetBy(toRemove->id); TRI_ASSERT(_setter != nullptr); auto enumColl = _setter; - + if (_setter->getType() == EN::CALCULATION) { // this should be an attribute access for _key auto cn = ExecutionNode::castTo(_setter); @@ -4914,7 +4914,7 @@ class RemoveToEnumCollFinder final : public WalkerWorker { if (shardKeys.size() != 1 || shardKeys[0] != StaticStrings::KeyString) { break; // abort . . . } - + // set the varsToRemove to the variable in the expression of this // node and also define enumColl ::arangodb::containers::HashSet varsToRemove; @@ -4998,10 +4998,11 @@ class RemoveToEnumCollFinder final : public WalkerWorker { break; // abort . . . } - auto const& projections = dynamic_cast(enumColl)->projections(); - if (projections.size() > 1 || + auto const& projections = + dynamic_cast(enumColl)->projections(); + if (projections.size() > 1 || (!projections.empty() && projections[0] != StaticStrings::KeyString)) { - // cannot handle projections + // cannot handle projections break; } @@ -5022,7 +5023,7 @@ class RemoveToEnumCollFinder final : public WalkerWorker { case EN::DISTRIBUTE: case EN::SCATTER: { if (_foundScatter) { // met more than one scatter node - break; // abort . . . + break; // abort . . . } _foundScatter = true; _toUnlink.emplace(en); @@ -5030,7 +5031,7 @@ class RemoveToEnumCollFinder final : public WalkerWorker { } case EN::GATHER: { if (_foundGather) { // met more than one gather node - break; // abort . . . + break; // abort . . . } _foundGather = true; _toUnlink.emplace(en); @@ -5075,7 +5076,7 @@ class RemoveToEnumCollFinder final : public WalkerWorker { TRI_ASSERT(false); } } - + _toUnlink.clear(); return true; } @@ -5421,7 +5422,8 @@ void arangodb::aql::replaceOrWithInRule(Optimizer* opt, std::unique_ptrnextId(), std::move(expr), outVar); + ExecutionNode* newNode = + new CalculationNode(plan.get(), plan->nextId(), std::move(expr), outVar); plan->registerNode(newNode); plan->replaceNode(cn, newNode); @@ -5585,7 +5587,8 @@ void arangodb::aql::removeRedundantOrRule(Optimizer* opt, auto astNode = remover.createReplacementNode(plan->getAst()); auto expr = std::make_unique(plan.get(), plan->getAst(), astNode); - ExecutionNode* newNode = new CalculationNode(plan.get(), plan->nextId(), std::move(expr), outVar); + ExecutionNode* newNode = + new CalculationNode(plan.get(), plan->nextId(), std::move(expr), outVar); plan->registerNode(newNode); plan->replaceNode(cn, newNode); modified = true; @@ -6956,7 +6959,8 @@ void arangodb::aql::sortLimitRule(Optimizer* opt, std::unique_ptr bool mod = false; // If there isn't a limit node, and at least one sort or gather node, there's // nothing to do. - if (!plan->contains(EN::LIMIT) || (!plan->contains(EN::SORT) && !plan->contains(EN::GATHER))) { + if (!plan->contains(EN::LIMIT) || + (!plan->contains(EN::SORT) && !plan->contains(EN::GATHER))) { opt->addPlan(std::move(plan), rule, mod); return; } @@ -7075,10 +7079,10 @@ void arangodb::aql::optimizeSubqueriesRule(Optimizer* opt, if (found.first != nullptr) { auto it = subqueryAttributes.find(found.first); if (it == subqueryAttributes.end()) { - subqueryAttributes.try_emplace(found.first, - std::make_tuple(found.second, - std::unordered_set{n}, - usedForCount)); + subqueryAttributes.try_emplace( + found.first, std::make_tuple(found.second, + std::unordered_set{n}, + usedForCount)); } else { auto& sq = (*it).second; if (usedForCount) { @@ -7199,7 +7203,8 @@ void arangodb::aql::optimizeSubqueriesRule(Optimizer* opt, } /// @brief move filters into EnumerateCollection nodes -void arangodb::aql::moveFiltersIntoEnumerateRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::moveFiltersIntoEnumerateRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const& rule) { bool modified = false; @@ -7212,12 +7217,14 @@ void arangodb::aql::moveFiltersIntoEnumerateRule(Optimizer* opt, std::unique_ptr for (auto const& n : nodes) { auto en = dynamic_cast(n); if (en == nullptr) { - THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "unable to cast node to DocumentProducingNode"); + THROW_ARANGO_EXCEPTION_MESSAGE( + TRI_ERROR_INTERNAL, "unable to cast node to DocumentProducingNode"); } - if (n->getType() == EN::INDEX && ExecutionNode::castTo(n)->getIndexes().size() != 1) { - // we can only handle exactly one index right now. otherwise some IndexExecutor code - // may assert and fail + if (n->getType() == EN::INDEX && + ExecutionNode::castTo(n)->getIndexes().size() != 1) { + // we can only handle exactly one index right now. otherwise some + // IndexExecutor code may assert and fail continue; } @@ -7241,7 +7248,7 @@ void arangodb::aql::moveFiltersIntoEnumerateRule(Optimizer* opt, std::unique_ptr if (calculations.empty()) { break; } - + auto filterNode = ExecutionNode::castTo(current); Variable const* inVariable = filterNode->inVariable(); @@ -7255,8 +7262,10 @@ void arangodb::aql::moveFiltersIntoEnumerateRule(Optimizer* opt, std::unique_ptr Expression* existingFilter = en->filter(); if (existingFilter != nullptr && existingFilter->node() != nullptr) { // node already has a filter, now AND-merge it with what we found! - AstNode* merged = plan->getAst()->createNodeBinaryOperator( - NODE_TYPE_OPERATOR_BINARY_AND, existingFilter->node(), expr->node()); + AstNode* merged = + plan->getAst()->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, + existingFilter->node(), + expr->node()); en->setFilter(std::make_unique(plan.get(), plan->getAst(), merged)); } else { @@ -7304,15 +7313,16 @@ void arangodb::aql::moveFiltersIntoEnumerateRule(Optimizer* opt, std::unique_ptr } /// @brief parallelize coordinator GatherNodes -void arangodb::aql::parallelizeGatherRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::parallelizeGatherRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const& rule) { TRI_ASSERT(ServerState::instance()->isCoordinator()); - + bool modified = false; // find all GatherNodes in the main query, starting from the query's root node // (the node most south when looking at the query execution plan). - // + // // for now, we effectively stop right after the first GatherNode we found, regardless // of whether we can make that node use parallelism or not. // the reason we have to stop here is that if we have multiple query snippets on a @@ -7320,42 +7330,39 @@ void arangodb::aql::parallelizeGatherRule(Optimizer* opt, std::unique_ptr::allocator_type::arena_type a; ::arangodb::containers::SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::GATHER, true); - if (nodes.size() == 1 && - !plan->contains(EN::TRAVERSAL) && - !plan->contains(EN::SHORTEST_PATH) && - !plan->contains(EN::K_SHORTEST_PATHS) && - !plan->contains(EN::DISTRIBUTE) && - !plan->contains(EN::SCATTER)) { + if (nodes.size() == 1 && !plan->contains(EN::TRAVERSAL) && + !plan->contains(EN::SHORTEST_PATH) && !plan->contains(EN::K_SHORTEST_PATHS) && + !plan->contains(EN::DISTRIBUTE) && !plan->contains(EN::SCATTER)) { GatherNode* gn = ExecutionNode::castTo(nodes[0]); if (!gn->isInSubquery() && gn->isParallelizable()) { @@ -7428,7 +7435,8 @@ void findSubqueriesSuitableForSplicing(ExecutionPlan const& plan, using ResultVector = decltype(result); using BoolVec = std::vector>; - using SuitableNodeSet = std::set, short_alloc>; + using SuitableNodeSet = + std::set, short_alloc>; // This finder adds all subquery nodes in pre-order to its `result` parameter, // and all nodes that are suitable for splicing to `suitableNodes`. Suitable @@ -7519,11 +7527,10 @@ void findSubqueriesSuitableForSplicing(ExecutionPlan const& plan, auto finder = Finder{result, suitableNodes}; plan.root()->walkSubqueriesFirst(finder); - - { // remove unsuitable nodes from result + { // remove unsuitable nodes from result auto i = size_t{0}; auto j = size_t{0}; - for(; j < result.size(); ++j) { + for (; j < result.size(); ++j) { TRI_ASSERT(i <= j); if (suitableNodes.count(result[j]) > 0) { if (i != j) { @@ -7541,7 +7548,7 @@ void findSubqueriesSuitableForSplicing(ExecutionPlan const& plan, result.resize(i); } } -} +} // namespace // Splices in subqueries by replacing subquery nodes by // a SubqueryStartNode and a SubqueryEndNode with the subquery's nodes @@ -7578,10 +7585,11 @@ void arangodb::aql::spliceSubqueriesRule(Optimizer* opt, std::unique_ptrcreateNode(plan.get(), plan->nextId(), sq->outVariable()); + auto start = plan->createNode(plan.get(), plan->nextId(), + sq->outVariable()); // start and end inherit this property from the subquery node start->setIsInSplicedSubquery(sq->isInSplicedSubquery()); @@ -7640,7 +7648,7 @@ void arangodb::aql::spliceSubqueriesRule(Optimizer* opt, std::unique_ptrgetSubquery(); Variable const* inVariable = nullptr; @@ -7658,7 +7666,8 @@ void arangodb::aql::spliceSubqueriesRule(Optimizer* opt, std::unique_ptrcreateNode(plan.get(), plan->nextId(), - inVariable, sq->outVariable()); + inVariable, sq->outVariable(), + sq->isModificationSubquery()); // start and end inherit this property from the subquery node end->setIsInSplicedSubquery(sq->isInSplicedSubquery()); // insert a SubqueryEndNode after the SubqueryNode sq diff --git a/arangod/Aql/OutputAqlItemRow.cpp b/arangod/Aql/OutputAqlItemRow.cpp index df485265ebc7..9eddbcf13a1a 100644 --- a/arangod/Aql/OutputAqlItemRow.cpp +++ b/arangod/Aql/OutputAqlItemRow.cpp @@ -42,6 +42,8 @@ The following conditions need to hold true, we need to add c++ tests for this. #include #include +#include "Logger/LogMacros.h" + using namespace arangodb; using namespace arangodb::aql; diff --git a/arangod/Aql/ScatterExecutor.cpp b/arangod/Aql/ScatterExecutor.cpp index 40d1c2208cf2..bdf56efdb391 100644 --- a/arangod/Aql/ScatterExecutor.cpp +++ b/arangod/Aql/ScatterExecutor.cpp @@ -48,7 +48,11 @@ ScatterExecutor::ClientBlockData::ClientBlockData(ExecutionEngine& engine, // We only get shared ptrs to const data. so we need to copy here... IdExecutorInfos infos{scatterInfos.numberOfInputRegisters(), *scatterInfos.registersToKeep(), - *scatterInfos.registersToClear(), "", false}; + *scatterInfos.registersToClear(), + false, + 0, + "", + false}; // NOTE: Do never change this type! The execute logic below requires this and only this type. _executor = std::make_unique>>(&engine, node, diff --git a/arangod/Aql/ShadowAqlItemRow.cpp b/arangod/Aql/ShadowAqlItemRow.cpp index c3e9df3f4994..cdc609ac752a 100644 --- a/arangod/Aql/ShadowAqlItemRow.cpp +++ b/arangod/Aql/ShadowAqlItemRow.cpp @@ -23,8 +23,8 @@ #include "ShadowAqlItemRow.h" #include "Basics/VelocyPackHelper.h" -#include "Transaction/Methods.h" #include "Transaction/Context.h" +#include "Transaction/Methods.h" using namespace arangodb; using namespace arangodb::aql; diff --git a/arangod/Aql/SubqueryEndExecutionNode.cpp b/arangod/Aql/SubqueryEndExecutionNode.cpp index 44a2f2fb0153..075604c6e414 100644 --- a/arangod/Aql/SubqueryEndExecutionNode.cpp +++ b/arangod/Aql/SubqueryEndExecutionNode.cpp @@ -29,6 +29,7 @@ #include "Aql/Query.h" #include "Aql/RegisterPlan.h" #include "Aql/SubqueryEndExecutor.h" +#include "Basics/VelocyPackHelper.h" #include "Meta/static_assert_size.h" #include "Transaction/Context.h" #include "Transaction/Methods.h" @@ -51,7 +52,19 @@ bool CompareVariables(Variable const* mine, Variable const* yours) { SubqueryEndNode::SubqueryEndNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base) : ExecutionNode(plan, base), _inVariable(Variable::varFromVPack(plan->getAst(), base, "inVariable", true)), - _outVariable(Variable::varFromVPack(plan->getAst(), base, "outVariable")) {} + _outVariable(Variable::varFromVPack(plan->getAst(), base, "outVariable")), + _isModificationSubquery(basics::VelocyPackHelper::getBooleanValue( + base, "isModificationSubquery", false)) {} + +SubqueryEndNode::SubqueryEndNode(ExecutionPlan* plan, size_t id, Variable const* inVariable, + Variable const* outVariable, bool isModificationSubquery) + : ExecutionNode(plan, id), + _inVariable(inVariable), + _outVariable(outVariable), + _isModificationSubquery(isModificationSubquery) { + // _inVariable might be nullptr + TRI_ASSERT(_outVariable != nullptr); +} void SubqueryEndNode::toVelocyPackHelper(VPackBuilder& nodes, unsigned flags, std::unordered_set& seen) const { @@ -65,6 +78,8 @@ void SubqueryEndNode::toVelocyPackHelper(VPackBuilder& nodes, unsigned flags, _inVariable->toVelocyPack(nodes); } + nodes.add("isModificationSubquery", VPackValue(isModificationNode())); + nodes.close(); } @@ -90,8 +105,9 @@ std::unique_ptr SubqueryEndNode::createBlock( auto const vpackOptions = trx->transactionContextPtr()->getVPackOptions(); SubqueryEndExecutorInfos infos(inputRegisters, outputRegisters, getRegisterPlan()->nrRegs[previousNode->getDepth()], - getRegisterPlan()->nrRegs[getDepth()], getRegsToClear(), - calcRegsToKeep(), vpackOptions, inReg, outReg); + getRegisterPlan()->nrRegs[getDepth()], + getRegsToClear(), calcRegsToKeep(), vpackOptions, + inReg, outReg, isModificationNode()); return std::make_unique>(&engine, this, std::move(infos)); @@ -108,7 +124,8 @@ ExecutionNode* SubqueryEndNode::clone(ExecutionPlan* plan, bool withDependencies inVariable = plan->getAst()->variables()->createVariable(inVariable); } } - auto c = std::make_unique(plan, _id, inVariable, outVariable); + auto c = std::make_unique(plan, _id, inVariable, outVariable, + isModificationNode()); return cloneHelper(std::move(c), withDependencies, withProperties); } @@ -134,7 +151,8 @@ bool SubqueryEndNode::isEqualTo(ExecutionNode const& other) const { SubqueryEndNode const& p = dynamic_cast(other); TRI_ASSERT(p._outVariable != nullptr); if (!CompareVariables(_outVariable, p._outVariable) || - !CompareVariables(_inVariable, p._inVariable)) { + !CompareVariables(_inVariable, p._inVariable) || + _isModificationSubquery != p._isModificationSubquery) { // One of the variables does not match return false; } @@ -144,3 +162,7 @@ bool SubqueryEndNode::isEqualTo(ExecutionNode const& other) const { return false; } } + +bool SubqueryEndNode::isModificationNode() const { + return _isModificationSubquery; +} diff --git a/arangod/Aql/SubqueryEndExecutionNode.h b/arangod/Aql/SubqueryEndExecutionNode.h index 879e1f385538..0cae650db267 100644 --- a/arangod/Aql/SubqueryEndExecutionNode.h +++ b/arangod/Aql/SubqueryEndExecutionNode.h @@ -37,11 +37,7 @@ class SubqueryEndNode : public ExecutionNode { SubqueryEndNode(ExecutionPlan*, arangodb::velocypack::Slice const& base); SubqueryEndNode(ExecutionPlan* plan, size_t id, Variable const* inVariable, - Variable const* outVariable) - : ExecutionNode(plan, id), _inVariable(inVariable), _outVariable(outVariable) { - // _inVariable might be nullptr - TRI_ASSERT(_outVariable != nullptr); - } + Variable const* outVariable, bool isModificationSubquery); CostEstimate estimateCost() const override final; @@ -74,11 +70,12 @@ class SubqueryEndNode : public ExecutionNode { } void replaceOutVariable(Variable const* var); + bool isModificationNode() const override; private: Variable const* _inVariable; - Variable const* _outVariable; + bool _isModificationSubquery; }; } // namespace aql diff --git a/arangod/Aql/SubqueryEndExecutor.cpp b/arangod/Aql/SubqueryEndExecutor.cpp index 06d261c5a047..6973ac216de4 100644 --- a/arangod/Aql/SubqueryEndExecutor.cpp +++ b/arangod/Aql/SubqueryEndExecutor.cpp @@ -34,6 +34,8 @@ #include #include +#include "Logger/LogMacros.h" + using namespace arangodb; using namespace arangodb::aql; @@ -42,14 +44,15 @@ SubqueryEndExecutorInfos::SubqueryEndExecutorInfos( std::shared_ptr> writeableOutputRegisters, RegisterId nrInputRegisters, RegisterId nrOutputRegisters, std::unordered_set const& registersToClear, - std::unordered_set registersToKeep, - velocypack::Options const* const options, RegisterId inReg, RegisterId outReg) + std::unordered_set registersToKeep, velocypack::Options const* const options, + RegisterId inReg, RegisterId outReg, bool isModificationSubquery) : ExecutorInfos(std::move(readableInputRegisters), std::move(writeableOutputRegisters), nrInputRegisters, nrOutputRegisters, registersToClear, std::move(registersToKeep)), _vpackOptions(options), _outReg(outReg), - _inReg(inReg) {} + _inReg(inReg), + _isModificationSubquery(isModificationSubquery) {} SubqueryEndExecutorInfos::~SubqueryEndExecutorInfos() = default; @@ -69,92 +72,73 @@ RegisterId SubqueryEndExecutorInfos::getInputRegister() const noexcept { return _inReg; } +bool SubqueryEndExecutorInfos::isModificationSubquery() const noexcept { + return _isModificationSubquery; +} + SubqueryEndExecutor::SubqueryEndExecutor(Fetcher& fetcher, SubqueryEndExecutorInfos& infos) : _fetcher(fetcher), _infos(infos), _accumulator(_infos.vpackOptions()) {} SubqueryEndExecutor::~SubqueryEndExecutor() = default; std::pair SubqueryEndExecutor::produceRows(OutputAqlItemRow& output) { - ExecutionState state; + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} +auto SubqueryEndExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { + // We can not account for skipped rows here. + // If we get this we have invalid logic either in the upstream + // produced by this Executor + // or in the reporting by the Executor data is requested from + TRI_ASSERT(input.skippedInFlight() == 0); + ExecutorState state{ExecutorState::HASMORE}; InputAqlItemRow inputRow = InputAqlItemRow{CreateInvalidInputRowHint()}; - ShadowAqlItemRow shadowRow = ShadowAqlItemRow{CreateInvalidShadowRowHint{}}; - - while (!output.isFull()) { - switch (_state) { - case State::ACCUMULATE_DATA_ROWS: { - std::tie(state, inputRow) = _fetcher.fetchRow(); - - if (state == ExecutionState::WAITING) { - TRI_ASSERT(!inputRow.isInitialized()); - return {state, NoStats{}}; - } - - // We got a data row, put it into the accumulator - if (inputRow.isInitialized() && _infos.usesInputRegister()) { - _accumulator.addValue(inputRow.getValue(_infos.getInputRegister())); - } - - // We have received DONE on data rows, so now - // we have to read a relevant shadow row - if (state == ExecutionState::DONE) { - _state = State::PROCESS_SHADOW_ROWS; - } - } break; - case State::PROCESS_SHADOW_ROWS: { - std::tie(state, shadowRow) = _fetcher.fetchShadowRow(); - if (state == ExecutionState::WAITING) { - TRI_ASSERT(!shadowRow.isInitialized()); - return {ExecutionState::WAITING, NoStats{}}; - } - TRI_ASSERT(state == ExecutionState::DONE || state == ExecutionState::HASMORE); - - if (!shadowRow.isInitialized()) { - TRI_ASSERT(_accumulator.numValues() == 0); - if (state == ExecutionState::HASMORE) { - // We did not get another shadowRow; either we - // are DONE or we are getting another relevant - // shadow row, but only after we called fetchRow - // again - _state = State::ACCUMULATE_DATA_ROWS; - } else { - TRI_ASSERT(state == ExecutionState::DONE); - return {ExecutionState::DONE, NoStats{}}; - } - } else { - if (shadowRow.isRelevant()) { - AqlValue value; - AqlValueGuard guard = _accumulator.stealValue(value); - - // Responsibility is handed over to output - output.consumeShadowRow(_infos.getOutputRegister(), shadowRow, guard); - TRI_ASSERT(output.produced()); - output.advanceRow(); - - if (state == ExecutionState::DONE) { - return {ExecutionState::DONE, NoStats{}}; - } - } else { - TRI_ASSERT(_accumulator.numValues() == 0); - // We got a shadow row, it must be irrelevant, - // because to get another relevant shadowRow we must - // first call fetchRow again - output.decreaseShadowRowDepth(shadowRow); - output.advanceRow(); - } - } - } break; - - default: { - TRI_ASSERT(false); - break; - } + + while (input.hasDataRow()) { + std::tie(state, inputRow) = input.nextDataRow(); + TRI_ASSERT(inputRow.isInitialized()); + + // We got a data row, put it into the accumulator, + // if we're getting data through an input register. + // If not, we just "accumulate" an empty output. + if (_infos.usesInputRegister()) { + _accumulator.addValue(inputRow.getValue(_infos.getInputRegister())); } } + return {input.upstreamState(), NoStats{}, AqlCall{}}; +} + +auto SubqueryEndExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple { + // We can not account for skipped rows here. + // If we get this we have invalid logic either in the upstream + // produced by this Executor + // or in the reporting by the Executor data is requested from + TRI_ASSERT(input.skippedInFlight() == 0); + ExecutorState state; + InputAqlItemRow inputRow = InputAqlItemRow{CreateInvalidInputRowHint()}; + + while (input.hasDataRow()) { + std::tie(state, inputRow) = input.nextDataRow(); + TRI_ASSERT(inputRow.isInitialized()); + } + // This is correct since the SubqueryEndExecutor produces one output out + // of the accumulation of all the (relevant) inputs + call.didSkip(1); + return {input.upstreamState(), NoStats{}, 1, AqlCall{}}; +} + +auto SubqueryEndExecutor::consumeShadowRow(ShadowAqlItemRow shadowRow, + OutputAqlItemRow& output) -> void { + AqlValue value; + AqlValueGuard guard = _accumulator.stealValue(value); + output.consumeShadowRow(_infos.getOutputRegister(), shadowRow, guard); +} - // We should *only* fall through here if output.isFull() is true. - TRI_ASSERT(output.isFull()); - return {ExecutionState::HASMORE, NoStats{}}; +auto SubqueryEndExecutor::isModificationSubquery() const noexcept -> bool { + return _infos.isModificationSubquery(); } void SubqueryEndExecutor::Accumulator::reset() { @@ -177,8 +161,8 @@ SubqueryEndExecutor::Accumulator::Accumulator(VPackOptions const* const options) } AqlValueGuard SubqueryEndExecutor::Accumulator::stealValue(AqlValue& result) { - // Note that an AqlValueGuard holds an AqlValue&, so we cannot create it from - // a local AqlValue and return the Guard! + // Note that an AqlValueGuard holds an AqlValue&, so we cannot create it + // from a local AqlValue and return the Guard! TRI_ASSERT(_builder->isOpenArray()); _builder->close(); diff --git a/arangod/Aql/SubqueryEndExecutor.h b/arangod/Aql/SubqueryEndExecutor.h index c3e5f0e62262..19b684f03b9e 100644 --- a/arangod/Aql/SubqueryEndExecutor.h +++ b/arangod/Aql/SubqueryEndExecutor.h @@ -24,6 +24,7 @@ #ifndef ARANGOD_AQL_SUBQUERY_END_EXECUTOR_H #define ARANGOD_AQL_SUBQUERY_END_EXECUTOR_H +#include "Aql/AqlCall.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" @@ -49,7 +50,7 @@ class SubqueryEndExecutorInfos : public ExecutorInfos { std::unordered_set const& registersToClear, std::unordered_set registersToKeep, velocypack::Options const* options, RegisterId inReg, - RegisterId outReg); + RegisterId outReg, bool isModificationSubquery); SubqueryEndExecutorInfos() = delete; SubqueryEndExecutorInfos(SubqueryEndExecutorInfos&&) noexcept = default; @@ -60,11 +61,13 @@ class SubqueryEndExecutorInfos : public ExecutorInfos { [[nodiscard]] RegisterId getOutputRegister() const noexcept; [[nodiscard]] bool usesInputRegister() const noexcept; [[nodiscard]] RegisterId getInputRegister() const noexcept; + [[nodiscard]] bool isModificationSubquery() const noexcept; private: velocypack::Options const* _vpackOptions; RegisterId const _outReg; RegisterId const _inReg; + bool const _isModificationSubquery; }; class SubqueryEndExecutor { @@ -82,9 +85,31 @@ class SubqueryEndExecutor { SubqueryEndExecutor(Fetcher& fetcher, SubqueryEndExecutorInfos& infos); ~SubqueryEndExecutor(); - std::pair produceRows(OutputAqlItemRow& output); + [[deprecated]] std::pair produceRows(OutputAqlItemRow& output); std::pair expectedNumberOfRows(size_t atMost) const; + // produceRows accumulates all input rows it can get into _accumulator, which + // will then be read out by ExecutionBlockImpl + // TODO: can the production of output be moved to produceRows again? + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + // skipRowsRange consumes all data rows available on the input and just + // ignores it. real skips of a subqueries will not execute the whole subquery + // so this might not be necessary at all (except for modifying subqueries, + // where we have to execute the subquery) + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple; + + /** + * @brief Consume the given shadow row and write the aggregated value to it + * + * @param shadowRow The shadow row + * @param output Output block + */ + auto consumeShadowRow(ShadowAqlItemRow shadowRow, OutputAqlItemRow& output) -> void; + + [[nodiscard]] auto isModificationSubquery() const noexcept -> bool; + private: enum class State { ACCUMULATE_DATA_ROWS, @@ -117,8 +142,6 @@ class SubqueryEndExecutor { SubqueryEndExecutorInfos& _infos; Accumulator _accumulator; - - State _state{State::ACCUMULATE_DATA_ROWS}; }; } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/SubqueryStartExecutionNode.h b/arangod/Aql/SubqueryStartExecutionNode.h index 9f73d2066944..1ddfbd6f318f 100644 --- a/arangod/Aql/SubqueryStartExecutionNode.h +++ b/arangod/Aql/SubqueryStartExecutionNode.h @@ -57,10 +57,13 @@ class SubqueryStartNode : public ExecutionNode { bool isEqualTo(ExecutionNode const& other) const override final; + bool isModificationSubqueryNode(); + private: /// @brief This is only required for Explain output. /// it has no practical usage other then to print this information during explain. Variable const* _subqueryOutVariable; + bool _isModificationSubquery; }; } // namespace aql diff --git a/arangod/Aql/SubqueryStartExecutor.cpp b/arangod/Aql/SubqueryStartExecutor.cpp index 76698e2477e2..f854d404496d 100644 --- a/arangod/Aql/SubqueryStartExecutor.cpp +++ b/arangod/Aql/SubqueryStartExecutor.cpp @@ -27,149 +27,76 @@ #include "Aql/SingleRowFetcher.h" #include "Aql/Stats.h" +#include "Logger/LogMacros.h" + using namespace arangodb; using namespace arangodb::aql; -SubqueryStartExecutor::SubqueryStartExecutor(Fetcher& fetcher, Infos& infos) - : _fetcher(fetcher) {} -SubqueryStartExecutor::~SubqueryStartExecutor() = default; +SubqueryStartExecutor::SubqueryStartExecutor(Fetcher& fetcher, Infos& infos) {} std::pair SubqueryStartExecutor::produceRows(OutputAqlItemRow& output) { - while (!output.isFull()) { - switch (_internalState) { - case State::READ_DATA_ROW: { - TRI_ASSERT(!_input.isInitialized()); - if (_upstreamState == ExecutionState::DONE) { - return {ExecutionState::DONE, NoStats{}}; - } - // We need to round the number of rows, otherwise this might be called - // with atMost == 0 Note that we must not set _upstreamState to DONE - // here, as fetchRow will report DONE when encountering a shadow row. - auto rowWithStates = - _fetcher.fetchRowWithGlobalState((output.numRowsLeft() + 1) / 2); - _input = std::move(rowWithStates.row); - if (rowWithStates.localState == ExecutionState::WAITING) { - return {ExecutionState::WAITING, NoStats{}}; - } - TRI_ASSERT(_upstreamState == ExecutionState::HASMORE); - TRI_ASSERT(rowWithStates.globalState == ExecutionState::HASMORE || - rowWithStates.globalState == ExecutionState::DONE); - // This can only switch from HASMORE to DONE - _upstreamState = rowWithStates.globalState; - if (!_input.isInitialized()) { - TRI_ASSERT(rowWithStates.localState == ExecutionState::DONE); - _internalState = State::PASS_SHADOW_ROW; - } else { - _internalState = State::PRODUCE_DATA_ROW; - } - } break; - case State::PRODUCE_DATA_ROW: { - TRI_ASSERT(!output.isFull()); - TRI_ASSERT(_input.isInitialized()); - output.copyRow(_input); - output.advanceRow(); - _internalState = State::PRODUCE_SHADOW_ROW; - } break; - case State::PRODUCE_SHADOW_ROW: { - TRI_ASSERT(_input.isInitialized()); - output.createShadowRow(_input); - output.advanceRow(); - _input = InputAqlItemRow(CreateInvalidInputRowHint{}); - _internalState = State::READ_DATA_ROW; - } break; - case State::PASS_SHADOW_ROW: { - if (_upstreamState == ExecutionState::DONE) { - return {ExecutionState::DONE, NoStats{}}; - } - // We need to handle shadowRows now. It is the job of this node to - // increase the shadow row depth - auto const [state, shadowRow] = _fetcher.fetchShadowRow(); - if (state == ExecutionState::WAITING) { - return {ExecutionState::WAITING, NoStats{}}; - } - TRI_ASSERT(_upstreamState == ExecutionState::HASMORE); - TRI_ASSERT(state == ExecutionState::HASMORE || state == ExecutionState::DONE); - // This can only switch from HASMORE to DONE - _upstreamState = state; - if (shadowRow.isInitialized()) { - output.increaseShadowRowDepth(shadowRow); - output.advanceRow(); - // stay in state PASS_SHADOW_ROW - } else { - _internalState = State::READ_DATA_ROW; - } - } break; - } - } + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} - if (_internalState == State::READ_DATA_ROW || _internalState == State::PASS_SHADOW_ROW) { - TRI_ASSERT(!_input.isInitialized()); - return {_upstreamState, NoStats{}}; +auto SubqueryStartExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { + if (_inputRow.isInitialized()) { + // We have not been able to report the ShadowRow. + // Simply return DONE to trigger Impl to fetch shadow row instead. + return {ExecutorState::DONE, NoStats{}, AqlCall{}}; } - // PRODUCE_DATA_ROW should always immediately be processed without leaving the - // loop - TRI_ASSERT(_internalState == State::PRODUCE_SHADOW_ROW); - TRI_ASSERT(_input.isInitialized()); - return {ExecutionState::HASMORE, NoStats{}}; + TRI_ASSERT(!_inputRow.isInitialized()); + TRI_ASSERT(!output.isFull()); + if (input.hasDataRow()) { + std::tie(_upstreamState, _inputRow) = input.peekDataRow(); + output.copyRow(_inputRow); + output.advanceRow(); + return {ExecutorState::DONE, NoStats{}, AqlCall{}}; + } + return {input.upstreamState(), NoStats{}, AqlCall{}}; } -std::pair SubqueryStartExecutor::expectedNumberOfRows(size_t atMost) const { - auto const [state, upstreamRows] = _fetcher.preFetchNumberOfRows(atMost); - // We will write one shadow row per input data row. - // We might write less on all shadow rows in input, right now we do not figure this out yes. - TRI_ASSERT( _input.isInitialized() == (_internalState == State::PRODUCE_SHADOW_ROW)); - TRI_ASSERT(_internalState != State::PRODUCE_DATA_ROW); - - // Return 1 if _input.isInitialized(), and 0 otherwise. Looks more complicated - // than it is. - auto const localRows = [this]() { - if (_input.isInitialized()) { - switch (_internalState) { - case State::PRODUCE_SHADOW_ROW: - return 1; - case State::PRODUCE_DATA_ROW: - case State::READ_DATA_ROW: - case State::PASS_SHADOW_ROW: { - TRI_ASSERT(false); - using namespace std::string_literals; - THROW_ARANGO_EXCEPTION_MESSAGE( - TRI_ERROR_INTERNAL_AQL, - "Unexpected state "s + stateToString(_internalState) + - " in SubqueryStartExecutor with local row"); - } - } - } else { - switch (_internalState) { - case State::READ_DATA_ROW: - case State::PASS_SHADOW_ROW: - return 0; - case State::PRODUCE_DATA_ROW: - case State::PRODUCE_SHADOW_ROW: - TRI_ASSERT(false); - using namespace std::string_literals; - THROW_ARANGO_EXCEPTION_MESSAGE( - TRI_ERROR_INTERNAL_AQL, - "Unexpected state "s + stateToString(_internalState) + - " in SubqueryStartExecutor with no local row"); - } - } - TRI_ASSERT(false); - return 0; - }(); +auto SubqueryStartExecutor::skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple { + TRI_ASSERT(call.shouldSkip()); + if (_inputRow.isInitialized()) { + // We have not been able to report the ShadowRow. + // Simply return DONE to trigger Impl to fetch shadow row instead. + return {ExecutorState::DONE, NoStats{}, 0, AqlCall{}}; + } - return {state, upstreamRows * 2 + localRows}; + if (input.hasDataRow()) { + // Do not consume the row. + // It needs to be reported in Produce. + std::tie(_upstreamState, _inputRow) = input.peekDataRow(); + call.didSkip(1); + return {ExecutorState::DONE, NoStats{}, call.getSkipCount(), AqlCall{}}; + } + return {input.upstreamState(), NoStats{}, 0, AqlCall{}}; } -auto SubqueryStartExecutor::stateToString(SubqueryStartExecutor::State state) -> std::string { - switch (state) { - case State::READ_DATA_ROW: - return "READ_DATA_ROW"; - case State::PRODUCE_DATA_ROW: - return "PRODUCE_DATA_ROW"; - case State::PRODUCE_SHADOW_ROW: - return "PRODUCE_SHADOW_ROW"; - case State::PASS_SHADOW_ROW: - return "PASS_SHADOW_ROW"; +auto SubqueryStartExecutor::produceShadowRow(AqlItemBlockInputRange& input, + OutputAqlItemRow& output) -> bool { + TRI_ASSERT(!output.allRowsUsed()); + if (_inputRow.isInitialized()) { + // Actually consume the input row now. + auto const [upstreamState, inputRow] = input.nextDataRow(); + // We are only supposed to report the inputRow we + // have seen in produce as a ShadowRow + TRI_ASSERT(inputRow.isSameBlockAndIndex(_inputRow)); + output.createShadowRow(_inputRow); + output.advanceRow(); + // Reset local input row + _inputRow = InputAqlItemRow(CreateInvalidInputRowHint{}); + return true; } - return "unhandled state"; + return false; +} + +// TODO: remove me +auto SubqueryStartExecutor::expectedNumberOfRows(size_t atMost) const + -> std::pair { + TRI_ASSERT(false); + return {ExecutionState::DONE, 0}; } diff --git a/arangod/Aql/SubqueryStartExecutor.h b/arangod/Aql/SubqueryStartExecutor.h index 3c65a659ad22..1cdd6f6fbeb0 100644 --- a/arangod/Aql/SubqueryStartExecutor.h +++ b/arangod/Aql/SubqueryStartExecutor.h @@ -23,6 +23,8 @@ #ifndef ARANGOD_AQL_SUBQUERY_START_EXECUTOR_H #define ARANGOD_AQL_SUBQUERY_START_EXECUTOR_H +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionState.h" #include "Aql/InputAqlItemRow.h" @@ -49,47 +51,35 @@ class SubqueryStartExecutor { using Infos = ExecutorInfos; using Stats = NoStats; SubqueryStartExecutor(Fetcher& fetcher, Infos& infos); - ~SubqueryStartExecutor(); + ~SubqueryStartExecutor() = default; - /** - * @brief produce the next Row of Aql Values. - * - * @return ExecutionState, - * if something was written output.hasValue() == true - */ - std::pair produceRows(OutputAqlItemRow& output); + [[deprecated]] std::pair produceRows(OutputAqlItemRow& output); - /** - * @brief Estimate of expected number of rows. - * - * @return ExecutionState, merely taken from upstream, - * size_t number of rows we are likely to produce (at most) - * - */ - std::pair expectedNumberOfRows(size_t atMost) const; + // produceRows for SubqueryStart reads a data row from its input and produces + // a copy of that row and a shadow row. This requires some amount of internal + // state as it can happen that after producing the copied data row the output + // is full, and hence we need to return ExecutorState::HASMORE to be able to + // produce the shadow row + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; - private: - enum class State { - READ_DATA_ROW, - PRODUCE_DATA_ROW, - PRODUCE_SHADOW_ROW, - PASS_SHADOW_ROW - }; + // skipRowsRange just skips input rows and reports how many rows it skipped + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple; - auto static stateToString(State state) -> std::string; + // Produce a shadow row *if* we have either skipped or output a datarow + // previously + auto produceShadowRow(AqlItemBlockInputRange& input, OutputAqlItemRow& output) -> bool; - private: - // Fetcher to get data. - Fetcher& _fetcher; + std::pair expectedNumberOfRows(size_t atMost) const; + private: + private: // Upstream state, used to determine if we are done with all subqueries - ExecutionState _upstreamState{ExecutionState::HASMORE}; + ExecutorState _upstreamState{ExecutorState::HASMORE}; // Cache for the input row we are currently working on - InputAqlItemRow _input{CreateInvalidInputRowHint{}}; - - // Internal state - State _internalState{State::READ_DATA_ROW}; + InputAqlItemRow _inputRow{CreateInvalidInputRowHint{}}; }; } // namespace aql } // namespace arangodb diff --git a/tests/Aql/ExecutionBlockImplTest.cpp b/tests/Aql/ExecutionBlockImplTest.cpp index 0fcde0a15adb..ea6133c55117 100644 --- a/tests/Aql/ExecutionBlockImplTest.cpp +++ b/tests/Aql/ExecutionBlockImplTest.cpp @@ -674,7 +674,7 @@ class ExecutionBlockImplExecuteSpecificTest : public SharedExecutionBlockImplTes */ std::unique_ptr createSingleton() { auto res = std::make_unique>>( - fakedQuery->engine(), generateNodeDummy(), IdExecutorInfos{0, {}, {}}); + fakedQuery->engine(), generateNodeDummy(), IdExecutorInfos{0, {}, {}, false}); InputAqlItemRow inputRow{CreateInvalidInputRowHint{}}; auto const [state, result] = res->initializeCursor(inputRow); EXPECT_EQ(state, ExecutionState::DONE); diff --git a/tests/Aql/ExecutionNodeTest.cpp b/tests/Aql/ExecutionNodeTest.cpp index fef683ce7aef..29814e70df37 100644 --- a/tests/Aql/ExecutionNodeTest.cpp +++ b/tests/Aql/ExecutionNodeTest.cpp @@ -87,7 +87,7 @@ TEST_F(ExecutionNodeTest, end_node_velocypack_roundtrip_no_invariable) { std::unique_ptr node, nodeFromVPack; std::unordered_set seen{}; - node = std::make_unique(&plan, 0, nullptr, &outvar); + node = std::make_unique(&plan, 0, nullptr, &outvar, false); builder.openArray(); node->toVelocyPackHelper(builder, ExecutionNode::SERIALIZE_DETAILS, seen); @@ -110,7 +110,7 @@ TEST_F(ExecutionNodeTest, end_node_velocypack_roundtrip_invariable) { std::unique_ptr node, nodeFromVPack; std::unordered_set seen{}; - node = std::make_unique(&plan, 0, &invar, &outvar); + node = std::make_unique(&plan, 0, &invar, &outvar, false); builder.openArray(); node->toVelocyPackHelper(builder, ExecutionNode::SERIALIZE_DETAILS, seen); @@ -129,8 +129,8 @@ TEST_F(ExecutionNodeTest, end_node_not_equal_different_id) { Variable outvar("name", 1); - node1 = std::make_unique(&plan, 0, nullptr, &outvar); - node2 = std::make_unique(&plan, 1, nullptr, &outvar); + node1 = std::make_unique(&plan, 0, nullptr, &outvar, false); + node2 = std::make_unique(&plan, 1, nullptr, &outvar, false); ASSERT_FALSE(node1->isEqualTo(*node2)); } @@ -141,8 +141,8 @@ TEST_F(ExecutionNodeTest, end_node_not_equal_invariable_null_vs_non_null) { Variable outvar("name", 1); Variable invar("otherName", 2); - node1 = std::make_unique(&plan, 0, &invar, &outvar); - node2 = std::make_unique(&plan, 1, nullptr, &outvar); + node1 = std::make_unique(&plan, 0, &invar, &outvar, false); + node2 = std::make_unique(&plan, 1, nullptr, &outvar, false); ASSERT_FALSE(node1->isEqualTo(*node2)); // Bidirectional nullptr check @@ -156,8 +156,8 @@ TEST_F(ExecutionNodeTest, end_node_not_equal_invariable_differ) { Variable invar("otherName", 2); Variable otherInvar("invalidName", 3); - node1 = std::make_unique(&plan, 0, &invar, &outvar); - node2 = std::make_unique(&plan, 1, &otherInvar, &outvar); + node1 = std::make_unique(&plan, 0, &invar, &outvar, false); + node2 = std::make_unique(&plan, 1, &otherInvar, &outvar, false); ASSERT_FALSE(node1->isEqualTo(*node2)); // Bidirectional check @@ -170,12 +170,12 @@ TEST_F(ExecutionNodeTest, end_node_not_equal_outvariable_differ) { Variable outvar("name", 1); Variable otherOutvar("otherName", 2); - node1 = std::make_unique(&plan, 0, nullptr, &outvar); - node2 = std::make_unique(&plan, 1, nullptr, &otherOutvar); + node1 = std::make_unique(&plan, 0, nullptr, &outvar, false); + node2 = std::make_unique(&plan, 1, nullptr, &otherOutvar, false); ASSERT_FALSE(node1->isEqualTo(*node2)); // Bidirectional check ASSERT_FALSE(node2->isEqualTo(*node1)); } -} // namespace arangodb +} // namespace arangodb::tests::aql diff --git a/tests/Aql/ExecutorTestHelper.cpp b/tests/Aql/ExecutorTestHelper.cpp index d0e38abac134..a7dd6cdb84d6 100644 --- a/tests/Aql/ExecutorTestHelper.cpp +++ b/tests/Aql/ExecutorTestHelper.cpp @@ -90,11 +90,19 @@ auto asserthelper::ValidateBlocksAreEqual(SharedAqlItemBlockPtr actual, } for (size_t row = 0; row < (std::min)(actual->size(), expected->size()); ++row) { + // Compare registers for (RegisterId reg = 0; reg < outRegs; ++reg) { RegisterId actualRegister = onlyCompareRegisters ? onlyCompareRegisters->at(reg) : reg; ValidateAqlValuesAreEqual(actual, row, actualRegister, expected, row, reg); } + // Compare shadowRows + EXPECT_EQ(actual->isShadowRow(row), expected->isShadowRow(row)); + if (actual->isShadowRow(row) && expected->isShadowRow(row)) { + ShadowAqlItemRow actualShadowRow{actual, row}; + ShadowAqlItemRow expectedShadowRow{expected, row}; + EXPECT_EQ(actualShadowRow.getDepth(), expectedShadowRow.getDepth()); + } } } @@ -112,6 +120,13 @@ auto asserthelper::ValidateBlocksAreEqualUnordered( std::optional> const& onlyCompareRegisters) -> void { ASSERT_NE(expected, nullptr); ASSERT_NE(actual, nullptr); + EXPECT_FALSE(actual->hasShadowRows()) + << "unordered validation does not support shadowRows yet. If you need " + "this please implement!"; + EXPECT_FALSE(expected->hasShadowRows()) + << "unordered validation does not support shadowRows yet. If you need " + "this please implement!"; + EXPECT_EQ(actual->size() + numRowsNotContained, expected->size()); RegisterId outRegs = (std::min)(actual->getNrRegs(), expected->getNrRegs()); @@ -159,6 +174,16 @@ AqlExecutorTestCase::AqlExecutorTestCase() : fakedQuery{_server->createFakeQuery(enableQueryTrace)} { auto engine = std::make_unique(*fakedQuery, SerializationFormat::SHADOWROWS); fakedQuery->setEngine(engine.release()); + if constexpr (enableQueryTrace) { + Logger::QUERIES.setLogLevel(LogLevel::DEBUG); + } +} + +template +AqlExecutorTestCase::~AqlExecutorTestCase() { + if constexpr (enableQueryTrace) { + Logger::QUERIES.setLogLevel(LogLevel::INFO); + } } template diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index cfc344f5f4f8..718c91b62054 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -112,7 +112,7 @@ class AqlExecutorTestCase : public ::testing::Test { protected: AqlExecutorTestCase(); - virtual ~AqlExecutorTestCase() = default; + virtual ~AqlExecutorTestCase(); /** * @brief Creates and manages a ExecutionNode. @@ -174,6 +174,22 @@ struct Pipeline { std::deque const& get() const { return _pipeline; }; std::deque& get() { return _pipeline; }; + Pipeline& addDependency(ExecBlock&& dependency) { + if (!empty()) { + _pipeline.back()->addDependency(dependency.get()); + } + _pipeline.emplace_back(std::move(dependency)); + return *this; + } + + Pipeline& addConsumer(ExecBlock&& consumer) { + if (!empty()) { + consumer->addDependency(_pipeline.front().get()); + } + _pipeline.emplace_front(std::move(consumer)); + return *this; + } + private: PipelineStorage _pipeline; }; @@ -204,8 +220,13 @@ struct ExecutorTestHelper { _query(query), _dummyNode{std::make_unique(_query.plan(), 42)} {} + auto setCallStack(AqlCallStack stack) -> ExecutorTestHelper& { + _callStack = stack; + return *this; + } + auto setCall(AqlCall c) -> ExecutorTestHelper& { - _call = c; + _callStack = AqlCallStack{c}; return *this; } @@ -263,9 +284,12 @@ struct ExecutorTestHelper { } auto expectOutput(std::array const& regs, - MatrixBuilder const& out) -> ExecutorTestHelper& { + MatrixBuilder const& out, + std::vector> const& shadowRows = {}) + -> ExecutorTestHelper& { _outputRegisters = regs; _output = out; + _outputShadowRows = shadowRows; return *this; } @@ -317,8 +341,8 @@ struct ExecutorTestHelper { auto createExecBlock(typename E::Infos infos, ExecutionNode::NodeType nodeType = ExecutionNode::SINGLETON) -> ExecBlock { - auto& testeeNode = _execNodes.emplace_back( - std::move(std::make_unique(_query.plan(), _execNodes.size(), nodeType))); + auto& testeeNode = _execNodes.emplace_back(std::move( + std::make_unique(_query.plan(), _execNodes.size(), nodeType))); return std::make_unique>(_query.engine(), testeeNode.get(), std::move(infos)); } @@ -363,28 +387,28 @@ struct ExecutorTestHelper { BlockCollector allResults{&itemBlockManager}; if (!loop) { - AqlCallStack stack{_call}; - auto const [state, skipped, result] = _pipeline.get().front()->execute(stack); + auto const [state, skipped, result] = _pipeline.get().front()->execute(_callStack); skippedTotal = skipped; finalState = state; if (result != nullptr) { allResults.add(result); } } else { - auto call = _call; do { - AqlCallStack stack{call}; - auto const [state, skipped, result] = _pipeline.get().front()->execute(stack); + auto const [state, skipped, result] = _pipeline.get().front()->execute(_callStack); finalState = state; + auto call = _callStack.popCall(); skippedTotal += skipped; + call.didSkip(skipped); if (result != nullptr) { + call.didProduce(result->size()); allResults.add(result); } - call = _call; - call.didSkip(skippedTotal); - call.didProduce(allResults.totalSize()); + _callStack.pushCall(std::move(call)); + } while (finalState != ExecutionState::DONE && - (!call.hasSoftLimit() || (call.getLimit() + call.getOffset()) > 0)); + (!_callStack.peek().hasSoftLimit() || + (_callStack.peek().getLimit() + _callStack.peek().getOffset()) > 0)); } EXPECT_EQ(skippedTotal, _expectedSkip); @@ -396,7 +420,7 @@ struct ExecutorTestHelper { << "Executor does not yield output, although it is expected"; } else { SharedAqlItemBlockPtr expectedOutputBlock = - buildBlock(itemBlockManager, std::move(_output)); + buildBlock(itemBlockManager, std::move(_output), _outputShadowRows); std::vector outRegVector(_outputRegisters.begin(), _outputRegisters.end()); if (_unorderedOutput) { @@ -471,9 +495,11 @@ struct ExecutorTestHelper { _waitingBehaviour); } - AqlCall _call; + // Default initialize with a fetchAll call. + AqlCallStack _callStack{AqlCall{}}; MatrixBuilder _input; MatrixBuilder _output; + std::vector> _outputShadowRows{}; std::array _outputRegisters; std::size_t _expectedSkip; ExecutionState _expectedState; diff --git a/tests/Aql/IdExecutorTest.cpp b/tests/Aql/IdExecutorTest.cpp index 6595e5612206..ac203cf39da3 100644 --- a/tests/Aql/IdExecutorTest.cpp +++ b/tests/Aql/IdExecutorTest.cpp @@ -47,36 +47,113 @@ using namespace arangodb::aql; namespace arangodb::tests::aql { -using TestParam = std::tuple, // The input data - ExecutorState, // The upstream state - AqlCall, // The client Call, +using TestParam = std::tuple number + ExecutorState, // The upstream state + AqlCall, // The client Call, + bool, // flag to decide if we need to do couting OutputAqlItemRow::CopyRowBehavior // How the data is handled within outputRow >; class IdExecutorTestCombiner : public AqlExecutorTestCaseWithParam { protected: - IdExecutorTestCombiner() {} - auto prepareInputRange() -> AqlItemBlockInputRange { - auto const& [input, upstreamState, clientCall, copyBehaviour] = GetParam(); - if (input.empty()) { + auto input = getInput(); + if (input == 0) { // no input - return AqlItemBlockInputRange{upstreamState}; + return AqlItemBlockInputRange{getUpstreamState()}; } MatrixBuilder<1> matrix; - for (auto const& it : input) { - matrix.emplace_back(RowBuilder<1>{{it}}); + for (int i = 0; i < static_cast(input); ++i) { + matrix.emplace_back(RowBuilder<1>{i}); } SharedAqlItemBlockPtr block = buildBlock<1>(manager(), std::move(matrix)); - TRI_ASSERT(clientCall.getSkipCount() == 0); - return AqlItemBlockInputRange{upstreamState, 0, block, 0}; + TRI_ASSERT(getCall().getSkipCount() == 0); + return AqlItemBlockInputRange{getUpstreamState(), 0, block, 0}; + } + + auto doCount() -> bool { + auto const& [a, b, c, doCount, d] = GetParam(); + return doCount; + } + + auto makeInfos() -> IdExecutorInfos { + return IdExecutorInfos{1, {0}, {}, doCount()}; + } + + auto getInput() -> size_t { + auto const& [input, a, b, c, d] = GetParam(); + return input; + } + + auto getCall() -> AqlCall { + auto const& [a, b, call, c, d] = GetParam(); + return call; + } + + auto getUpstreamState() -> ExecutorState { + auto const& [a, state, b, c, d] = GetParam(); + return state; + } + + auto getExpectedState() -> ExecutionState { + auto call = getCall(); + auto available = getInput(); + if (call.needsFullCount() || call.getOffset() + call.getLimit() >= available) { + // We will fetch all + return ExecutionState::DONE; + } + if (getUpstreamState() == ExecutorState::DONE) { + return ExecutionState::DONE; + } + return ExecutionState::HASMORE; + } + + auto getSkip() -> size_t { + size_t skip = 0; + size_t available = getInput(); + auto call = getCall(); + if (call.getOffset() > 0) { + skip = std::min(skip, available); + available -= skip; + } + if (call.hasHardLimit() && call.needsFullCount()) { + // Take away the rows that will be produced + // add the leftOver to skip + available -= std::min(available, call.getLimit()); + skip += available; + } + return skip; + } + + auto getOutput() -> MatrixBuilder<1> { + MatrixBuilder<1> res; + auto call = getCall(); + int available = std::min(getInput(), call.getOffset() + call.getLimit()); + for (int i = call.getOffset(); i < available; ++i) { + res.emplace_back(RowBuilder<1>{i}); + } + return res; + } + + auto getStats() -> ExecutionStats { + ExecutionStats stats; + + if (doCount()) { + auto available = getInput(); + auto call = getCall(); + available -= std::min(available, call.getOffset()); + available = std::min(available, call.getLimit()); + stats.count = available; + } + + return stats; } auto prepareOutputRow(SharedAqlItemBlockPtr input) -> OutputAqlItemRow { auto toWrite = make_shared_unordered_set({}); auto toKeep = make_shared_unordered_set({0}); auto toClear = make_shared_unordered_set(); - auto const& [unused, upstreamState, clientCall, copyBehaviour] = GetParam(); + auto const& [unused, upstreamState, clientCall, unused2, copyBehaviour] = GetParam(); AqlCall callCopy = clientCall; if (copyBehaviour == OutputAqlItemRow::CopyRowBehavior::DoNotCopyInputRows) { // For passthrough we reuse the block @@ -94,66 +171,63 @@ class IdExecutorTestCombiner : public AqlExecutorTestCaseWithParam { return OutputAqlItemRow(outBlock, toWrite, toKeep, toClear, std::move(callCopy), copyBehaviour); } - - // After Execute is done these fetchers shall be removed, - // the Executor does not need it anymore! - // However the template is still required. - template - auto runTest(Fetcher& fetcher) -> void { - auto const& [input, upstreamState, clientCall, copyBehaviour] = GetParam(); - - auto inputRange = prepareInputRange(); - auto outputRow = prepareOutputRow(inputRange.getBlock()); - - // If the input is empty, all rows(none) are used, otherwise they are not. - EXPECT_EQ(outputRow.allRowsUsed(), input.empty()); - IdExecutorInfos infos{1, {0}, {}}; - - IdExecutor testee{fetcher, infos}; - - auto const [state, stats, call] = testee.produceRows(inputRange, outputRow); - EXPECT_EQ(state, upstreamState); - // Stats are NoStats, no checks here. - - // We can never forward any offset. - EXPECT_EQ(call.getOffset(), 0); - - // The limits need to be reduced by input size. - EXPECT_EQ(call.softLimit + input.size(), clientCall.softLimit); - EXPECT_EQ(call.hardLimit + input.size(), clientCall.hardLimit); - - // We can forward fullCount if it is there. - EXPECT_EQ(call.needsFullCount(), clientCall.needsFullCount()); - - // This internally actually asserts that all input rows are "copied". - EXPECT_TRUE(outputRow.allRowsUsed()); - auto result = outputRow.stealBlock(); - if (!input.empty()) { - ASSERT_NE(result, nullptr); - ASSERT_EQ(result->size(), input.size()); - for (size_t i = 0; i < input.size(); ++i) { - auto val = result->getValueReference(i, 0); - ASSERT_TRUE(val.isNumber()); - EXPECT_EQ(val.toInt64(), input.at(i)); - } - } else { - EXPECT_EQ(result, nullptr); - } - } }; TEST_P(IdExecutorTestCombiner, test_produce_datarange_constFetcher) { + auto input = getInput(); + auto upstreamState = getUpstreamState(); + auto clientCall = getCall(); + auto inputRange = prepareInputRange(); + auto outputRow = prepareOutputRow(inputRange.getBlock()); + + // If the input is empty, all rows(none) are used, otherwise they are not. + EXPECT_EQ(outputRow.allRowsUsed(), input == 0); + IdExecutorInfos infos{1, {0}, {}, doCount()}; std::shared_ptr fakeFetcherInput{VPackParser::fromJson("[ ]")}; ConstFetcher cFetcher = ConstFetcherHelper{manager(), fakeFetcherInput->buffer()}; - runTest(cFetcher); + IdExecutor testee{cFetcher, infos}; + + auto const [state, stats, call] = testee.produceRows(inputRange, outputRow); + EXPECT_EQ(state, upstreamState); + // Stats are NoStats, no checks here. + + // We can never forward any offset. + EXPECT_EQ(call.getOffset(), 0); + + // The limits need to be reduced by input size. + EXPECT_EQ(call.softLimit + input, clientCall.softLimit); + EXPECT_EQ(call.hardLimit + input, clientCall.hardLimit); + + // We can forward fullCount if it is there. + EXPECT_EQ(call.needsFullCount(), clientCall.needsFullCount()); + + // This internally actually asserts that all input rows are "copied". + EXPECT_TRUE(outputRow.allRowsUsed()); + auto result = outputRow.stealBlock(); + if (input > 0) { + ASSERT_NE(result, nullptr); + ASSERT_EQ(result->size(), input); + for (size_t i = 0; i < input; ++i) { + auto val = result->getValueReference(i, 0); + ASSERT_TRUE(val.isNumber()); + EXPECT_EQ(val.toInt64(), i); + } + } else { + EXPECT_EQ(result, nullptr); + } } TEST_P(IdExecutorTestCombiner, test_produce_datarange_singleRowFetcher) { - std::shared_ptr fakeFetcherInput{VPackParser::fromJson("[ ]")}; - SingleRowFetcher<::arangodb::aql::BlockPassthrough::Enable> srFetcher = - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable>{ - manager(), fakeFetcherInput->buffer(), false}; - runTest(srFetcher); + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock>>( + makeInfos(), ExecutionNode::SINGLETON) + .setInputFromRowNum(getInput()) + .setCall(getCall()) + .expectedState(getExpectedState()) + .expectSkipped(getSkip()) + .expectOutput({0}, getOutput()) + .expectedStats(getStats()) + .run(); } /** @@ -182,8 +256,8 @@ TEST_P(IdExecutorTestCombiner, test_produce_datarange_singleRowFetcher) { * DoCopy << This is to assert that copying is performaed */ -static auto inputs = testing::Values(std::vector{}, // Test empty input - std::vector{1, 2, 3} // Test input data +static auto inputs = testing::Values(0, // Test empty input + 3 // Test input data ); auto upstreamStates = testing::Values(ExecutorState::HASMORE, ExecutorState::DONE); @@ -201,13 +275,14 @@ auto copyBehaviours = testing::Values(OutputAqlItemRow::CopyRowBehavior::CopyInp ); INSTANTIATE_TEST_CASE_P(IdExecutorTest, IdExecutorTestCombiner, - ::testing::Combine(inputs, upstreamStates, clientCalls, copyBehaviours)); + ::testing::Combine(inputs, upstreamStates, clientCalls, + ::testing::Bool(), copyBehaviours)); class IdExecutionBlockTest : public AqlExecutorTestCase<> {}; // The IdExecutor has a specific initializeCursor method in ExecutionBlockImpl TEST_F(IdExecutionBlockTest, test_initialize_cursor_get) { - IdExecutorInfos infos{1, {0}, {}}; + IdExecutorInfos infos{1, {0}, {}, false}; ExecutionBlockImpl> testee{fakedQuery->engine(), generateNodeDummy(), std::move(infos)}; @@ -249,7 +324,7 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_get) { // The IdExecutor has a specific initializeCursor method in ExecutionBlockImpl TEST_F(IdExecutionBlockTest, test_initialize_cursor_skip) { - IdExecutorInfos infos{1, {0}, {}}; + IdExecutorInfos infos{1, {0}, {}, false}; ExecutionBlockImpl> testee{fakedQuery->engine(), generateNodeDummy(), std::move(infos)}; @@ -289,7 +364,7 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_skip) { // The IdExecutor has a specific initializeCursor method in ExecutionBlockImpl TEST_F(IdExecutionBlockTest, test_initialize_cursor_fullCount) { - IdExecutorInfos infos{1, {0}, {}}; + IdExecutorInfos infos{1, {0}, {}, false}; ExecutionBlockImpl> testee{fakedQuery->engine(), generateNodeDummy(), std::move(infos)}; @@ -330,7 +405,7 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_fullCount) { } TEST_F(IdExecutionBlockTest, test_hardlimit_single_row_fetcher) { - IdExecutorInfos infos{1, {0}, {}}; + IdExecutorInfos infos{1, {0}, {}, false}; ExecutorTestHelper(*fakedQuery) .setExecBlock>>(std::move(infos)) .setInputValueList(1, 2, 3, 4, 5, 6) @@ -351,7 +426,7 @@ TEST_F(IdExecutionBlockTest, test_hardlimit_single_row_fetcher) { class BlockOverloadTest : public AqlExecutorTestCaseWithParam { protected: auto getTestee() -> ExecutionBlockImpl> { - IdExecutorInfos infos{1, {0}, {}}; + IdExecutorInfos infos{1, {0}, {}, false}; return ExecutionBlockImpl>{fakedQuery->engine(), generateNodeDummy(), std::move(infos)}; diff --git a/tests/Aql/SplicedSubqueryIntegrationTest.cpp b/tests/Aql/SplicedSubqueryIntegrationTest.cpp new file mode 100644 index 000000000000..f6f37d99f4b1 --- /dev/null +++ b/tests/Aql/SplicedSubqueryIntegrationTest.cpp @@ -0,0 +1,471 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Markus Pfeiffer +//////////////////////////////////////////////////////////////////////////////// + +#include "gtest/gtest.h" + +#include "AqlItemBlockHelper.h" +#include "Mocks/Servers.h" +#include "TestEmptyExecutorHelper.h" +#include "TestExecutorHelper.h" +#include "TestLambdaExecutor.h" +#include "WaitingExecutionBlockMock.h" +#include "fakeit.hpp" + +#include "Aql/AqlCallStack.h" +#include "Aql/AqlItemBlock.h" +#include "Aql/AqlItemBlockSerializationFormat.h" +#include "Aql/ConstFetcher.h" +#include "Aql/ExecutionBlockImpl.h" +#include "Aql/ExecutionEngine.h" +#include "Aql/IdExecutor.h" +#include "Aql/Query.h" +#include "Aql/RegisterPlan.h" +#include "Aql/ReturnExecutor.h" +#include "Aql/SingleRowFetcher.h" +#include "Aql/SubqueryEndExecutor.h" +#include "Aql/SubqueryStartExecutor.h" +#include "Transaction/Context.h" +#include "Transaction/Methods.h" + +#include "Aql/ExecutorTestHelper.h" +#include "Aql/TestLambdaExecutor.h" +#include "Aql/WaitingExecutionBlockMock.h" + +// TODO: remove me +#include "Logger/LogMacros.h" +#include "Logger/Logger.h" +#include "Logger/LoggerStream.h" + +using namespace arangodb; +using namespace arangodb::aql; +using namespace arangodb::tests; +using namespace arangodb::tests::aql; +using namespace arangodb::basics; + +using SubqueryExecutorTestHelper = ExecutorTestHelper<1, 1>; +using SubqueryExecutorSplitType = SubqueryExecutorTestHelper::SplitType; +using SubqueryExecutorParamType = std::tuple; + +using RegisterSet = std::unordered_set; +using LambdaExePassThrough = TestLambdaExecutor; +using LambdaExe = TestLambdaSkipExecutor; + +class SplicedSubqueryIntegrationTest + : public AqlExecutorTestCaseWithParam { + protected: + ExecutorTestHelper<1, 1> executorTestHelper; + + SplicedSubqueryIntegrationTest() : executorTestHelper(*fakedQuery) {} + + // returns a new pipeline that contains body as a subquery + auto createSubquery(Pipeline&& body) -> Pipeline { + auto subqueryEnd = createSubqueryEndExecutionBlock(); + if (!body.empty()) { + subqueryEnd->addDependency(body.get().front().get()); + } + body.get().emplace_front(std::move(subqueryEnd)); + + auto subqueryStart = createSubqueryStartExecutionBlock(); + // This exists because we at least added the subqueryEnd + body.get().back()->addDependency(subqueryStart.get()); + + body.get().emplace_back(std::move(subqueryStart)); + + return std::move(body); + } + + auto createSubquery() -> Pipeline { return createSubquery(Pipeline()); } + + auto createDoNothingPipeline() -> Pipeline { + auto numRegs = size_t{1}; + auto emptyRegisterList = std::make_shared>( + std::initializer_list{}); + + auto inRegisterList = std::make_shared>( + std::initializer_list{0}); + auto outRegisterList = std::make_shared>( + std::initializer_list{1}); + + std::unordered_set toKeep; + + for (RegisterId r = 0; r < numRegs; ++r) { + toKeep.emplace(r); + } + + auto infos = LambdaExe::Infos(inRegisterList, outRegisterList, 1, 2, {}, + toKeep, createProduceCall(), createSkipCall()); + + return Pipeline(executorTestHelper.createExecBlock(std::move(infos))); + } + + auto createAssertPipeline() -> Pipeline { + auto numRegs = size_t{1}; + auto emptyRegisterList = std::make_shared>( + std::initializer_list{}); + + auto inRegisterList = std::make_shared>( + std::initializer_list{0}); + auto outRegisterList = std::make_shared>( + std::initializer_list{1}); + + std::unordered_set toKeep; + + for (RegisterId r = 0; r < numRegs; ++r) { + toKeep.emplace(r); + } + + auto infos = LambdaExe::Infos(inRegisterList, outRegisterList, 1, 2, {}, + toKeep, createAssertCall(), createSkipCall()); + + return Pipeline(executorTestHelper.createExecBlock(std::move(infos))); + } + + auto createCallAssertPipeline(AqlCall call) -> Pipeline { + auto numRegs = size_t{1}; + auto emptyRegisterList = std::make_shared>( + std::initializer_list{}); + + auto inRegisterList = std::make_shared>( + std::initializer_list{0}); + auto outRegisterList = std::make_shared>( + std::initializer_list{1}); + + std::unordered_set toKeep; + + for (RegisterId r = 0; r < numRegs; ++r) { + toKeep.emplace(r); + } + + auto infos = LambdaExe::Infos(inRegisterList, outRegisterList, 1, 2, {}, toKeep, + createAssertCallCall(call), createSkipCall()); + + return Pipeline(executorTestHelper.createExecBlock(std::move(infos))); + } + + auto createSubqueryStartExecutionBlock() -> ExecBlock { + // Subquery start executor does not care about input or output registers? + // TODO: talk about registers & register planning + + auto inputRegisterSet = + std::make_shared(std::initializer_list{0}); + auto outputRegisterSet = + std::make_shared(std::initializer_list{}); + auto toKeepRegisterSet = RegisterSet{0}; + + auto infos = SubqueryStartExecutor::Infos(inputRegisterSet, outputRegisterSet, + inputRegisterSet->size(), + inputRegisterSet->size() + + outputRegisterSet->size(), + {}, toKeepRegisterSet); + + return executorTestHelper.createExecBlock(std::move(infos), + ExecutionNode::SUBQUERY_START); + } + + // Subquery end executor has an input and an output register, + // but only the output register is used, remove input reg? + auto createSubqueryEndExecutionBlock() -> ExecBlock { + auto const inputRegister = RegisterId{0}; + auto const outputRegister = RegisterId{1}; + auto inputRegisterSet = + std::make_shared(std::initializer_list{inputRegister}); + auto outputRegisterSet = + std::make_shared(std::initializer_list{outputRegister}); + auto toKeepRegisterSet = RegisterSet{0}; + + auto infos = + SubqueryEndExecutor::Infos(inputRegisterSet, outputRegisterSet, + inputRegisterSet->size(), + inputRegisterSet->size() + outputRegisterSet->size(), + {}, toKeepRegisterSet, nullptr, + inputRegister, outputRegister, false); + + return executorTestHelper.createExecBlock(std::move(infos), + ExecutionNode::SUBQUERY_END); + } + + auto createReturnExecutionBlock() -> ExecBlock { + auto const inputRegister = RegisterId{0}; + auto const outputRegister = RegisterId{0}; + auto inputRegisterSet = + std::make_shared(std::initializer_list{inputRegister}); + auto outputRegisterSet = + std::make_shared(std::initializer_list{outputRegister}); + auto toKeepRegisterSet = RegisterSet{0}; + + auto infos = ReturnExecutor::Infos(inputRegister, 1, 1, false); + + return executorTestHelper.createExecBlock(std::move(infos), + ExecutionNode::RETURN); + } + + auto createProduceCall() -> ProduceCall { + return [](AqlItemBlockInputRange& input, + OutputAqlItemRow& output) -> std::tuple { + while (input.hasDataRow() && !output.isFull()) { + auto const [state, row] = input.nextDataRow(); + output.cloneValueInto(1, row, AqlValue("foo")); + output.advanceRow(); + } + NoStats stats{}; + AqlCall call{}; + + return {input.upstreamState(), stats, call}; + }; + }; + + auto createSkipCall() -> SkipCall { + return [](AqlItemBlockInputRange& input, + AqlCall& call) -> std::tuple { + auto skipped = size_t{0}; + while (input.hasDataRow() && call.shouldSkip()) { + auto const& [state, inputRow] = input.nextDataRow(); + EXPECT_TRUE(inputRow.isInitialized()); + call.didSkip(1); + skipped++; + } + auto upstreamCall = AqlCall{call}; + return {input.upstreamState(), NoStats{}, skipped, upstreamCall}; + }; + }; + + // Asserts if called. This is to check that when we use skip to + // skip over a subquery, the subquery's produce is not invoked + auto createAssertCall() -> ProduceCall { + return [](AqlItemBlockInputRange& input, + OutputAqlItemRow& output) -> std::tuple { + EXPECT_TRUE(false); + NoStats stats{}; + AqlCall call{}; + + return {ExecutorState::DONE, stats, call}; + }; + } + + auto createAssertCallCall(AqlCall call) -> ProduceCall { + return [call](AqlItemBlockInputRange& input, + OutputAqlItemRow& output) -> std::tuple { + auto clientCall = output.getClientCall(); + + EXPECT_EQ(clientCall.offset, call.offset); + EXPECT_EQ(clientCall.softLimit, call.softLimit); + EXPECT_EQ(clientCall.hardLimit, call.hardLimit); + EXPECT_EQ(clientCall.fullCount, call.fullCount); + + while (input.hasDataRow() && !output.isFull()) { + auto const [state, row] = input.nextDataRow(); + output.cloneValueInto(1, row, AqlValue("foo")); + output.advanceRow(); + } + + NoStats stats{}; + AqlCall call{}; + + return {input.upstreamState(), stats, call}; + }; + } + auto getSplit() -> SubqueryExecutorSplitType { + auto [split] = GetParam(); + return split; + } +}; + +template +const SubqueryExecutorSplitType splitIntoBlocks = + SubqueryExecutorSplitType{std::vector{vs...}}; +template +const SubqueryExecutorSplitType splitStep = SubqueryExecutorSplitType{step}; + +INSTANTIATE_TEST_CASE_P(SplicedSubqueryIntegrationTest, SplicedSubqueryIntegrationTest, + ::testing::Values(splitIntoBlocks<2, 3>, splitIntoBlocks<3, 4>, + splitStep<2>, splitStep<1>)); + +TEST_P(SplicedSubqueryIntegrationTest, single_subquery_empty_input) { + auto call = AqlCall{}; + auto pipeline = createSubquery(); + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList() + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, single_subquery) { + auto call = AqlCall{}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {{1, R"([1])"}, + {2, R"([2])"}, + {5, R"([5])"}, + {2, R"([2])"}, + {1, R"([1])"}, + {5, R"([5])"}, + {7, R"([7])"}, + {1, R"([1])"}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_skip_and_produce) { + auto call = AqlCall{5}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {{5, R"([5])"}, {7, R"([7])"}, {1, R"([1])"}}) + .expectSkipped(5) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_skip_all) { + auto call = AqlCall{20}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {}) + .expectSkipped(8) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_fullcount) { + auto call = AqlCall{0, true, 0, AqlCall::LimitType::HARD}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {}) + .expectSkipped(8) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_skip_produce_count) { + auto call = AqlCall{2, true, 2, AqlCall::LimitType::HARD}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {{5, R"([5])"}, {2, R"([2])"}}) + .expectSkipped(6) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, two_nested_subqueries_empty_input) { + auto call = AqlCall{}; + auto pipeline = createSubquery(createSubquery()); + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList() + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, two_nested_subqueries) { + auto call = AqlCall{}; + auto pipeline = createSubquery(createSubquery()); + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}, {2}, {1}, {5}, {7}, {1}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, two_sequential_subqueries) { + auto call = AqlCall{}; + auto pipeline = concatPipelines(createSubquery(), createSubquery()); + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}, {2}, {1}, {5}, {7}, {1}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, do_nothing_in_subquery) { + auto call = AqlCall{}; + auto pipeline = createSubquery(createDoNothingPipeline()); + + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}, {2}, {1}, {5}, {7}, {1}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, DISABLED_check_call_passes_subquery) { + auto call = AqlCall{10}; + auto pipeline = concatPipelines(createCallAssertPipeline(call), createSubquery()); + + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {}) + .expectSkipped(8) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SplicedSubqueryIntegrationTest, DISABLED_check_skipping_subquery) { + auto call = AqlCall{10}; + auto pipeline = createSubquery(createAssertPipeline()); + + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; diff --git a/tests/Aql/SubqueryEndExecutorTest.cpp b/tests/Aql/SubqueryEndExecutorTest.cpp index 8902b2c4531b..3bbb66beb127 100644 --- a/tests/Aql/SubqueryEndExecutorTest.cpp +++ b/tests/Aql/SubqueryEndExecutorTest.cpp @@ -48,12 +48,14 @@ class SubqueryEndExecutorTest : public ::testing::Test { SubqueryEndExecutorTest() : _infos(std::make_shared(std::initializer_list({0})), std::make_shared(std::initializer_list({0})), - 1, 1, {}, {}, nullptr, RegisterId{0}, RegisterId{0}) {} + 1, 1, {}, {}, nullptr, RegisterId{0}, RegisterId{0}, false) {} protected: ResourceMonitor monitor; AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; SubqueryEndExecutorInfos _infos; + SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher{ + itemBlockManager, VPackParser::fromJson("[]")->steal(), false}; void ExpectedValues(OutputAqlItemRow& itemRow, std::vector> const& expectedStrings, @@ -98,248 +100,3 @@ TEST_F(SubqueryEndExecutorTest, check_properties) { << "The block produces one output row per input row plus potentially a " "shadow rows which is bounded by the structure of the query"; }; - -// If the input to a spliced subquery is empty, there should be no output -TEST_F(SubqueryEndExecutorTest, empty_input_expects_no_shadow_rows) { - SharedAqlItemBlockPtr outputBlock; - SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{{}}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, 1, false, inputBlock); - SubqueryEndExecutor testee(fetcher, _infos); - - // I don't seem to be able to make an empty inputBlock above, - // so we just fetch the one row that's in the block. - fetcher.fetchRow(); - - ExecutionState state{ExecutionState::HASMORE}; - - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 1)); - OutputAqlItemRow output{std::move(outputBlock), _infos.getOutputRegisters(), - _infos.registersToKeep(), _infos.registersToClear()}; - - std::tie(state, std::ignore) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(output.numRowsWritten(), 0); -} - -TEST_F(SubqueryEndExecutorTest, single_input_expects_shadow_rows) { - SharedAqlItemBlockPtr outputBlock; - SharedAqlItemBlockPtr inputBlock = - buildBlock<1>(itemBlockManager, {{{1}}, {{1}}}, {{1, 0}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, inputBlock->size(), false, inputBlock); - - SubqueryEndExecutor testee(fetcher, _infos); - - ExecutionState state{ExecutionState::HASMORE}; - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 1)); - OutputAqlItemRow output{std::move(outputBlock), _infos.getOutputRegisters(), - _infos.registersToKeep(), _infos.registersToClear()}; - std::tie(state, std::ignore) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(output.numRowsWritten(), 1); - - ExpectedValues(output, {{"[1]"}}, {}); -} - -TEST_F(SubqueryEndExecutorTest, two_inputs_one_shadowrow) { - SharedAqlItemBlockPtr outputBlock; - SharedAqlItemBlockPtr inputBlock = - buildBlock<1>(itemBlockManager, {{{42}}, {{34}}, {{1}}}, {{2, 0}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, inputBlock->size(), false, inputBlock); - - SubqueryEndExecutor testee(fetcher, _infos); - - ExecutionState state{ExecutionState::HASMORE}; - - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 1)); - OutputAqlItemRow output{std::move(outputBlock), _infos.getOutputRegisters(), - _infos.registersToKeep(), _infos.registersToClear()}; - std::tie(state, std::ignore) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(output.numRowsWritten(), 1); - - ExpectedValues(output, {{"[42,34]"}}, {}); -} - -TEST_F(SubqueryEndExecutorTest, two_inputs_two_shadowrows) { - SharedAqlItemBlockPtr outputBlock; - - SharedAqlItemBlockPtr inputBlock = - buildBlock<1>(itemBlockManager, {{{42}}, {{1}}, {{34}}, {{1}}}, {{1, 0}, {3, 0}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, inputBlock->size(), false, inputBlock); - - SubqueryEndExecutor testee(fetcher, _infos); - - ExecutionState state{ExecutionState::HASMORE}; - - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 1)); - OutputAqlItemRow output{std::move(outputBlock), _infos.getOutputRegisters(), - _infos.registersToKeep(), _infos.registersToClear()}; - std::tie(state, std::ignore) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(output.numRowsWritten(), 2); - ExpectedValues(output, {{"[42]"}, {"[34]"}}, {}); -} - -TEST_F(SubqueryEndExecutorTest, two_input_one_shadowrow_two_irrelevant) { - SharedAqlItemBlockPtr outputBlock; - SharedAqlItemBlockPtr inputBlock = - buildBlock<1>(itemBlockManager, {{{42}}, {{42}}, {{42}}, {{42}}, {{42}}}, - {{2, 0}, {3, 1}, {4, 2}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, inputBlock->size(), false, inputBlock); - - SubqueryEndExecutor testee(fetcher, _infos); - - ExecutionState state{ExecutionState::HASMORE}; - - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 1)); - OutputAqlItemRow output{std::move(outputBlock), _infos.getOutputRegisters(), - _infos.registersToKeep(), _infos.registersToClear()}; - - std::tie(state, std::ignore) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(output.numRowsWritten(), 3); - ExpectedValues(output, {{{"[42, 42]"}}, {{""}}, {{""}}}, {{1, 0}, {2, 1}}); -} - -TEST_F(SubqueryEndExecutorTest, consume_output_of_subquery_end_executor) { - ExecutionState state{ExecutionState::HASMORE}; - - SharedAqlItemBlockPtr outputBlock; - SharedAqlItemBlockPtr inputBlock = - buildBlock<1>(itemBlockManager, {{{42}}, {{42}}, {{42}}, {{42}}, {{42}}}, - {{2, 0}, {3, 1}, {4, 2}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, inputBlock->size(), false, inputBlock); - - SubqueryEndExecutor testee(fetcher, _infos); - - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 1)); - OutputAqlItemRow output{std::move(outputBlock), _infos.getOutputRegisters(), - _infos.registersToKeep(), _infos.registersToClear()}; - std::tie(state, std::ignore) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(output.numRowsWritten(), 3); - - // ExpectedValues(output, { "[42, 42]", "", "" }); - - outputBlock = output.stealBlock(); - inputBlock.swap(outputBlock); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher2( - itemBlockManager, inputBlock->size(), false, inputBlock); - SubqueryEndExecutor testee2(fetcher2, _infos); - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 1)); - OutputAqlItemRow output2{std::move(outputBlock), _infos.getOutputRegisters(), - _infos.registersToKeep(), _infos.registersToClear()}; - std::tie(state, std::ignore) = testee2.produceRows(output2); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(output2.numRowsWritten(), 2); - - ExpectedValues(output2, {{"[ [42, 42] ]"}, {""}}, {{1, 0}}); -} - -TEST_F(SubqueryEndExecutorTest, write_to_register_outside) { - auto infos = SubqueryEndExecutorInfos( - std::make_shared(std::initializer_list{0}), - std::make_shared(std::initializer_list{1}), 1, 2, - {}, RegisterSet{0}, nullptr, RegisterId{0}, RegisterId{1}); - - ExecutionState state{ExecutionState::HASMORE}; - - SharedAqlItemBlockPtr outputBlock; - SharedAqlItemBlockPtr inputBlock = - buildBlock<1>(itemBlockManager, {{{42}}, {{23}}}, {{1, 0}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, inputBlock->size(), false, inputBlock); - - SubqueryEndExecutor testee(fetcher, infos); - - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 2)); - OutputAqlItemRow output{std::move(outputBlock), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, std::ignore) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(output.numRowsWritten(), 1); - - ExpectedValues(output, {{"23", "[42]"}}, {{1, 0}}); -} - -TEST_F(SubqueryEndExecutorTest, no_input_register) { - auto infos = SubqueryEndExecutorInfos( - std::make_shared(std::initializer_list{0}), - std::make_shared(std::initializer_list{1}), 1, 2, {}, - RegisterSet{0}, nullptr, RegisterId{RegisterPlan::MaxRegisterId}, RegisterId{1}); - - ExecutionState state{ExecutionState::HASMORE}; - - SharedAqlItemBlockPtr outputBlock; - SharedAqlItemBlockPtr inputBlock = - buildBlock<1>(itemBlockManager, {{{42}}, {{23}}}, {{1, 0}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, inputBlock->size(), false, inputBlock); - - SubqueryEndExecutor testee(fetcher, infos); - - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 2)); - OutputAqlItemRow output{std::move(outputBlock), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, std::ignore) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(output.numRowsWritten(), 1); - - ExpectedValues(output, {{"23", "[]"}}, {{1, 0}}); -} - -// TODO: This is a "death test" with malformed shadow row layout (an irrelevant shadow row before any other row) -// See https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#death-tests-and-threads - -using SubqueryEndExecutorTest_DeathTest = SubqueryEndExecutorTest; - -TEST_F(SubqueryEndExecutorTest_DeathTest, no_shadow_row) { - SharedAqlItemBlockPtr outputBlock; - SharedAqlItemBlockPtr inputBlock = buildBlock<1>(itemBlockManager, {{1}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, inputBlock->size(), false, inputBlock); - - SubqueryEndExecutor testee(fetcher, _infos); - - ExecutionState state{ExecutionState::HASMORE}; - - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 1)); - OutputAqlItemRow output{std::move(outputBlock), _infos.getOutputRegisters(), - _infos.registersToKeep(), _infos.registersToClear()}; - EXPECT_DEATH_CORE_FREE(std::tie(state, std::ignore) = testee.produceRows(output), - ".*"); -} - -TEST_F(SubqueryEndExecutorTest_DeathTest, misplaced_irrelevant_shadowrow) { - SharedAqlItemBlockPtr outputBlock; - SharedAqlItemBlockPtr inputBlock = - buildBlock<1>(itemBlockManager, {{42}, {42}, {42}}, {{1, 1}, {2, 1}}); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, inputBlock->size(), false, inputBlock); - - SubqueryEndExecutor testee(fetcher, _infos); - - ExecutionState state{ExecutionState::HASMORE}; - - outputBlock.reset(new AqlItemBlock(itemBlockManager, inputBlock->size(), 1)); - OutputAqlItemRow output{std::move(outputBlock), _infos.getOutputRegisters(), - _infos.registersToKeep(), _infos.registersToClear()}; - EXPECT_DEATH_CORE_FREE(std::tie(state, std::ignore) = testee.produceRows(output), - ".*"); -} diff --git a/tests/Aql/SubqueryStartExecutorTest.cpp b/tests/Aql/SubqueryStartExecutorTest.cpp index 75fdc4ef32a9..dc02b404e2d0 100644 --- a/tests/Aql/SubqueryStartExecutorTest.cpp +++ b/tests/Aql/SubqueryStartExecutorTest.cpp @@ -20,10 +20,15 @@ /// @author Michael Hackstein //////////////////////////////////////////////////////////////////////////////// -#include "RowFetcherHelper.h" #include "gtest/gtest.h" +#include "ExecutorTestHelper.h" +#include "RowFetcherHelper.h" + +#include "Aql/AqlItemBlock.h" +#include "Aql/AqlItemBlockHelper.h" #include "Aql/ExecutorInfos.h" +#include "Aql/InputAqlItemRow.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/Stats.h" #include "Aql/SubqueryStartExecutor.h" @@ -32,6 +37,7 @@ #include #include +#include "Logger/LogMacros.h" using namespace arangodb; using namespace arangodb::aql; using namespace arangodb::tests; @@ -47,25 +53,37 @@ ExecutorInfos MakeBaseInfos(RegisterId numRegs) { } return ExecutorInfos(emptyRegisterList, emptyRegisterList, numRegs, numRegs, {}, toKeep); } - -void TestShadowRow(SharedAqlItemBlockPtr const& block, size_t row, bool isRelevant) { - EXPECT_TRUE(block->isShadowRow(row)); - // We do this additional if, in order to allow the outer test loop to continue - // even if we do not have a shadow row. - if (block->isShadowRow(row)) { - ShadowAqlItemRow shadow{block, row}; - EXPECT_EQ(shadow.isRelevant(), isRelevant) << "Testing row " << row; - } -} } // namespace -class SubqueryStartExecutorTest : public ::testing::Test { +// We need to be backwards compatible, with version 3.6 +// There we do not get a fullStack, but only a single entry. +// We have a compatibility mode Stack for this version +// These tests can be removed again in the branch for the version +// after 3.7.* +enum CompatibilityMode { VERSION36, VERSION37 }; +class SubqueryStartExecutorTest + : public AqlExecutorTestCaseWithParam { protected: - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager{&monitor, SerializationFormat::SHADOWROWS}; + auto GetCompatMode() const -> CompatibilityMode { + auto const mode = GetParam(); + return mode; + } + + auto queryStack(AqlCall fromSubqueryEnd, AqlCall insideSubquery) const -> AqlCallStack { + if (GetCompatMode() == CompatibilityMode::VERSION36) { + return AqlCallStack{insideSubquery, true}; + } + AqlCallStack stack(fromSubqueryEnd); + stack.pushCall(std::move(insideSubquery)); + return stack; + } }; -TEST_F(SubqueryStartExecutorTest, check_properties) { +INSTANTIATE_TEST_CASE_P(SubqueryStartExecutorTest, SubqueryStartExecutorTest, + ::testing::Values(CompatibilityMode::VERSION36, + CompatibilityMode::VERSION37)); + +TEST_P(SubqueryStartExecutorTest, check_properties) { EXPECT_TRUE(SubqueryStartExecutor::Properties::preservesOrder) << "The block has no effect on ordering of elements, it adds additional " "rows only."; @@ -76,178 +94,249 @@ TEST_F(SubqueryStartExecutorTest, check_properties) { "input. (Might be less if input contains shadowRows"; } -TEST_F(SubqueryStartExecutorTest, empty_input_does_not_add_shadow_rows) { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 1)}; - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input.steal(), false); - auto infos = MakeBaseInfos(1); - SubqueryStartExecutor testee(fetcher, infos); - - NoStats stats{}; - ExecutionState state{ExecutionState::HASMORE}; - OutputAqlItemRow output{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_FALSE(output.produced()); - EXPECT_EQ(output.numRowsWritten(), 0); +TEST_P(SubqueryStartExecutorTest, empty_input_does_not_add_shadow_rows) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput({0}, {}) + .expectSkipped(0) + .setCallStack(queryStack(AqlCall{}, AqlCall{})) + .run(); +} + +TEST_P(SubqueryStartExecutorTest, adds_a_shadowrow_after_single_input) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectSkipped(0) + .expectOutput({0}, {{R"("a")"}, {R"("a")"}}, {{1, 0}}) + .setCallStack(queryStack(AqlCall{}, AqlCall{})) + .run(); } -TEST_F(SubqueryStartExecutorTest, adds_a_shadowrow_after_single_input) { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 1)}; - auto input = VPackParser::fromJson(R"([ - ["a"] - ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), false); - auto infos = MakeBaseInfos(1); - SubqueryStartExecutor testee(fetcher, infos); - - NoStats stats{}; - ExecutionState state{ExecutionState::HASMORE}; - OutputAqlItemRow output{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_FALSE(output.produced()); - EXPECT_EQ(output.numRowsWritten(), 2); - - block = output.stealBlock(); - EXPECT_FALSE(block->isShadowRow(0)); - TestShadowRow(block, 1, true); +// NOTE: The following two tests exclude each other. +// Right now we can only support 1 ShadowRow per request, we cannot do a look-ahead of +// calls. As soon as we can this test needs to re removed, and the one blow needs to be activated. +TEST_P(SubqueryStartExecutorTest, + adds_only_one_shadowrow_even_if_more_input_is_available_in_single_pass) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{{R"("a")"}}, {{R"("b")"}}, {{R"("c")"}}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::HASMORE) + .expectSkipped(0) + .expectOutput({0}, {{R"("a")"}, {R"("a")"}}, {{1, 0}}) + .setCallStack(queryStack(AqlCall{}, AqlCall{})) + .run(); } -TEST_F(SubqueryStartExecutorTest, adds_a_shadowrow_after_every_input_line_in_single_pass) { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 1)}; - auto input = VPackParser::fromJson(R"([ - ["a"], - ["b"], - ["c"] - ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), false); - auto infos = MakeBaseInfos(1); - SubqueryStartExecutor testee(fetcher, infos); - - NoStats stats{}; - ExecutionState state{ExecutionState::HASMORE}; - OutputAqlItemRow output{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_FALSE(output.produced()); - EXPECT_EQ(output.numRowsWritten(), 6); - - block = output.stealBlock(); - EXPECT_FALSE(block->isShadowRow(0)); - TestShadowRow(block, 1, true); - EXPECT_FALSE(block->isShadowRow(2)); - TestShadowRow(block, 3, true); - EXPECT_FALSE(block->isShadowRow(4)); - TestShadowRow(block, 5, true); +// NOTE: This test and the one right above do exclude each other. +// This is the behaviour we would like to have eventually +// As soon as we can support Call look-aheads we need to enable this test. +// and it needs to pass then +TEST_P(SubqueryStartExecutorTest, DISABLED_adds_a_shadowrow_after_every_input_line_in_single_pass) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{{R"("a")"}}, {{R"("b")"}}, {{R"("c")"}}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectSkipped(0) + .expectOutput({0}, {{R"("a")"}, {R"("a")"}, {R"("b")"}, {R"("b")"}, {R"("c")"}, {R"("c")"}}, + {{1, 0}, {3, 0}, {5, 0}}) + .setCallStack(queryStack(AqlCall{}, AqlCall{})) + .run(); } -TEST_F(SubqueryStartExecutorTest, shadow_row_does_not_fit_in_current_block) { - auto input = VPackParser::fromJson(R"([ - ["a"], - ["b"], - ["c"] - ])"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), false); - auto infos = MakeBaseInfos(1); - SubqueryStartExecutor testee(fetcher, infos); - - NoStats stats{}; - ExecutionState state{ExecutionState::HASMORE}; +// NOTE: As soon as the single_pass test is enabled this test is superflous. +// It will be identical to the one above +TEST_P(SubqueryStartExecutorTest, adds_a_shadowrow_after_every_input_line) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{{R"("a")"}}, {{R"("b")"}}, {{R"("c")"}}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectSkipped(0) + .expectOutput({0}, {{R"("a")"}, {R"("a")"}, {R"("b")"}, {R"("b")"}, {R"("c")"}, {R"("c")"}}, + {{1, 0}, {3, 0}, {5, 0}}) + .setCallStack(queryStack(AqlCall{}, AqlCall{})) + .run(true); +} + +TEST_P(SubqueryStartExecutorTest, shadow_row_does_not_fit_in_current_block) { + // NOTE: This test relies on batchSizes beeing handled correctly and we do not over-allocate memory + // Also it tests, that ShadowRows go into place accounting of the output block (count as 1 line) + + // NOTE: Reduce batch size to 1, to enforce a too small output block + ExecutionBlock::setDefaultBatchSize(1); + TRI_DEFER(ExecutionBlock::setDefaultBatchSize(ExecutionBlock::ProductionDefaultBatchSize);); { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 3, 1)}; - OutputAqlItemRow output{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_FALSE(output.produced()); - EXPECT_EQ(output.numRowsWritten(), 3); - - block = output.stealBlock(); - EXPECT_FALSE(block->isShadowRow(0)); - TestShadowRow(block, 1, true); - EXPECT_FALSE(block->isShadowRow(2)); + // First test: Validate that the shadowRow is not written + // We only do a single call here + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::HASMORE) + .expectSkipped(0) + .expectOutput({0}, {{R"("a")"}}, {}) + .setCallStack(queryStack(AqlCall{}, AqlCall{})) + .run(); } { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 3, 1)}; - OutputAqlItemRow output{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_FALSE(output.produced()); - EXPECT_EQ(output.numRowsWritten(), 3); - - block = output.stealBlock(); - TestShadowRow(block, 0, true); - EXPECT_FALSE(block->isShadowRow(1)); - TestShadowRow(block, 2, true); + // Second test: Validate that the shadowRow is eventually written + // if we call often enough + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectSkipped(0) + .expectOutput({0}, {{R"("a")"}, {R"("a")"}}, {{1, 0}}) + .setCallStack(queryStack(AqlCall{}, AqlCall{})) + .run(true); } } -// TODO: -// This test can be enabled and should work as soon as the Fetcher skips non-relevant Subqueries -TEST_F(SubqueryStartExecutorTest, does_only_add_shadowrows_on_data_rows) { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 1)}; - auto input = VPackParser::fromJson(R"([ - ["a"], - ["b"], - ["c"] - ])"); - - auto infos = MakeBaseInfos(1); +TEST_P(SubqueryStartExecutorTest, skip_in_subquery) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput({0}, {{R"("a")"}}, {{0, 0}}) + .expectSkipped(1) + .setCallStack(queryStack(AqlCall{}, AqlCall{10, false})) + .run(); +} + +TEST_P(SubqueryStartExecutorTest, fullCount_in_subquery) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput({0}, {{R"("a")"}}, {{0, 0}}) + .expectSkipped(1) + .setCallStack(queryStack(AqlCall{}, AqlCall{0, true, 0, AqlCall::LimitType::HARD})) + .run(); +} + +TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding) { + ExecutorTestHelper<1, 1> helper(*fakedQuery); + AqlCallStack stack = queryStack(AqlCall{}, AqlCall{}); + stack.pushCall(AqlCall{}); + Pipeline pipe{}; + pipe.addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)) + .addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)); + helper.setPipeline(std::move(pipe)) + .setInputValue({{R"("a")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput({0}, {{R"("a")"}, {R"("a")"}, {R"("a")"}}, {{1, 0}, {2, 1}}) + .expectSkipped(0) + .setCallStack(stack) + .run(); +} + +TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding_many_inputs_single_call) { + ExecutorTestHelper<1, 1> helper(*fakedQuery); + AqlCallStack stack = queryStack(AqlCall{}, AqlCall{}); + stack.pushCall(AqlCall{}); + Pipeline pipe{}; + pipe.addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)) + .addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)); + helper.setPipeline(std::move(pipe)) + .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::HASMORE) + .expectOutput({0}, {{R"("a")"}, {R"("a")"}, {R"("a")"}}, {{1, 0}, {2, 1}}) + .expectSkipped(0) + .setCallStack(stack) + .run(); +} + +TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding_many_inputs_many_requests) { + ExecutorTestHelper<1, 1> helper(*fakedQuery); + AqlCallStack stack = queryStack(AqlCall{}, AqlCall{}); + stack.pushCall(AqlCall{}); + Pipeline pipe{}; + pipe.addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)) + .addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)); + helper.setPipeline(std::move(pipe)) + .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput( + {0}, + {{R"("a")"}, {R"("a")"}, {R"("a")"}, {R"("b")"}, {R"("b")"}, {R"("b")"}, {R"("c")"}, {R"("c")"}, {R"("c")"}}, + {{1, 0}, {2, 1}, {4, 0}, {5, 1}, {7, 0}, {8, 1}}) + .expectSkipped(0) + .setCallStack(stack) + .run(true); +} + +TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding_many_inputs_not_enough_space) { + // NOTE: This test relies on batchSizes beeing handled correctly and we do not over-allocate memory + // Also it tests, that ShadowRows go into place accounting of the output block (count as 1 line) + + // NOTE: Reduce batch size to 2, to enforce a too small output block, in between the shadow Rows + ExecutionBlock::setDefaultBatchSize(2); + TRI_DEFER(ExecutionBlock::setDefaultBatchSize(ExecutionBlock::ProductionDefaultBatchSize);); { - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), false); - SubqueryStartExecutor testee(fetcher, infos); - - NoStats stats{}; - ExecutionState state{ExecutionState::HASMORE}; - OutputAqlItemRow output{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_FALSE(output.produced()); - ASSERT_EQ(output.numRowsWritten(), 6); - block = output.stealBlock(); - EXPECT_FALSE(block->isShadowRow(0)); - TestShadowRow(block, 1, true); - EXPECT_FALSE(block->isShadowRow(2)); - TestShadowRow(block, 3, true); - EXPECT_FALSE(block->isShadowRow(4)); - TestShadowRow(block, 5, true); - // Taken from test above. We now have produced a block - // having 3 data rows alternating with 3 shadow rows + // First test: Validate that the shadowRow is not written + // We only do a single call here + ExecutorTestHelper<1, 1> helper(*fakedQuery); + AqlCallStack stack = queryStack(AqlCall{}, AqlCall{}); + stack.pushCall(AqlCall{}); + Pipeline pipe{}; + pipe.addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)) + .addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)); + helper.setPipeline(std::move(pipe)) + .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::HASMORE) + .expectOutput({0}, {{R"("a")"}, {R"("a")"}}, {{1, 0}}) + .expectSkipped(0) + .setCallStack(stack) + .run(); } { - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, - 6, false, block); - SubqueryStartExecutor testee(fetcher, infos); - block.reset(new AqlItemBlock(itemBlockManager, 1000, 1)); - - NoStats stats{}; - ExecutionState state{ExecutionState::HASMORE}; - OutputAqlItemRow output{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(output); - EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_FALSE(output.produced()); - ASSERT_EQ(output.numRowsWritten(), 9); - block = output.stealBlock(); - EXPECT_FALSE(block->isShadowRow(0)); - TestShadowRow(block, 1, true); - TestShadowRow(block, 2, false); - EXPECT_FALSE(block->isShadowRow(3)); - TestShadowRow(block, 4, true); - TestShadowRow(block, 5, false); - EXPECT_FALSE(block->isShadowRow(6)); - TestShadowRow(block, 7, true); - TestShadowRow(block, 8, false); + // Second test: Validate that the shadowRow is eventually written + // Wedo call as many times as we need to. + ExecutorTestHelper<1, 1> helper(*fakedQuery); + AqlCallStack stack = queryStack(AqlCall{}, AqlCall{}); + stack.pushCall(AqlCall{}); + Pipeline pipe{}; + pipe.addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)) + .addConsumer(helper.createExecBlock(MakeBaseInfos(1), + ExecutionNode::SUBQUERY_START)); + helper.setPipeline(std::move(pipe)) + .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput( + {0}, + {{R"("a")"}, {R"("a")"}, {R"("a")"}, {R"("b")"}, {R"("b")"}, {R"("b")"}, {R"("c")"}, {R"("c")"}, {R"("c")"}}, + {{1, 0}, {2, 1}, {4, 0}, {5, 1}, {7, 0}, {8, 1}}) + .expectSkipped(0) + .setCallStack(stack) + .run(true); } } + +// TODO: +// * Add tests for Skipping +// - on Higher level subquery diff --git a/tests/Aql/WaitingExecutionBlockMock.cpp b/tests/Aql/WaitingExecutionBlockMock.cpp index b20a09c843ca..c48de11e02e4 100644 --- a/tests/Aql/WaitingExecutionBlockMock.cpp +++ b/tests/Aql/WaitingExecutionBlockMock.cpp @@ -28,6 +28,8 @@ #include "Aql/ExecutionState.h" #include "Aql/QueryOptions.h" +#include "Logger/LogMacros.h" + #include using namespace arangodb; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5061dd53a789..9df5c6a01515 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -68,6 +68,7 @@ set(ARANGODB_TESTS_SOURCES Aql/SortExecutorTest.cpp Aql/SortLimit-test.cpp Aql/SpliceSubqueryOptimizerRuleTest.cpp + Aql/SplicedSubqueryIntegrationTest.cpp Aql/SubqueryEndExecutorTest.cpp Aql/SubqueryStartExecutorTest.cpp Aql/TestEmptyExecutorHelper.cpp From 1cd9adb371f5fdc722de696306a5c461debb8d17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Thu, 27 Feb 2020 09:27:47 +0100 Subject: [PATCH 077/122] Feature/aql subquery execute remote node api flag (#11159) * Clarified comment * Added api property to RemoteNode * Added missing include --- arangod/Aql/AqlCall.h | 2 +- arangod/Aql/AqlItemBlockSerializationFormat.h | 4 +++ arangod/Aql/ClusterNodes.cpp | 20 ++++++++++++- arangod/Aql/ClusterNodes.h | 28 +++++++++++++++---- ...EngineInfoContainerDBServerServerBased.cpp | 3 +- arangod/Aql/RestAqlHandler.cpp | 7 +++-- lib/Basics/StaticStrings.cpp | 1 + lib/Basics/StaticStrings.h | 1 + 8 files changed, 55 insertions(+), 11 deletions(-) diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 75d2c7048b7a..1abf4868b6c4 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -34,7 +34,7 @@ namespace arangodb::aql { struct AqlCall { // TODO We currently have softLimit and hardLimit, where both can be a number // or Infinity - but not both may be non-infinite at the same time. - // In addition, a soft limit does only make sense together with a hard + // In addition, fullCount does only make sense together with a hard // limit. // The data structures and APIs should reflect that. E.g.: // Infinity | SoftLimit { count : Int } | HardLimit { count : Int, fullCount : Bool } diff --git a/arangod/Aql/AqlItemBlockSerializationFormat.h b/arangod/Aql/AqlItemBlockSerializationFormat.h index 72d8edbe9657..1d98a7836151 100644 --- a/arangod/Aql/AqlItemBlockSerializationFormat.h +++ b/arangod/Aql/AqlItemBlockSerializationFormat.h @@ -23,6 +23,8 @@ #ifndef ARANGOD_AQL_AQLITEMBLOCK_SERIALIZATION_FORMAT_H #define ARANGOD_AQL_AQLITEMBLOCK_SERIALIZATION_FORMAT_H +#include + namespace arangodb { namespace aql { @@ -34,6 +36,8 @@ enum class SerializationFormat { SHADOWROWS = 1 }; +using SerializationFormatType = std::underlying_type_t; + } // namespace aql } // namespace arangodb #endif diff --git a/arangod/Aql/ClusterNodes.cpp b/arangod/Aql/ClusterNodes.cpp index 965442481607..1bcfffe3eb12 100644 --- a/arangod/Aql/ClusterNodes.cpp +++ b/arangod/Aql/ClusterNodes.cpp @@ -128,7 +128,8 @@ RemoteNode::RemoteNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& b : DistributeConsumerNode(plan, base), _vocbase(&(plan->getAst()->query()->vocbase())), _server(base.get("server").copyString()), - _queryId(base.get("queryId").copyString()) { + _queryId(base.get("queryId").copyString()), + _apiToUse(getApiProperty(base, StaticStrings::AqlRemoteApi)) { // Backwards compatibility (3.4.x)(3.5.0) and earlier, coordinator might send ownName. arangodb::velocypack::StringRef tmpId(getDistributeId()); tmpId = VelocyPackHelper::getStringRef(base, "ownName", tmpId); @@ -183,6 +184,7 @@ void RemoteNode::toVelocyPackHelper(VPackBuilder& nodes, unsigned flags, nodes.add("database", VPackValue(_vocbase->name())); nodes.add("server", VPackValue(_server)); nodes.add("queryId", VPackValue(_queryId)); + nodes.add(StaticStrings::AqlRemoteApi, apiToVpack(_apiToUse)); // And close it: nodes.close(); @@ -203,6 +205,22 @@ CostEstimate RemoteNode::estimateCost() const { return estimate; } +auto RemoteNode::api() const noexcept -> Api { + return _apiToUse; +} + +auto RemoteNode::apiToVpack(Api const api) -> velocypack::Value { + return VPackValue(static_cast>(api)); +} + +auto RemoteNode::getApiProperty(VPackSlice slice, std::string const& key) + -> RemoteNode::Api { + using ApiType = std::underlying_type_t; + // Default to GET_SOME + return static_cast( + VelocyPackHelper::getNumericValue(slice, key, static_cast(Api::GET_SOME))); +} + /// @brief construct a scatter node ScatterNode::ScatterNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base) : ExecutionNode(plan, base) { diff --git a/arangod/Aql/ClusterNodes.h b/arangod/Aql/ClusterNodes.h index 7095fee40436..5eefc5395cb9 100644 --- a/arangod/Aql/ClusterNodes.h +++ b/arangod/Aql/ClusterNodes.h @@ -35,6 +35,8 @@ #include "VocBase/voc-types.h" #include "VocBase/vocbase.h" +#include + namespace arangodb { namespace velocypack { class Builder; @@ -53,14 +55,20 @@ struct Collection; class RemoteNode final : public DistributeConsumerNode { friend class ExecutionBlock; - /// @brief constructor with an id public: + /// @brief Type of API; the legacy pre-3.7 getSome/skipSome API, or the + /// execute API. Used for rolling upgrades. Can be removed in 3.8. + /// It is serialized as an integral, changing the values will break the + /// API! + enum class Api { GET_SOME = 0, EXECUTE = 1 }; + + /// @brief constructor with an id RemoteNode(ExecutionPlan* plan, size_t id, TRI_vocbase_t* vocbase, - std::string const& server, std::string const& ownName, std::string const& queryId) + std::string server, std::string const& ownName, std::string queryId, Api = Api::EXECUTE) : DistributeConsumerNode(plan, id, ownName), _vocbase(vocbase), - _server(server), - _queryId(queryId) { + _server(std::move(server)), + _queryId(std::move(queryId)) { // note: server and queryId may be empty and filled later } @@ -82,7 +90,7 @@ class RemoteNode final : public DistributeConsumerNode { ExecutionNode* clone(ExecutionPlan* plan, bool withDependencies, bool withProperties) const override final { return cloneHelper(std::make_unique(plan, _id, _vocbase, _server, - getDistributeId(), _queryId), + getDistributeId(), _queryId, _apiToUse), withDependencies, withProperties); } @@ -109,6 +117,12 @@ class RemoteNode final : public DistributeConsumerNode { _queryId = arangodb::basics::StringUtils::itoa(queryId); } + [[nodiscard]] auto api() const noexcept -> Api; + + private: + static auto apiToVpack(Api) -> velocypack::Value; + static auto getApiProperty(VPackSlice slice, std::string const& key) -> Api; + private: /// @brief the underlying database TRI_vocbase_t* _vocbase; @@ -118,6 +132,10 @@ class RemoteNode final : public DistributeConsumerNode { /// @brief the ID of the query on the server as a string std::string _queryId; + + /// @brief Whether to use the pre-3.7 getSome/skipSome API, instead of the + /// execute API. Used for rolling upgrades, so can be removed in 3.8. + Api _apiToUse = Api::EXECUTE; }; /// @brief class ScatterNode diff --git a/arangod/Aql/EngineInfoContainerDBServerServerBased.cpp b/arangod/Aql/EngineInfoContainerDBServerServerBased.cpp index 8503b41a42e5..99346d0445f6 100644 --- a/arangod/Aql/EngineInfoContainerDBServerServerBased.cpp +++ b/arangod/Aql/EngineInfoContainerDBServerServerBased.cpp @@ -325,7 +325,8 @@ Result EngineInfoContainerDBServerServerBased::buildEngines( TRI_ASSERT(infoBuilder.isOpenObject()); infoBuilder.add(StaticStrings::SerializationFormat, - VPackValue(static_cast(aql::SerializationFormat::SHADOWROWS))); + VPackValue(static_cast( + aql::SerializationFormat::SHADOWROWS))); infoBuilder.close(); // Base object TRI_ASSERT(infoBuilder.isClosed()); diff --git a/arangod/Aql/RestAqlHandler.cpp b/arangod/Aql/RestAqlHandler.cpp index e0cc8ab6ee0a..facdf4d8d802 100644 --- a/arangod/Aql/RestAqlHandler.cpp +++ b/arangod/Aql/RestAqlHandler.cpp @@ -188,9 +188,10 @@ void RestAqlHandler::setupClusterQuery() { } // If we have a new format then it has to be included here. // If not default to classic (old coordinator will not send it) - SerializationFormat format = static_cast( - VelocyPackHelper::getNumericValue(querySlice, StaticStrings::SerializationFormat, - static_cast(SerializationFormat::CLASSIC))); + auto format = static_cast( + VelocyPackHelper::getNumericValue( + querySlice, StaticStrings::SerializationFormat, + static_cast(SerializationFormat::CLASSIC))); // Now we need to create shared_ptr // That contains the old-style cluster snippet in order // to prepare create a Query object. diff --git a/lib/Basics/StaticStrings.cpp b/lib/Basics/StaticStrings.cpp index 509dbaf52595..4962b624889b 100644 --- a/lib/Basics/StaticStrings.cpp +++ b/lib/Basics/StaticStrings.cpp @@ -295,6 +295,7 @@ std::string const StaticStrings::BackupToDeleteName("DIRECTORY_TO_DELETE"); std::string const StaticStrings::BackupSearchToDeleteName( "DIRECTORY_TO_DELETE_SEARCH"); std::string const StaticStrings::SerializationFormat("serializationFormat"); +std::string const StaticStrings::AqlRemoteApi("api"); // validation std::string const StaticStrings::ValidatorLevelNone("none"); diff --git a/lib/Basics/StaticStrings.h b/lib/Basics/StaticStrings.h index 069922cf30a0..4591679174a3 100644 --- a/lib/Basics/StaticStrings.h +++ b/lib/Basics/StaticStrings.h @@ -271,6 +271,7 @@ class StaticStrings { static std::string const BackupToDeleteName; static std::string const BackupSearchToDeleteName; static std::string const SerializationFormat; + static std::string const AqlRemoteApi; // validation static std::string const ValidatorLevelNone; From be42def57485897495748775990b8a3734ec7f91 Mon Sep 17 00:00:00 2001 From: Heiko Date: Thu, 27 Feb 2020 14:21:09 +0100 Subject: [PATCH 078/122] Feature/aql subquery operations stack i research view executor (#11140) * added arangosearch view executor to new style * added new produceRows and skipRowsRange * ires * test jenkins * modify aql call * cleanup branch * Update arangod/Aql/IResearchViewExecutor.cpp Co-Authored-By: Dronplane * applied suggested changes * also enable IResearchViewMergeExecutor * Fixed an issue in skipRange, where the input was not moved forward in all cases. * Removed unused local variables Co-authored-by: Dronplane Co-authored-by: Michael Hackstein --- arangod/Aql/ExecutionBlockImpl.cpp | 75 ++++-- arangod/Aql/IResearchViewExecutor.cpp | 361 ++++++++++++++++---------- arangod/Aql/IResearchViewExecutor.h | 92 ++++--- 3 files changed, 339 insertions(+), 189 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index f246e366e2bc..3b6934601fac 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -134,19 +134,39 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); * TODO: This should be removed once all executors and fetchers are ported to the new style. */ template -constexpr bool isNewStyleExecutor = - is_one_of_v, - IdExecutor>, ReturnExecutor, IndexExecutor, EnumerateCollectionExecutor, - /* - CalculationExecutor, CalculationExecutor, - CalculationExecutor,*/ - HashedCollectExecutor, +constexpr bool isNewStyleExecutor = is_one_of_v< + Executor, FilterExecutor, SortedCollectExecutor, IdExecutor, + IdExecutor>, ReturnExecutor, IndexExecutor, EnumerateCollectionExecutor, + /* + CalculationExecutor, CalculationExecutor, + CalculationExecutor,*/ + HashedCollectExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS - TestLambdaExecutor, - TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode + TestLambdaExecutor, + TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif - SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, - ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, + ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, @@ -1099,14 +1119,35 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert( useExecutor == - (is_one_of_v>, - IdExecutor, HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, + (is_one_of_v< + Executor, FilterExecutor, ShortestPathExecutor, ReturnExecutor, KShortestPathsExecutor, + IdExecutor>, IdExecutor, + HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS - TestLambdaSkipExecutor, + TestLambdaSkipExecutor, #endif - TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, - SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor>), + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + IResearchViewMergeExecutor, + TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, + SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor>), "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! diff --git a/arangod/Aql/IResearchViewExecutor.cpp b/arangod/Aql/IResearchViewExecutor.cpp index 4fcba5946ae6..5bd3cdc8d7d5 100644 --- a/arangod/Aql/IResearchViewExecutor.cpp +++ b/arangod/Aql/IResearchViewExecutor.cpp @@ -24,9 +24,10 @@ #include "IResearchViewExecutor.h" #include "Aql/ExecutionStats.h" +#include "Aql/OutputAqlItemRow.h" #include "Aql/Query.h" #include "Aql/SingleRowFetcher.h" -#include "Aql/OutputAqlItemRow.h" +#include "AqlCall.h" #include "IResearch/IResearchCommon.h" #include "IResearch/IResearchDocument.h" #include "IResearch/IResearchFilterFactory.h" @@ -95,7 +96,8 @@ inline irs::columnstore_reader::values_reader_f sortColumn(irs::sub_reader const return reader ? reader->values() : irs::columnstore_reader::values_reader_f{}; } -inline std::pair getStoredColumnsInfo(IResearchViewNode::ViewValuesRegisters const& columnsFieldsRegs) { +inline std::pair getStoredColumnsInfo( + IResearchViewNode::ViewValuesRegisters const& columnsFieldsRegs) { auto max = (--columnsFieldsRegs.cend())->first; TRI_ASSERT(max >= IResearchViewNode::SortColumnNumber); auto columnFieldsRegs = columnsFieldsRegs.cbegin(); @@ -116,11 +118,10 @@ IResearchViewExecutorInfos::IResearchViewExecutorInfos( RegisterId firstOutputRegister, RegisterId numScoreRegisters, Query& query, std::vector const& scorers, std::pair const& sort, - IResearchViewStoredValues const& storedValues, - ExecutionPlan const& plan, Variable const& outVariable, - aql::AstNode const& filterCondition, std::pair volatility, - IResearchViewExecutorInfos::VarInfoMap const& varInfoMap, int depth, - IResearchViewNode::ViewValuesRegisters&& outNonMaterializedViewRegs) + IResearchViewStoredValues const& storedValues, ExecutionPlan const& plan, + Variable const& outVariable, aql::AstNode const& filterCondition, + std::pair volatility, IResearchViewExecutorInfos::VarInfoMap const& varInfoMap, + int depth, IResearchViewNode::ViewValuesRegisters&& outNonMaterializedViewRegs) : ExecutorInfos(std::move(infos)), _firstOutputRegister(firstOutputRegister), _numScoreRegisters(numScoreRegisters), @@ -151,7 +152,8 @@ RegisterId IResearchViewExecutorInfos::getNumScoreRegisters() const noexcept { return _numScoreRegisters; } -IResearchViewNode::ViewValuesRegisters const& IResearchViewExecutorInfos::getOutNonMaterializedViewRegs() const noexcept { +IResearchViewNode::ViewValuesRegisters const& IResearchViewExecutorInfos::getOutNonMaterializedViewRegs() const + noexcept { return _outNonMaterializedViewRegs; } @@ -209,8 +211,8 @@ IResearchViewStoredValues const& IResearchViewExecutorInfos::storedValues() cons } bool IResearchViewExecutorInfos::isScoreReg(RegisterId reg) const noexcept { - return getNumScoreRegisters() > 0 && - getFirstScoreRegister() <= reg && reg < getFirstScoreRegister() + getNumScoreRegisters(); + return getNumScoreRegisters() > 0 && getFirstScoreRegister() <= reg && + reg < getFirstScoreRegister() + getNumScoreRegisters(); } IResearchViewStats::IResearchViewStats() noexcept : _scannedIndex(0) {} @@ -301,7 +303,7 @@ std::vector::iterator IResearchViewExecutorBase::IndexRe template template IResearchViewExecutorBase::IndexReadBuffer::IndexReadBuffer(std::size_t const numScoreRegisters) - : _numScoreRegisters(numScoreRegisters), _keyBaseIdx(0) {} + : _numScoreRegisters(numScoreRegisters), _keyBaseIdx(0) {} template template @@ -330,7 +332,8 @@ void IResearchViewExecutorBase::IndexReadBuffer::pushVa template template -void IResearchViewExecutorBase::IndexReadBuffer::pushStoredValue(std::vector&& storedValue) { +void IResearchViewExecutorBase::IndexReadBuffer::pushStoredValue( + std::vector&& storedValue) { _storedValueBuffer.emplace_back(std::move(storedValue)); } @@ -401,11 +404,9 @@ void IResearchViewExecutorBase::IndexReadBuffer::assert template IResearchViewExecutorBase::IResearchViewExecutorBase( - IResearchViewExecutorBase::Fetcher& fetcher, IResearchViewExecutorBase::Infos& infos) + IResearchViewExecutorBase::Fetcher&, IResearchViewExecutorBase::Infos& infos) : _infos(infos), - _fetcher(fetcher), - _inputRow(CreateInvalidInputRowHint{}), - _upstreamState(ExecutionState::HASMORE), + _inputRow(CreateInvalidInputRowHint{}), // TODO: Remove me after refactor _indexReadBuffer(_infos.getNumScoreRegisters()), _filterCtx(1), // arangodb::iresearch::ExpressionExecutionContext _ctx(&infos.getQuery(), infos.numberOfOutputRegisters(), @@ -413,8 +414,6 @@ IResearchViewExecutorBase::IResearchViewExecutorBase( _reader(infos.getReader()), _filter(irs::filter::prepared::empty()), _execCtx(*infos.getQuery().trx(), _ctx), - _inflight(0), - _hasMore(true), // has more data initially _isInitialized(false) { TRI_ASSERT(infos.getQuery().trx() != nullptr); @@ -425,87 +424,102 @@ IResearchViewExecutorBase::IResearchViewExecutorBase( template std::pair::Stats> IResearchViewExecutorBase::produceRows(OutputAqlItemRow& output) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +template +std::tuple::Stats, AqlCall> +IResearchViewExecutorBase::produceRows(AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) { IResearchViewStats stats{}; - bool documentWritten = false; + AqlCall upstreamCall{}; + upstreamCall.fullCount = output.getClientCall().fullCount; - while (!documentWritten) { - if (!_inputRow.isInitialized()) { - if (_upstreamState == ExecutionState::DONE) { - // There will be no more rows, stop fetching. - return {ExecutionState::DONE, stats}; - } + while (inputRange.hasDataRow() && !output.isFull()) { + bool documentWritten = false; - std::tie(_upstreamState, _inputRow) = _fetcher.fetchRow(); + while (!documentWritten) { + if (!_inputRow.isInitialized()) { + std::tie(std::ignore, _inputRow) = inputRange.peekDataRow(); - if (_upstreamState == ExecutionState::WAITING) { - return {_upstreamState, stats}; - } + if (!_inputRow.isInitialized()) { + return {ExecutorState::DONE, stats, upstreamCall}; + } - if (!_inputRow.isInitialized()) { - return {ExecutionState::DONE, stats}; + // reset must be called exactly after we've got a new and valid input row. + static_cast(*this).reset(); } - // reset must be called exactly after we've got a new and valid input row. - static_cast(*this).reset(); - } - - ReadContext ctx(infos().getOutputRegister(), _inputRow, output); - documentWritten = next(ctx); + ReadContext ctx(infos().getOutputRegister(), _inputRow, output); + documentWritten = next(ctx); - if (documentWritten) { - stats.incrScanned(); - } else { - _inputRow = InputAqlItemRow{CreateInvalidInputRowHint{}}; - // no document written, repeat. + if (documentWritten) { + stats.incrScanned(); + output.advanceRow(); + } else { + _inputRow = InputAqlItemRow{CreateInvalidInputRowHint{}}; + std::ignore = inputRange.nextDataRow(); + // no document written, repeat. + } } } - return {ExecutionState::HASMORE, stats}; + return {inputRange.upstreamState(), stats, upstreamCall}; } template std::tuple::Stats, size_t> IResearchViewExecutorBase::skipRows(size_t toSkip) { - TRI_ASSERT(_indexReadBuffer.empty()); - IResearchViewStats stats{}; - size_t skipped = 0; + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} +template +std::tuple::Stats, size_t, AqlCall> +IResearchViewExecutorBase::skipRowsRange(AqlItemBlockInputRange& inputRange, + AqlCall& call) { + TRI_ASSERT(_indexReadBuffer.empty()); auto& impl = static_cast(*this); - if (!_inputRow.isInitialized()) { - if (_upstreamState == ExecutionState::DONE) { - // There will be no more rows, stop fetching. - return std::make_tuple(ExecutionState::DONE, stats, - 0); // tupple, cannot use initializer list due to build failure - } + while (inputRange.hasDataRow() && call.shouldSkip()) { + if (!_inputRow.isInitialized()) { + auto rowState = ExecutorState::HASMORE; + std::tie(rowState, _inputRow) = inputRange.peekDataRow(); - std::tie(_upstreamState, _inputRow) = _fetcher.fetchRow(); + if (!_inputRow.isInitialized()) { + TRI_ASSERT(rowState == ExecutorState::DONE); + break; + } - if (_upstreamState == ExecutionState::WAITING) { - return std::make_tuple(_upstreamState, stats, 0); // tupple, cannot use initializer list due to build failure + // reset must be called exactly after we've got a new and valid input row. + impl.reset(); } - - if (!_inputRow.isInitialized()) { - return std::make_tuple(ExecutionState::DONE, stats, - 0); // tupple, cannot use initializer list due to build failure + TRI_ASSERT(_inputRow.isInitialized()); + if (call.getOffset() > 0) { + // OffsetPhase need to skip atMost offset + call.didSkip(impl.skip(call.getOffset())); + } else { + TRI_ASSERT(call.getLimit() == 0 && call.hasHardLimit()); + // skip all - fullCount phase + call.didSkip(impl.skipAll()); } + TRI_ASSERT(_indexReadBuffer.empty()); - // reset must be called exactly after we've got a new and valid input row. - impl.reset(); + if (call.shouldSkip()) { + // We still need to fetch more + // trigger refetch of new input row + std::ignore = inputRange.nextDataRow(); + _inputRow = InputAqlItemRow{CreateInvalidInputRowHint{}}; + } } - TRI_ASSERT(_inputRow.isInitialized()); - - skipped = static_cast(*this).skip(toSkip); - TRI_ASSERT(_indexReadBuffer.empty()); - stats.incrScanned(skipped); - - if (skipped < toSkip) { - _inputRow = InputAqlItemRow{CreateInvalidInputRowHint{}}; - } + IResearchViewStats stats{}; + stats.incrScanned(call.getSkipCount()); - return std::make_tuple(ExecutionState::HASMORE, stats, - skipped); // tupple, cannot use initializer list due to build failure + AqlCall upstreamCall{}; + upstreamCall.softLimit = call.getOffset() + std::min(call.softLimit, call.hardLimit); + return {inputRange.upstreamState(), stats, call.getSkipCount(), upstreamCall}; } template @@ -619,26 +633,25 @@ void IResearchViewExecutorBase::reset() { } } -template +template bool IResearchViewExecutorBase::writeLocalDocumentId( - ReadContext& ctx, - LocalDocumentId const& documentId, - LogicalCollection const& collection) { + ReadContext& ctx, LocalDocumentId const& documentId, LogicalCollection const& collection) { // we will need collection Id also as View could produce documents from multiple collections if (ADB_LIKELY(documentId.isSet())) { { // For sake of performance we store raw pointer to collection // It is safe as pipeline work inside one process - static_assert(sizeof(void*) <= sizeof(uint64_t), "Pointer not fits in uint64_t"); + static_assert(sizeof(void*) <= sizeof(uint64_t), + "Pointer not fits in uint64_t"); AqlValue a(AqlValueHintUInt(reinterpret_cast(&collection))); bool mustDestroy = true; - AqlValueGuard guard{ a, mustDestroy }; + AqlValueGuard guard{a, mustDestroy}; ctx.outputRow.moveValueInto(ctx.getNmColPtrOutReg(), ctx.inputRow, guard); } { AqlValue a(AqlValueHintUInt(documentId.id())); bool mustDestroy = true; - AqlValueGuard guard{ a, mustDestroy }; + AqlValueGuard guard{a, mustDestroy}; ctx.outputRow.moveValueInto(ctx.getNmDocIdOutReg(), ctx.inputRow, guard); } return true; @@ -647,9 +660,10 @@ bool IResearchViewExecutorBase::writeLocalDocumentId( } } -template -inline bool IResearchViewExecutorBase::writeStoredValue(ReadContext& ctx, std::vector const& storedValues, - size_t columnNum, std::map const& fieldsRegs) { +template +inline bool IResearchViewExecutorBase::writeStoredValue( + ReadContext& ctx, std::vector const& storedValues, + size_t columnNum, std::map const& fieldsRegs) { TRI_ASSERT(columnNum < storedValues.size()); auto const& storedValue = storedValues[columnNum]; TRI_ASSERT(!storedValue.empty()); @@ -675,7 +689,7 @@ inline bool IResearchViewExecutorBase::writeStoredValue(ReadContex return true; } -template +template bool IResearchViewExecutorBase::writeRow(ReadContext& ctx, IndexReadBufferEntry bufferEntry, LocalDocumentId const& documentId, @@ -683,29 +697,36 @@ bool IResearchViewExecutorBase::writeRow(ReadContext& ctx, TRI_ASSERT(documentId.isSet()); if constexpr (Traits::MaterializeType == MaterializeType::Materialize) { // read document from underlying storage engine, if we got an id - if (!collection.readDocumentWithCallback(infos().getQuery().trx(), documentId, ctx.callback)) { + if (!collection.readDocumentWithCallback(infos().getQuery().trx(), + documentId, ctx.callback)) { return false; } - } else if ((Traits::MaterializeType & MaterializeType::LateMaterialize) == MaterializeType::LateMaterialize) { + } else if ((Traits::MaterializeType & MaterializeType::LateMaterialize) == + MaterializeType::LateMaterialize) { // no need to look into collection. Somebody down the stream will do materialization. Just emit LocalDocumentIds if (!writeLocalDocumentId(ctx, documentId, collection)) { return false; } } - if constexpr ((Traits::MaterializeType & MaterializeType::UseStoredValues) == MaterializeType::UseStoredValues) { + if constexpr ((Traits::MaterializeType & MaterializeType::UseStoredValues) == + MaterializeType::UseStoredValues) { auto const& columnsFieldsRegs = infos().getOutNonMaterializedViewRegs(); TRI_ASSERT(!columnsFieldsRegs.empty()); auto columsInfo = getStoredColumnsInfo(columnsFieldsRegs); auto& columnFieldsRegs = columsInfo.second; auto const& storedValues = _indexReadBuffer.getStoredValue(bufferEntry); if (IResearchViewNode::SortColumnNumber == columnFieldsRegs->first) { - if (ADB_UNLIKELY(!writeStoredValue(ctx, storedValues, static_cast(columsInfo.first), columnFieldsRegs->second))) { + if (ADB_UNLIKELY(!writeStoredValue(ctx, storedValues, + static_cast(columsInfo.first), + columnFieldsRegs->second))) { return false; } ++columnFieldsRegs; } for (; columnFieldsRegs != columnsFieldsRegs.cend(); ++columnFieldsRegs) { - if (ADB_UNLIKELY(!writeStoredValue(ctx, storedValues, static_cast(columnFieldsRegs->first), columnFieldsRegs->second))) { + if (ADB_UNLIKELY(!writeStoredValue(ctx, storedValues, + static_cast(columnFieldsRegs->first), + columnFieldsRegs->second))) { return false; } } @@ -734,9 +755,10 @@ bool IResearchViewExecutorBase::writeRow(ReadContext& ctx, return true; } -template -void IResearchViewExecutorBase::getStoredValue(irs::document const& doc, std::vector& storedValue, size_t index, - std::vector const& storedValuesReaders) { +template +void IResearchViewExecutorBase::getStoredValue( + irs::document const& doc, std::vector& storedValue, size_t index, + std::vector const& storedValuesReaders) { irs::columnstore_reader::values_reader_f reader = storedValuesReaders[index]; TRI_ASSERT(reader); auto ok = reader(doc.value, storedValue[index]); @@ -746,9 +768,10 @@ void IResearchViewExecutorBase::getStoredValue(irs::document const } } -template -void IResearchViewExecutorBase::pushStoredValues(irs::document const& doc, - std::vector const& storedValuesReaders) { +template +void IResearchViewExecutorBase::pushStoredValues( + irs::document const& doc, + std::vector const& storedValuesReaders) { auto const& columnsFieldsRegs = this->_infos.getOutNonMaterializedViewRegs(); TRI_ASSERT(!columnsFieldsRegs.empty()); auto columsInfo = getStoredColumnsInfo(columnsFieldsRegs); @@ -760,14 +783,16 @@ void IResearchViewExecutorBase::pushStoredValues(irs::document con ++columnFieldsRegs; } for (; columnFieldsRegs != columnsFieldsRegs.cend(); ++columnFieldsRegs) { - getStoredValue(doc, storedValue, static_cast(columnFieldsRegs->first), storedValuesReaders); + getStoredValue(doc, storedValue, static_cast(columnFieldsRegs->first), + storedValuesReaders); } this->_indexReadBuffer.pushStoredValue(std::move(storedValue)); } -template -bool IResearchViewExecutorBase::getStoredValuesReaders(irs::sub_reader const& segmentReader, - std::vector& storedValuesReaders) { +template +bool IResearchViewExecutorBase::getStoredValuesReaders( + irs::sub_reader const& segmentReader, + std::vector& storedValuesReaders) { auto const& columnsFieldsRegs = this->_infos.getOutNonMaterializedViewRegs(); if (!columnsFieldsRegs.empty()) { auto columsInfo = getStoredColumnsInfo(columnsFieldsRegs); @@ -792,14 +817,16 @@ bool IResearchViewExecutorBase::getStoredValuesReaders(irs::sub_re auto const& columns = storedValues.columns(); auto const storedColumnNumber = static_cast(columnFieldsRegs->first); TRI_ASSERT(storedColumnNumber < columns.size()); - auto storedValuesReader = segmentReader.column_reader(columns[storedColumnNumber].name); + auto storedValuesReader = + segmentReader.column_reader(columns[storedColumnNumber].name); if (!storedValuesReader) { LOG_TOPIC("af7ec", WARN, arangodb::iresearch::TOPIC) << "encountered a sub-reader without a stored value column while " "executing a query, ignoring"; return false; } - storedValuesReaders[static_cast(columnFieldsRegs->first)] = storedValuesReader->values(); + storedValuesReaders[static_cast(columnFieldsRegs->first)] = + storedValuesReader->values(); } } } @@ -811,7 +838,8 @@ bool IResearchViewExecutorBase::getStoredValuesReaders(irs::sub_re /////////////////////////////////////////////////////////////////////////////// template -IResearchViewExecutor::IResearchViewExecutor(Fetcher& fetcher, Infos& infos) +IResearchViewExecutor::IResearchViewExecutor(Fetcher& fetcher, + Infos& infos) : Base(fetcher, infos), _pkReader(), _itr(), @@ -836,7 +864,7 @@ void IResearchViewExecutor::evaluateScores(ReadContext this->fillScores(ctx, begin, end); } -template +template bool IResearchViewExecutor::readPK(LocalDocumentId& documentId) { TRI_ASSERT(!documentId.isSet()); TRI_ASSERT(_itr); @@ -938,7 +966,8 @@ void IResearchViewExecutor::fillBuffer(IResearchViewEx evaluateScores(ctx); } - if constexpr ((materializeType & MaterializeType::UseStoredValues) == MaterializeType::UseStoredValues) { + if constexpr ((materializeType & MaterializeType::UseStoredValues) == + MaterializeType::UseStoredValues) { TRI_ASSERT(_doc); this->pushStoredValues(*_doc, _storedValuesReaders); } @@ -977,7 +1006,8 @@ bool IResearchViewExecutor::resetIterator() { return false; } - if constexpr ((materializeType & MaterializeType::UseStoredValues) == MaterializeType::UseStoredValues) { + if constexpr ((materializeType & MaterializeType::UseStoredValues) == + MaterializeType::UseStoredValues) { if (ADB_UNLIKELY(!this->getStoredValuesReaders(segmentReader, _storedValuesReaders))) { return false; } @@ -1045,6 +1075,37 @@ size_t IResearchViewExecutor::skip(size_t limit) { _doc = nullptr; } + saveCollection(); + + return toSkip - limit; +} + +template +size_t IResearchViewExecutor::skipAll() { + TRI_ASSERT(this->_indexReadBuffer.empty()); + TRI_ASSERT(this->_filter); + + size_t skipped = 0; + + for (size_t count = this->_reader->size(); _readerOffset < count;) { + if (!_itr && !resetIterator()) { + continue; + } + + while (_itr->next()) { + skipped++; + } + + ++_readerOffset; + _itr.reset(); + _doc = nullptr; + } + + return skipped; +} + +template +void IResearchViewExecutor::saveCollection() { // We're in the middle of a reader, save the collection in case produceRows() // needs it. if (_itr) { @@ -1071,13 +1132,12 @@ size_t IResearchViewExecutor::skip(size_t limit) { this->_indexReadBuffer.reset(); _collection = collection.get(); } - - return toSkip - limit; } template -bool IResearchViewExecutor::writeRow(IResearchViewExecutor::ReadContext& ctx, - IResearchViewExecutor::IndexReadBufferEntry bufferEntry) { +bool IResearchViewExecutor::writeRow( + IResearchViewExecutor::ReadContext& ctx, + IResearchViewExecutor::IndexReadBufferEntry bufferEntry) { TRI_ASSERT(_collection); return Base::writeRow(ctx, bufferEntry, @@ -1089,7 +1149,8 @@ bool IResearchViewExecutor::writeRow(IResearchViewExec /////////////////////////////////////////////////////////////////////////////// template -IResearchViewMergeExecutor::IResearchViewMergeExecutor(Fetcher& fetcher, Infos& infos) +IResearchViewMergeExecutor::IResearchViewMergeExecutor(Fetcher& fetcher, + Infos& infos) : Base{fetcher, infos}, _heap_it{MinHeapContext{*infos.sort().first, infos.sort().second, _segments}} { TRI_ASSERT(infos.sort().first); @@ -1101,9 +1162,8 @@ IResearchViewMergeExecutor::IResearchViewMergeExecutor template IResearchViewMergeExecutor::Segment::Segment( - irs::doc_iterator::ptr&& docs, irs::document const& doc, - irs::score const& score, LogicalCollection const& collection, - irs::columnstore_reader::values_reader_f&& pkReader, + irs::doc_iterator::ptr&& docs, irs::document const& doc, irs::score const& score, + LogicalCollection const& collection, irs::columnstore_reader::values_reader_f&& pkReader, std::vector&& storedValuesReaders) noexcept : docs(std::move(docs)), doc(&doc), @@ -1141,16 +1201,16 @@ bool IResearchViewMergeExecutor::MinHeapContext::opera } template -bool IResearchViewMergeExecutor::MinHeapContext::operator()(const size_t lhs, - const size_t rhs) const { +bool IResearchViewMergeExecutor::MinHeapContext::operator()( + const size_t lhs, const size_t rhs) const { assert(lhs < _segments->size()); assert(rhs < _segments->size()); return _less((*_segments)[rhs].sortValue, (*_segments)[lhs].sortValue); } template -void IResearchViewMergeExecutor::evaluateScores(ReadContext const& ctx, - irs::score const& score) { +void IResearchViewMergeExecutor::evaluateScores( + ReadContext const& ctx, irs::score const& score) { // This must not be called in the unordered case. TRI_ASSERT(ordered); @@ -1227,7 +1287,8 @@ void IResearchViewMergeExecutor::reset() { } std::vector storedValuesReaders; - if constexpr ((materializeType & MaterializeType::UseStoredValues) == MaterializeType::UseStoredValues) { + if constexpr ((materializeType & MaterializeType::UseStoredValues) == + MaterializeType::UseStoredValues) { if (ADB_UNLIKELY(!this->getStoredValuesReaders(segment, storedValuesReaders))) { continue; } @@ -1235,7 +1296,8 @@ void IResearchViewMergeExecutor::reset() { // add sortReader if it has not been added yet // sortReader is the last item auto const& columnsFieldsRegs = this->_infos.getOutNonMaterializedViewRegs(); - if (columnsFieldsRegs.empty() || columnsFieldsRegs.cbegin()->first != IResearchViewNode::SortColumnNumber) { + if (columnsFieldsRegs.empty() || + columnsFieldsRegs.cbegin()->first != IResearchViewNode::SortColumnNumber) { auto sortReader = ::sortColumn(segment); if (!sortReader) { @@ -1305,7 +1367,8 @@ void IResearchViewMergeExecutor::fillBuffer(ReadContex evaluateScores(ctx, *segment.score); } - if constexpr ((materializeType & MaterializeType::UseStoredValues) == MaterializeType::UseStoredValues) { + if constexpr ((materializeType & MaterializeType::UseStoredValues) == + MaterializeType::UseStoredValues) { TRI_ASSERT(segment.doc); this->pushStoredValues(*segment.doc, segment.storedValuesReaders); } @@ -1333,6 +1396,20 @@ size_t IResearchViewMergeExecutor::skip(size_t limit) return toSkip - limit; } +template +size_t IResearchViewMergeExecutor::skipAll() { + TRI_ASSERT(this->_indexReadBuffer.empty()); + TRI_ASSERT(this->_filter != nullptr); + + size_t skipped = 0; + + while (_heap_it.next()) { + skipped++; + } + + return skipped; +} + template bool IResearchViewMergeExecutor::writeRow( IResearchViewMergeExecutor::ReadContext& ctx, @@ -1377,23 +1454,37 @@ template class ::arangodb::aql::IResearchViewMergeExecutor>; template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewExecutor>; template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewExecutor>; template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewExecutor>; template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewExecutor>; template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewExecutor>; - -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; - -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; -template class ::arangodb::aql::IResearchViewExecutorBase<::arangodb::aql::IResearchViewMergeExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewExecutor>; + +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; + +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; +template class ::arangodb::aql::IResearchViewExecutorBase< + ::arangodb::aql::IResearchViewMergeExecutor>; diff --git a/arangod/Aql/IResearchViewExecutor.h b/arangod/Aql/IResearchViewExecutor.h index 5b535150b100..1b6aae180944 100644 --- a/arangod/Aql/IResearchViewExecutor.h +++ b/arangod/Aql/IResearchViewExecutor.h @@ -27,6 +27,7 @@ #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" #include "Aql/IResearchViewNode.h" +#include "Aql/InputAqlItemRow.h" #include "IResearch/ExpressionFilter.h" #include "IResearch/IResearchExpressionContext.h" #include "IResearch/IResearchVPackComparer.h" @@ -52,6 +53,8 @@ struct Scorer; } namespace aql { +struct AqlCall; +class AqlItemBlockInputRange; struct ExecutionStats; class OutputAqlItemRow; template @@ -66,18 +69,16 @@ class IResearchViewExecutorInfos : public ExecutorInfos { RegisterId firstOutputRegister, RegisterId numScoreRegisters, Query& query, std::vector const& scorers, std::pair const& sort, - iresearch::IResearchViewStoredValues const& storedValues, - ExecutionPlan const& plan, - Variable const& outVariable, - aql::AstNode const& filterCondition, - std::pair volatility, - VarInfoMap const& varInfoMap, - int depth, iresearch::IResearchViewNode::ViewValuesRegisters&& outNonMaterializedViewRegs); + iresearch::IResearchViewStoredValues const& storedValues, ExecutionPlan const& plan, + Variable const& outVariable, aql::AstNode const& filterCondition, + std::pair volatility, VarInfoMap const& varInfoMap, int depth, + iresearch::IResearchViewNode::ViewValuesRegisters&& outNonMaterializedViewRegs); RegisterId getOutputRegister() const noexcept; RegisterId getFirstScoreRegister() const noexcept; RegisterId getNumScoreRegisters() const noexcept; - iresearch::IResearchViewNode::ViewValuesRegisters const& getOutNonMaterializedViewRegs() const noexcept; + iresearch::IResearchViewNode::ViewValuesRegisters const& getOutNonMaterializedViewRegs() const + noexcept; std::shared_ptr getReader() const noexcept; Query& getQuery() const noexcept; std::vector const& scorers() const noexcept; @@ -157,6 +158,22 @@ class IResearchViewExecutorBase { std::tuple skipRows(size_t toSkip); std::pair produceRows(OutputAqlItemRow& output); + /** + * @brief produce the next Rows of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple produceRows( + AqlItemBlockInputRange& input, OutputAqlItemRow& output); + + /** + * @brief skip the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple skipRowsRange( + AqlItemBlockInputRange& inputRange, AqlCall& call); + protected: class ReadContext { private: @@ -171,13 +188,9 @@ class IResearchViewExecutorBase { OutputAqlItemRow& outputRow; IndexIterator::DocumentCallback const callback; - aql::RegisterId getNmColPtrOutReg() const noexcept { - return docOutReg; - } + aql::RegisterId getNmColPtrOutReg() const noexcept { return docOutReg; } - aql::RegisterId getNmDocIdOutReg() const noexcept { - return docOutReg + 1; - } + aql::RegisterId getNmDocIdOutReg() const noexcept { return docOutReg + 1; } }; // ReadContext template @@ -250,7 +263,8 @@ class IResearchViewExecutorBase { void pushStoredValue(std::vector&& storedValue); - std::vector const& getStoredValue(IndexReadBufferEntry bufferEntry) const noexcept; + std::vector const& getStoredValue(IndexReadBufferEntry bufferEntry) const + noexcept; private: // _keyBuffer, _scoreBuffer, _sortValueBuffer together hold all the @@ -285,30 +299,30 @@ class IResearchViewExecutorBase { bool writeRow(ReadContext& ctx, IndexReadBufferEntry bufferEntry, LocalDocumentId const& documentId, LogicalCollection const& collection); - bool writeLocalDocumentId(ReadContext& ctx, - LocalDocumentId const& documentId, - LogicalCollection const& collection); + bool writeLocalDocumentId(ReadContext& ctx, LocalDocumentId const& documentId, + LogicalCollection const& collection); void reset(); - bool writeStoredValue(ReadContext& ctx, std::vector const& storedValues, size_t columnNum, - std::map const& fieldsRegs); + bool writeStoredValue(ReadContext& ctx, std::vector const& storedValues, + size_t columnNum, std::map const& fieldsRegs); - void getStoredValue(irs::document const& doc, std::vector& storedValue, size_t index, + void getStoredValue(irs::document const& doc, + std::vector& storedValue, size_t index, std::vector const& storedValuesReaders); - void pushStoredValues(irs::document const& doc, std::vector const& storedValuesReaders); + void pushStoredValues(irs::document const& doc, + std::vector const& storedValuesReaders); - bool getStoredValuesReaders(irs::sub_reader const& segmentReader, std::vector& storedValuesReaders); + bool getStoredValuesReaders(irs::sub_reader const& segmentReader, + std::vector& storedValuesReaders); private: bool next(ReadContext& ctx); protected: Infos const& _infos; - Fetcher& _fetcher; InputAqlItemRow _inputRow; - ExecutionState _upstreamState; IndexReadBuffer _indexReadBuffer; irs::bytes_ref _pk; // temporary store for pk buffer before decoding it irs::attribute_view _filterCtx; // filter context @@ -317,13 +331,12 @@ class IResearchViewExecutorBase { irs::filter::prepared::ptr _filter; irs::order::prepared _order; iresearch::ExpressionExecutionContext _execCtx; // expression execution context - size_t _inflight; // The number of documents inflight if we hit a WAITING state. - bool _hasMore; bool _isInitialized; }; // IResearchViewExecutorBase template -class IResearchViewExecutor : public IResearchViewExecutorBase> { +class IResearchViewExecutor + : public IResearchViewExecutorBase> { public: using Base = IResearchViewExecutorBase>; using Fetcher = typename Base::Fetcher; @@ -339,6 +352,9 @@ class IResearchViewExecutor : public IResearchViewExecutorBase _storedValuesReaders; // current stored values readers + std::vector _storedValuesReaders; // current stored values readers // case ordered only: irs::score const* _scr; irs::bytes_ref _scrVal; }; // IResearchViewExecutor -template +template struct IResearchViewExecutorTraits> { using IndexBufferValueType = LocalDocumentId; static constexpr bool Ordered = ordered; @@ -376,7 +392,8 @@ struct IResearchViewExecutorTraits -class IResearchViewMergeExecutor : public IResearchViewExecutorBase> { +class IResearchViewMergeExecutor + : public IResearchViewExecutorBase> { public: using Base = IResearchViewExecutorBase>; using Fetcher = typename Base::Fetcher; @@ -407,10 +424,10 @@ class IResearchViewMergeExecutor : public IResearchViewExecutorBase storedValuesReaders; // current stored values readers - irs::columnstore_reader::values_reader_f& sortReader; // sort column reader + irs::bytes_ref sortValue{irs::bytes_ref::NIL}; // sort column value + irs::columnstore_reader::values_reader_f pkReader; // primary key reader + std::vector storedValuesReaders; // current stored values readers + irs::columnstore_reader::values_reader_f& sortReader; // sort column reader }; class MinHeapContext { @@ -439,13 +456,14 @@ class IResearchViewMergeExecutor : public IResearchViewExecutorBase _segments; irs::external_heap_iterator _heap_it; }; // IResearchViewMergeExecutor -template +template struct IResearchViewExecutorTraits> { using IndexBufferValueType = std::pair; static constexpr bool Ordered = ordered; From abd2740d13edce4f1e358f02e61e9e03bcfa7dc9 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Thu, 27 Feb 2020 13:52:37 +0000 Subject: [PATCH 079/122] Move SingleRemoteModificationExecutor to new interface (#11166) * Move SingleRemoteModificationExecutor to new interface * Activate SingleRemoteModificationExecutor * Inspect the result of remote modification Co-authored-by: Michael Hackstein --- arangod/Aql/ExecutionBlockImpl.cpp | 10 ++- .../Aql/SingleRemoteModificationExecutor.cpp | 69 ++++++++++++------- .../Aql/SingleRemoteModificationExecutor.h | 9 ++- 3 files changed, 62 insertions(+), 26 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 3b6934601fac..0f9dd19c838a 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -166,7 +166,9 @@ constexpr bool isNewStyleExecutor = is_one_of_v< IResearchViewMergeExecutor, IResearchViewMergeExecutor, SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, - ShortestPathExecutor, EnumerateListExecutor, LimitExecutor>; + ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, + SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, + SingleRemoteModificationExecutor, SingleRemoteModificationExecutor>; template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, @@ -1147,7 +1149,11 @@ static SkipRowsRangeVariant constexpr skipRowsType() { IResearchViewMergeExecutor, IResearchViewMergeExecutor, TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, - SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor>), + SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor, + SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, + SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, + SingleRemoteModificationExecutor, SingleRemoteModificationExecutor>), + "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! diff --git a/arangod/Aql/SingleRemoteModificationExecutor.cpp b/arangod/Aql/SingleRemoteModificationExecutor.cpp index 6ec7b8e3f635..228742ca79a1 100644 --- a/arangod/Aql/SingleRemoteModificationExecutor.cpp +++ b/arangod/Aql/SingleRemoteModificationExecutor.cpp @@ -71,29 +71,50 @@ SingleRemoteModificationExecutor::SingleRemoteModificationExecutor(Fet template std::pair::Stats> SingleRemoteModificationExecutor::produceRows(OutputAqlItemRow& output) { - Stats stats; - InputAqlItemRow input = InputAqlItemRow(CreateInvalidInputRowHint{}); + TRI_ASSERT(false); + return {ExecutionState::DONE, Stats{}}; +} - if (_upstreamState == ExecutionState::DONE) { - return {_upstreamState, std::move(stats)}; +template +[[nodiscard]] auto SingleRemoteModificationExecutor::produceRows( + AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple::Stats, AqlCall> { + auto stats = Stats{}; + + if (input.hasDataRow()) { + auto [state, row] = input.nextDataRow(); + auto result = doSingleRemoteModificationOperation(row, stats); + if (result.ok()) { + doSingleRemoteModificationOutput(row, output, result); + } } - std::tie(_upstreamState, input) = _fetcher.fetchRow(); + return {input.upstreamState(), stats, AqlCall{}}; +} - if (input.isInitialized()) { - TRI_ASSERT(_upstreamState == ExecutionState::HASMORE || - _upstreamState == ExecutionState::DONE); - doSingleRemoteModificationOperation(input, output, stats); - } else { - TRI_ASSERT(_upstreamState == ExecutionState::WAITING || - _upstreamState == ExecutionState::DONE); +template +[[nodiscard]] auto SingleRemoteModificationExecutor::skipRowsRange( + AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple::Stats, size_t, AqlCall> { + auto stats = Stats{}; + + if (input.hasDataRow()) { + auto [state, row] = input.nextDataRow(); + auto result = doSingleRemoteModificationOperation(row, stats); + if (result.ok()) { + call.didSkip(1); + return {input.upstreamState(), stats, 1, AqlCall{}}; + } } - return {_upstreamState, std::move(stats)}; + return {input.upstreamState(), stats, 0, AqlCall{}}; } template -bool SingleRemoteModificationExecutor::doSingleRemoteModificationOperation( - InputAqlItemRow& input, OutputAqlItemRow& output, Stats& stats) { +auto SingleRemoteModificationExecutor::doSingleRemoteModificationOperation( + InputAqlItemRow& input, Stats& stats) -> OperationResult { + OperationResult result; + OperationOptions& options = _info._options; + _info._options.silent = false; _info._options.returnOld = _info._options.returnOld || _info._outputRegisterId != RegisterPlan::MaxRegisterId; @@ -106,8 +127,6 @@ bool SingleRemoteModificationExecutor::doSingleRemoteModificationOpera int possibleWrites = 0; // TODO - get real statistic values! - OperationOptions& options = _info._options; - if (_info._key.empty() && _info._input1RegisterId == RegisterPlan::MaxRegisterId) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND, "missing document reference"); @@ -127,7 +146,6 @@ bool SingleRemoteModificationExecutor::doSingleRemoteModificationOpera inSlice = mergedBuilder->slice(); } - OperationResult result; if (isIndex) { result = _info._trx->document(_info._aqlCollection->name(), inSlice, _info._options); } else if (isInsert) { @@ -163,18 +181,25 @@ bool SingleRemoteModificationExecutor::doSingleRemoteModificationOpera // document not there is not an error in this situation. // FOR ... FILTER ... REMOVE wouldn't invoke REMOVE in first place, so // don't throw an excetpion. - return false; + return result; } else if (!_info._ignoreErrors) { // TODO remove if THROW_ARANGO_EXCEPTION_MESSAGE(result.errorNumber(), result.errorMessage()); } if (isIndex) { - return false; + return result; } } stats.addWritesExecuted(possibleWrites); stats.incrScannedIndex(); + return result; +} + +template +auto SingleRemoteModificationExecutor::doSingleRemoteModificationOutput( + InputAqlItemRow& input, OutputAqlItemRow& output, OperationResult& result) -> void { + OperationOptions& options = _info._options; if (!(_info._outputRegisterId != RegisterPlan::MaxRegisterId || _info._outputOldRegisterId != RegisterPlan::MaxRegisterId || @@ -182,7 +207,7 @@ bool SingleRemoteModificationExecutor::doSingleRemoteModificationOpera if (_info._hasParent) { output.copyRow(input); } - return _info._hasParent; + return; // _info._hasParent; } // Fill itemblock @@ -239,8 +264,6 @@ bool SingleRemoteModificationExecutor::doSingleRemoteModificationOpera TRI_IF_FAILURE("SingleRemoteModificationOperationBlock::moreDocuments") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - - return true; } template struct ::arangodb::aql::SingleRemoteModificationExecutor; diff --git a/arangod/Aql/SingleRemoteModificationExecutor.h b/arangod/Aql/SingleRemoteModificationExecutor.h index bf18456d83d4..dfbc3c6c4a6d 100644 --- a/arangod/Aql/SingleRemoteModificationExecutor.h +++ b/arangod/Aql/SingleRemoteModificationExecutor.h @@ -99,8 +99,15 @@ struct SingleRemoteModificationExecutor { */ std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple; + protected: - bool doSingleRemoteModificationOperation(InputAqlItemRow&, OutputAqlItemRow&, Stats&); + auto doSingleRemoteModificationOperation(InputAqlItemRow&, Stats&) -> OperationResult; + auto doSingleRemoteModificationOutput(InputAqlItemRow&, OutputAqlItemRow&, + OperationResult&) -> void; Infos& _info; Fetcher& _fetcher; From 9bf96cd7cc0b321f9a2107df17fe9f8549cfb5c1 Mon Sep 17 00:00:00 2001 From: Lars Maier Date: Thu, 27 Feb 2020 21:31:15 +0100 Subject: [PATCH 080/122] New style DistinctCollect (#11096) * First implementation of new style executor for DistinctCollect. * Tests for distinct collect. * Remove debug output. Co-authored-by: Michael Hackstein --- arangod/Aql/AqlValueGroup.cpp | 9 + arangod/Aql/AqlValueGroup.h | 3 + arangod/Aql/CollectNode.cpp | 4 +- arangod/Aql/DistinctCollectExecutor.cpp | 155 +++++----- arangod/Aql/DistinctCollectExecutor.h | 21 +- arangod/Aql/ExecutionBlockImpl.cpp | 26 +- tests/Aql/DistinctCollectExecutorTest.cpp | 331 +++------------------- 7 files changed, 183 insertions(+), 366 deletions(-) diff --git a/arangod/Aql/AqlValueGroup.cpp b/arangod/Aql/AqlValueGroup.cpp index 316086712f8b..5a06dd373e15 100644 --- a/arangod/Aql/AqlValueGroup.cpp +++ b/arangod/Aql/AqlValueGroup.cpp @@ -45,6 +45,11 @@ size_t AqlValueGroupHash::operator()(const std::vector& value) const { return static_cast(hash); } +size_t AqlValueGroupHash::operator()(AqlValue const& value) const { + uint64_t hash = 0x12345678; + return value.hash(_trx, hash); +} + AqlValueGroupEqual::AqlValueGroupEqual(arangodb::transaction::Methods* trx) : _trx(trx) {} @@ -62,3 +67,7 @@ bool AqlValueGroupEqual::operator()(const std::vector& lhs, return true; } + +bool AqlValueGroupEqual::operator()(AqlValue const& lhs, AqlValue const& rhs) const { + return AqlValue::Compare(_trx, lhs, rhs, false) == 0; +} diff --git a/arangod/Aql/AqlValueGroup.h b/arangod/Aql/AqlValueGroup.h index e7bf94a763c1..f04d10eec31d 100644 --- a/arangod/Aql/AqlValueGroup.h +++ b/arangod/Aql/AqlValueGroup.h @@ -39,6 +39,7 @@ struct AqlValueGroupHash { AqlValueGroupHash(transaction::Methods* trx, size_t num); size_t operator()(std::vector const& value) const; + size_t operator()(AqlValue const& value) const; transaction::Methods* _trx; size_t const _num; @@ -49,10 +50,12 @@ struct AqlValueGroupEqual { explicit AqlValueGroupEqual(transaction::Methods* trx); bool operator()(std::vector const& lhs, std::vector const& rhs) const; + bool operator()(AqlValue const& lhs, AqlValue const& rhs) const; transaction::Methods* _trx; }; + } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/CollectNode.cpp b/arangod/Aql/CollectNode.cpp index 304962e14084..16d06066fb34 100644 --- a/arangod/Aql/CollectNode.cpp +++ b/arangod/Aql/CollectNode.cpp @@ -372,12 +372,14 @@ std::unique_ptr CollectNode::createBlock( calcGroupRegisters(groupRegisters, readableInputRegisters, writeableOutputRegisters); transaction::Methods* trxPtr = _plan->getAst()->query()->trx(); + + TRI_ASSERT(groupRegisters.size() == 1); DistinctCollectExecutorInfos infos(getRegisterPlan()->nrRegs[previousNode->getDepth()], getRegisterPlan()->nrRegs[getDepth()], getRegsToClear(), calcRegsToKeep(), std::move(readableInputRegisters), std::move(writeableOutputRegisters), - std::move(groupRegisters), trxPtr); + groupRegisters.front(), trxPtr); return std::make_unique>(&engine, this, std::move(infos)); diff --git a/arangod/Aql/DistinctCollectExecutor.cpp b/arangod/Aql/DistinctCollectExecutor.cpp index e19f1501aa5b..bb6dc923baf1 100644 --- a/arangod/Aql/DistinctCollectExecutor.cpp +++ b/arangod/Aql/DistinctCollectExecutor.cpp @@ -35,6 +35,10 @@ #include +#define LOG_DEVEL_DISTINCT_COLLECT_ENABLED false +#define LOG_DEVEL_DC \ + LOG_DEVEL_IF(LOG_DEVEL_DISTINCT_COLLECT_ENABLED) << __FUNCTION__ << " " + using namespace arangodb; using namespace arangodb::aql; @@ -44,19 +48,16 @@ DistinctCollectExecutorInfos::DistinctCollectExecutorInfos( std::unordered_set registersToKeep, std::unordered_set&& readableInputRegisters, std::unordered_set&& writeableInputRegisters, - std::vector>&& groupRegisters, - transaction::Methods* trxPtr) + std::pair groupRegister, transaction::Methods* trxPtr) : ExecutorInfos(std::make_shared>(readableInputRegisters), std::make_shared>(writeableInputRegisters), nrInputRegisters, nrOutputRegisters, std::move(registersToClear), std::move(registersToKeep)), - _groupRegisters(groupRegisters), - _trxPtr(trxPtr) { - TRI_ASSERT(!_groupRegisters.empty()); -} + _groupRegister(groupRegister), + _trxPtr(trxPtr) {} -std::vector> DistinctCollectExecutorInfos::getGroupRegisters() const { - return _groupRegisters; +std::pair const& DistinctCollectExecutorInfos::getGroupRegister() const { + return _groupRegister; } transaction::Methods* DistinctCollectExecutorInfos::getTransaction() const { @@ -66,9 +67,7 @@ transaction::Methods* DistinctCollectExecutorInfos::getTransaction() const { DistinctCollectExecutor::DistinctCollectExecutor(Fetcher& fetcher, Infos& infos) : _infos(infos), _fetcher(fetcher), - _seen(1024, - AqlValueGroupHash(_infos.getTransaction(), - _infos.getGroupRegisters().size()), + _seen(1024, AqlValueGroupHash(_infos.getTransaction(), 1), AqlValueGroupEqual(_infos.getTransaction())) {} DistinctCollectExecutor::~DistinctCollectExecutor() { destroyValues(); } @@ -76,82 +75,110 @@ DistinctCollectExecutor::~DistinctCollectExecutor() { destroyValues(); } void DistinctCollectExecutor::initializeCursor() { destroyValues(); } std::pair DistinctCollectExecutor::produceRows(OutputAqlItemRow& output) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); +} + +std::pair DistinctCollectExecutor::expectedNumberOfRows(size_t atMost) const { + // This block cannot know how many elements will be returned exactly. + // but it is upper bounded by the input. + return _fetcher.preFetchNumberOfRows(atMost); +} + +void DistinctCollectExecutor::destroyValues() { + // destroy all AqlValues captured + for (auto& value : _seen) { + const_cast(&value)->destroy(); + } + _seen.clear(); +} + +const DistinctCollectExecutor::Infos& DistinctCollectExecutor::infos() const noexcept { + return _infos; +} + +auto DistinctCollectExecutor::produceRows(AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) + -> std::tuple { TRI_IF_FAILURE("DistinctCollectExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - NoStats stats{}; + InputAqlItemRow input{CreateInvalidInputRowHint{}}; - ExecutionState state; + ExecutorState state = ExecutorState::HASMORE; - std::vector groupValues; - groupValues.reserve(_infos.getGroupRegisters().size()); + LOG_DEVEL_DC << output.getClientCall(); - while (true) { - std::tie(state, input) = _fetcher.fetchRow(); + AqlValue groupValue; - if (state == ExecutionState::WAITING) { - return {state, stats}; - } + while (inputRange.hasDataRow()) { + LOG_DEVEL_DC << "output.isFull() = " << std::boolalpha << output.isFull(); - if (!input) { - TRI_ASSERT(state == ExecutionState::DONE); - return {state, stats}; + if (output.isFull()) { + LOG_DEVEL_DC << "output is full"; + break; } + + std::tie(state, input) = inputRange.nextDataRow(); + LOG_DEVEL_DC << "inputRange.nextDataRow() = " << state; TRI_ASSERT(input.isInitialized()); - groupValues.clear(); // for hashing simply re-use the aggregate registers, without cloning // their contents - for (auto& it : _infos.getGroupRegisters()) { - groupValues.emplace_back(input.getValue(it.second)); - } + groupValue = input.getValue(_infos.getGroupRegister().second); // now check if we already know this group - auto foundIt = _seen.find(groupValues); - - bool newGroup = foundIt == _seen.end(); + bool newGroup = _seen.find(groupValue) == _seen.end(); if (newGroup) { - size_t i = 0; - - for (auto& it : _infos.getGroupRegisters()) { - output.cloneValueInto(it.first, input, groupValues[i]); - ++i; - } - - // transfer ownership - std::vector copy; - copy.reserve(groupValues.size()); - for (auto const& it : groupValues) { - copy.emplace_back(it.clone()); - } - _seen.emplace(std::move(copy)); - } + output.cloneValueInto(_infos.getGroupRegister().first, input, groupValue); + output.advanceRow(); - // Abort if upstream is done - if (state == ExecutionState::DONE) { - return {state, stats}; + _seen.emplace(groupValue.clone()); } - - return {ExecutionState::HASMORE, stats}; } -} -std::pair DistinctCollectExecutor::expectedNumberOfRows(size_t atMost) const { - // This block cannot know how many elements will be returned exactly. - // but it is upper bounded by the input. - return _fetcher.preFetchNumberOfRows(atMost); + LOG_DEVEL_DC << "returning state " << state; + return {inputRange.upstreamState(), {}, {}}; } -void DistinctCollectExecutor::destroyValues() { - // destroy all AqlValues captured - for (auto& it : _seen) { - for (auto& it2 : it) { - const_cast(&it2)->destroy(); +auto DistinctCollectExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { + TRI_IF_FAILURE("DistinctCollectExecutor::skipRowsRange") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + InputAqlItemRow input{CreateInvalidInputRowHint{}}; + ExecutorState state = ExecutorState::HASMORE; + + AqlValue groupValue; + size_t skipped = 0; + + LOG_DEVEL_DC << call; + + while (inputRange.hasDataRow()) { + LOG_DEVEL_DC << "call.needSkipMore() = " << std::boolalpha << call.needSkipMore(); + + if (!call.needSkipMore()) { + return {ExecutorState::HASMORE, {}, skipped, {}}; + } + + std::tie(state, input) = inputRange.nextDataRow(); + LOG_DEVEL_DC << "inputRange.nextDataRow() = " << state; + TRI_ASSERT(input.isInitialized()); + + // for hashing simply re-use the aggregate registers, without cloning + // their contents + groupValue = input.getValue(_infos.getGroupRegister().second); + + // now check if we already know this group + bool newGroup = _seen.find(groupValue) == _seen.end(); + if (newGroup) { + skipped += 1; + call.didSkip(1); + + _seen.emplace(groupValue.clone()); } } - _seen.clear(); -} -const DistinctCollectExecutor::Infos& DistinctCollectExecutor::infos() const noexcept { - return _infos; + return {inputRange.upstreamState(), {}, skipped, {}}; } diff --git a/arangod/Aql/DistinctCollectExecutor.h b/arangod/Aql/DistinctCollectExecutor.h index 2bc5b7963fde..def1af36cf13 100644 --- a/arangod/Aql/DistinctCollectExecutor.h +++ b/arangod/Aql/DistinctCollectExecutor.h @@ -26,6 +26,8 @@ #ifndef ARANGOD_AQL_DISTINCT_COLLECT_EXECUTOR_H #define ARANGOD_AQL_DISTINCT_COLLECT_EXECUTOR_H +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/AqlValue.h" #include "Aql/AqlValueGroup.h" #include "Aql/ExecutionState.h" @@ -55,7 +57,7 @@ class DistinctCollectExecutorInfos : public ExecutorInfos { std::unordered_set registersToKeep, std::unordered_set&& readableInputRegisters, std::unordered_set&& writeableInputRegisters, - std::vector>&& groupRegisters, + std::pair groupRegister, transaction::Methods* trxPtr); DistinctCollectExecutorInfos() = delete; @@ -64,12 +66,12 @@ class DistinctCollectExecutorInfos : public ExecutorInfos { ~DistinctCollectExecutorInfos() = default; public: - std::vector> getGroupRegisters() const; + [[nodiscard]] std::pair const& getGroupRegister() const; transaction::Methods* getTransaction() const; private: /// @brief pairs, consisting of out register and in register - std::vector> _groupRegisters; + std::pair _groupRegister; /// @brief the transaction for this query transaction::Methods* _trxPtr; @@ -105,6 +107,17 @@ class DistinctCollectExecutor { */ std::pair produceRows(OutputAqlItemRow& output); + /** + * @brief produce the next Rows of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple; + std::pair expectedNumberOfRows(size_t atMost) const; private: @@ -114,7 +127,7 @@ class DistinctCollectExecutor { private: Infos const& _infos; Fetcher& _fetcher; - std::unordered_set, AqlValueGroupHash, AqlValueGroupEqual> _seen; + std::unordered_set _seen; }; } // namespace aql diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 0f9dd19c838a..19904c9eabaa 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -134,13 +134,13 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); * TODO: This should be removed once all executors and fetchers are ported to the new style. */ template -constexpr bool isNewStyleExecutor = is_one_of_v< - Executor, FilterExecutor, SortedCollectExecutor, IdExecutor, - IdExecutor>, ReturnExecutor, IndexExecutor, EnumerateCollectionExecutor, - /* - CalculationExecutor, CalculationExecutor, - CalculationExecutor,*/ - HashedCollectExecutor, +constexpr bool isNewStyleExecutor = + is_one_of_v, + IdExecutor>, ReturnExecutor, + DistinctCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, + // TODO: re-enable after new subquery end & start are implemented + // CalculationExecutor, CalculationExecutor, CalculationExecutor, + HashedCollectExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode @@ -1124,7 +1124,7 @@ static SkipRowsRangeVariant constexpr skipRowsType() { (is_one_of_v< Executor, FilterExecutor, ShortestPathExecutor, ReturnExecutor, KShortestPathsExecutor, IdExecutor>, IdExecutor, - HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, + HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, DistinctCollectExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif @@ -1161,13 +1161,13 @@ static SkipRowsRangeVariant constexpr skipRowsType() { !std::is_same::value || useFetcher, "LimitExecutor needs to implement skipRows() to work correctly"); - if (useExecutor) { + static_assert(useExecutor || useFetcher, "no skipping variant available"); + + if constexpr (useExecutor) { return SkipRowsRangeVariant::EXECUTOR; - } else if (useFetcher) { - return SkipRowsRangeVariant::FETCHER; } else { - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); + static_assert(useFetcher); + return SkipRowsRangeVariant::FETCHER; } } diff --git a/tests/Aql/DistinctCollectExecutorTest.cpp b/tests/Aql/DistinctCollectExecutorTest.cpp index 6a7f2c793fe7..060ad80677ba 100644 --- a/tests/Aql/DistinctCollectExecutorTest.cpp +++ b/tests/Aql/DistinctCollectExecutorTest.cpp @@ -32,9 +32,10 @@ #include "Aql/DistinctCollectExecutor.h" #include "Aql/ExecutionEngine.h" #include "Aql/OutputAqlItemRow.h" -#include "Aql/SingleRowFetcher.h" #include "Aql/Query.h" +#include "Aql/SingleRowFetcher.h" #include "Aql/Stats.h" +#include "ExecutorTestHelper.h" #include "Mocks/Servers.h" #include "Transaction/Context.h" #include "Transaction/Methods.h" @@ -50,310 +51,72 @@ namespace arangodb { namespace tests { namespace aql { -class DistinctCollectExecutorTest : public ::testing::Test { +using DistinctCollectTestHelper = ExecutorTestHelper<1, 1>; +using DistinctCollectSplitType = DistinctCollectTestHelper::SplitType; + +class DistinctCollectExecutorTest + : public AqlExecutorTestCaseWithParam> { protected: ExecutionState state; ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager; - mocks::MockAqlServer server; - std::unique_ptr fakedQuery; arangodb::transaction::Methods* trx; std::unordered_set const regToClear; std::unordered_set const regToKeep; - std::vector> groupRegisters; - std::unordered_set readableInputRegisters; - std::unordered_set writeableOutputRegisters; + std::unordered_set readableInputRegisters = {0}; + std::unordered_set writeableOutputRegisters = {1}; SharedAqlItemBlockPtr block; VPackBuilder input; NoStats stats; + DistinctCollectExecutorInfos infos; + DistinctCollectExecutorTest() - : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - fakedQuery(server.createFakeQuery()), - trx(fakedQuery->trx()) {} + : trx(fakedQuery->trx()), + infos(1 /*nrIn*/, 2 /*nrOut*/, regToClear, regToKeep, + std::move(readableInputRegisters), std::move(writeableOutputRegisters), + std::make_pair(1, 0), trx) {} }; -TEST_F(DistinctCollectExecutorTest, if_no_rows_in_upstream_the_producer_doesnt_wait) { - groupRegisters.emplace_back(std::make_pair(1, 2)); - DistinctCollectExecutorInfos infos(2 /*nrIn*/, 2 /*nrOut*/, regToClear, - regToKeep, std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), trx); - block.reset(new AqlItemBlock(itemBlockManager, 1000, 2)); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); - DistinctCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); -} - -TEST_F(DistinctCollectExecutorTest, if_no_rows_in_upstream_the_producer_waits) { - groupRegisters.emplace_back(std::make_pair(1, 2)); - DistinctCollectExecutorInfos infos(2 /*nrIn*/, 2 /*nrOut*/, regToClear, - regToKeep, std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), trx); - block.reset(new AqlItemBlock(itemBlockManager, 1000, 2)); - - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); - DistinctCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); -} - -TEST_F(DistinctCollectExecutorTest, - there_are_rows_in_the_upstream_no_distinct_values_the_producer_doesnt_wait) { - groupRegisters.emplace_back(std::make_pair(1, 0)); - readableInputRegisters.insert(0); - writeableOutputRegisters.insert(1); - RegisterId nrOutputRegister = 2; - DistinctCollectExecutorInfos infos(1 /*nrInputReg*/, - nrOutputRegister /*nrOutputReg*/, regToClear, - regToKeep, std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), trx); - block.reset(new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)); - - auto input = VPackParser::fromJson("[ [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - DistinctCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - auto block = result.stealBlock(); - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.toInt64(), 1); - - AqlValue z = block->getValue(1, 1); - ASSERT_TRUE(z.isNumber()); - ASSERT_EQ(z.toInt64(), 2); +TEST_P(DistinctCollectExecutorTest, split_1) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) + .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) + .setInputSplitType(split) + .setCall(AqlCall{2, AqlCall::Infinity{}, 2, true}) + .expectOutputValueList(3, 4) + .expectSkipped(3) + .expectedState(ExecutionState::DONE) + .run(); } -TEST_F(DistinctCollectExecutorTest, - there_are_rows_in_the_upstream_no_distinct_values_the_producer_waits) { - groupRegisters.emplace_back(std::make_pair(1, 0)); - readableInputRegisters.insert(0); - writeableOutputRegisters.insert(1); - RegisterId nrOutputRegister = 2; - DistinctCollectExecutorInfos infos(1 /*nrInputReg*/, - nrOutputRegister /*nrOutputReg*/, regToClear, - regToKeep, std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), trx); - block.reset(new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)); - - auto input = VPackParser::fromJson("[ [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); - DistinctCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - auto block = result.stealBlock(); - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.toInt64(), 1); - - AqlValue z = block->getValue(1, 1); - ASSERT_TRUE(z.isNumber()); - ASSERT_EQ(z.toInt64(), 2); +TEST_P(DistinctCollectExecutorTest, split_2) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) + .setInputValueList(1, 1, 1, 2, 3, 4, 4, 5) + .setInputSplitType(split) + .setCall(AqlCall{0, AqlCall::Infinity{}, 2, true}) + .expectOutputValueList(1, 2) + .expectSkipped(3) + .expectedState(ExecutionState::DONE) + .run(); } -TEST_F(DistinctCollectExecutorTest, - there_are_rows_in_the_upstream_with_distinct_values_the_producer_doesnt_wait) { - groupRegisters.emplace_back(std::make_pair(1, 0)); - readableInputRegisters.insert(0); - writeableOutputRegisters.insert(1); - RegisterId nrOutputRegister = 2; - DistinctCollectExecutorInfos infos(1 /*nrInputReg*/, - nrOutputRegister /*nrOutputReg*/, regToClear, - regToKeep, std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), trx); - block.reset(new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)); - - auto input = VPackParser::fromJson("[ [1], [2], [3], [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - DistinctCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - auto block = result.stealBlock(); - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.toInt64(), 1); - - AqlValue z = block->getValue(1, 1); - ASSERT_TRUE(z.isNumber()); - ASSERT_EQ(z.toInt64(), 2); - - AqlValue y = block->getValue(2, 1); - ASSERT_TRUE(y.isNumber()); - ASSERT_EQ(y.toInt64(), 3); -} - -TEST_F(DistinctCollectExecutorTest, - there_are_rows_in_the_upstream_with_distinct_values_the_producer_waits) { - groupRegisters.emplace_back(std::make_pair(1, 0)); - readableInputRegisters.insert(0); - writeableOutputRegisters.insert(1); - RegisterId nrOutputRegister = 2; - DistinctCollectExecutorInfos infos(1 /*nrInputReg*/, - nrOutputRegister /*nrOutputReg*/, regToClear, - regToKeep, std::move(readableInputRegisters), - std::move(writeableOutputRegisters), - std::move(groupRegisters), trx); - block.reset(new AqlItemBlock(itemBlockManager, 1000, nrOutputRegister)); - - auto input = VPackParser::fromJson("[ [1], [2], [3], [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); - DistinctCollectExecutor testee(fetcher, infos); - - OutputAqlItemRow result(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - - auto block = result.stealBlock(); - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.toInt64(), 1); - - AqlValue z = block->getValue(1, 1); - ASSERT_TRUE(z.isNumber()); - ASSERT_EQ(z.toInt64(), 2); - - AqlValue y = block->getValue(2, 1); - ASSERT_TRUE(y.isNumber()); - ASSERT_EQ(y.toInt64(), 3); -} +template +const DistinctCollectSplitType splitIntoBlocks = + DistinctCollectSplitType{std::vector{vs...}}; +template +const DistinctCollectSplitType splitStep = DistinctCollectSplitType{step}; +INSTANTIATE_TEST_CASE_P(DistinctCollectExecutor, DistinctCollectExecutorTest, + ::testing::Values(splitIntoBlocks<2, 3>, + splitIntoBlocks<3, 4>, splitStep<2>, splitStep<1>)); } // namespace aql } // namespace tests } // namespace arangodb From c2ae62141af41f7ec6d8a5a631f0346f9f281803 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 28 Feb 2020 11:56:24 +0100 Subject: [PATCH 081/122] Feature/aql subquery execution block impl execute implementation noresults (#11180) * Use ExecutorHelper in tests * Implemented no Results fetcher --- arangod/Aql/ExecutionBlockImpl.cpp | 6 +- arangod/Aql/NoResultsExecutor.cpp | 12 ++- arangod/Aql/NoResultsExecutor.h | 20 +++- tests/Aql/NoResultsExecutorTest.cpp | 157 +++++++++++----------------- 4 files changed, 90 insertions(+), 105 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 19904c9eabaa..c1b4a286e91c 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -165,11 +165,12 @@ constexpr bool isNewStyleExecutor = IResearchViewMergeExecutor, IResearchViewMergeExecutor, IResearchViewMergeExecutor, - SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, + SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, NoResultsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor>; + template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, ExecutionNode const* node, @@ -1149,11 +1150,10 @@ static SkipRowsRangeVariant constexpr skipRowsType() { IResearchViewMergeExecutor, IResearchViewMergeExecutor, TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, - SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor, + SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor, NoResultsExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor>), - "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! diff --git a/arangod/Aql/NoResultsExecutor.cpp b/arangod/Aql/NoResultsExecutor.cpp index cc958c00df17..1728e430e6dd 100644 --- a/arangod/Aql/NoResultsExecutor.cpp +++ b/arangod/Aql/NoResultsExecutor.cpp @@ -30,9 +30,15 @@ using namespace arangodb; using namespace arangodb::aql; -NoResultsExecutor::NoResultsExecutor(Fetcher& fetcher, ExecutorInfos& infos) {} +NoResultsExecutor::NoResultsExecutor(Fetcher&, ExecutorInfos&) {} NoResultsExecutor::~NoResultsExecutor() = default; -std::pair NoResultsExecutor::produceRows(OutputAqlItemRow& output) { - return {ExecutionState::DONE, NoStats{}}; +auto NoResultsExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) const + noexcept -> std::tuple { + return {ExecutorState::DONE, NoStats{}, AqlCall{0, false, 0, AqlCall::LimitType::HARD}}; } + +auto NoResultsExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) const + noexcept -> std::tuple { + return {ExecutorState::DONE, NoStats{}, 0, AqlCall{0, false, 0, AqlCall::LimitType::HARD}}; +}; \ No newline at end of file diff --git a/arangod/Aql/NoResultsExecutor.h b/arangod/Aql/NoResultsExecutor.h index 05fbc0aecd97..db5f7ed91059 100644 --- a/arangod/Aql/NoResultsExecutor.h +++ b/arangod/Aql/NoResultsExecutor.h @@ -39,7 +39,9 @@ template class SingleRowFetcher; class ExecutorInfos; class NoStats; +struct AqlCall; class OutputAqlItemRow; +class AqlItemBlockInputRange; class NoResultsExecutor { public: @@ -51,16 +53,24 @@ class NoResultsExecutor { using Fetcher = SingleRowFetcher; using Infos = ExecutorInfos; using Stats = NoStats; - NoResultsExecutor(Fetcher& fetcher, ExecutorInfos&); + NoResultsExecutor(Fetcher&, ExecutorInfos&); ~NoResultsExecutor(); /** - * @brief produce the next Row of Aql Values. + * @brief DO NOT PRODUCE ROWS * - * @return ExecutionState, - * if something was written output.hasValue() == true + * @return DONE, NoStats, HardLimit = 0 Call */ - std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) const + noexcept -> std::tuple; + + /** + * @brief DO NOT SKIP ROWS + * + ** @return DONE, NoStats, 0, HardLimit = 0 Call + */ + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) const + noexcept -> std::tuple; inline std::pair expectedNumberOfRows(size_t) const { // Well nevermind the input, but we will always return 0 rows here. diff --git a/tests/Aql/NoResultsExecutorTest.cpp b/tests/Aql/NoResultsExecutorTest.cpp index 2557345af345..c7aaaad1b830 100644 --- a/tests/Aql/NoResultsExecutorTest.cpp +++ b/tests/Aql/NoResultsExecutorTest.cpp @@ -20,19 +20,13 @@ /// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// -#include "RowFetcherHelper.h" #include "gtest/gtest.h" -#include "Aql/AqlItemBlock.h" +#include "ExecutorTestHelper.h" + #include "Aql/ExecutorInfos.h" -#include "Aql/InputAqlItemRow.h" #include "Aql/NoResultsExecutor.h" -#include "Aql/OutputAqlItemRow.h" -#include "Aql/ResourceUsage.h" -#include "Aql/Stats.h" - -#include -#include +#include "Aql/SingleRowFetcher.h" using namespace arangodb; using namespace arangodb::aql; @@ -41,94 +35,69 @@ namespace arangodb { namespace tests { namespace aql { -class NoResultsExecutorTest : public ::testing::Test { - protected: - ExecutionState state; - - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager; - SharedAqlItemBlockPtr block; - std::shared_ptr> outputRegisters; - std::shared_ptr> registersToClear; - std::shared_ptr> registersToKeep; - - RegisterId inputRegister; - ExecutorInfos infos; - OutputAqlItemRow result; +using NoResultsTestHelper = ExecutorTestHelper<1, 1>; +using NoResultsSplitType = NoResultsTestHelper::SplitType; +using NoResultsInputParam = std::tuple; - NoResultsExecutorTest() - : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - block(new AqlItemBlock(itemBlockManager, 1000, 1)), - outputRegisters(make_shared_unordered_set()), - registersToClear(make_shared_unordered_set()), - registersToKeep(make_shared_unordered_set()), - inputRegister(0), - infos(make_shared_unordered_set({0}), outputRegisters, 1 /*nr in*/, - 1 /*nr out*/, *registersToClear, *registersToKeep), - result(std::move(block), outputRegisters, registersToKeep, registersToClear) {} +class NoResultsExecutorTest : public AqlExecutorTestCaseWithParam { + protected: + auto getSplit() -> NoResultsSplitType { + auto const& [split, call, inputRows] = GetParam(); + return split; + } + auto getCall() -> AqlCall { + auto const& [split, call, inputRows] = GetParam(); + return call; + } + + auto getInput() -> size_t { + auto const& [split, call, inputRows] = GetParam(); + return inputRows; + } + + auto makeInfos() -> ExecutorInfos { + return ExecutorInfos{make_shared_unordered_set({0}), + make_shared_unordered_set(), + 1, + 1, + {}, + {}}; + } }; -TEST_F(NoResultsExecutorTest, no_rows_upstream_the_producer_doesnt_wait) { - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); - NoResultsExecutor testee(fetcher, infos); - NoStats stats{}; - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(fetcher.nrCalled(), 0); -} - -TEST_F(NoResultsExecutorTest, no_rows_upstream_the_producer_waits) { - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); - NoResultsExecutor testee(fetcher, infos); - NoStats stats{}; - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(fetcher.nrCalled(), 0); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(fetcher.nrCalled(), 0); -} - -TEST_F(NoResultsExecutorTest, rows_upstream_the_producer_doesnt_wait) { - auto input = VPackParser::fromJson("[ [true], [false], [true] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - NoResultsExecutor testee(fetcher, infos); - NoStats stats{}; - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(fetcher.nrCalled(), 0); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(fetcher.nrCalled(), 0); -} - -TEST_F(NoResultsExecutorTest, rows_upstream_the_producer_waits) { - auto input = VPackParser::fromJson("[ [true], [false], [true] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); - NoResultsExecutor testee(fetcher, infos); - NoStats stats{}; - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(fetcher.nrCalled(), 0); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(fetcher.nrCalled(), 0); +template +const NoResultsSplitType splitIntoBlocks = + NoResultsSplitType{std::vector{vs...}}; +template +const NoResultsSplitType splitStep = NoResultsSplitType{step}; + +auto NoResultsInputSplits = + ::testing::Values(splitIntoBlocks<2, 3>, splitStep<1>, splitStep<2>); +// This is just a random list of calls. +auto NoResultsCalls = + ::testing::Values(AqlCall{}, AqlCall{0, false, 1, AqlCall::LimitType::SOFT}, + AqlCall{0, false, 2, AqlCall::LimitType::HARD}, + AqlCall{0, true, 1, AqlCall::LimitType::HARD}, + AqlCall{5, false, 1, AqlCall::LimitType::SOFT}, + AqlCall{2, true, 0, AqlCall::LimitType::HARD}); +auto NoResultsInputSizes = ::testing::Values(0, 1, 10, 2000); + +INSTANTIATE_TEST_CASE_P(NoResultsExecutorTest, NoResultsExecutorTest, + ::testing::Combine(NoResultsInputSplits, NoResultsCalls, + NoResultsInputSizes)); + +TEST_P(NoResultsExecutorTest, do_never_ever_return_results) { + ExecutionStats stats{}; + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(makeInfos(), ExecutionNode::NORESULTS) + .setInputFromRowNum(getInput()) + .setInputSplitType(getSplit()) + .setCall(getCall()) + .expectOutput({0}, {}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .expectedStats(stats) + .run(); } } // namespace aql From 4fe26a951379961b56e94c2bfe854c6037cc53eb Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 28 Feb 2020 15:16:13 +0100 Subject: [PATCH 082/122] Moved Constrained sort to execute API (#11173) * Moved Constrained sort to execute API * The LIMIT in ConstrainedSort can actually fetch TOO much, if there is too small input from upstream * Improved profiler tests, we need less calls now \o/ * Fixed unlimited call to limit executor --- arangod/Aql/ConstrainedSortExecutor.cpp | 195 +++++++++++------------- arangod/Aql/ConstrainedSortExecutor.h | 23 ++- arangod/Aql/ExecutionBlockImpl.cpp | 3 +- arangod/Aql/LimitExecutor.cpp | 2 +- tests/Aql/SortLimit-test.cpp | 84 ++++++---- tests/js/server/aql/aql-profiler.js | 10 +- 6 files changed, 167 insertions(+), 150 deletions(-) diff --git a/arangod/Aql/ConstrainedSortExecutor.cpp b/arangod/Aql/ConstrainedSortExecutor.cpp index 2db1771511c8..54aa1ce7248a 100644 --- a/arangod/Aql/ConstrainedSortExecutor.cpp +++ b/arangod/Aql/ConstrainedSortExecutor.cpp @@ -81,7 +81,7 @@ class arangodb::aql::ConstrainedLessThan { std::vector const& _sortRegisters; }; // ConstrainedLessThan -arangodb::Result ConstrainedSortExecutor::pushRow(InputAqlItemRow& input) { +arangodb::Result ConstrainedSortExecutor::pushRow(InputAqlItemRow const& input) { using arangodb::aql::AqlItemBlock; using arangodb::aql::AqlValue; using arangodb::aql::RegisterId; @@ -114,12 +114,14 @@ arangodb::Result ConstrainedSortExecutor::pushRow(InputAqlItemRow& input) { return TRI_ERROR_NO_ERROR; } -bool ConstrainedSortExecutor::compareInput(size_t const& rowPos, InputAqlItemRow& row) const { +bool ConstrainedSortExecutor::compareInput(size_t const& rowPos, + InputAqlItemRow const& row) const { for (auto const& reg : _infos.sortRegisters()) { auto const& lhs = _heapBuffer->getValueReference(rowPos, reg.reg); auto const& rhs = row.getValue(reg.reg); - int const cmp = arangodb::aql::AqlValue::Compare(_infos.vpackOptions(), lhs, rhs, true); + int const cmp = + arangodb::aql::AqlValue::Compare(_infos.vpackOptions(), lhs, rhs, true); if (cmp < 0) { return reg.asc; @@ -155,87 +157,118 @@ ConstrainedSortExecutor::~ConstrainedSortExecutor() = default; bool ConstrainedSortExecutor::doneProducing() const noexcept { // must not get strictly larger TRI_ASSERT(_returnNext <= _rows.size()); - return _state == ExecutionState::DONE && _returnNext >= _rows.size(); + return _returnNext >= _rows.size(); } bool ConstrainedSortExecutor::doneSkipping() const noexcept { // must not get strictly larger TRI_ASSERT(_returnNext + _skippedAfter <= _rowsRead); - return _state == ExecutionState::DONE && _returnNext + _skippedAfter >= _rowsRead; + return _returnNext + _skippedAfter >= _rowsRead; } -ExecutionState ConstrainedSortExecutor::consumeInput() { - while (_state != ExecutionState::DONE) { +ExecutorState ConstrainedSortExecutor::consumeInput(AqlItemBlockInputRange& inputRange) { + while (inputRange.hasDataRow()) { TRI_IF_FAILURE("SortBlock::doSorting") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - // We need to pull rows from above, and insert them into the heap - InputAqlItemRow input(CreateInvalidInputRowHint{}); - std::tie(_state, input) = _fetcher.fetchRow(); - if (_state == ExecutionState::WAITING) { - return _state; + auto const& [state, input] = inputRange.nextDataRow(); + // Otherwise we would have left the loop + TRI_ASSERT(input.isInitialized()); + ++_rowsRead; + if (_rowsPushed < _infos.limit() || !compareInput(_rows.front(), input)) { + // Push this row into the heap + pushRow(input); } - if (!input.isInitialized()) { - TRI_ASSERT(_state == ExecutionState::DONE); - } else { - ++_rowsRead; - if (_rowsPushed < _infos.limit() || !compareInput(_rows.front(), input)) { - // Push this row into the heap - pushRow(input); - } + } + if (inputRange.upstreamState() == ExecutorState::DONE) { + if (_returnNext == 0) { + // Only once sort the rows again, s.t. the + // contained list of elements is in the right ordering. + std::sort(_rows.begin(), _rows.end(), *_cmpHeap); } } + return inputRange.upstreamState(); +} - TRI_ASSERT(_state == ExecutionState::DONE); - - return _state; +auto ConstrainedSortExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { + if (consumeInput(input) == ExecutorState::HASMORE) { + // Input could not be fully consumed, executor is more hungry! + // Get more. + AqlCall upstreamCall{}; + // We need to fetch everything form upstream. + // Unlimited, no offset call. + return {ExecutorState::HASMORE, NoStats{}, upstreamCall}; + }; + + while (!output.isFull() && !doneProducing()) { + // Now our heap is full and sorted, we just need to return it line by line + TRI_ASSERT(_returnNext < _rows.size()); + auto const heapRowPosition = _rows[_returnNext]; + ++_returnNext; + InputAqlItemRow heapRow(_heapBuffer, heapRowPosition); + TRI_ASSERT(heapRow.isInitialized()); + TRI_ASSERT(heapRowPosition < _rowsPushed); + output.copyRow(heapRow); + output.advanceRow(); + } + if (doneProducing()) { + return {ExecutorState::DONE, NoStats{}, AqlCall{}}; + } + return {ExecutorState::HASMORE, NoStats{}, AqlCall{}}; } -std::pair ConstrainedSortExecutor::produceRows(OutputAqlItemRow& output) { - { - ExecutionState state = consumeInput(); - TRI_ASSERT(state == _state); - if (state == ExecutionState::WAITING) { - return {ExecutionState::WAITING, NoStats{}}; +auto ConstrainedSortExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { + if (consumeInput(inputRange) == ExecutorState::HASMORE) { + // Input could not be fully consumed, executor is more hungry! + // Get more. + AqlCall upstreamCall{}; + // We need to fetch everything form upstream. + // Unlimited, no offset call. + return {ExecutorState::HASMORE, NoStats{}, 0, upstreamCall}; + }; + + while (!doneProducing()) { + if (call.getOffset() > 0) { + size_t available = _rows.size() - _returnNext; + size_t toSkip = std::min(available, call.getOffset()); + _returnNext += toSkip; + call.didSkip(toSkip); + } else if (call.needSkipMore()) { + // We are in fullcount case, simply skip all! + // I think this is actually invalid, as it would case LIMIT + // to underfetch. + // However will work like this. + size_t available = _rows.size() - _returnNext; + call.didSkip(available); + _returnNext = _rows.size(); + } else { + // We still have something, but cannot continue to skip. + return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), AqlCall{}}; } - TRI_ASSERT(state == ExecutionState::DONE); } - if (doneProducing()) { - if (doneSkipping()) { - // No we're really done - return {ExecutionState::DONE, NoStats{}}; + while (call.needSkipMore() && !doneSkipping()) { + // unlikely, but for backwardscompatibility. + if (call.getOffset() > 0) { + auto const rowsLeftToSkip = _rowsRead - (_rows.size() + _skippedAfter); + auto const skipNum = (std::min)(call.getOffset(), rowsLeftToSkip); + call.didSkip(skipNum); + _skippedAfter += skipNum; + } else { + // Fullcount + auto const rowsLeftToSkip = _rowsRead - (_rows.size() + _skippedAfter); + call.didSkip(rowsLeftToSkip); + _skippedAfter += rowsLeftToSkip; + TRI_ASSERT(doneSkipping()); } - // We should never get here, as the following LIMIT block should never fetch - // more than our limit. It may only skip after that. - // But note that this means that this block breaks with usual AQL behaviour! - // From this point on (i.e. doneProducing()), this block may only skip, not produce. - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION_MESSAGE( - TRI_ERROR_INTERNAL_AQL, - "Overfetch during constrained heap sort. Please report this error! Try " - "turning off the sort-limit optimizer rule to get your query working."); } - if (_returnNext == 0) { - // Only once sort the rows again, s.t. the - // contained list of elements is in the right ordering. - std::sort(_rows.begin(), _rows.end(), *_cmpHeap); - } + auto const state = doneSkipping() ? ExecutorState::DONE : ExecutorState::HASMORE; - // Now our heap is full and sorted, we just need to return it line by line - TRI_ASSERT(_returnNext < _rows.size()); - auto const heapRowPosition = _rows[_returnNext]; - ++_returnNext; - InputAqlItemRow heapRow(_heapBuffer, heapRowPosition); - TRI_ASSERT(heapRow.isInitialized()); - TRI_ASSERT(heapRowPosition < _rowsPushed); - output.copyRow(heapRow); - - // Lie, we may have a possible LIMIT block with fullCount to work. - // We emitted at least one row at this point, so this is fine. - return {ExecutionState::HASMORE, NoStats{}}; + return {state, NoStats{}, call.getSkipCount(), AqlCall{}}; } std::pair ConstrainedSortExecutor::expectedNumberOfRows(size_t) const { @@ -268,45 +301,3 @@ std::pair ConstrainedSortExecutor::expectedNumberOfRows( return {ExecutionState::HASMORE, rowsLeft}; } - -std::tuple ConstrainedSortExecutor::skipRows(size_t toSkipRequested) { - { - ExecutionState state = consumeInput(); - TRI_ASSERT(state == _state); - if (state == ExecutionState::WAITING) { - return {ExecutionState::WAITING, NoStats{}, 0}; - } - TRI_ASSERT(state == ExecutionState::DONE); - } - - if (_returnNext == 0) { - // Only once sort the rows again, s.t. the - // contained list of elements is in the right ordering. - std::sort(_rows.begin(), _rows.end(), *_cmpHeap); - } - - size_t skipped = 0; - - // Skip rows in the heap - if (!doneProducing()) { - TRI_ASSERT(_rows.size() >= _returnNext); - auto const rowsLeftInHeap = _rows.size() - _returnNext; - auto const skipNum = (std::min)(toSkipRequested, rowsLeftInHeap); - _returnNext += skipNum; - skipped += skipNum; - } - - // Skip rows we've dropped - if (skipped < toSkipRequested && !doneSkipping()) { - TRI_ASSERT(doneProducing()); - auto const rowsLeftToSkip = _rowsRead - (_rows.size() + _skippedAfter); - auto const skipNum = (std::min)(toSkipRequested, rowsLeftToSkip); - _skippedAfter += skipNum; - skipped += skipNum; - } - - TRI_ASSERT(skipped <= toSkipRequested); - auto const state = doneSkipping() ? ExecutionState::DONE : ExecutionState::HASMORE; - - return {state, NoStats{}, skipped}; -} diff --git a/arangod/Aql/ConstrainedSortExecutor.h b/arangod/Aql/ConstrainedSortExecutor.h index 7fdf9db53972..9541dd0cbde4 100644 --- a/arangod/Aql/ConstrainedSortExecutor.h +++ b/arangod/Aql/ConstrainedSortExecutor.h @@ -47,6 +47,7 @@ class AqlItemMatrix; class ConstrainedLessThan; class ExecutorInfos; class InputAqlItemRow; +class AqlItemBlockInputRange; class NoStats; class OutputAqlItemRow; class SortExecutorInfos; @@ -70,14 +71,20 @@ class ConstrainedSortExecutor { ~ConstrainedSortExecutor(); /** - * @brief produce the next Row of Aql Values. + * @brief produce the next Rows of Aql Values. * - * @return ExecutionState, - * if something was written output.hasValue() == true + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream */ - std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; - std::tuple skipRows(size_t toSkipRequested); + /** + * @brief skip the next Rows of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple; /** * @brief This Executor knows how many rows it will produce and most by itself @@ -86,8 +93,8 @@ class ConstrainedSortExecutor { std::pair expectedNumberOfRows(size_t atMost) const; private: - bool compareInput(size_t const& rosPos, InputAqlItemRow& row) const; - arangodb::Result pushRow(InputAqlItemRow& row); + bool compareInput(size_t const& rosPos, InputAqlItemRow const& row) const; + arangodb::Result pushRow(InputAqlItemRow const& row); // We're done producing when we've emitted all rows from our heap. bool doneProducing() const noexcept; @@ -97,7 +104,7 @@ class ConstrainedSortExecutor { // sort as well. This is for fullCount queries only. bool doneSkipping() const noexcept; - ExecutionState consumeInput(); + ExecutorState consumeInput(AqlItemBlockInputRange& inputRange); private: Infos& _infos; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index c1b4a286e91c..d4f9ee43a800 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -140,7 +140,7 @@ constexpr bool isNewStyleExecutor = DistinctCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, // TODO: re-enable after new subquery end & start are implemented // CalculationExecutor, CalculationExecutor, CalculationExecutor, - HashedCollectExecutor, + HashedCollectExecutor, ConstrainedSortExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode @@ -1126,6 +1126,7 @@ static SkipRowsRangeVariant constexpr skipRowsType() { Executor, FilterExecutor, ShortestPathExecutor, ReturnExecutor, KShortestPathsExecutor, IdExecutor>, IdExecutor, HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, DistinctCollectExecutor, + ConstrainedSortExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif diff --git a/arangod/Aql/LimitExecutor.cpp b/arangod/Aql/LimitExecutor.cpp index 7d3d3ea6f089..0f13bd062a52 100644 --- a/arangod/Aql/LimitExecutor.cpp +++ b/arangod/Aql/LimitExecutor.cpp @@ -75,7 +75,7 @@ auto LimitExecutor::calculateUpstreamCall(AqlCall const& clientCall) const -> Aq // Generally, we create a hard limit. However, if we get a soft limit from // downstream that is lower than our hard limit, we use that instead. - bool const useSoftLimit = clientCall.hasSoftLimit() && + bool const useSoftLimit = !clientCall.hasHardLimit() && clientCall.getLimit() < localLimitMinusDownstreamOffset; if (useSoftLimit) { diff --git a/tests/Aql/SortLimit-test.cpp b/tests/Aql/SortLimit-test.cpp index 2bf1f312bab6..72f4d17f7ff1 100644 --- a/tests/Aql/SortLimit-test.cpp +++ b/tests/Aql/SortLimit-test.cpp @@ -64,8 +64,10 @@ extern const char* ARGV0; // defined in main.cpp /// @brief setup //////////////////////////////////////////////////////////////////////////////// +// The Paramater is a Flag if we activate fullcount or not. + class SortLimitTest - : public ::testing::Test, + : public ::testing::TestWithParam, public arangodb::tests::LogSuppressor { protected: arangodb::tests::mocks::MockAqlServer server; @@ -87,10 +89,20 @@ class SortLimitTest ~SortLimitTest() { vocbase.reset(); } + auto doFullCount() -> bool { return GetParam(); } + + auto buildOptions(std::string rules) -> std::shared_ptr { + if (doFullCount()) { + return arangodb::velocypack::Parser::fromJson( + "{\"optimizer\": {\"rules\": [" + rules + "]}, \"fullCount\": true}"); + } + return arangodb::velocypack::Parser::fromJson( + "{\"optimizer\": {\"rules\": [" + rules + "]}}"); + } + std::string sorterType(TRI_vocbase_t& vocbase, std::string const& queryString, std::string rules = "") { - auto options = arangodb::velocypack::Parser::fromJson( - "{\"optimizer\": {\"rules\": [" + rules + "]}}"); + auto options = buildOptions(rules); arangodb::aql::Query query(false, vocbase, arangodb::aql::QueryString(queryString), nullptr, options, arangodb::aql::PART_MAIN); @@ -114,9 +126,8 @@ class SortLimitTest void verifyExpectedResults(TRI_vocbase_t& vocbase, std::string const& queryString, std::vector const& expected, - std::string rules = "") { - auto options = arangodb::velocypack::Parser::fromJson( - "{\"optimizer\": {\"rules\": [" + rules + "]}}"); + size_t fullCount, std::string rules = "") { + auto options = buildOptions(rules); arangodb::aql::Query query(false, vocbase, arangodb::aql::QueryString(queryString), nullptr, options, arangodb::aql::PART_MAIN); std::shared_ptr ss = query.sharedState(); @@ -146,6 +157,13 @@ class SortLimitTest << resolved.toJson(); i++; } + auto actualFullCount = arangodb::basics::VelocyPackHelper::getNumericValue( + result.extra->slice(), std::vector{"stats", "fullCount"}, 0); + if (doFullCount()) { + EXPECT_EQ(actualFullCount, fullCount); + } else { + EXPECT_EQ(actualFullCount, 0); + } } // create collection0, insertedDocs[0, 999] @@ -182,102 +200,104 @@ class SortLimitTest } }; -TEST_F(SortLimitTest, CheckSimpleLimitSortedAscInInsertionOrder) { +INSTANTIATE_TEST_CASE_P(SortLimitTest, SortLimitTest, ::testing::Bool()); + +TEST_P(SortLimitTest, CheckSimpleLimitSortedAscInInsertionOrder) { std::string query = "FOR d IN testCollection0 SORT d.valAsc LIMIT 0, 10 RETURN d"; std::vector expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckLimitWithOffsetSortedAscInInsertionOrder) { +TEST_P(SortLimitTest, CheckLimitWithOffsetSortedAscInInsertionOrder) { std::string query = "FOR d IN testCollection0 SORT d.valAsc LIMIT 10, 10 RETURN d"; std::vector expected = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckSimpleLimitSortedAscInReverseInsertionOrder) { +TEST_P(SortLimitTest, CheckSimpleLimitSortedAscInReverseInsertionOrder) { std::string query = "FOR d IN testCollection0 SORT d.valDsc LIMIT 0, 10 RETURN d"; std::vector expected = {999, 998, 997, 996, 995, 994, 993, 992, 991, 990}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckLimitWithOffsetSortedAscInReverseInsertionOrder) { +TEST_P(SortLimitTest, CheckLimitWithOffsetSortedAscInReverseInsertionOrder) { std::string query = "FOR d IN testCollection0 SORT d.valDsc LIMIT 10, 10 RETURN d"; std::vector expected = {989, 988, 987, 986, 985, 984, 983, 982, 981, 980}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckSimpleLimitSortedDscInInsertionOrder) { +TEST_P(SortLimitTest, CheckSimpleLimitSortedDscInInsertionOrder) { std::string query = "FOR d IN testCollection0 SORT d.valAsc DESC LIMIT 0, 10 RETURN d"; std::vector expected = {999, 998, 997, 996, 995, 994, 993, 992, 991, 990}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckLimitWithOffsetSortedDscInInsertionOrder) { +TEST_P(SortLimitTest, CheckLimitWithOffsetSortedDscInInsertionOrder) { std::string query = "FOR d IN testCollection0 SORT d.valAsc DESC LIMIT 10, 10 RETURN d"; std::vector expected = {989, 988, 987, 986, 985, 984, 983, 982, 981, 980}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckSimpleLimitSortedDscInReverseInsertionOrder) { +TEST_P(SortLimitTest, CheckSimpleLimitSortedDscInReverseInsertionOrder) { std::string query = "FOR d IN testCollection0 SORT d.valDsc DESC LIMIT 0, 10 RETURN d"; std::vector expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckLimitWithOffsetSortedDscInReverseInsertionOrder) { +TEST_P(SortLimitTest, CheckLimitWithOffsetSortedDscInReverseInsertionOrder) { std::string query = "FOR d IN testCollection0 SORT d.valDsc DESC LIMIT 10, 10 RETURN d"; std::vector expected = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckLimitWithOffsetCompoundSort) { +TEST_P(SortLimitTest, CheckLimitWithOffsetCompoundSort) { std::string query = "FOR d IN testCollection0 SORT d.mod, d.valAsc LIMIT 2, 5 RETURN d"; std::vector expected = {200, 300, 400, 500, 600}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckLimitWithOffsetCompoundSortAgain) { +TEST_P(SortLimitTest, CheckLimitWithOffsetCompoundSortAgain) { std::string query = "FOR d IN testCollection0 SORT d.mod, d.valAsc LIMIT 10, 10 RETURN d"; std::vector expected = {1, 101, 201, 301, 401, 501, 601, 701, 801, 901}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } -TEST_F(SortLimitTest, CheckInterloperFilterMovedUp) { +TEST_P(SortLimitTest, CheckInterloperFilterMovedUp) { std::string query = "FOR d IN testCollection0 SORT d.valAsc FILTER d.mod == 0 LIMIT 0, 10 " "RETURN d"; std::vector expected = {0, 100, 200, 300, 400, 500, 600, 700, 800, 900}; EXPECT_EQ(sorterType(*vocbase, query), "constrained-heap"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 10); } -TEST_F(SortLimitTest, CheckInterloperFilterNotMoved) { +TEST_P(SortLimitTest, CheckInterloperFilterNotMoved) { std::string query = "FOR d IN testCollection0 SORT d.valAsc FILTER d.mod == 0 LIMIT 0, 10 " "RETURN d"; @@ -285,14 +305,14 @@ TEST_F(SortLimitTest, CheckInterloperFilterNotMoved) { std::vector expected = {0, 100, 200, 300, 400, 500, 600, 700, 800, 900}; EXPECT_EQ(sorterType(*vocbase, query, rules), "standard"); - verifyExpectedResults(*vocbase, query, expected, rules); + verifyExpectedResults(*vocbase, query, expected, 10, rules); } -TEST_F(SortLimitTest, CheckInterloperEnumerateList) { +TEST_P(SortLimitTest, CheckInterloperEnumerateList) { std::string query = "FOR d IN testCollection0 SORT d.valAsc FOR e IN 1..10 FILTER e == 1 " "LIMIT 0, 10 RETURN d"; std::vector expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; EXPECT_EQ(sorterType(*vocbase, query), "standard"); - verifyExpectedResults(*vocbase, query, expected); + verifyExpectedResults(*vocbase, query, expected, 1000); } diff --git a/tests/js/server/aql/aql-profiler.js b/tests/js/server/aql/aql-profiler.js index 421d7254a7f7..50eca3fee3a3 100644 --- a/tests/js/server/aql/aql-profiler.js +++ b/tests/js/server/aql/aql-profiler.js @@ -677,7 +677,7 @@ function ahuacatlProfilerTestSuite () { { type : SingletonBlock, calls : 1, items : 1 }, { type : CalculationBlock, calls : 1, items : 1 }, { type : EnumerateListBlock, calls : batches, items : rows }, - { type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows), items : limit(rows) }, + { type : ConstrainedSortBlock, calls : limitMinusSkipBatches(rows), items : limit(rows) }, { type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }, { type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) } ]; @@ -698,15 +698,13 @@ function ahuacatlProfilerTestSuite () { testSortLimitBlock2 : function () { const query = 'FOR i IN 1..@rows SORT i DESC LIMIT @offset, @limit RETURN i'; - const remainder = rows => rows - limit(rows); - const remainderBatches = rows => remainder(rows) === 0 ? 0 : 1; const genNodeList = (rows, batches) => [ { type : SingletonBlock, calls : 1, items : 1 }, { type : CalculationBlock, calls : 1, items : 1 }, { type : EnumerateListBlock, calls : batches, items : rows }, - { type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows) + remainderBatches(rows), items : rows }, - { type : LimitBlock, calls : limitMinusSkipBatches(rows) + /* this is only during ::execute work, should remove later again */ remainderBatches(rows), items : limitMinusSkip(rows) }, - { type : ReturnBlock, calls : limitMinusSkipBatches(rows) + /* this is only during ::execute work, should remove later again */ remainderBatches(rows), items : limitMinusSkip(rows) } + { type : ConstrainedSortBlock, calls : limitMinusSkipBatches(rows), items : rows }, + { type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }, + { type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) } ]; const bind = rows => ({ rows, From a0d6ec193f02203e44e2270be0146d8aa5147b86 Mon Sep 17 00:00:00 2001 From: Heiko Date: Fri, 28 Feb 2020 16:55:27 +0100 Subject: [PATCH 083/122] Feature/aql subquery operations stack aql item block input matrix (#11167) * added new file to cmakelist * added new method skipAllRemainingDataRows * added first implementation of execute - AllRowsFetcher * impl is now using skipAllRemainingDataRows method, also execute methods now using Fetcher::DataRange instead of inputRange * impl is now using skipAllRemainingDataRows method, also execute methods now using Fetcher::DataRange instead of inputRange - header file * AllRowsFetcher now using AqlItemBlockInputMatrix as DataRange, first implementation of execute * first implementation of AqlItemBlockInputMatrix * more cleanup * optimized execut elogic * optimized logic in inputmatrix, still a few todos left * added SortExecutor as newStyle * added produceRows and skipRows * logs (to be removed), skipAllRemainingRows * logs * inputMatrix * changed return type of skipAllRemainingDataRows * impl logs * logs, added clear method * sort test * typo, log * latest changes * cleanup, assert * rm logs * added more todos * todo * new method names, compile again * Reset ExecutorReturnedDone for SubqueryStart and SubqueryEnd. * InputMatrix needs to return state DONE on every input request. It guarantees that we have all input * Removed invalid Assertion, and removed early return if output is full. * Removed DebugLogging from CalculationTest * Fixed skipping in SortExecutor * Fixed upstreamState return of AqlItemMatrix. It is only finalState iff there is no data * Let the InputMatrix drop all rows after FullCount * Allow bypassing with hardLimit 0 in InputMatrix * Disabled the old-code assertion. This way the SortExecutor tests we have will be red, but not crash, allowing the jenkins to produce meaningfull results * Fix assert in WAITING case * Add rudimentary sort executor GTests * Fixed assertion on InputMatrix that starts right away with a shadowRow * Removed declared but not implemented function * changed from: peekDataRowAndState -> to: peekDataRow Co-authored-by: Michael Hackstein --- arangod/Aql/AllRowsFetcher.cpp | 47 ++++ arangod/Aql/AllRowsFetcher.h | 25 ++- arangod/Aql/AqlItemBlockInputMatrix.cpp | 142 ++++++++++++ arangod/Aql/AqlItemBlockInputMatrix.h | 67 ++++++ arangod/Aql/AqlItemBlockInputRange.cpp | 22 +- arangod/Aql/AqlItemBlockInputRange.h | 5 +- arangod/Aql/AqlItemMatrix.cpp | 8 +- arangod/Aql/AqlItemMatrix.h | 2 + arangod/Aql/EnumerateListExecutor.cpp | 1 - arangod/Aql/ExecutionBlockImpl.cpp | 174 ++++++++------- arangod/Aql/ExecutionBlockImpl.h | 10 +- arangod/Aql/SortExecutor.cpp | 109 +++++++--- arangod/Aql/SortExecutor.h | 21 ++ arangod/CMakeLists.txt | 1 + tests/Aql/AqlItemBlockInputRangeTest.cpp | 8 +- tests/Aql/CalculationExecutorTest.cpp | 9 +- tests/Aql/SortExecutorTest.cpp | 263 ++++++++++++----------- 17 files changed, 653 insertions(+), 261 deletions(-) create mode 100644 arangod/Aql/AqlItemBlockInputMatrix.cpp create mode 100644 arangod/Aql/AqlItemBlockInputMatrix.h diff --git a/arangod/Aql/AllRowsFetcher.cpp b/arangod/Aql/AllRowsFetcher.cpp index 117deb8ae799..27702b4f5995 100644 --- a/arangod/Aql/AllRowsFetcher.cpp +++ b/arangod/Aql/AllRowsFetcher.cpp @@ -21,6 +21,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "AllRowsFetcher.h" +#include #include "Aql/AqlItemBlock.h" #include "Aql/AqlItemMatrix.h" @@ -59,6 +60,52 @@ std::pair AllRowsFetcher::fetchAllRows() { return {ExecutionState::DONE, nullptr}; } +std::tuple AllRowsFetcher::execute(AqlCallStack& stack) { + if (!stack.isRelevant()) { + auto [state, skipped, block] = _dependencyProxy->execute(stack); + return {state, skipped, AqlItemBlockInputMatrix{block}}; + } + TRI_ASSERT(stack.peek().getOffset() == 0); + TRI_ASSERT(!stack.peek().needsFullCount()); + // We allow a 0 hardLimit for bypassing + // bot otherwise we do not allow any limit + TRI_ASSERT(!stack.peek().hasHardLimit() || stack.peek().getLimit() == 0); + TRI_ASSERT(!stack.peek().hasSoftLimit()); + + if (_aqlItemMatrix == nullptr) { + _aqlItemMatrix = std::make_unique(getNrInputRegisters()); + } + // We can only execute More if we are not Stopped yet. + TRI_ASSERT(!_aqlItemMatrix->stoppedOnShadowRow()); + while (true) { + auto [state, skipped, block] = _dependencyProxy->execute(stack); + TRI_ASSERT(skipped == 0); + + // we will either build a complete fetched AqlItemBlockInputMatrix or return an empty one + if (state == ExecutionState::WAITING) { + TRI_ASSERT(block == nullptr); + // On waiting we have nothing to return + return {state, 0, AqlItemBlockInputMatrix{ExecutorState::HASMORE}}; + } + TRI_ASSERT(block != nullptr || state == ExecutionState::DONE); + + if (block != nullptr) { + // we need to store the block for later creation of AqlItemBlockInputMatrix + _aqlItemMatrix->addBlock(std::move(block)); + } + + // If we find a ShadowRow or ExecutionState == Done, we're done fetching. + if (_aqlItemMatrix->stoppedOnShadowRow() || state == ExecutionState::DONE) { + if (state == ExecutionState::HASMORE) { + return {state, 0, + AqlItemBlockInputMatrix{ExecutorState::HASMORE, _aqlItemMatrix.get()}}; + } + return {state, 0, + AqlItemBlockInputMatrix{ExecutorState::DONE, _aqlItemMatrix.get()}}; + } + } +} + std::pair AllRowsFetcher::fetchRow(size_t atMost) { switch (_dataFetchedState) { case ALL_DATA_FETCHED: diff --git a/arangod/Aql/AllRowsFetcher.h b/arangod/Aql/AllRowsFetcher.h index adfe5a402817..f285715c7e11 100644 --- a/arangod/Aql/AllRowsFetcher.h +++ b/arangod/Aql/AllRowsFetcher.h @@ -33,8 +33,7 @@ #include #include -// TODO REMOVE ME TEMPORARY -#include "Aql/AqlItemBlockInputRange.h" +#include "Aql/AqlItemBlockInputMatrix.h" namespace arangodb { namespace aql { @@ -90,22 +89,28 @@ class AllRowsFetcher { public: explicit AllRowsFetcher(DependencyProxy& executionBlock); - TEST_VIRTUAL ~AllRowsFetcher() = default; - // TODO FIXME, this Range does not work here. - using DataRange = AqlItemBlockInputRange; + using DataRange = AqlItemBlockInputMatrix; protected: // only for testing! Does not initialize _dependencyProxy! AllRowsFetcher() = default; public: - // TODO implement and document - std::tuple execute(/* TODO: add"justDoIt"-style parameter */) { - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); - } + /** + * @brief Execute the given call stack + * + * @param stack Call stack, on top of stack there is current subquery, bottom is the main query. + * @return std::tuple + * ExecutionState => DONE, all queries are done, there will be no more + * ExecutionState => HASMORE, there are more results for queries, might be on other subqueries + * ExecutionState => WAITING, we need to do I/O to solve the request, save local state and return WAITING to caller immediately + * + * size_t => Amount of documents skipped + * DataRange => Resulting data + */ + std::tuple execute(AqlCallStack& stack); /** * @brief Fetch one new AqlItemRow from upstream. diff --git a/arangod/Aql/AqlItemBlockInputMatrix.cpp b/arangod/Aql/AqlItemBlockInputMatrix.cpp new file mode 100644 index 000000000000..d2a7d16c3cd3 --- /dev/null +++ b/arangod/Aql/AqlItemBlockInputMatrix.cpp @@ -0,0 +1,142 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#include "AqlItemBlockInputMatrix.h" +#include "Aql/ShadowAqlItemRow.h" + +#include +#include +#include +#include + +using namespace arangodb; +using namespace arangodb::aql; + +AqlItemBlockInputMatrix::AqlItemBlockInputMatrix(ExecutorState state) + : _finalState{state}, _aqlItemMatrix{nullptr} { + TRI_ASSERT(_aqlItemMatrix == nullptr); + TRI_ASSERT(!hasDataRow()); +} + +// only used for block passthrough +AqlItemBlockInputMatrix::AqlItemBlockInputMatrix(arangodb::aql::SharedAqlItemBlockPtr const& block) + : _block{block}, _aqlItemMatrix{nullptr} { + TRI_ASSERT(_aqlItemMatrix == nullptr); + TRI_ASSERT(!hasDataRow()); +} + +AqlItemBlockInputMatrix::AqlItemBlockInputMatrix(ExecutorState state, AqlItemMatrix* aqlItemMatrix) + : _finalState{state}, _aqlItemMatrix{aqlItemMatrix} { + TRI_ASSERT(_block == nullptr); + TRI_ASSERT(_aqlItemMatrix != nullptr); + if (_aqlItemMatrix->size() == 0 && _aqlItemMatrix->stoppedOnShadowRow()) { + // Fast forward to initialize the _shadowRow + skipAllRemainingDataRows(); + } +} + +SharedAqlItemBlockPtr AqlItemBlockInputMatrix::getBlock() const noexcept { + TRI_ASSERT(_aqlItemMatrix == nullptr); + return _block; +} + +std::pair AqlItemBlockInputMatrix::getMatrix() noexcept { + TRI_ASSERT(_aqlItemMatrix != nullptr); + TRI_ASSERT(_block == nullptr); + TRI_ASSERT(!_shadowRow.isInitialized()); + + // We are always done. This InputMatrix + // guarantees that we have all data in our hand at once. + return {ExecutorState::DONE, _aqlItemMatrix}; +} + +ExecutorState AqlItemBlockInputMatrix::upstreamState() const noexcept { + if (hasDataRow() || hasShadowRow()) { + return ExecutorState::DONE; + } + return _finalState; +} + +bool AqlItemBlockInputMatrix::upstreamHasMore() const noexcept { + return upstreamState() == ExecutorState::HASMORE; +} + +bool AqlItemBlockInputMatrix::hasDataRow() const noexcept { + if (_aqlItemMatrix == nullptr) { + return false; + } + return (!_shadowRow.isInitialized() && _aqlItemMatrix->size() != 0); +} + +std::pair AqlItemBlockInputMatrix::nextShadowRow() { + auto tmpShadowRow = _shadowRow; + + if (_aqlItemMatrix->size() == 0 && _aqlItemMatrix->stoppedOnShadowRow() && + !_aqlItemMatrix->peekShadowRow().isRelevant()) { + // next row will be a shadow row + _shadowRow = _aqlItemMatrix->popShadowRow(); + } else { + _shadowRow = ShadowAqlItemRow{CreateInvalidShadowRowHint()}; + } + + auto state = ExecutorState::HASMORE; + if (_shadowRow.isInitialized()) { + TRI_ASSERT(!_shadowRow.isRelevant()); + state = ExecutorState::HASMORE; + } else if (_aqlItemMatrix->stoppedOnShadowRow() && + _aqlItemMatrix->peekShadowRow().isRelevant()) { + state = ExecutorState::DONE; + } else { + state = _finalState; + } + + return {state, tmpShadowRow}; +} + +ShadowAqlItemRow AqlItemBlockInputMatrix::peekShadowRow() const { + return _shadowRow; +} + +bool AqlItemBlockInputMatrix::hasShadowRow() const noexcept { + return _shadowRow.isInitialized(); +} + +void AqlItemBlockInputMatrix::skipAllRemainingDataRows() { + if (_aqlItemMatrix == nullptr) { + // Have not been initialized. + // We need to be called before. + TRI_ASSERT(!hasShadowRow()); + TRI_ASSERT(!hasDataRow()); + return; + } + if (!hasShadowRow()) { + if (_aqlItemMatrix->stoppedOnShadowRow()) { + _shadowRow = _aqlItemMatrix->popShadowRow(); + TRI_ASSERT(_shadowRow.isRelevant()); + } else { + TRI_ASSERT(_finalState == ExecutorState::DONE); + _aqlItemMatrix->clear(); + } + } + // Else we did already skip once. + // nothing to do +} diff --git a/arangod/Aql/AqlItemBlockInputMatrix.h b/arangod/Aql/AqlItemBlockInputMatrix.h new file mode 100644 index 000000000000..9ce5c3073dac --- /dev/null +++ b/arangod/Aql/AqlItemBlockInputMatrix.h @@ -0,0 +1,67 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_AQLITEMBLOCKMATRIXITERATOR_H +#define ARANGOD_AQL_AQLITEMBLOCKMATRIXITERATOR_H + +#include "Aql/AqlItemMatrix.h" +#include "Aql/ExecutionState.h" +#include "Aql/InputAqlItemRow.h" +#include "Aql/SharedAqlItemBlockPtr.h" + +namespace arangodb::aql { + +class ShadowAqlItemRow; + +class AqlItemBlockInputMatrix { + public: + explicit AqlItemBlockInputMatrix(ExecutorState state); + + AqlItemBlockInputMatrix(arangodb::aql::SharedAqlItemBlockPtr const&); + + AqlItemBlockInputMatrix(ExecutorState state, AqlItemMatrix* aqlItemMatrix); + + std::pair nextShadowRow(); + ShadowAqlItemRow peekShadowRow() const; + bool hasShadowRow() const noexcept; + bool hasDataRow() const noexcept; + + arangodb::aql::SharedAqlItemBlockPtr getBlock() const noexcept; + std::pair getMatrix() noexcept; + + ExecutorState upstreamState() const noexcept; + bool upstreamHasMore() const noexcept; + void skipAllRemainingDataRows(); + + private: + arangodb::aql::SharedAqlItemBlockPtr _block{nullptr}; + ExecutorState _finalState{ExecutorState::HASMORE}; + + // Only if _aqlItemMatrix is set (and NOT a nullptr), we have a valid and usable + // DataRange object available to work with. + AqlItemMatrix* _aqlItemMatrix; + ShadowAqlItemRow _shadowRow{CreateInvalidShadowRowHint{}}; +}; + +} // namespace arangodb::aql + +#endif // ARANGOD_AQL_AQLITEMBLOCKINPUTITERATOR_H diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 4a0156247621..11bd5710e6be 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -57,6 +57,7 @@ bool AqlItemBlockInputRange::hasDataRow() const noexcept { return isIndexValid(_rowIndex) && !isShadowRowAtIndex(_rowIndex); } +// TODO: Implement peekDataRow (without state). e.g. IResearchViewExecutor does not need the state! std::pair AqlItemBlockInputRange::peekDataRow() const { if (hasDataRow()) { return std::make_pair(nextState(), @@ -95,7 +96,14 @@ bool AqlItemBlockInputRange::isShadowRowAtIndex(std::size_t index) const noexcep return _block->isShadowRow(index); } -std::pair AqlItemBlockInputRange::peekShadowRow() const { +arangodb::aql::ShadowAqlItemRow AqlItemBlockInputRange::peekShadowRow() const { + if (hasShadowRow()) { + return ShadowAqlItemRow{_block, _rowIndex}; + } + return ShadowAqlItemRow{CreateInvalidShadowRowHint{}}; +} + +std::pair AqlItemBlockInputRange::peekShadowRowAndState() const { if (hasShadowRow()) { return std::make_pair(nextState(), ShadowAqlItemRow{_block, _rowIndex}); @@ -105,7 +113,7 @@ std::pair AqlItemBlockInputRange::peekShadowRow } std::pair AqlItemBlockInputRange::nextShadowRow() { - auto res = peekShadowRow(); + auto res = peekShadowRowAndState(); if (res.second.isInitialized()) { // Advance the current row. _rowIndex++; @@ -113,6 +121,16 @@ std::pair AqlItemBlockInputRange::nextShadowRow return res; } +void AqlItemBlockInputRange::skipAllRemainingDataRows() { + ExecutorState state; + InputAqlItemRow row{CreateInvalidInputRowHint{}}; + + while (hasDataRow()) { + std::tie(state, row) = nextDataRow(); + TRI_ASSERT(row.isInitialized()); + } +} + template ExecutorState AqlItemBlockInputRange::nextState() const noexcept { size_t testRowIndex = _rowIndex; diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index 79461c8b66f4..03e442a3a730 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -57,10 +57,13 @@ class AqlItemBlockInputRange { bool hasShadowRow() const noexcept; - std::pair peekShadowRow() const; + arangodb::aql::ShadowAqlItemRow peekShadowRow() const; + std::pair peekShadowRowAndState() const; std::pair nextShadowRow(); + void skipAllRemainingDataRows(); + // Subtract up to this many rows from the local `_skipped` state; return // the number actually skipped. Does not skip data rows. [[nodiscard]] auto skip(std::size_t) noexcept -> std::size_t; diff --git a/arangod/Aql/AqlItemMatrix.cpp b/arangod/Aql/AqlItemMatrix.cpp index 2fe8bb15903f..9f3361936b4d 100644 --- a/arangod/Aql/AqlItemMatrix.cpp +++ b/arangod/Aql/AqlItemMatrix.cpp @@ -125,6 +125,11 @@ std::vector AqlItemMatrix::produceRowIndexes() const { bool AqlItemMatrix::empty() const noexcept { return _blocks.empty(); } +void AqlItemMatrix::clear() { + _blocks.clear(); + _size = 0; +} + RegisterId AqlItemMatrix::getNrRegisters() const noexcept { return _nrRegs; } uint64_t AqlItemMatrix::size() const noexcept { return _size; } @@ -207,4 +212,5 @@ ShadowAqlItemRow AqlItemMatrix::peekShadowRow() const { } AqlItemMatrix::AqlItemMatrix(RegisterId nrRegs) - : _size(0), _nrRegs(nrRegs), _lastShadowRow(InvalidRowIndex) {} + : _size(0), _nrRegs(nrRegs), _lastShadowRow(InvalidRowIndex) { +} diff --git a/arangod/Aql/AqlItemMatrix.h b/arangod/Aql/AqlItemMatrix.h index 84130aa6237c..c14a0abd4677 100644 --- a/arangod/Aql/AqlItemMatrix.h +++ b/arangod/Aql/AqlItemMatrix.h @@ -74,6 +74,8 @@ class AqlItemMatrix { */ bool empty() const noexcept; + void clear(); + std::vector produceRowIndexes() const; /** diff --git a/arangod/Aql/EnumerateListExecutor.cpp b/arangod/Aql/EnumerateListExecutor.cpp index 2840dbf3339c..84d2ab2d080b 100644 --- a/arangod/Aql/EnumerateListExecutor.cpp +++ b/arangod/Aql/EnumerateListExecutor.cpp @@ -182,7 +182,6 @@ std::tuple EnumerateListExecutor::skipR initializeNewRow(inputRange); continue; } - // auto const& [state, input] = inputRange.peekDataRow(); TRI_ASSERT(_inputArrayPosition < _inputArrayLength); // if offset is > 0, we're in offset skip phase diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index d4f9ee43a800..5ffd2ad3edc1 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -134,17 +134,18 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); * TODO: This should be removed once all executors and fetchers are ported to the new style. */ template -constexpr bool isNewStyleExecutor = - is_one_of_v, - IdExecutor>, ReturnExecutor, - DistinctCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, - // TODO: re-enable after new subquery end & start are implemented - // CalculationExecutor, CalculationExecutor, CalculationExecutor, - HashedCollectExecutor, ConstrainedSortExecutor, +constexpr bool isNewStyleExecutor = is_one_of_v< + Executor, FilterExecutor, SortedCollectExecutor, IdExecutor, + IdExecutor>, ReturnExecutor, DistinctCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, + // TODO: re-enable after new subquery end & start are implemented + // CalculationExecutor, CalculationExecutor, CalculationExecutor, + HashedCollectExecutor, ConstrainedSortExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif + SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, + ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SortExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -165,12 +166,12 @@ constexpr bool isNewStyleExecutor = IResearchViewMergeExecutor, IResearchViewMergeExecutor, IResearchViewMergeExecutor, - SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, NoResultsExecutor, - ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, + SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, + NoResultsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, + SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor>; - template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, ExecutionNode const* node, @@ -610,19 +611,7 @@ std::pair ExecutionBlockImpl::initializeCursor TRI_ASSERT(_state == InternalState::DONE || _state == InternalState::FETCH_DATA); _state = InternalState::FETCH_DATA; - constexpr bool customInit = hasInitializeCursor::value; - // IndexExecutor and EnumerateCollectionExecutor have initializeCursor - // implemented, so assert this implementation is used. - static_assert(!std::is_same::value || customInit, - "EnumerateCollectionExecutor is expected to implement a custom " - "initializeCursor method!"); - static_assert(!std::is_same::value || customInit, - "IndexExecutor is expected to implement a custom " - "initializeCursor method!"); - static_assert(!std::is_same::value || customInit, - "DistinctCollectExecutor is expected to implement a custom " - "initializeCursor method!"); - InitializeCursor::init(_executor, _rowFetcher, _infos); + resetExecutor(); // // use this with c++17 instead of specialization below // if constexpr (std::is_same_v) { @@ -731,9 +720,7 @@ auto ExecutionBlockImpl>::injectConstantBlock::value; - InitializeCursor::init(_executor, _rowFetcher, _infos); + resetExecutor(); } // TODO -- remove this specialization when cpp 17 becomes available @@ -1058,6 +1045,9 @@ void ExecutionBlockImpl::ensureOutputBlock(AqlCall&& call) { // This cannot return upstream call or shadowrows. template auto ExecutionBlockImpl::nextState(AqlCall const& call) const -> ExecState { + if (_executorReturnedDone) { + return ExecState::FASTFORWARD; + } if (call.getOffset() > 0) { // First skip return ExecState::SKIP; @@ -1124,13 +1114,13 @@ static SkipRowsRangeVariant constexpr skipRowsType() { useExecutor == (is_one_of_v< Executor, FilterExecutor, ShortestPathExecutor, ReturnExecutor, KShortestPathsExecutor, - IdExecutor>, IdExecutor, - HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, DistinctCollectExecutor, - ConstrainedSortExecutor, + IdExecutor>, IdExecutor, HashedCollectExecutor, + IndexExecutor, EnumerateCollectionExecutor, DistinctCollectExecutor, ConstrainedSortExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif - IResearchViewExecutor, + TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, SubqueryEndExecutor, SortedCollectExecutor, + LimitExecutor, SortExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -1150,9 +1140,8 @@ static SkipRowsRangeVariant constexpr skipRowsType() { IResearchViewMergeExecutor, IResearchViewMergeExecutor, IResearchViewMergeExecutor, - TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, - SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor, NoResultsExecutor, - SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, + TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, SubqueryEndExecutor, SortedCollectExecutor, + LimitExecutor, NoResultsExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor>), "Unexpected executor for SkipVariants::EXECUTOR"); @@ -1176,8 +1165,34 @@ static SkipRowsRangeVariant constexpr skipRowsType() { template struct dependent_false : std::false_type {}; +/** + * @brief Define the variant of FastForward behaviour + * + * FULLCOUNT => Call executeSkipRowsRange and report what has been skipped. + * EXECUTOR => Call executeSkipRowsRange, but do not report what has been skipped. + * (This instance is used to make sure Modifications are performed, or stats are correct) + * FETCHER => Do not bother the Executor, drop all from input, without further reporting + */ +enum class FastForwardVariant { FULLCOUNT, EXECUTOR, FETCHER }; + +template +static auto fastForwardType(AqlCall const& call, Executor const& e) -> FastForwardVariant { + if (call.needsFullCount() && call.getOffset() == 0 && call.getLimit() == 0) { + // Only start fullCount after the original call is fulfilled. Otherwise + // do fast-forward variant + TRI_ASSERT(call.hasHardLimit()); + return FastForwardVariant::FULLCOUNT; + } + // TODO: We only need to do this is the executor actually require to call. + // e.g. Modifications will always need to be called. Limit only if it needs to report fullCount + if constexpr (is_one_of_v) { + return FastForwardVariant::EXECUTOR; + } + return FastForwardVariant::FETCHER; +} + template -auto ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& inputRange, +auto ExecutionBlockImpl::executeSkipRowsRange(typename Fetcher::DataRange& inputRange, AqlCall& call) -> std::tuple { if constexpr (isNewStyleExecutor) { @@ -1185,7 +1200,9 @@ auto ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { // If the executor has a method skipRowsRange, to skip outputs. // Every non-passthrough executor needs to implement this. - return _executor.skipRowsRange(inputRange, call); + auto res = _executor.skipRowsRange(inputRange, call); + _executorReturnedDone = std::get(res) == ExecutorState::DONE; + return res; } else if constexpr (skipRowsType() == SkipRowsRangeVariant::FETCHER) { // If we know that every input row produces exactly one output row (this // is a property of the executor), then we can just let the fetcher skip @@ -1195,6 +1212,7 @@ auto ExecutionBlockImpl::executeSkipRowsRange(AqlItemBlockInputRange& static_assert( std::is_same_v, "Executors with custom statistics must implement skipRowsRange."); + // TODO Set _executorReturnedDone? return {inputRange.upstreamState(), NoStats{}, 0, call}; } else { static_assert(dependent_false::value, @@ -1219,6 +1237,9 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecSta // If we get woken up by a dataRow during forwarding of ShadowRows // This will return false, and if so we need to call produce instead. auto didWrite = _executor.produceShadowRow(_lastRange, *_outputItemRow); + // The Subquery Start returns DONE after every row. + // This needs to be resetted as soon as a shadowRow has been produced + _executorReturnedDone = false; if (didWrite) { if (_lastRange.hasShadowRow()) { // Forward the ShadowRows @@ -1241,6 +1262,7 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecSta _outputItemRow->increaseShadowRowDepth(shadowRow); TRI_ASSERT(_outputItemRow->produced()); _outputItemRow->advanceRow(); + if (_lastRange.hasShadowRow()) { return ExecState::SHADOWROWS; } @@ -1267,6 +1289,9 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState // We need to consume the row, and write the Aggregate to it. _executor.consumeShadowRow(shadowRow, *_outputItemRow); didConsume = true; + // we need to reset the ExecutorHasReturnedDone, it will + // return done after every subquery is fully collected. + _executorReturnedDone = false; } else { _outputItemRow->decreaseShadowRowDepth(shadowRow); } @@ -1322,9 +1347,7 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState { LOG_QUERY("6d337", DEBUG) << printTypeInfo() << " init executor."; // We found a relevant shadow Row. // We need to reset the Executor - // cppcheck-suppress unreadVariable - constexpr bool customInit = hasInitializeCursor::value; - InitializeCursor::init(_executor, _rowFetcher, _infos); + resetExecutor(); } TRI_ASSERT(_outputItemRow->produced()); @@ -1353,34 +1376,8 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState { } } -/** - * @brief Define the variant of FastForward behaviour - * - * FULLCOUNT => Call executeSkipRowsRange and report what has been skipped. - * EXECUTOR => Call executeSkipRowsRange, but do not report what has been skipped. - * (This instance is used to make sure Modifications are performed, or stats are correct) - * FETCHER => Do not bother the Executor, drop all from input, without further reporting - */ -enum class FastForwardVariant { FULLCOUNT, EXECUTOR, FETCHER }; - template -static auto fastForwardType(AqlCall const& call, Executor const& e) -> FastForwardVariant { - if (call.needsFullCount() && call.getOffset() == 0 && call.getLimit() == 0) { - // Only start fullCount after the original call is fulfilled. Otherwise - // do fast-forward variant - TRI_ASSERT(call.hasHardLimit()); - return FastForwardVariant::FULLCOUNT; - } - // TODO: We only need to do this is the executor actually require to call. - // e.g. Modifications will always need to be called. Limit only if it needs to report fullCount - if constexpr (is_one_of_v) { - return FastForwardVariant::EXECUTOR; - } - return FastForwardVariant::FETCHER; -} - -template -auto ExecutionBlockImpl::executeFastForward(AqlItemBlockInputRange& inputRange, +auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRange& inputRange, AqlCall& clientCall) -> std::tuple { TRI_ASSERT(isNewStyleExecutor); @@ -1404,14 +1401,18 @@ auto ExecutionBlockImpl::executeFastForward(AqlItemBlockInputRange& in // We do not report the skip skippedLocal = 0; } + if constexpr (std::is_same_v) { + // The executor will have used all Rows. + // However we need to drop them from the input + // here. + inputRange.skipAllRemainingDataRows(); + } + return {state, stats, skippedLocal, call}; } case FastForwardVariant::FETCHER: { LOG_QUERY("fa327", DEBUG) << printTypeInfo() << " bypass unused rows."; - while (inputRange.hasDataRow()) { - auto [state, row] = inputRange.nextDataRow(); - TRI_ASSERT(row.isInitialized()); - } + inputRange.skipAllRemainingDataRows(); AqlCall call{}; call.hardLimit = 0; return {inputRange.upstreamState(), typename Executor::Stats{}, 0, call}; @@ -1603,10 +1604,12 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { ensureOutputBlock(std::move(clientCall)); } TRI_ASSERT(_outputItemRow); + TRI_ASSERT(!_executorReturnedDone); // Execute getSome auto const [state, stats, call] = _executor.produceRows(_lastRange, *_outputItemRow); + _executorReturnedDone = state == ExecutorState::DONE; _engine->_stats += stats; localExecutorState = state; @@ -1647,11 +1650,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { localExecutorState = state; if (state == ExecutorState::DONE) { - if (_outputItemRow && _outputItemRow->isInitialized() && - _outputItemRow->allRowsUsed()) { - // We have a block with data, but no more place for a shadow row. - _execState = ExecState::DONE; - } else if (!_lastRange.hasShadowRow() && !_lastRange.hasDataRow()) { + if (!_lastRange.hasShadowRow() && !_lastRange.hasDataRow()) { _execState = ExecState::DONE; } else { _execState = ExecState::SHADOWROWS; @@ -1718,7 +1717,11 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // We skipped through passthrough, so count that a skip was solved. clientCall.didSkip(skippedLocal); } - _execState = ExecState::CHECKCALL; + if (_lastRange.hasShadowRow() && !_lastRange.peekShadowRow().isRelevant()) { + _execState = ExecState::SHADOWROWS; + } else { + _execState = ExecState::CHECKCALL; + } break; } case ExecState::SHADOWROWS: { @@ -1728,7 +1731,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { << printTypeInfo() << " (sub-)query completed. Move ShadowRows."; // TODO: Check if we can have the situation that we are between two shadow rows here. - // E.g. LastRow is releveant shadowRow. NextRow is non-relevant shadowRow. + // E.g. LastRow is relevant shadowRow. NextRow is non-relevant shadowRow. // NOTE: I do not think this is an issue, as the Executor will always say that it cannot do anything with // an empty input. Only exception might be COLLECT COUNT. @@ -1798,12 +1801,23 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } } -/// @brief reset all internal states after processing a shadow row. template -void ExecutionBlockImpl::resetAfterShadowRow() { +void ExecutionBlockImpl::resetExecutor() { // cppcheck-suppress unreadVariable - constexpr bool customInit = hasInitializeCursor::value; + constexpr bool customInit = hasInitializeCursor::value; + // IndexExecutor and EnumerateCollectionExecutor have initializeCursor + // implemented, so assert this implementation is used. + static_assert(!std::is_same::value || customInit, + "EnumerateCollectionExecutor is expected to implement a custom " + "initializeCursor method!"); + static_assert(!std::is_same::value || customInit, + "IndexExecutor is expected to implement a custom " + "initializeCursor method!"); + static_assert(!std::is_same::value || customInit, + "DistinctCollectExecutor is expected to implement a custom " + "initializeCursor method!"); InitializeCursor::init(_executor, _rowFetcher, _infos); + _executorReturnedDone = false; } template @@ -1829,7 +1843,7 @@ ExecutionState ExecutionBlockImpl::fetchShadowRowInternal() { } else { if (_state != InternalState::DONE) { _state = InternalState::FETCH_DATA; - resetAfterShadowRow(); + resetExecutor(); } } return state; diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 43d7f4e28d47..6e4e383a22ce 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -235,9 +235,9 @@ class ExecutionBlockImpl final : public ExecutionBlock { // execute a skipRowsRange call std::tuple executeSkipRowsRange( - AqlItemBlockInputRange& input, AqlCall& call); + typename Fetcher::DataRange& inputRange, AqlCall& call); - auto executeFastForward(AqlItemBlockInputRange& inputRange, AqlCall& clientCall) + auto executeFastForward(typename Fetcher::DataRange& inputRange, AqlCall& clientCall) -> std::tuple; /** @@ -276,8 +276,6 @@ class ExecutionBlockImpl final : public ExecutionBlock { /// @brief request an AqlItemBlock from the memory manager [[nodiscard]] SharedAqlItemBlockPtr requestBlock(size_t nrItems, RegisterCount nrRegs); - void resetAfterShadowRow(); - [[nodiscard]] ExecutionState fetchShadowRowInternal(); // Allocate an output block and install a call in it @@ -299,6 +297,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { [[nodiscard]] auto outputIsFull() const noexcept -> bool; + void resetExecutor(); + private: /** * @brief Used to allow the row Fetcher to access selected methods of this @@ -342,6 +342,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { // into an output block. // If so we are not allowed to reuse it. bool _hasUsedDataRangeBlock; + + bool _executorReturnedDone = false; }; } // namespace arangodb::aql diff --git a/arangod/Aql/SortExecutor.cpp b/arangod/Aql/SortExecutor.cpp index fed978c2f256..6cf17cffc829 100644 --- a/arangod/Aql/SortExecutor.cpp +++ b/arangod/Aql/SortExecutor.cpp @@ -29,6 +29,7 @@ #include "Aql/SortRegister.h" #include "Aql/Stats.h" +#include #include using namespace arangodb; @@ -113,45 +114,74 @@ AqlItemBlockManager& SortExecutorInfos::itemBlockManager() noexcept { } SortExecutor::SortExecutor(Fetcher& fetcher, SortExecutorInfos& infos) - : _infos(infos), _fetcher(fetcher), _input(nullptr), _returnNext(0) {} + : _infos(infos), + _fetcher(fetcher), + _input(nullptr), + _currentRow(CreateInvalidInputRowHint{}), + _returnNext(0) {} SortExecutor::~SortExecutor() = default; std::pair SortExecutor::produceRows(OutputAqlItemRow& output) { - ExecutionState state; - if (_input == nullptr) { - // We need to get data - std::tie(state, _input) = _fetcher.fetchAllRows(); - if (state == ExecutionState::WAITING) { - return {state, NoStats{}}; - } - // If the execution state was not waiting it is guaranteed that we get a - // matrix. Maybe empty still - TRI_ASSERT(_input != nullptr); - if (_input == nullptr) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); - } - // After allRows the dependency has to be done - TRI_ASSERT(state == ExecutionState::DONE); + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +void SortExecutor::initializeInputMatrix(AqlItemBlockInputMatrix& inputMatrix) { + TRI_ASSERT(_input == nullptr); + ExecutorState state; - // Execute the sort - doSorting(); + // We need to get data + std::tie(state, _input) = inputMatrix.getMatrix(); + + // If the execution state was not waiting it is guaranteed that we get a + // matrix. Maybe empty still + TRI_ASSERT(_input != nullptr); + if (_input == nullptr) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); } + // After allRows the dependency has to be done + TRI_ASSERT(state == ExecutorState::DONE); + + // Execute the sort + doSorting(); + // If we get here we have an input matrix // And we have a list of sorted indexes. TRI_ASSERT(_input != nullptr); TRI_ASSERT(_sortedIndexes.size() == _input->size()); +}; + +std::tuple SortExecutor::produceRows( + AqlItemBlockInputMatrix& inputMatrix, OutputAqlItemRow& output) { + AqlCall upstreamCall{}; + + // if (inputMatrix.upstreamState() == ExecutorState::HASMORE) { + if (!inputMatrix.hasDataRow()) { + // If our inputMatrix does not contain all upstream rows + return {inputMatrix.upstreamState(), NoStats{}, upstreamCall}; + } + + if (_input == nullptr) { + initializeInputMatrix(inputMatrix); + } + if (_returnNext >= _sortedIndexes.size()) { // Bail out if called too often, // Bail out on no elements - return {ExecutionState::DONE, NoStats{}}; + return {ExecutorState::DONE, NoStats{}, upstreamCall}; + } + + while (_returnNext < _sortedIndexes.size() && !output.isFull()) { + InputAqlItemRow inRow = _input->getRow(_sortedIndexes[_returnNext]); + output.copyRow(inRow); + output.advanceRow(); + _returnNext++; } - InputAqlItemRow inRow = _input->getRow(_sortedIndexes[_returnNext]); - output.copyRow(inRow); - _returnNext++; + if (_returnNext >= _sortedIndexes.size()) { - return {ExecutionState::DONE, NoStats{}}; + return {ExecutorState::DONE, NoStats{}, upstreamCall}; } - return {ExecutionState::HASMORE, NoStats{}}; + return {ExecutorState::HASMORE, NoStats{}, upstreamCall}; } void SortExecutor::doSorting() { @@ -169,6 +199,37 @@ void SortExecutor::doSorting() { } } +std::tuple SortExecutor::skipRowsRange( + AqlItemBlockInputMatrix& inputMatrix, AqlCall& call) { + AqlCall upstreamCall{}; + + if (inputMatrix.upstreamState() == ExecutorState::HASMORE) { + // If our inputMatrix does not contain all upstream rows + return {ExecutorState::HASMORE, NoStats{}, 0, upstreamCall}; + } + + if (_input == nullptr) { + initializeInputMatrix(inputMatrix); + } + + if (_returnNext >= _sortedIndexes.size()) { + // Bail out if called too often, + // Bail out on no elements + return {ExecutorState::DONE, NoStats{}, 0, upstreamCall}; + } + + while (_returnNext < _sortedIndexes.size() && call.shouldSkip()) { + InputAqlItemRow inRow = _input->getRow(_sortedIndexes[_returnNext]); + _returnNext++; + call.didSkip(1); + } + + if (_returnNext >= _sortedIndexes.size()) { + return {ExecutorState::DONE, NoStats{}, call.getSkipCount(), upstreamCall}; + } + return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), upstreamCall}; +} + std::pair SortExecutor::expectedNumberOfRows(size_t atMost) const { if (_input == nullptr) { // This executor does not know anything yet. diff --git a/arangod/Aql/SortExecutor.h b/arangod/Aql/SortExecutor.h index aea597d8b204..973cd20393a6 100644 --- a/arangod/Aql/SortExecutor.h +++ b/arangod/Aql/SortExecutor.h @@ -30,6 +30,7 @@ #include "Aql/AqlItemMatrix.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" +#include "Aql/InputAqlItemRow.h" #include #include @@ -41,6 +42,8 @@ class Methods; namespace aql { +struct AqlCall; +class AqlItemBlockInputMatrix; class AllRowsFetcher; class ExecutorInfos; class NoStats; @@ -105,6 +108,23 @@ class SortExecutor { std::pair produceRows(OutputAqlItemRow& output); std::pair expectedNumberOfRows(size_t) const; + void initializeInputMatrix(AqlItemBlockInputMatrix& inputMatrix); + + /** + * @brief produce the next Rows of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple produceRows( + AqlItemBlockInputMatrix& inputMatrix, OutputAqlItemRow& output); + + /** + * @brief skip the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple skipRowsRange( + AqlItemBlockInputMatrix& inputMatrix, AqlCall& call); private: void doSorting(); @@ -115,6 +135,7 @@ class SortExecutor { Fetcher& _fetcher; AqlItemMatrix const* _input; + InputAqlItemRow _currentRow; std::vector _sortedIndexes; diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 36fb43c4675b..ca56d6ebf1b6 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -222,6 +222,7 @@ set(LIB_ARANGO_AQL_SOURCES Aql/AqlFunctionFeature.cpp Aql/AqlItemBlock.cpp Aql/AqlItemBlockInputRange.cpp + Aql/AqlItemBlockInputMatrix.cpp Aql/AqlItemBlockManager.cpp Aql/AqlItemBlockUtils.cpp Aql/AqlItemMatrix.cpp diff --git a/tests/Aql/AqlItemBlockInputRangeTest.cpp b/tests/Aql/AqlItemBlockInputRangeTest.cpp index ac80439ff7a8..a09bf2a91435 100644 --- a/tests/Aql/AqlItemBlockInputRangeTest.cpp +++ b/tests/Aql/AqlItemBlockInputRangeTest.cpp @@ -72,7 +72,7 @@ class InputRangeTest : public ::testing::TestWithParam { // Test Shadow Rows EXPECT_FALSE(testee.hasShadowRow()); { - auto const [state, row] = testee.peekShadowRow(); + auto const [state, row] = testee.peekShadowRowAndState(); EXPECT_EQ(GetParam(), state); EXPECT_FALSE(row.isInitialized()); } @@ -92,7 +92,7 @@ class InputRangeTest : public ::testing::TestWithParam { auto rowIndexBefore = testee.getRowIndex(); // Validate that shadowRowAPI does not move on { - auto [state, row] = testee.peekShadowRow(); + auto [state, row] = testee.peekShadowRowAndState(); EXPECT_EQ(state, ExecutorState::DONE); EXPECT_FALSE(row.isInitialized()); ASSERT_EQ(rowIndexBefore, testee.getRowIndex()) @@ -154,7 +154,7 @@ class InputRangeTest : public ::testing::TestWithParam { } // Validate ShadowRow API { - auto [state, row] = testee.peekShadowRow(); + auto [state, row] = testee.peekShadowRowAndState(); EXPECT_EQ(state, expectedState); EXPECT_TRUE(row.isInitialized()); auto val = row.getValue(0); @@ -209,7 +209,7 @@ TEST_P(InputRangeTest, empty_does_not_have_more_shadow_rows) { TEST_P(InputRangeTest, empty_peek_shadow_is_empty) { auto testee = createEmpty(); - auto const [state, row] = testee.peekShadowRow(); + auto const [state, row] = testee.peekShadowRowAndState(); EXPECT_EQ(GetParam(), state); EXPECT_FALSE(row.isInitialized()); } diff --git a/tests/Aql/CalculationExecutorTest.cpp b/tests/Aql/CalculationExecutorTest.cpp index ab64ffbf4d42..37db9313c92d 100644 --- a/tests/Aql/CalculationExecutorTest.cpp +++ b/tests/Aql/CalculationExecutorTest.cpp @@ -69,7 +69,7 @@ namespace aql { // CalculationExecutor and // CalculationExecutor! -class CalculationExecutorTest : public AqlExecutorTestCase { +class CalculationExecutorTest : public AqlExecutorTestCase { protected: ExecutionState state; AqlItemBlockManager itemBlockManager; @@ -285,9 +285,8 @@ TEST_F(CalculationExecutorTest, test_produce_datarange_need_more) { AqlItemBlockInputRange input{ExecutorState::HASMORE, 0, inBlock, 0}; OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), - infos.registersToClear(), - AqlCall{0, 3, AqlCall::Infinity{}, false}); + infos.registersToKeep(), infos.registersToClear(), + AqlCall{0, 3, AqlCall::Infinity{}, false}); auto myCall = output.getClientCall(); EXPECT_EQ(myCall.getLimit(), 3); @@ -316,7 +315,7 @@ TEST_F(CalculationExecutorTest, test_produce_datarange_need_more) { EXPECT_FALSE(outputCall.fullCount); } -TEST_F(CalculationExecutorTest, DISABLED_test_produce_datarange_has_more) { // TODO: fix and re-enable after this executor newStyle is active +TEST_F(CalculationExecutorTest, DISABLED_test_produce_datarange_has_more) { // TODO: fix and re-enable after this executor newStyle is active // This fetcher will not be called! // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); diff --git a/tests/Aql/SortExecutorTest.cpp b/tests/Aql/SortExecutorTest.cpp index b7a204e0ec5a..c2da61092d31 100644 --- a/tests/Aql/SortExecutorTest.cpp +++ b/tests/Aql/SortExecutorTest.cpp @@ -25,23 +25,26 @@ #include "gtest/gtest.h" +#include "ExecutorTestHelper.h" + #include "fakeit.hpp" #include "RowFetcherHelper.h" #include "Aql/AqlItemBlock.h" +#include "Aql/ConstrainedSortExecutor.h" #include "Aql/ExecutionBlockImpl.h" #include "Aql/ExecutionNode.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/ResourceUsage.h" #include "Aql/SortExecutor.h" -#include "Aql/ConstrainedSortExecutor.h" #include "Aql/SortRegister.h" #include "Aql/Stats.h" #include "Aql/Variable.h" #include "Transaction/Context.h" #include "Transaction/Methods.h" +#include "AqlItemBlockHelper.h" #include "search/sort.hpp" #include @@ -52,148 +55,150 @@ using namespace arangodb::aql; namespace arangodb::tests::aql { -class SortExecutorTest : public ::testing::Test { - protected: - ExecutionState state; - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager; - SharedAqlItemBlockPtr block; +using SortTestHelper = ExecutorTestHelper<1, 1>; +using SortSplitType = SortTestHelper::SplitType; +using SortInputParam = std::tuple; - velocypack::Options const* vpackOptions{&velocypack::Options::Defaults}; +class SortExecutorTest : public AqlExecutorTestCaseWithParam { + protected: + auto getSplit() -> SortSplitType { + auto const& [split] = GetParam(); + return split; + } - Variable sortVar; - SortElement sl; - SortRegister sortReg; - std::vector sortRegisters; - - SortExecutorTest() - : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - block(new AqlItemBlock(itemBlockManager, 1000, 1)), - sortVar("mySortVar", 0), - sl(&sortVar, true), - sortReg(0, sl) { + auto makeInfos() -> SortExecutorInfos { + SortElement sl{&sortVar, true}; + SortRegister sortReg{0, sl}; + std::vector sortRegisters; sortRegisters.emplace_back(std::move(sortReg)); + return SortExecutorInfos(std::move(sortRegisters), + /*limit (ignored for default sort)*/ 0, manager(), + 1, 1, {}, {0}, vpackOptions, false); } + + private: + velocypack::Options const* vpackOptions{&velocypack::Options::Defaults}; + Variable sortVar{"mySortVar", 0}; }; -TEST_F(SortExecutorTest, no_rows_upstream_producer_doesnt_wait) { - SortExecutorInfos infos(std::move(sortRegisters), - /*limit (ignored for default sort)*/ 0, - itemBlockManager, 1, 1, {}, {0}, vpackOptions, false); - VPackBuilder input; - AllRowsFetcherHelper fetcher(input.steal(), false); - SortExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +template +const SortSplitType splitIntoBlocks = SortSplitType{std::vector{vs...}}; +template +const SortSplitType splitStep = SortSplitType{step}; + +INSTANTIATE_TEST_CASE_P(SortExecutorTest, SortExecutorTest, + ::testing::Values(splitIntoBlocks<2, 3>, splitIntoBlocks<3, 4>, + splitStep<1>, splitStep<2>)); + +TEST_P(SortExecutorTest, does_sort_all) { + AqlCall call{}; // unlimited produce + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos(), ExecutionNode::SORT) + .setInputSplitType(getSplit()) + .setInputValue({{5}, {3}, {1}, {2}, {4}}) + .expectOutput({0}, {{1}, {2}, {3}, {4}, {5}}) + .setCall(call) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); } -TEST_F(SortExecutorTest, no_rows_upstream_producer_waits) { - SortExecutorInfos infos(std::move(sortRegisters), - /*limit (ignored for default sort)*/ 0, - itemBlockManager, 1, 1, {}, {0}, vpackOptions, false); - VPackBuilder input; - AllRowsFetcherHelper fetcher(input.steal(), true); - SortExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +TEST_P(SortExecutorTest, no_input) { + AqlCall call{}; // unlimited produce + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos(), ExecutionNode::SORT) + .setInputSplitType(getSplit()) + .setInputValue({}) + .expectOutput({0}, {}) + .setCall(call) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); } +TEST_P(SortExecutorTest, skip) { + AqlCall call{2}; + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos(), ExecutionNode::SORT) + .setInputSplitType(getSplit()) + .setInputValue({{5}, {3}, {1}, {2}, {4}}) + .expectOutput({0}, {{3}, {4}, {5}}) + .setCall(call) + .expectSkipped(2) + .expectedState(ExecutionState::DONE) + .run(); +} -TEST_F(SortExecutorTest, rows_upstream_we_are_waiting_for_list_of_numbers) { - SortExecutorInfos infos(std::move(sortRegisters), - /*limit (ignored for default sort)*/ 0, - itemBlockManager, 1, 1, {}, {0}, vpackOptions, false); - std::shared_ptr input = - VPackParser::fromJson("[[5],[3],[1],[2],[4]]"); - AllRowsFetcherHelper fetcher(input->steal(), true); - SortExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - // Wait, 5, Wait, 3, Wait, 1, Wait, 2, Wait, 4, HASMORE - for (size_t i = 0; i < 5; ++i) { - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - } - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - - block = result.stealBlock(); - AqlValue v = block->getValue(0, 0); - ASSERT_TRUE(v.isNumber()); - int64_t number = v.toInt64(); - ASSERT_EQ(number, 1); - - v = block->getValue(1, 0); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 2); +TEST_P(SortExecutorTest, hard_limit) { + AqlCall call{0, false, 2, AqlCall::LimitType::HARD}; + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos(), ExecutionNode::SORT) + .setInputSplitType(getSplit()) + .setInputValue({{5}, {3}, {1}, {2}, {4}}) + .expectOutput({0}, {{1}, {2}}) + .setCall(call) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +} - v = block->getValue(2, 0); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 3); +TEST_P(SortExecutorTest, soft_limit) { + AqlCall call{0, false, 2, AqlCall::LimitType::SOFT}; + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos(), ExecutionNode::SORT) + .setInputSplitType(getSplit()) + .setInputValue({{5}, {3}, {1}, {2}, {4}}) + .expectOutput({0}, {{1}, {2}}) + .setCall(call) + .expectSkipped(0) + .expectedState(ExecutionState::HASMORE) + .run(); +} - v = block->getValue(3, 0); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 4); +TEST_P(SortExecutorTest, fullcount) { + AqlCall call{0, true, 2, AqlCall::LimitType::HARD}; + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos(), ExecutionNode::SORT) + .setInputSplitType(getSplit()) + .setInputValue({{5}, {3}, {1}, {2}, {4}}) + .expectOutput({0}, {{1}, {2}}) + .setCall(call) + .expectSkipped(3) + .expectedState(ExecutionState::DONE) + .run(); +} - v = block->getValue(4, 0); - ASSERT_TRUE(v.isNumber()); - number = v.toInt64(); - ASSERT_EQ(number, 5); +TEST_P(SortExecutorTest, skip_produce_fullcount) { + AqlCall call{2, true, 2, AqlCall::LimitType::HARD}; + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos(), ExecutionNode::SORT) + .setInputSplitType(getSplit()) + .setInputValue({{5}, {3}, {1}, {2}, {4}}) + .expectOutput({0}, {{3}, {4}}) + .setCall(call) + .expectSkipped(3) + .expectedState(ExecutionState::DONE) + .run(); } +TEST_P(SortExecutorTest, skip_too_much) { + AqlCall call{10, false}; + ExecutionStats stats{}; // No stats here + ExecutorTestHelper(*fakedQuery) + .setExecBlock(makeInfos(), ExecutionNode::SORT) + .setInputSplitType(getSplit()) + .setInputValue({{5}, {3}, {1}, {2}, {4}}) + .expectOutput({0}, {}) + .setCall(call) + .expectSkipped(5) + .expectedState(ExecutionState::DONE) + .run(); +} } // namespace arangodb::tests::aql From 28b9c044a2815df4eb82cd266a1b1d948db980d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Fri, 28 Feb 2020 21:46:47 +0100 Subject: [PATCH 084/122] Feature/aql subquery execute in restaqlhandler (#11175) * Updated comment * Added two static strings * Avoid unintentional streaming operator of string_view as a container * Added parser for execute rest call * Fixed ambiguous overload in ResultT * Added execute and executeForClient to ExecutionEngine * Implemented executeForClient in RestAqlHandler * Add default value * Updated comment * Changed error attribute * Fixed duplicate log id * Try to make MSVC work * Moved AqlExecuteResult to a separate file * Tried to fix MSVC compile error * Revert "Tried to fix MSVC compile error" This reverts commit f6f43b2c7e4bfe17dafe65f48bcd5c0b6e5c69a9. * Next try to fix MSVC * Revert "Next try to fix MSVC" This reverts commit 528c4c795d10ee0aa6686d143be1f6faa6e9b553. * Revert "Try to make MSVC work" This reverts commit ba7d9c072fccb969da1cdf045eadf49c297da8e9. * Work around MSVC shortcoming --- arangod/Aql/AqlCall.cpp | 225 ++++++++++++++++++ arangod/Aql/AqlCall.h | 9 +- arangod/Aql/AqlCallStack.cpp | 47 +++- arangod/Aql/AqlCallStack.h | 16 +- arangod/Aql/AqlExecuteResult.cpp | 70 ++++++ arangod/Aql/AqlExecuteResult.h | 57 +++++ arangod/Aql/ExecutionEngine.cpp | 46 +++- arangod/Aql/ExecutionEngine.h | 7 + arangod/Aql/RestAqlHandler.cpp | 178 ++++++++++---- arangod/Aql/RestAqlHandler.h | 50 +++- arangod/CMakeLists.txt | 2 + arangod/Cluster/ResultT.h | 10 +- arangod/Cluster/SynchronizeShard.cpp | 2 +- arangod/RestHandler/RestRepairHandler.cpp | 2 +- .../RocksDBRestReplicationHandler.cpp | 2 +- lib/Basics/StaticStrings.cpp | 16 ++ lib/Basics/StaticStrings.h | 16 ++ lib/Basics/debugging.h | 27 ++- 18 files changed, 703 insertions(+), 79 deletions(-) create mode 100644 arangod/Aql/AqlCall.cpp create mode 100644 arangod/Aql/AqlExecuteResult.cpp create mode 100644 arangod/Aql/AqlExecuteResult.h diff --git a/arangod/Aql/AqlCall.cpp b/arangod/Aql/AqlCall.cpp new file mode 100644 index 000000000000..cc8f947f13c1 --- /dev/null +++ b/arangod/Aql/AqlCall.cpp @@ -0,0 +1,225 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#include "AqlCall.h" + +#include "Basics/StaticStrings.h" +#include "Basics/voc-errors.h" +#include "Logger/LogMacros.h" +#include "Logger/Logger.h" + +#include +#include + +#include +#include + +using namespace arangodb; +using namespace arangodb::aql; + +namespace { +// hack for MSVC +auto getStringView(velocypack::Slice slice) -> std::string_view { + velocypack::StringRef ref = slice.stringRef(); + return std::string_view(ref.data(), ref.size()); +} +} + +auto AqlCall::fromVelocyPack(velocypack::Slice slice) -> ResultT { + if (ADB_UNLIKELY(!slice.isObject())) { + using namespace std::string_literals; + return Result(TRI_ERROR_TYPE_ERROR, + "When deserializating AqlCall: Expected object, got "s + slice.typeName()); + } + + auto expectedPropertiesFound = std::map{}; + expectedPropertiesFound.emplace(StaticStrings::AqlRemoteLimit, false); + expectedPropertiesFound.emplace(StaticStrings::AqlRemoteLimitType, false); + expectedPropertiesFound.emplace(StaticStrings::AqlRemoteFullCount, false); + expectedPropertiesFound.emplace(StaticStrings::AqlRemoteOffset, false); + + auto limit = AqlCall::Limit{}; + auto limitType = std::optional{}; + auto offset = decltype(AqlCall::offset){0}; + auto fullCount = false; + + auto const readLimit = [](velocypack::Slice slice) -> ResultT { + if (slice.isEqualString(StaticStrings::AqlRemoteInfinity)) { + return AqlCall::Limit{AqlCall::Infinity{}}; + } else if (slice.isInteger()) { + try { + return AqlCall::Limit{slice.getNumber()}; + } catch (velocypack::Exception const& ex) { + auto message = std::string{"When deserializating AqlCall: "}; + message += "When reading limit: "; + message += ex.what(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + } else { + auto message = std::string{"When deserializating AqlCall: "}; + message += "When reading limit: "; + if (slice.isString()) { + message += "Unexpected value '"; + message += getStringView(slice); + message += "'"; + } else { + message += "Unexpected type "; + message += slice.typeName(); + } + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + }; + + auto const readLimitType = [](velocypack::Slice slice) -> ResultT { + if (ADB_UNLIKELY(!slice.isString())) { + auto message = std::string{ + "When deserializating AqlCall: When reading limitType: " + "Unexpected type "}; + message += slice.typeName(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + auto value = getStringView(slice); + if (value == StaticStrings::AqlRemoteLimitTypeSoft) { + return AqlCall::LimitType::SOFT; + } + else if (value == StaticStrings::AqlRemoteLimitTypeHard) { + return AqlCall::LimitType::HARD; + } + else { + auto message = std::string{ + "When deserializating AqlCall: When reading limitType: " + "Unexpected value '"}; + message += value; + message += "'"; + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + }; + + auto const readFullCount = [](velocypack::Slice slice) -> ResultT { + if (ADB_UNLIKELY(!slice.isBool())) { + auto message = std::string{ + "When deserializating AqlCall: When reading fullCount: " + "Unexpected type "}; + message += slice.typeName(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + return slice.getBool(); + }; + + auto const readOffset = [](velocypack::Slice slice) -> ResultT { + if (!slice.isInteger()) { + auto message = std::string{ + "When deserializating AqlCall: When reading offset: " + "Unexpected type "}; + message += slice.typeName(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + try { + return slice.getNumber(); + } catch (velocypack::Exception const& ex) { + auto message = + std::string{"When deserializating AqlCall: When reading offset: "}; + message += ex.what(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + }; + + for (auto const it : velocypack::ObjectIterator(slice)) { + auto const keySlice = it.key; + if (ADB_UNLIKELY(!keySlice.isString())) { + return Result(TRI_ERROR_TYPE_ERROR, + "When deserializating AqlCall: Key is not a string"); + } + auto const key = getStringView(keySlice); + + if (auto propIt = expectedPropertiesFound.find(key); + ADB_LIKELY(propIt != expectedPropertiesFound.end())) { + if (ADB_UNLIKELY(propIt->second)) { + return Result( + TRI_ERROR_TYPE_ERROR, + "When deserializating AqlCall: Encountered duplicate key"); + } + propIt->second = true; + } + + if (key == StaticStrings::AqlRemoteLimit) { + auto maybeLimit = readLimit(it.value); + if (maybeLimit.fail()) { + return std::move(maybeLimit).result(); + } + limit = maybeLimit.get(); + } else if (key == StaticStrings::AqlRemoteLimitType) { + auto maybeLimitType = readLimitType(it.value); + if (maybeLimitType.fail()) { + return std::move(maybeLimitType).result(); + } + limitType = maybeLimitType.get(); + } else if (key == StaticStrings::AqlRemoteFullCount) { + auto maybeFullCount = readFullCount(it.value); + if (maybeFullCount.fail()) { + return std::move(maybeFullCount).result(); + } + fullCount = maybeFullCount.get(); + } else if (key == StaticStrings::AqlRemoteOffset) { + auto maybeOffset = readOffset(it.value); + if (maybeOffset.fail()) { + return std::move(maybeOffset).result(); + } + offset = maybeOffset.get(); + } else { + LOG_TOPIC("404b0", WARN, Logger::AQL) + << "When deserializating AqlCall: Encountered unexpected key " << key; + // If you run into this assertion during rolling upgrades after adding a + // new attribute, remove it in the older version. + TRI_ASSERT(false); + } + } + + for (auto const& it : expectedPropertiesFound) { + if (ADB_UNLIKELY(!it.second)) { + auto message = std::string{"When deserializating AqlCall: missing key "}; + message += it.first; + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + } + + auto call = AqlCall{}; + + if (limitType.has_value()) { + switch (limitType.value()) { + case LimitType::SOFT: + call.softLimit = limit; + break; + case LimitType::HARD: + call.hardLimit = limit; + break; + } + } else if (ADB_UNLIKELY(!std::holds_alternative(limit))) { + return Result(TRI_ERROR_TYPE_ERROR, + "When deserializating AqlCall: limit set, but limitType is missing."); + } + + call.offset = offset; + call.fullCount = fullCount; + + return call; +} diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 1abf4868b6c4..07214859d631 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -25,10 +25,15 @@ #include "Aql/ExecutionBlock.h" #include "Basics/overload.h" +#include "Cluster/ResultT.h" #include #include +namespace arangodb::velocypack { +class Slice; +} + namespace arangodb::aql { struct AqlCall { @@ -41,7 +46,7 @@ struct AqlCall { // On a less important case, softLimit = 0 and offset = 0 do not occur together, // but it's probably not worth implementing that in terms of data structures. class Infinity {}; - using Limit = std::variant; + using Limit = std::variant; AqlCall() = default; // Replacements for struct initialization @@ -58,6 +63,8 @@ struct AqlCall { hardLimit{limitType == LimitType::HARD ? Limit{limit} : Limit{Infinity{}}}, fullCount{fullCount} {} + static auto fromVelocyPack(velocypack::Slice) -> ResultT; + // TODO Remove me, this will not be necessary later static AqlCall SimulateSkipSome(std::size_t toSkip) { AqlCall call; diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp index 0b826fb6886b..92dab45176a5 100644 --- a/arangod/Aql/AqlCallStack.cpp +++ b/arangod/Aql/AqlCallStack.cpp @@ -22,6 +22,10 @@ #include "AqlCallStack.h" +#include +#include +#include + // TODO: This class is not yet memory efficient or optimized in any way. // it might be reimplement soon to have the above features, Focus now is on // the API we want to use. @@ -31,11 +35,10 @@ using namespace arangodb::aql; AqlCallStack::AqlCallStack(AqlCall call, bool compatibilityMode3_6) : _operations{{std::move(call)}}, - _depth(0), _compatibilityMode3_6(compatibilityMode3_6) {} AqlCallStack::AqlCallStack(AqlCallStack const& other, AqlCall call) - : _operations{other._operations}, _depth(0) { + : _operations{other._operations} { // We can only use this constructor on relevant levels // Alothers need to use passThrough constructor TRI_ASSERT(other._depth == 0); @@ -48,6 +51,9 @@ AqlCallStack::AqlCallStack(AqlCallStack const& other) _depth(other._depth), _compatibilityMode3_6(other._compatibilityMode3_6) {} +AqlCallStack::AqlCallStack(std::stack&& operations) + : _operations(std::move(operations)) {} + bool AqlCallStack::isRelevant() const { return _depth == 0; } AqlCall AqlCallStack::popCall() { @@ -107,4 +113,39 @@ auto AqlCallStack::increaseSubqueryDepth() -> void { TRI_ASSERT(_depth < std::numeric_limits::max() - 2); _depth++; TRI_ASSERT(!isRelevant()); -} \ No newline at end of file +} + +auto AqlCallStack::fromVelocyPack(velocypack::Slice const slice) -> ResultT { + if (ADB_UNLIKELY(!slice.isArray())) { + using namespace std::string_literals; + return Result(TRI_ERROR_TYPE_ERROR, + "When deserializing AqlCallStack: expected array, got "s + + slice.typeName()); + } + if (ADB_UNLIKELY(slice.isEmptyArray())) { + return Result(TRI_ERROR_TYPE_ERROR, + "When deserializing AqlCallStack: stack is empty"); + } + + auto stack = std::stack{}; + auto i = std::size_t{0}; + for (auto const entry : VPackArrayIterator(slice)) { + auto maybeAqlCall = AqlCall::fromVelocyPack(entry); + + if (ADB_UNLIKELY(maybeAqlCall.fail())) { + auto message = std::string{"When deserializing AqlCallStack: entry "}; + message += std::to_string(i); + message += ": "; + message += std::move(maybeAqlCall).errorMessage(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + + stack.emplace(maybeAqlCall.get()); + + ++i; + } + + TRI_ASSERT(i > 0); + + return AqlCallStack{std::move(stack)}; +} diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index 16027bfcfd94..9d8e98df7d0b 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -24,10 +24,14 @@ #define ARANGOD_AQL_AQL_CALLSTACK_H 1 #include "Aql/AqlCall.h" +#include "Cluster/ResultT.h" #include namespace arangodb { +namespace velocypack { +class Slice; +} namespace aql { class AqlCallStack { @@ -41,6 +45,8 @@ class AqlCallStack { AqlCallStack& operator=(AqlCallStack const& other) = default; + static auto fromVelocyPack(velocypack::Slice) -> ResultT; + // Quick test is this CallStack is of local relevance, or it is sufficient to pass it through bool isRelevant() const; @@ -74,8 +80,7 @@ class AqlCallStack { void increaseSubqueryDepth(); // TODO: Remove me again, only used to fake DONE - // @deprecated - auto empty() const noexcept -> bool { + [[deprecated]] auto empty() const noexcept -> bool { return _operations.empty() && _depth == 0; } @@ -83,6 +88,9 @@ class AqlCallStack { return _operations.size() + _depth; } + private: + explicit AqlCallStack(std::stack&& operations); + private: // The list of operations, stacked by depth (e.g. bottom element is from main query) std::stack _operations; @@ -91,7 +99,7 @@ class AqlCallStack { // as they have been skipped. // In most cases this will be zero. // However if we skip a subquery that has a nested subquery this depth will be 1 in the nested subquery. - size_t _depth; + size_t _depth{0}; // This flag will be set if and only if // we are called with the 3.6 and earlier API @@ -99,7 +107,7 @@ class AqlCallStack { // and not 3.6.* -> 3.8.* we can savely remove // this flag and all it's side effects on the // version after 3.7. - bool _compatibilityMode3_6; + bool _compatibilityMode3_6{false}; }; } // namespace aql diff --git a/arangod/Aql/AqlExecuteResult.cpp b/arangod/Aql/AqlExecuteResult.cpp new file mode 100644 index 000000000000..3b56bebec8bf --- /dev/null +++ b/arangod/Aql/AqlExecuteResult.cpp @@ -0,0 +1,70 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#include "AqlExecuteResult.h" + +#include "Basics/StaticStrings.h" + +#include +#include + +using namespace arangodb; +using namespace arangodb::aql; + +auto AqlExecuteResult::state() const noexcept -> ExecutionState { + return _state; +} + +auto AqlExecuteResult::skipped() const noexcept -> std::size_t { + return _skipped; +} + +auto AqlExecuteResult::block() const noexcept -> SharedAqlItemBlockPtr const& { + return _block; +} + +void AqlExecuteResult::toVelocyPack(velocypack::Builder& builder, + velocypack::Options const* const options) { + using namespace arangodb::velocypack; + auto const stateToValue = [](ExecutionState state) -> Value { + switch (state) { + case ExecutionState::DONE: + return Value(StaticStrings::AqlRemoteStateDone); + case ExecutionState::HASMORE: + return Value(StaticStrings::AqlRemoteStateHasmore); + case ExecutionState::WAITING: + THROW_ARANGO_EXCEPTION_MESSAGE( + TRI_ERROR_INTERNAL_AQL, + "Unexpected state WAITING, must not be serialized."); + } + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL_AQL, "Unhandled state"); + }; + + builder.add(StaticStrings::AqlRemoteState, stateToValue(state())); + builder.add(StaticStrings::AqlRemoteSkipped, Value(skipped())); + if (block() != nullptr) { + ObjectBuilder guard(&builder, StaticStrings::AqlRemoteBlock); + block()->toVelocyPack(options, builder); + } else { + builder.add(StaticStrings::AqlRemoteBlock, Value(ValueType::Null)); + } +} diff --git a/arangod/Aql/AqlExecuteResult.h b/arangod/Aql/AqlExecuteResult.h new file mode 100644 index 000000000000..4d23804bd019 --- /dev/null +++ b/arangod/Aql/AqlExecuteResult.h @@ -0,0 +1,57 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_AQLEXECUTERESULT_H +#define ARANGOD_AQL_AQLEXECUTERESULT_H + +#include "Aql/ExecutionState.h" +#include "Aql/SharedAqlItemBlockPtr.h" + +#include + +namespace arangodb::velocypack { +class Builder; +struct Options; +} // namespace arangodb::velocypack + +namespace arangodb::aql { + +class AqlExecuteResult { + public: + AqlExecuteResult(ExecutionState state, std::size_t skipped, SharedAqlItemBlockPtr&& block) + : _state(state), _skipped(skipped), _block(std::move(block)) {} + + void toVelocyPack(velocypack::Builder&, velocypack::Options const*); + + [[nodiscard]] auto state() const noexcept -> ExecutionState; + [[nodiscard]] auto skipped() const noexcept -> std::size_t; + [[nodiscard]] auto block() const noexcept -> SharedAqlItemBlockPtr const&; + + private: + ExecutionState _state = ExecutionState::HASMORE; + std::size_t _skipped = 0; + SharedAqlItemBlockPtr _block = nullptr; +}; + +} // namespace arangodb::aql + +#endif // ARANGOD_AQL_AQLEXECUTERESULT_H diff --git a/arangod/Aql/ExecutionEngine.cpp b/arangod/Aql/ExecutionEngine.cpp index 28adf34b8de9..2fa51ea000df 100644 --- a/arangod/Aql/ExecutionEngine.cpp +++ b/arangod/Aql/ExecutionEngine.cpp @@ -25,6 +25,7 @@ #include "ApplicationFeatures/ApplicationServer.h" #include "Aql/AqlResult.h" +#include "Aql/BlocksWithClients.h" #include "Aql/Collection.h" #include "Aql/EngineInfoContainerCoordinator.h" #include "Aql/EngineInfoContainerDBServerServerBased.h" @@ -42,8 +43,6 @@ #include "Basics/ScopeGuard.h" #include "Cluster/ServerState.h" #include "Futures/Utilities.h" -#include "Logger/LogMacros.h" -#include "Logger/Logger.h" #include "Network/Methods.h" #include "Network/NetworkFeature.h" #include "Network/Utils.h" @@ -564,6 +563,49 @@ std::pair ExecutionEngine::initializeCursor(SharedAqlIte return res; } +auto ExecutionEngine::execute(AqlCallStack const& stack) + -> std::tuple { + if (_query.killed()) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); + } + auto const res = _root->execute(stack); +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + if (std::get(res) == ExecutionState::WAITING) { + auto const skipped = std::get(res); + auto const block = std::get(res); + TRI_ASSERT(skipped == 0); + TRI_ASSERT(block == nullptr); + } +#endif + return res; +} + +auto ExecutionEngine::executeForClient(AqlCallStack const& stack, std::string const& clientId) + -> std::tuple { + if (_query.killed()) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); + } + + auto rootBlock = dynamic_cast(root()); + if (rootBlock == nullptr) { + using namespace std::string_literals; + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL_AQL, + "unexpected node type "s + + root()->getPlanNode()->getTypeString()); + } + + auto const res = rootBlock->executeForClient(stack, clientId); +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + if (std::get(res) == ExecutionState::WAITING) { + auto const skipped = std::get(res); + auto const& block = std::get(res); + TRI_ASSERT(skipped == 0); + TRI_ASSERT(block == nullptr); + } +#endif + return res; +} + std::pair ExecutionEngine::getSome(size_t atMost) { if (_query.killed()) { THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); diff --git a/arangod/Aql/ExecutionEngine.h b/arangod/Aql/ExecutionEngine.h index d56feed72c41..f9af9ff3d2c2 100644 --- a/arangod/Aql/ExecutionEngine.h +++ b/arangod/Aql/ExecutionEngine.h @@ -40,6 +40,7 @@ namespace arangodb { class Result; namespace aql { +class AqlCallStack; class AqlItemBlock; class ExecutionBlock; class ExecutionNode; @@ -96,6 +97,12 @@ class ExecutionEngine { /// return waiting std::pair shutdown(int errorCode); + auto execute(AqlCallStack const& stack) + -> std::tuple; + + auto executeForClient(AqlCallStack const& stack, std::string const& clientId) + -> std::tuple; + /// @brief getSome std::pair getSome(size_t atMost); diff --git a/arangod/Aql/RestAqlHandler.cpp b/arangod/Aql/RestAqlHandler.cpp index facdf4d8d802..366f65e4555f 100644 --- a/arangod/Aql/RestAqlHandler.cpp +++ b/arangod/Aql/RestAqlHandler.cpp @@ -23,9 +23,8 @@ #include "RestAqlHandler.h" -#include -#include - +#include "Aql/AqlCallStack.h" +#include "Aql/AqlExecuteResult.h" #include "Aql/AqlItemBlock.h" #include "Aql/AqlItemBlockSerializationFormat.h" #include "Aql/BlocksWithClients.h" @@ -46,7 +45,9 @@ #include "Logger/Logger.h" #include "Transaction/Context.h" #include "Transaction/Methods.h" -#include "VocBase/ticks.h" + +#include +#include using namespace arangodb; using namespace arangodb::rest; @@ -400,41 +401,7 @@ bool RestAqlHandler::killQuery(std::string const& idString) { } // PUT method for /_api/aql//, (internal) -// this is using the part of the cursor API with side effects. -// : can be "lock" or "getSome" or "skip" or "initializeCursor" or -// "shutdown". -// The body must be a Json with the following attributes: -// For the "getSome" operation one has to give: -// "atMost": must be a positive integer, the cursor returns never -// more than "atMost" items. The result is the JSON representation -// of an AqlItemBlock. -// If "atMost" is not given it defaults to -// ExecutionBlock::DefaultBatchSize. -// For the "skipSome" operation one has to give: -// "atMost": must be a positive integer, the cursor skips never -// more than "atMost" items. The result is a JSON object with a -// single attribute "skipped" containing the number of -// skipped items. -// If "atMost" is not given it defaults to -// ExecutionBlock::DefaultBatchSize. -// For the "skip" operation one should give: -// "number": must be a positive integer, the cursor skips as many items, -// possibly exhausting the cursor. -// The result is a JSON with the attributes "error" (boolean), -// "errorMessage" (if applicable) and "done" (boolean) -// to indicate whether or not the cursor is exhausted. -// If "number" is not given it defaults to 1. -// For the "initializeCursor" operation, one has to bind the following -// attributes: -// "items": This is a serialized AqlItemBlock with usually only one row -// and the correct number of columns. -// "pos": The number of the row in "items" to take, usually 0. -// For the "shutdown" and "lock" operations no additional arguments are -// required and an empty JSON object in the body is OK. -// All operations allow to set the HTTP header "x-shard-id:". If this is -// set, then the root block of the stored query must be a ScatterBlock -// and the shard ID is given as an additional argument to the ScatterBlock's -// special API. +// see comment in header for details RestStatus RestAqlHandler::useQuery(std::string const& operation, std::string const& idString) { bool success = false; VPackSlice querySlice = this->parseVPackBody(success); @@ -635,6 +602,92 @@ Query* RestAqlHandler::findQuery(std::string const& idString) { return q; } +class AqlExecuteCall { + public: + // Deserializing factory + static auto fromVelocyPack(VPackSlice slice) -> ResultT; + + auto callStack() const noexcept -> AqlCallStack const& { return _callStack; } + + private: + AqlExecuteCall(AqlCallStack&& callStack) : _callStack(std::move(callStack)) {} + + AqlCallStack _callStack; +}; + +namespace { +// hack for MSVC +auto getStringView(velocypack::Slice slice) -> std::string_view { + velocypack::StringRef ref = slice.stringRef(); + return std::string_view(ref.data(), ref.size()); +} +} + +// TODO Use the deserializer when available +auto AqlExecuteCall::fromVelocyPack(VPackSlice const slice) -> ResultT { + if (ADB_UNLIKELY(!slice.isObject())) { + using namespace std::string_literals; + return Result(TRI_ERROR_CLUSTER_AQL_COMMUNICATION, + "When deserializating AqlExecuteCall: Expected object, got "s + + slice.typeName()); + } + + auto expectedPropertiesFound = std::map{}; + expectedPropertiesFound.emplace(StaticStrings::AqlRemoteCallStack, false); + + auto callStack = std::optional{}; + + for (auto const it : VPackObjectIterator(slice)) { + auto const keySlice = it.key; + if (ADB_UNLIKELY(!keySlice.isString())) { + return Result(TRI_ERROR_CLUSTER_AQL_COMMUNICATION, + "When deserializating AqlExecuteCall: Key is not a string"); + } + auto const key = getStringView(keySlice); + + if (auto propIt = expectedPropertiesFound.find(key); + ADB_LIKELY(propIt != expectedPropertiesFound.end())) { + if (ADB_UNLIKELY(propIt->second)) { + return Result( + TRI_ERROR_CLUSTER_AQL_COMMUNICATION, + "When deserializating AqlExecuteCall: Encountered duplicate key"); + } + propIt->second = true; + } + + if (key == StaticStrings::AqlRemoteCallStack) { + auto maybeCallStack = AqlCallStack::fromVelocyPack(it.value); + if (ADB_UNLIKELY(maybeCallStack.fail())) { + auto message = std::string{"When deserializating AqlExecuteCall: failed to deserialize "}; + message += StaticStrings::AqlRemoteCallStack; + message += ": "; + message += maybeCallStack.errorMessage(); + return Result(TRI_ERROR_CLUSTER_AQL_COMMUNICATION, std::move(message)); + } + + callStack = maybeCallStack.get(); + } else { + LOG_TOPIC("0dd42", WARN, Logger::AQL) + << "When deserializating AqlExecuteCall: Encountered unexpected key " << key; + // If you run into this assertion during rolling upgrades after adding a + // new attribute, remove it in the older version. + TRI_ASSERT(false); + } + } + + for (auto const& it : expectedPropertiesFound) { + if (ADB_UNLIKELY(!it.second)) { + auto message = std::string{"When deserializating AqlExecuteCall: missing key "}; + message += it.first; + return Result(TRI_ERROR_CLUSTER_AQL_COMMUNICATION, std::move(message)); + } + } + + TRI_ASSERT(callStack.has_value()); + + return {AqlExecuteCall{std::move(callStack).value()}}; +} + // handle for useQuery RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, VPackSlice const querySlice) { @@ -667,11 +720,46 @@ RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, auto transactionContext = _query->trx()->transactionContext(); + auto const rootNodeType = _query->engine()->root()->getPlanNode()->getType(); + VPackBuffer answerBuffer; VPackBuilder answerBuilder(answerBuffer); answerBuilder.openObject(/*unindexed*/ true); - if (operation == "getSome") { + if (operation == StaticStrings::AqlRemoteExecute) { + auto maybeExecuteCall = AqlExecuteCall::fromVelocyPack(querySlice); + if (maybeExecuteCall.fail()) { + generateError(std::move(maybeExecuteCall).result()); + return RestStatus::DONE; + } + auto& executeCall = maybeExecuteCall.get(); + + auto items = SharedAqlItemBlockPtr{}; + auto skipped = size_t{}; + auto state = ExecutionState::HASMORE; + + // shardId is set IFF the root node is scatter or distribute + TRI_ASSERT(shardId.empty() != (rootNodeType == ExecutionNode::SCATTER || + rootNodeType == ExecutionNode::DISTRIBUTE)); + if (shardId.empty()) { + std::tie(state, skipped, items) = + _query->engine()->execute(executeCall.callStack()); + if (state == ExecutionState::WAITING) { + return RestStatus::WAITING; + } + } else { + std::tie(state, skipped, items) = + _query->engine()->executeForClient(executeCall.callStack(), shardId); + if (state == ExecutionState::WAITING) { + return RestStatus::WAITING; + } + } + + auto result = AqlExecuteResult{state, skipped, std::move(items)}; + result.toVelocyPack(answerBuilder, + _query->trx()->transactionContextPtr()->getVPackOptions()); + answerBuilder.add(StaticStrings::Code, VPackValue(TRI_ERROR_NO_ERROR)); + } else if (operation == "getSome") { TRI_IF_FAILURE("RestAqlHandler::getSome") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } @@ -679,14 +767,16 @@ RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, ExecutionBlock::DefaultBatchSize); SharedAqlItemBlockPtr items; ExecutionState state; + + // shardId is set IFF the root node is scatter or distribute + TRI_ASSERT(shardId.empty() != (rootNodeType == ExecutionNode::SCATTER || + rootNodeType == ExecutionNode::DISTRIBUTE)); if (shardId.empty()) { std::tie(state, items) = _query->engine()->getSome(atMost); if (state == ExecutionState::WAITING) { return RestStatus::WAITING; } } else { - TRI_ASSERT(_query->engine()->root()->getPlanNode()->getType() == ExecutionNode::SCATTER || - _query->engine()->root()->getPlanNode()->getType() == ExecutionNode::DISTRIBUTE); auto block = dynamic_cast(_query->engine()->root()); if (block == nullptr) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, @@ -718,8 +808,8 @@ RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, } skipped = tmpRes.second; } else { - TRI_ASSERT(_query->engine()->root()->getPlanNode()->getType() == ExecutionNode::SCATTER || - _query->engine()->root()->getPlanNode()->getType() == ExecutionNode::DISTRIBUTE); + TRI_ASSERT(rootNodeType == ExecutionNode::SCATTER || + rootNodeType == ExecutionNode::DISTRIBUTE); auto block = dynamic_cast(_query->engine()->root()); if (block == nullptr) { diff --git a/arangod/Aql/RestAqlHandler.h b/arangod/Aql/RestAqlHandler.h index d0b05a9d14e3..c917c6a2f1ca 100644 --- a/arangod/Aql/RestAqlHandler.h +++ b/arangod/Aql/RestAqlHandler.h @@ -61,20 +61,46 @@ class RestAqlHandler : public RestVocbaseBaseHandler { // PUT method for /_api/aql//, this is using // the part of the cursor API with side effects. - // : can be "getSome" or "skip". + // : can be "execute", "getSome", "skipSome" "initializeCursor" or + // "shutdown". + // "getSome" and "skipSome" are only used pre-3.7 and can be + // removed in 3.8. // The body must be a Json with the following attributes: + // For the "execute" operation one has to give: + // "callStack": an array of objects, each with the following attributes: + // "offset": a non-negative integer + // "limit": either a non-negative integer, or the string "infinity" + // "limitType: string or null, either "soft" or "hard"; set iff limit is not infinity + // "fullCount": a boolean + // The result is an object with the attributes + // "code": integer, error code. + // If there was no error: + // "state": string, either "hasMore" or "done" + // "skipped": non-negative integer + // "result": serialized AqlItemBlock, or null when no rows are returned. // For the "getSome" operation one has to give: - // "atMost": must be a positiv integers, the cursor returns never - // more than "atMost" items. - // The result is the JSON representation of an - // AqlItemBlock. - // For the "skip" operation one has to give: - // "number": must be a positive integer, the cursor skips as many items, - // possibly exhausting the cursor. - // The result is a JSON with the attributes "error" (boolean), - // "errorMessage" (if applicable) and - // "done" (boolean) [3.4.0 and later] to indicate - // whether or not the cursor is exhausted. + // "atMost": must be a positive integer, the cursor returns never + // more than "atMost" items. Defaults to + // ExecutionBlock::DefaultBatchSize. + // The result is the JSON representation of an AqlItemBlock. + // For the "skipSome" operation one has to give: + // "atMost": must be a positive integer, the cursor skips never + // more than "atMost" items. The result is a JSON object with a + // single attribute "skipped" containing the number of + // skipped items. + // If "atMost" is not given it defaults to + // ExecutionBlock::DefaultBatchSize. + // For the "initializeCursor" operation, one has to bind the following + // attributes: + // "items": This is a serialized AqlItemBlock with usually only one row + // and the correct number of columns. + // "pos": The number of the row in "items" to take, usually 0. + // For the "shutdown" operation no additional arguments are + // required and an empty JSON object in the body is OK. + // All operations allow to set the HTTP header "x-shard-id:". If this is + // set, then the root block of the stored query must be a ScatterBlock + // and the shard ID is given as an additional argument to the ScatterBlock's + // special API. RestStatus useQuery(std::string const& operation, std::string const& idString); private: diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index ca56d6ebf1b6..ba2250d221ec 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -218,7 +218,9 @@ set(LIB_ARANGO_PREGEL_SOURCES set(LIB_ARANGO_AQL_SOURCES Aql/Aggregator.cpp Aql/AllRowsFetcher.cpp + Aql/AqlCall.cpp Aql/AqlCallStack.cpp + Aql/AqlExecuteResult.cpp Aql/AqlFunctionFeature.cpp Aql/AqlItemBlock.cpp Aql/AqlItemBlockInputRange.cpp diff --git a/arangod/Cluster/ResultT.h b/arangod/Cluster/ResultT.h index 4b455dead5d6..3ebd6fcde34f 100644 --- a/arangod/Cluster/ResultT.h +++ b/arangod/Cluster/ResultT.h @@ -230,14 +230,20 @@ class ResultT { ResultT(std::optional&& val_, int errorNumber, std::string const& errorMessage) : _result(errorNumber, errorMessage), _val(val_) {} + ResultT(std::optional&& val_, int errorNumber, std::string&& errorMessage) + : _result(errorNumber, std::move(errorMessage)), _val(val_) {} + ResultT(std::optional const& val_, int errorNumber) : _result(errorNumber), _val(std::move(val_)) {} ResultT(std::optional const& val_, int errorNumber, std::string const& errorMessage) : _result(errorNumber, errorMessage), _val(val_) {} - ResultT(std::optional&& val_, Result result) - : _result(std::move(result)), _val(std::move(val_)) {} + ResultT(std::optional const& val_, int errorNumber, std::string&& errorMessage) + : _result(errorNumber, std::move(errorMessage)), _val(val_) {} + + ResultT(std::optional&& val_, Result const& result) + : _result(result), _val(std::move(val_)) {} ResultT(std::optional&& val_, Result&& result) : _result(std::move(result)), _val(std::move(val_)) {} diff --git a/arangod/Cluster/SynchronizeShard.cpp b/arangod/Cluster/SynchronizeShard.cpp index f935461411b5..a50dc970fbb0 100644 --- a/arangod/Cluster/SynchronizeShard.cpp +++ b/arangod/Cluster/SynchronizeShard.cpp @@ -935,7 +935,7 @@ bool SynchronizeShard::first() { leader, lastTick, builder); if (!tickResult.ok()) { LOG_TOPIC("0a4d4", INFO, Logger::MAINTENANCE) << syncRes.errorMessage(); - _result.reset(tickResult.result()); + _result.reset(std::move(tickResult).result()); return false; } lastTick = tickResult.get(); diff --git a/arangod/RestHandler/RestRepairHandler.cpp b/arangod/RestHandler/RestRepairHandler.cpp index 31141dc189cc..1ccddc4adbb1 100644 --- a/arangod/RestHandler/RestRepairHandler.cpp +++ b/arangod/RestHandler/RestRepairHandler.cpp @@ -323,7 +323,7 @@ ResultT RestRepairHandler::jobFinished(std::string const& jobId) { << "Failed to get job status: " << "[" << jobStatus.errorNumber() << "] " << jobStatus.errorMessage(); - return ResultT::error(std::move(jobStatus.result())); + return ResultT::error(std::move(jobStatus).result()); } return ResultT::success(false); diff --git a/arangod/RocksDBEngine/RocksDBRestReplicationHandler.cpp b/arangod/RocksDBEngine/RocksDBRestReplicationHandler.cpp index e70f3221d589..032ad4ceabfc 100644 --- a/arangod/RocksDBEngine/RocksDBRestReplicationHandler.cpp +++ b/arangod/RocksDBEngine/RocksDBRestReplicationHandler.cpp @@ -128,7 +128,7 @@ void RocksDBRestReplicationHandler::handleCommandBatch() { auto res = _manager->extendLifetime(id, ttl); if (res.fail()) { - generateError(res.result()); + generateError(std::move(res).result()); return; } diff --git a/lib/Basics/StaticStrings.cpp b/lib/Basics/StaticStrings.cpp index 4962b624889b..dc44f9502536 100644 --- a/lib/Basics/StaticStrings.cpp +++ b/lib/Basics/StaticStrings.cpp @@ -294,8 +294,24 @@ std::string const StaticStrings::UpgradeEnvName( std::string const StaticStrings::BackupToDeleteName("DIRECTORY_TO_DELETE"); std::string const StaticStrings::BackupSearchToDeleteName( "DIRECTORY_TO_DELETE_SEARCH"); + +// aql api strings std::string const StaticStrings::SerializationFormat("serializationFormat"); std::string const StaticStrings::AqlRemoteApi("api"); +std::string const StaticStrings::AqlRemoteExecute("execute"); +std::string const StaticStrings::AqlRemoteCallStack("callStack"); +std::string const StaticStrings::AqlRemoteLimit("limit"); +std::string const StaticStrings::AqlRemoteLimitType("limitType"); +std::string const StaticStrings::AqlRemoteLimitTypeSoft("soft"); +std::string const StaticStrings::AqlRemoteLimitTypeHard("hard"); +std::string const StaticStrings::AqlRemoteFullCount("fullCount"); +std::string const StaticStrings::AqlRemoteOffset("offset"); +std::string const StaticStrings::AqlRemoteInfinity("infinity"); +std::string const StaticStrings::AqlRemoteBlock("block"); +std::string const StaticStrings::AqlRemoteSkipped("skipped"); +std::string const StaticStrings::AqlRemoteState("state"); +std::string const StaticStrings::AqlRemoteStateDone("done"); +std::string const StaticStrings::AqlRemoteStateHasmore("hasmore"); // validation std::string const StaticStrings::ValidatorLevelNone("none"); diff --git a/lib/Basics/StaticStrings.h b/lib/Basics/StaticStrings.h index 4591679174a3..d51660ec654d 100644 --- a/lib/Basics/StaticStrings.h +++ b/lib/Basics/StaticStrings.h @@ -270,8 +270,24 @@ class StaticStrings { static std::string const UpgradeEnvName; static std::string const BackupToDeleteName; static std::string const BackupSearchToDeleteName; + + // aql api strings static std::string const SerializationFormat; static std::string const AqlRemoteApi; + static std::string const AqlRemoteExecute; + static std::string const AqlRemoteCallStack; + static std::string const AqlRemoteLimit; + static std::string const AqlRemoteLimitType; + static std::string const AqlRemoteLimitTypeSoft; + static std::string const AqlRemoteLimitTypeHard; + static std::string const AqlRemoteFullCount; + static std::string const AqlRemoteOffset; + static std::string const AqlRemoteInfinity; + static std::string const AqlRemoteBlock; + static std::string const AqlRemoteSkipped; + static std::string const AqlRemoteState; + static std::string const AqlRemoteStateDone; + static std::string const AqlRemoteStateHasmore; // validation static std::string const ValidatorLevelNone; diff --git a/lib/Basics/debugging.h b/lib/Basics/debugging.h index 871d94722989..560e38a29078 100644 --- a/lib/Basics/debugging.h +++ b/lib/Basics/debugging.h @@ -119,6 +119,9 @@ template struct is_container { static constexpr bool value = sizeof(test(std::declval())) == 1; }; +template< class T > +inline constexpr bool is_container_v = is_container::value; + template < typename T > struct is_associative { static tc& test(...) ; @@ -127,16 +130,24 @@ template < typename T > struct is_associative { static constexpr bool value = sizeof( test( std::declval() ) ) == 1 ; }; -} +} // namespace container_traits + +template +struct remove_cvref { + typedef std::remove_cv_t> type; +}; +template +using remove_cvref_t = typename remove_cvref::type; template < typename T > struct is_container : - std::conditional<(container_traits::is_container::value || std::is_array::value) - && !std::is_same::type>::value - && !std::is_same::type>::value - && !std::is_same::type>::value - && !std::is_same::type>::value - && !std::is_same::value - && !std::is_same::value, std::true_type, std::false_type >::type {}; + std::conditional_t<(container_traits::is_container_v || std::is_array_v) + && !std::is_same_v> + && !std::is_same_v> + && !std::is_same_v> + && !std::is_same_v> + && !std::is_same_v> + && !std::is_same_v>, + std::true_type, std::false_type > {}; template < typename T > struct is_associative : std::conditional< container_traits::is_container::value && container_traits::is_associative::value, From 08ceffe450adda8993869c9d8dbbd22df60bd52f Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Sat, 29 Feb 2020 07:13:19 +0000 Subject: [PATCH 085/122] Support executors with multiple dependencies (#11181) * Use DataRange in some places * Add MultiAqlItemBlockInputRange * First implementation of life, universe, and everything * implement skip for UnsortedGather * Some more wiring * Attempt at hacking up execute for DependencyProxy * Compilation fixes! * Fixed AqlCall compilation and unified execute and executeForDependency * Fixed WAITIGN assertion * Sonderlocken. * Foo * Make compile * Make tests pass. Co-authored-by: Michael Hackstein --- arangod/Aql/AqlItemBlockInputMatrix.cpp | 5 +- arangod/Aql/AqlItemBlockInputMatrix.h | 6 +- arangod/Aql/AqlItemBlockInputRange.cpp | 3 +- arangod/Aql/AqlItemBlockInputRange.h | 2 +- arangod/Aql/DependencyProxy.cpp | 70 +++++---- arangod/Aql/DependencyProxy.h | 3 + arangod/Aql/ExecutionBlockImpl.cpp | 145 +++++++++++++----- arangod/Aql/ExecutionBlockImpl.h | 16 +- arangod/Aql/MultiAqlItemBlockInputRange.cpp | 141 +++++++++++++++++ arangod/Aql/MultiAqlItemBlockInputRange.h | 78 ++++++++++ .../Aql/MultiDependencySingleRowFetcher.cpp | 41 ++++- arangod/Aql/MultiDependencySingleRowFetcher.h | 8 +- arangod/Aql/UnsortedGatherExecutor.cpp | 71 +++++++++ arangod/Aql/UnsortedGatherExecutor.h | 25 +-- arangod/CMakeLists.txt | 1 + 15 files changed, 526 insertions(+), 89 deletions(-) create mode 100644 arangod/Aql/MultiAqlItemBlockInputRange.cpp create mode 100644 arangod/Aql/MultiAqlItemBlockInputRange.h diff --git a/arangod/Aql/AqlItemBlockInputMatrix.cpp b/arangod/Aql/AqlItemBlockInputMatrix.cpp index d2a7d16c3cd3..956b98467c51 100644 --- a/arangod/Aql/AqlItemBlockInputMatrix.cpp +++ b/arangod/Aql/AqlItemBlockInputMatrix.cpp @@ -120,13 +120,13 @@ bool AqlItemBlockInputMatrix::hasShadowRow() const noexcept { return _shadowRow.isInitialized(); } -void AqlItemBlockInputMatrix::skipAllRemainingDataRows() { +size_t AqlItemBlockInputMatrix::skipAllRemainingDataRows() { if (_aqlItemMatrix == nullptr) { // Have not been initialized. // We need to be called before. TRI_ASSERT(!hasShadowRow()); TRI_ASSERT(!hasDataRow()); - return; + return 0; } if (!hasShadowRow()) { if (_aqlItemMatrix->stoppedOnShadowRow()) { @@ -139,4 +139,5 @@ void AqlItemBlockInputMatrix::skipAllRemainingDataRows() { } // Else we did already skip once. // nothing to do + return 0; } diff --git a/arangod/Aql/AqlItemBlockInputMatrix.h b/arangod/Aql/AqlItemBlockInputMatrix.h index 9ce5c3073dac..80e37e40e1f9 100644 --- a/arangod/Aql/AqlItemBlockInputMatrix.h +++ b/arangod/Aql/AqlItemBlockInputMatrix.h @@ -50,14 +50,14 @@ class AqlItemBlockInputMatrix { ExecutorState upstreamState() const noexcept; bool upstreamHasMore() const noexcept; - void skipAllRemainingDataRows(); + size_t skipAllRemainingDataRows(); private: arangodb::aql::SharedAqlItemBlockPtr _block{nullptr}; ExecutorState _finalState{ExecutorState::HASMORE}; - // Only if _aqlItemMatrix is set (and NOT a nullptr), we have a valid and usable - // DataRange object available to work with. + // Only if _aqlItemMatrix is set (and NOT a nullptr), we have a valid and + // usable DataRange object available to work with. AqlItemMatrix* _aqlItemMatrix; ShadowAqlItemRow _shadowRow{CreateInvalidShadowRowHint{}}; }; diff --git a/arangod/Aql/AqlItemBlockInputRange.cpp b/arangod/Aql/AqlItemBlockInputRange.cpp index 11bd5710e6be..b0329dfd94d7 100644 --- a/arangod/Aql/AqlItemBlockInputRange.cpp +++ b/arangod/Aql/AqlItemBlockInputRange.cpp @@ -121,7 +121,7 @@ std::pair AqlItemBlockInputRange::nextShadowRow return res; } -void AqlItemBlockInputRange::skipAllRemainingDataRows() { +size_t AqlItemBlockInputRange::skipAllRemainingDataRows() { ExecutorState state; InputAqlItemRow row{CreateInvalidInputRowHint{}}; @@ -129,6 +129,7 @@ void AqlItemBlockInputRange::skipAllRemainingDataRows() { std::tie(state, row) = nextDataRow(); TRI_ASSERT(row.isInitialized()); } + return 0; } template diff --git a/arangod/Aql/AqlItemBlockInputRange.h b/arangod/Aql/AqlItemBlockInputRange.h index 03e442a3a730..ef2bfd6b70e6 100644 --- a/arangod/Aql/AqlItemBlockInputRange.h +++ b/arangod/Aql/AqlItemBlockInputRange.h @@ -62,7 +62,7 @@ class AqlItemBlockInputRange { std::pair nextShadowRow(); - void skipAllRemainingDataRows(); + size_t skipAllRemainingDataRows(); // Subtract up to this many rows from the local `_skipped` state; return // the number actually skipped. Does not skip data rows. diff --git a/arangod/Aql/DependencyProxy.cpp b/arangod/Aql/DependencyProxy.cpp index b7828fea8ec4..cdbc955fac6c 100644 --- a/arangod/Aql/DependencyProxy.cpp +++ b/arangod/Aql/DependencyProxy.cpp @@ -37,44 +37,54 @@ DependencyProxy::execute(AqlCallStack& stack) { ExecutionState state = ExecutionState::HASMORE; size_t skipped = 0; SharedAqlItemBlockPtr block = nullptr; + // Note: upstreamBlock will return next dependency + // if we need to loop here do { - // Note: upstreamBlock will return next dependency - // if we need to loop here - if (!_distributeId.empty()) { - // We are in the cluster case. - // we have to ask executeForShard - auto upstreamWithClient = dynamic_cast(&upstreamBlock()); - TRI_ASSERT(upstreamWithClient != nullptr); - if (upstreamWithClient == nullptr) { - THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL_AQL, - "Invalid state reached, we try to " - "request sharded data from a block " - "that is not able to provide it."); - } - std::tie(state, skipped, block) = - upstreamWithClient->executeForClient(stack, _distributeId); - } else { - std::tie(state, skipped, block) = upstreamBlock().execute(stack); - } - TRI_IF_FAILURE("ExecutionBlock::getBlock") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - if (state == ExecutionState::WAITING) { - TRI_ASSERT(block == nullptr); - TRI_ASSERT(skipped == 0); - break; - } + std::tie(state, skipped, block) = executeForDependency(_currentDependency, stack); - if (skipped == 0 && block == nullptr) { - // We're not waiting and didn't get any input, so we have to be done. - TRI_ASSERT(state == ExecutionState::DONE); + if (state == ExecutionState::DONE) { if (!advanceDependency()) { break; } } + } while (state != ExecutionState::WAITING && skipped == 0 && block == nullptr); + return {state, skipped, block}; +} - } while (skipped == 0 && block == nullptr); +template +std::tuple DependencyProxy::executeForDependency( + size_t dependency, AqlCallStack& stack) { + // TODO: assert dependency in range + ExecutionState state = ExecutionState::HASMORE; + size_t skipped = 0; + SharedAqlItemBlockPtr block = nullptr; + if (!_distributeId.empty()) { + // We are in the cluster case. + // we have to ask executeForShard + auto upstreamWithClient = + dynamic_cast(&upstreamBlockForDependency(dependency)); + TRI_ASSERT(upstreamWithClient != nullptr); + if (upstreamWithClient == nullptr) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL_AQL, + "Invalid state reached, we try to " + "request sharded data from a block " + "that is not able to provide it."); + } + std::tie(state, skipped, block) = + upstreamWithClient->executeForClient(stack, _distributeId); + } else { + std::tie(state, skipped, block) = + upstreamBlockForDependency(dependency).execute(stack); + } + TRI_IF_FAILURE("ExecutionBlock::getBlock") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + if (skipped == 0 && block == nullptr) { + // We're either waiting or Done + TRI_ASSERT(state == ExecutionState::DONE || state == ExecutionState::WAITING); + } return {state, skipped, block}; } diff --git a/arangod/Aql/DependencyProxy.h b/arangod/Aql/DependencyProxy.h index c66011845512..108e6bad1ac9 100644 --- a/arangod/Aql/DependencyProxy.h +++ b/arangod/Aql/DependencyProxy.h @@ -76,6 +76,9 @@ class DependencyProxy { // TODO Implement and document properly! TEST_VIRTUAL std::tuple execute(AqlCallStack& stack); + TEST_VIRTUAL std::tuple executeForDependency( + size_t dependency, AqlCallStack& stack); + // This is only TEST_VIRTUAL, so we ignore this lint warning: // NOLINTNEXTLINE google-default-arguments [[nodiscard]] TEST_VIRTUAL std::pair fetchBlock( diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 5ffd2ad3edc1..54bc29742555 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -144,8 +144,8 @@ constexpr bool isNewStyleExecutor = is_one_of_v< TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif - SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, - ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SortExecutor, + UnsortedGatherExecutor, SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, + KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SortExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -190,6 +190,7 @@ ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, _execState{ExecState::CHECKCALL}, _upstreamRequest{}, _clientRequest{}, + _requestedDependency{}, _hasUsedDataRangeBlock{false} { // already insert ourselves into the statistics results if (_profile >= PROFILE_LEVEL_BLOCKS) { @@ -1119,8 +1120,9 @@ static SkipRowsRangeVariant constexpr skipRowsType() { #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif - TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, SubqueryEndExecutor, SortedCollectExecutor, - LimitExecutor, SortExecutor, IResearchViewExecutor, + UnsortedGatherExecutor, TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, + SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor, SortExecutor, + IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -1191,18 +1193,66 @@ static auto fastForwardType(AqlCall const& call, Executor const& e) -> FastForwa return FastForwardVariant::FETCHER; } +template +auto ExecutionBlockImpl::executeFetcher(AqlCallStack& stack, size_t const dependency) + -> std::tuple { + // Silence compiler about unused dependency + (void)dependency; + if constexpr (isNewStyleExecutor) { + if constexpr (is_one_of_v) { + // TODO: This is a hack to guarantee we have enough space in our range + // to fit all inputs, in particular the one executed below + TRI_ASSERT(dependency < _dependencies.size()); + _lastRange.resizeIfNecessary(ExecutorState::HASMORE, 0, _dependencies.size()); + + auto [state, skipped, range] = _rowFetcher.executeForDependency(dependency, stack); + + _lastRange.setDependency(dependency, range); + + return {state, skipped, _lastRange}; + } else { + return _rowFetcher.execute(stack); + } + } else { + TRI_ASSERT(false); + } +} + +template +auto ExecutionBlockImpl::executeProduceRows(typename Fetcher::DataRange& input, + OutputAqlItemRow& output) + -> std::tuple { + if constexpr (isNewStyleExecutor) { + if constexpr (is_one_of_v) { + return _executor.produceRows(input, output); + } else { + auto [state, stats, call] = _executor.produceRows(input, output); + return {state, stats, call, 0}; + } + } else { + TRI_ASSERT(false); + } +} + template auto ExecutionBlockImpl::executeSkipRowsRange(typename Fetcher::DataRange& inputRange, AqlCall& call) - -> std::tuple { + -> std::tuple { if constexpr (isNewStyleExecutor) { call.skippedRows = 0; if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { - // If the executor has a method skipRowsRange, to skip outputs. - // Every non-passthrough executor needs to implement this. - auto res = _executor.skipRowsRange(inputRange, call); - _executorReturnedDone = std::get(res) == ExecutorState::DONE; - return res; + if constexpr (is_one_of_v) { + // If the executor has a method skipRowsRange, to skip outputs. + // Every non-passthrough executor needs to implement this. + auto res = _executor.skipRowsRange(inputRange, call); + _executorReturnedDone = std::get(res) == ExecutorState::DONE; + return res; + } else { + auto [state, stats, skipped, localCall] = + _executor.skipRowsRange(inputRange, call); + _executorReturnedDone = state == ExecutorState::DONE; + return {state, stats, skipped, localCall, 0}; + } } else if constexpr (skipRowsType() == SkipRowsRangeVariant::FETCHER) { // If we know that every input row produces exactly one output row (this // is a property of the executor), then we can just let the fetcher skip @@ -1212,19 +1262,18 @@ auto ExecutionBlockImpl::executeSkipRowsRange(typename Fetcher::DataRa static_assert( std::is_same_v, "Executors with custom statistics must implement skipRowsRange."); - // TODO Set _executorReturnedDone? - return {inputRange.upstreamState(), NoStats{}, 0, call}; + return {inputRange.upstreamState(), NoStats{}, 0, call, 0}; } else { static_assert(dependent_false::value, "This value of SkipRowsRangeVariant is not supported"); - return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call); + return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call, 0); } } else { TRI_ASSERT(false); - return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call); + return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call, 0); } // Compiler is unhappy without this. - return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call); + return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call, 0); } template <> @@ -1379,7 +1428,7 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState { template auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRange& inputRange, AqlCall& clientCall) - -> std::tuple { + -> std::tuple { TRI_ASSERT(isNewStyleExecutor); if constexpr (std::is_same_v) { if (clientCall.needsFullCount() && clientCall.getOffset() == 0 && @@ -1389,33 +1438,45 @@ auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRang return executeSkipRowsRange(_lastRange, clientCall); } // Do not fastForward anything, the Subquery start will handle it by itself - return {ExecutorState::DONE, NoStats{}, 0, AqlCall{}}; + return {ExecutorState::DONE, NoStats{}, 0, AqlCall{}, 0}; } auto type = fastForwardType(clientCall, _executor); switch (type) { case FastForwardVariant::FULLCOUNT: case FastForwardVariant::EXECUTOR: { LOG_QUERY("cb135", DEBUG) << printTypeInfo() << " apply full count."; - auto [state, stats, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); + auto [state, stats, skippedLocal, call, dependency] = + executeSkipRowsRange(_lastRange, clientCall); + _requestedDependency = dependency; + if (type == FastForwardVariant::EXECUTOR) { // We do not report the skip skippedLocal = 0; } - if constexpr (std::is_same_v) { + + if constexpr (is_one_of_v) { // The executor will have used all Rows. // However we need to drop them from the input // here. inputRange.skipAllRemainingDataRows(); } - return {state, stats, skippedLocal, call}; + return {state, stats, skippedLocal, call, dependency}; } case FastForwardVariant::FETCHER: { LOG_QUERY("fa327", DEBUG) << printTypeInfo() << " bypass unused rows."; - inputRange.skipAllRemainingDataRows(); + _requestedDependency = inputRange.skipAllRemainingDataRows(); AqlCall call{}; call.hardLimit = 0; - return {inputRange.upstreamState(), typename Executor::Stats{}, 0, call}; + + // TODO We have to ask all dependencies to go forward to the next shadow row + if constexpr (std::is_same_v) { + return {inputRange.upstreamState(_requestedDependency), + typename Executor::Stats{}, 0, call, _requestedDependency}; + } else { + return {inputRange.upstreamState(), typename Executor::Stats{}, 0, call, + _requestedDependency}; + } } } // Unreachable @@ -1476,7 +1537,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // However we need to maintain the upstream state. size_t skippedLocal = 0; typename Fetcher::DataRange bypassedRange{ExecutorState::HASMORE}; - std::tie(_upstreamState, skippedLocal, bypassedRange) = _rowFetcher.execute(stack); + std::tie(_upstreamState, skippedLocal, bypassedRange) = + executeFetcher(stack, _requestedDependency); return {_upstreamState, skippedLocal, bypassedRange.getBlock()}; } @@ -1543,15 +1605,15 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { clientCall.getLimit() == 0 && clientCall.needsFullCount(); #endif LOG_QUERY("1f786", DEBUG) << printTypeInfo() << " call skipRows " << clientCall; - auto [state, stats, skippedLocal, call] = + auto [state, stats, skippedLocal, call, dependency] = executeSkipRowsRange(_lastRange, clientCall); + _requestedDependency = dependency; #ifdef ARANGODB_ENABLE_MAINTAINER_MODE // Assertion: We did skip 'skippedLocal' documents here. - // This means that they have to be removed from clientCall.getOffset() - // This has to be done by the Executor calling call.didSkip() - // accordingly. - // The LIMIT executor with a LIMIT of 0 can also bypass fullCount - // here, even if callLimit > 0 + // This means that they have to be removed from + // clientCall.getOffset() This has to be done by the Executor + // calling call.didSkip() accordingly. The LIMIT executor with a + // LIMIT of 0 can also bypass fullCount here, even if callLimit > 0 if (canPassFullcount || std::is_same_v) { // In this case we can first skip. But straight after continue with fullCount, so we might skip more TRI_ASSERT(clientCall.getOffset() + skippedLocal >= offsetBefore); @@ -1607,14 +1669,15 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(!_executorReturnedDone); // Execute getSome - auto const [state, stats, call] = - _executor.produceRows(_lastRange, *_outputItemRow); + auto const [state, stats, call, dependency] = + executeProduceRows(_lastRange, *_outputItemRow); + // TODO: Check + _requestedDependency = dependency; _executorReturnedDone = state == ExecutorState::DONE; _engine->_stats += stats; localExecutorState = state; if constexpr (!std::is_same_v) { - // Produce might have modified the clientCall // But only do this if we are not subquery. clientCall = _outputItemRow->getClientCall(); } @@ -1628,7 +1691,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // In all other branches only if the client Still needs more data. _execState = ExecState::DONE; break; - } else if (clientCall.getLimit() > 0 && !_lastRange.hasDataRow()) { + } else if (clientCall.getLimit() > 0 && !lastRangeHasDataRow()) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more _upstreamRequest = call; @@ -1642,9 +1705,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { case ExecState::FASTFORWARD: { LOG_QUERY("96e2c", DEBUG) << printTypeInfo() << " all produced, fast forward to end up (sub-)query."; - auto [state, stats, skippedLocal, call] = + auto [state, stats, skippedLocal, call, dependency] = executeFastForward(_lastRange, clientCall); + _requestedDependency = dependency; _skipped += skippedLocal; _engine->_stats += stats; localExecutorState = state; @@ -1669,7 +1733,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // HASMORE even if it knew that upstream has no further rows. TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to make sure _lastRange is all used - TRI_ASSERT(!_lastRange.hasDataRow()); + TRI_ASSERT(!lastRangeHasDataRow()); TRI_ASSERT(!_lastRange.hasShadowRow()); size_t skippedLocal = 0; @@ -1685,7 +1749,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { stack.pushCall(std::move(callCopy)); } - std::tie(_upstreamState, skippedLocal, _lastRange) = _rowFetcher.execute(stack); + std::tie(_upstreamState, skippedLocal, _lastRange) = + executeFetcher(stack, _requestedDependency); if constexpr (std::is_same_v) { // Do not pop the call, we did not put it on. @@ -1789,7 +1854,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0)); return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; } - // We must return skipped and/or data when reporting HASMORE + // We must return skipped and/or data when reportingHASMORE TRI_ASSERT(_upstreamState != ExecutionState::HASMORE || (skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0))); return {_upstreamState, skipped, std::move(outputBlock)}; @@ -1855,6 +1920,12 @@ auto ExecutionBlockImpl::outputIsFull() const noexcept -> bool { _outputItemRow->allRowsUsed(); } +// TODO: remove again +template +auto ExecutionBlockImpl::lastRangeHasDataRow() const -> bool { + return _lastRange.hasDataRow(); +} + template <> template <> RegisterId ExecutionBlockImpl>>::getOutputRegisterId() const diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 6e4e383a22ce..0d2d3da66d1e 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -233,12 +233,18 @@ class ExecutionBlockImpl final : public ExecutionBlock { */ std::tuple executeWithoutTrace(AqlCallStack stack); + std::tuple executeFetcher( + AqlCallStack& stack, size_t const dependency); + + std::tuple executeProduceRows( + typename Fetcher::DataRange& input, OutputAqlItemRow& output); + // execute a skipRowsRange call - std::tuple executeSkipRowsRange( - typename Fetcher::DataRange& inputRange, AqlCall& call); + auto executeSkipRowsRange(typename Fetcher::DataRange& inputRange, AqlCall& call) + -> std::tuple; auto executeFastForward(typename Fetcher::DataRange& inputRange, AqlCall& clientCall) - -> std::tuple; + -> std::tuple; /** * @brief Inner getSome() part, without the tracing calls. @@ -297,6 +303,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { [[nodiscard]] auto outputIsFull() const noexcept -> bool; + [[nodiscard]] auto lastRangeHasDataRow() const -> bool; + void resetExecutor(); private: @@ -337,6 +345,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { AqlCall _clientRequest; + size_t _requestedDependency; + // Only used in passthrough variant. // We track if we have reference the range's block // into an output block. diff --git a/arangod/Aql/MultiAqlItemBlockInputRange.cpp b/arangod/Aql/MultiAqlItemBlockInputRange.cpp new file mode 100644 index 000000000000..8b5aad8595d7 --- /dev/null +++ b/arangod/Aql/MultiAqlItemBlockInputRange.cpp @@ -0,0 +1,141 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Markus Pfeiffer +//////////////////////////////////////////////////////////////////////////////// + +#include "MultiAqlItemBlockInputRange.h" +#include "Aql/ShadowAqlItemRow.h" + +#include +#include +#include + +#include "Logger/LogMacros.h" + +using namespace arangodb; +using namespace arangodb::aql; + +MultiAqlItemBlockInputRange::MultiAqlItemBlockInputRange(ExecutorState state, + std::size_t skipped, + std::size_t nrInputRanges) { + _inputs.resize(nrInputRanges, AqlItemBlockInputRange{state, skipped}); +} + +auto MultiAqlItemBlockInputRange::resizeIfNecessary(ExecutorState state, size_t skipped, + size_t nrInputRanges) -> void { + // We never want to reduce the number of dependencies. + TRI_ASSERT(_inputs.size() <= nrInputRanges); + if (_inputs.size() < nrInputRanges) { + _inputs.resize(nrInputRanges, AqlItemBlockInputRange{state, skipped}); + } +} + +auto MultiAqlItemBlockInputRange::upstreamState(size_t const dependency) const + noexcept -> ExecutorState { + TRI_ASSERT(dependency < _inputs.size()); + return _inputs.at(dependency).upstreamState(); +} + +auto MultiAqlItemBlockInputRange::hasDataRow(size_t const dependency) const noexcept -> bool { + TRI_ASSERT(dependency < _inputs.size()); + return _inputs.at(dependency).hasDataRow(); +} + +auto MultiAqlItemBlockInputRange::hasDataRow() const noexcept -> bool { + return std::any_of(std::begin(_inputs), std::end(_inputs), + [](AqlItemBlockInputRange const& i) -> bool { + return i.hasDataRow(); + }); +} + +auto MultiAqlItemBlockInputRange::peekDataRow(size_t const dependency) const + -> std::pair { + TRI_ASSERT(dependency < _inputs.size()); + return _inputs.at(dependency).peekDataRow(); +} + +auto MultiAqlItemBlockInputRange::nextDataRow(size_t const dependency) + -> std::pair { + TRI_ASSERT(dependency < _inputs.size()); + return _inputs.at(dependency).nextDataRow(); +} + +// We have a shadow row, iff all our inputs have o +auto MultiAqlItemBlockInputRange::hasShadowRow() const noexcept -> bool { + return std::all_of(std::begin(_inputs), std::end(_inputs), + [](AqlItemBlockInputRange const& i) -> bool { + return i.hasShadowRow(); + }); +} + +// TODO: * It doesn't matter which shadow row we peek, they should all be the same +// * assert that all dependencies are on a shadow row? +auto MultiAqlItemBlockInputRange::peekShadowRow() const -> arangodb::aql::ShadowAqlItemRow { + TRI_ASSERT(!hasDataRow()); + + // TODO: Correct? + return _inputs.at(0).peekShadowRow(); +} + +auto MultiAqlItemBlockInputRange::nextShadowRow() + -> std::pair { + TRI_ASSERT(!hasDataRow()); + + // Need to consume all shadow rows simultaneously. + // TODO: Assert we're on the correct shadow row for all upstreams + auto state = ExecutorState::HASMORE; + auto shadowRow = ShadowAqlItemRow{CreateInvalidShadowRowHint()}; + + for (auto& i : _inputs) { + std::tie(state, shadowRow) = i.nextShadowRow(); + } + return {state, shadowRow}; +} + +auto MultiAqlItemBlockInputRange::getBlock(size_t const dependency) const + noexcept -> SharedAqlItemBlockPtr { + TRI_ASSERT(dependency < _inputs.size()); + return _inputs.at(dependency).getBlock(); +} + +auto MultiAqlItemBlockInputRange::setDependency(size_t const dependency, + AqlItemBlockInputRange& range) -> void { + TRI_ASSERT(dependency < _inputs.size()); + _inputs.at(dependency) = range; +} + +auto MultiAqlItemBlockInputRange::isDone() const -> bool { + auto res = std::all_of(std::begin(_inputs), std::end(_inputs), + [](AqlItemBlockInputRange const& i) -> bool { + return !i.hasDataRow() && + i.upstreamState() == ExecutorState::DONE; + }); + return res; +} + +size_t MultiAqlItemBlockInputRange::skipAllRemainingDataRows() { + for (size_t i = 0; i < _inputs.size(); i++) { + _inputs.at(i).skipAllRemainingDataRows(); + if (_inputs.at(i).upstreamState() == ExecutorState::HASMORE) { + return i; + } + } + return 0; +} diff --git a/arangod/Aql/MultiAqlItemBlockInputRange.h b/arangod/Aql/MultiAqlItemBlockInputRange.h new file mode 100644 index 000000000000..e23a65f5ad7a --- /dev/null +++ b/arangod/Aql/MultiAqlItemBlockInputRange.h @@ -0,0 +1,78 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Markus Pfeiffer +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_MULTIAQLITEMBLOCKINPUTRANGE_H +#define ARANGOD_AQL_MULTIAQLITEMBLOCKINPUTRANGE_H + +#include "Aql/AqlItemBlockInputRange.h" +#include "Aql/ExecutionState.h" +#include "Aql/InputAqlItemRow.h" +#include "Aql/SharedAqlItemBlockPtr.h" + +namespace arangodb::aql { + +class MultiAqlItemBlockInputRange { + public: + explicit MultiAqlItemBlockInputRange(ExecutorState state, std::size_t skipped = 0, + std::size_t nrInputRanges = 1); + + MultiAqlItemBlockInputRange(ExecutorState, std::size_t skipped, + arangodb::aql::SharedAqlItemBlockPtr const&, + std::size_t startIndex); + + MultiAqlItemBlockInputRange(ExecutorState, std::size_t skipped, + arangodb::aql::SharedAqlItemBlockPtr&&, + std::size_t startIndex) noexcept; + + ExecutorState upstreamState(size_t const dependency) const noexcept; + bool upstreamHasMore(size_t const dependency) const noexcept; + + bool hasDataRow() const noexcept; + bool hasDataRow(size_t const dependency) const noexcept; + + std::pair peekDataRow(size_t const dependency) const; + std::pair nextDataRow(size_t const dependency); + + bool hasShadowRow() const noexcept; + + arangodb::aql::ShadowAqlItemRow peekShadowRow() const; + std::pair nextShadowRow(); + + auto isDone() const -> bool; + + auto resizeIfNecessary(ExecutorState state, size_t skipped, size_t nrInputRanges) -> void; + + auto getBlock(size_t const dependency = 0) const noexcept -> SharedAqlItemBlockPtr; + + auto setDependency(size_t const dependency, AqlItemBlockInputRange& range) -> void; + + size_t skipAllRemainingDataRows(); + + private: + ExecutorState _finalState{ExecutorState::HASMORE}; + + std::vector _inputs; +}; + +} // namespace arangodb::aql + +#endif diff --git a/arangod/Aql/MultiDependencySingleRowFetcher.cpp b/arangod/Aql/MultiDependencySingleRowFetcher.cpp index de7f043db13b..517542e953e1 100644 --- a/arangod/Aql/MultiDependencySingleRowFetcher.cpp +++ b/arangod/Aql/MultiDependencySingleRowFetcher.cpp @@ -169,6 +169,10 @@ void MultiDependencySingleRowFetcher::initDependencies() { for (size_t i = 0; i < _dependencyProxy->numberDependencies(); ++i) { _dependencyInfos.emplace_back(DependencyInfo{}); } + _dependencyStates.reserve(_dependencyProxy->numberDependencies()); + for (size_t i = 0; i < _dependencyProxy->numberDependencies(); ++i) { + _dependencyStates.emplace_back(ExecutionState::HASMORE); + } } size_t MultiDependencySingleRowFetcher::numberDependencies() { @@ -359,4 +363,39 @@ bool MultiDependencySingleRowFetcher::fetchBlockIfNecessary(size_t const depende //@deprecated auto MultiDependencySingleRowFetcher::useStack(AqlCallStack const& stack) -> void { _dependencyProxy->useStack(stack); -} \ No newline at end of file +} + +auto MultiDependencySingleRowFetcher::executeForDependency(size_t const dependency, + AqlCallStack& stack) + -> std::tuple { + auto [state, skipped, block] = _dependencyProxy->executeForDependency(dependency, stack); + + if (state == ExecutionState::WAITING) { + return {state, 0, AqlItemBlockInputRange{ExecutorState::HASMORE}}; + } + + _dependencyStates.at(dependency) = state; + if (std::any_of(std::begin(_dependencyStates), std::end(_dependencyStates), + [](ExecutionState const s) { + return s == ExecutionState::HASMORE; + })) { + state = ExecutionState::HASMORE; + } else { + state = ExecutionState::DONE; + } + if (block == nullptr) { + if (state == ExecutionState::HASMORE) { + return {state, skipped, AqlItemBlockInputRange{ExecutorState::HASMORE, skipped}}; + } + return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE, skipped}}; + } + + auto [start, end] = block->getRelevantRange(); + if (state == ExecutionState::HASMORE) { + TRI_ASSERT(block != nullptr); + return {state, skipped, + AqlItemBlockInputRange{ExecutorState::DONE, skipped, block, start}}; + } + return {state, skipped, + AqlItemBlockInputRange{ExecutorState::DONE, skipped, block, start}}; +} diff --git a/arangod/Aql/MultiDependencySingleRowFetcher.h b/arangod/Aql/MultiDependencySingleRowFetcher.h index fd17efc44f6d..a2c2ebdd911e 100644 --- a/arangod/Aql/MultiDependencySingleRowFetcher.h +++ b/arangod/Aql/MultiDependencySingleRowFetcher.h @@ -23,10 +23,10 @@ #ifndef ARANGOD_AQL_MULTI_DEPENDENCY_SINGLE_ROW_FETCHER_H #define ARANGOD_AQL_MULTI_DEPENDENCY_SINGLE_ROW_FETCHER_H -#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionBlock.h" #include "Aql/ExecutionState.h" #include "Aql/InputAqlItemRow.h" +#include "Aql/MultiAqlItemBlockInputRange.h" #include "Basics/Exceptions.h" #include "Basics/voc-errors.h" @@ -85,7 +85,7 @@ class MultiDependencySingleRowFetcher { }; public: - using DataRange = AqlItemBlockInputRange; + using DataRange = MultiAqlItemBlockInputRange; explicit MultiDependencySingleRowFetcher(DependencyProxy& executionBlock); TEST_VIRTUAL ~MultiDependencySingleRowFetcher() = default; @@ -134,6 +134,9 @@ class MultiDependencySingleRowFetcher { //@deprecated auto useStack(AqlCallStack const& stack) -> void; + auto executeForDependency(size_t const dependency, AqlCallStack& stack) + -> std::tuple; + private: DependencyProxy* _dependencyProxy; @@ -141,6 +144,7 @@ class MultiDependencySingleRowFetcher { * @brief Holds the information for all dependencies */ std::vector _dependencyInfos; + std::vector _dependencyStates; private: /** diff --git a/arangod/Aql/UnsortedGatherExecutor.cpp b/arangod/Aql/UnsortedGatherExecutor.cpp index a186fe36e732..e1b3221a640f 100644 --- a/arangod/Aql/UnsortedGatherExecutor.cpp +++ b/arangod/Aql/UnsortedGatherExecutor.cpp @@ -23,19 +23,90 @@ #include "UnsortedGatherExecutor.h" #include "Aql/IdExecutor.h" // for IdExecutorInfos +#include "Aql/MultiAqlItemBlockInputRange.h" #include "Aql/MultiDependencySingleRowFetcher.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/Stats.h" #include "Basics/debugging.h" +#include "Logger/LogMacros.h" + using namespace arangodb; using namespace arangodb::aql; +struct Dependency { + Dependency() : _number{0} {}; + + size_t _number; +}; + UnsortedGatherExecutor::UnsortedGatherExecutor(Fetcher& fetcher, Infos& infos) : _fetcher(fetcher) {} UnsortedGatherExecutor::~UnsortedGatherExecutor() = default; +auto UnsortedGatherExecutor::produceRows(typename Fetcher::DataRange& input, + OutputAqlItemRow& output) + -> std::tuple { + while (!output.isFull() && !done()) { + if (input.hasDataRow(currentDependency())) { + auto [state, inputRow] = input.nextDataRow(currentDependency()); + output.copyRow(inputRow); + TRI_ASSERT(output.produced()); + output.advanceRow(); + + if (state == ExecutorState::DONE) { + advanceDependency(); + } + } else { + if (input.upstreamState(currentDependency()) == ExecutorState::DONE) { + advanceDependency(); + } else { + return {input.upstreamState(currentDependency()), Stats{}, AqlCall{}, + currentDependency()}; + } + } + } + + while (!done() && input.upstreamState(currentDependency()) == ExecutorState::DONE) { + advanceDependency(); + } + + if (done()) { + // here currentDependency is invalid which will cause things to crash + // if we ask upstream in ExecutionBlockImpl. yolo. + TRI_ASSERT(!input.hasDataRow()); + return {ExecutorState::DONE, Stats{}, AqlCall{}, currentDependency()}; + } else { + return {input.upstreamState(currentDependency()), Stats{}, AqlCall{}, + currentDependency()}; + } +} + +auto UnsortedGatherExecutor::skipRowsRange(typename Fetcher::DataRange& input, AqlCall& call) + -> std::tuple { + auto skipped = size_t{0}; + while (call.needSkipMore() && input.hasDataRow(currentDependency())) { + auto [state, inputRow] = input.nextDataRow(currentDependency()); + + call.didSkip(1); + skipped++; + + if (state == ExecutorState::DONE) { + advanceDependency(); + } + } + + if (done()) { + // here currentDependency is invalid which will cause things to crash + // if we ask upstream in ExecutionBlockImpl. yolo. + return {ExecutorState::DONE, Stats{}, skipped, AqlCall{}, currentDependency()}; + } else { + return {input.upstreamState(currentDependency()), Stats{}, skipped, + AqlCall{}, currentDependency()}; + } +} + auto UnsortedGatherExecutor::produceRows(OutputAqlItemRow& output) -> std::pair { while (!output.isFull() && !done()) { diff --git a/arangod/Aql/UnsortedGatherExecutor.h b/arangod/Aql/UnsortedGatherExecutor.h index 4896e9a166cc..5dc6d5c8afe8 100644 --- a/arangod/Aql/UnsortedGatherExecutor.h +++ b/arangod/Aql/UnsortedGatherExecutor.h @@ -38,17 +38,18 @@ class InputAqlItemRow; class OutputAqlItemRow; class IdExecutorInfos; class SharedAqlItemBlockPtr; +struct AqlCall; /** -* @brief Produces all rows from its dependencies, which may be more than one, -* in some unspecified order. It is, purposefully, strictly synchronous, and -* always waits for an answer before requesting the next row(s). This is as -* opposed to the ParallelUnsortedGather, which already starts fetching the next -* dependenci(es) while waiting for an answer. -* -* The actual implementation fetches all available rows from the first -* dependency, then from the second, and so forth. But that is not guaranteed. -*/ + * @brief Produces all rows from its dependencies, which may be more than one, + * in some unspecified order. It is, purposefully, strictly synchronous, and + * always waits for an answer before requesting the next row(s). This is as + * opposed to the ParallelUnsortedGather, which already starts fetching the next + * dependenci(es) while waiting for an answer. + * + * The actual implementation fetches all available rows from the first + * dependency, then from the second, and so forth. But that is not guaranteed. + */ class UnsortedGatherExecutor { public: struct Properties { @@ -82,6 +83,12 @@ class UnsortedGatherExecutor { [[nodiscard]] auto skipRows(size_t atMost) -> std::tuple; + // TODO: This should really be the DataRange of the fetcher? + [[nodiscard]] auto produceRows(typename Fetcher::DataRange& input, OutputAqlItemRow& output) + -> std::tuple; + [[nodiscard]] auto skipRowsRange(typename Fetcher::DataRange& input, AqlCall& call) + -> std::tuple; + private: [[nodiscard]] auto numDependencies() const noexcept(noexcept(static_cast(nullptr)->numberDependencies())) -> size_t; diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index ba2250d221ec..785416e6a60a 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -306,6 +306,7 @@ set(LIB_ARANGO_AQL_SOURCES Aql/ModificationExecutorInfos.cpp Aql/ModificationNodes.cpp Aql/ModificationOptions.cpp + Aql/MultiAqlItemBlockInputRange.cpp Aql/MultiDependencySingleRowFetcher.cpp Aql/NoResultsExecutor.cpp Aql/NodeFinder.cpp From 3ae0061587cfd71d324874e6a14aa300c7583cbd Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Sat, 29 Feb 2020 07:16:21 +0000 Subject: [PATCH 086/122] Move ModificationExecutors to new interface (#11165) * Move ModificationExecutors to new interface * added inputRange method to inputMatrix class, added switch between DataRange Input <-> Matrix for ModificationExecutors * forgot to enable allrows modification executor in fastForwardType * removed obsolete comment * removed obsolete comment Co-authored-by: Heiko Co-authored-by: Michael Hackstein --- arangod/Aql/AqlItemBlockInputMatrix.cpp | 31 +++++++ arangod/Aql/AqlItemBlockInputMatrix.h | 12 +++ arangod/Aql/ExecutionBlockImpl.cpp | 37 ++++++-- arangod/Aql/ModificationExecutor.cpp | 111 +++++++++++++++--------- arangod/Aql/ModificationExecutor.h | 18 +++- 5 files changed, 158 insertions(+), 51 deletions(-) diff --git a/arangod/Aql/AqlItemBlockInputMatrix.cpp b/arangod/Aql/AqlItemBlockInputMatrix.cpp index 956b98467c51..bbd495784e27 100644 --- a/arangod/Aql/AqlItemBlockInputMatrix.cpp +++ b/arangod/Aql/AqlItemBlockInputMatrix.cpp @@ -54,6 +54,20 @@ AqlItemBlockInputMatrix::AqlItemBlockInputMatrix(ExecutorState state, AqlItemMat } } +AqlItemBlockInputRange AqlItemBlockInputMatrix::getNextInputRange() { + TRI_ASSERT(_aqlItemMatrix != nullptr); + + if (_aqlItemMatrix->numberOfBlocks() == 0) { + return AqlItemBlockInputRange{upstreamState()}; + } + + SharedAqlItemBlockPtr blockPtr = _aqlItemMatrix->getBlock(_currentBlockRowIndex); + auto [start, end] = blockPtr->getRelevantRange(); + ExecutorState state = incrBlockIndex(); + + return {state, 0, std::move(blockPtr), start}; +} + SharedAqlItemBlockPtr AqlItemBlockInputMatrix::getBlock() const noexcept { TRI_ASSERT(_aqlItemMatrix == nullptr); return _block; @@ -94,6 +108,7 @@ std::pair AqlItemBlockInputMatrix::nextShadowRo !_aqlItemMatrix->peekShadowRow().isRelevant()) { // next row will be a shadow row _shadowRow = _aqlItemMatrix->popShadowRow(); + resetBlockIndex(); } else { _shadowRow = ShadowAqlItemRow{CreateInvalidShadowRowHint()}; } @@ -136,8 +151,24 @@ size_t AqlItemBlockInputMatrix::skipAllRemainingDataRows() { TRI_ASSERT(_finalState == ExecutorState::DONE); _aqlItemMatrix->clear(); } + resetBlockIndex(); } // Else we did already skip once. // nothing to do return 0; } + +ExecutorState AqlItemBlockInputMatrix::incrBlockIndex() { + TRI_ASSERT(_aqlItemMatrix != nullptr); + if (_currentBlockRowIndex + 1 < _aqlItemMatrix->numberOfBlocks()) { + _currentBlockRowIndex++; + // we were able to increase the size as we reached not the end yet + return ExecutorState::HASMORE; + } + // we could not increase the index, we already reached the end + return ExecutorState::DONE; +} + +void AqlItemBlockInputMatrix::resetBlockIndex() noexcept { + _currentBlockRowIndex = 0; +} diff --git a/arangod/Aql/AqlItemBlockInputMatrix.h b/arangod/Aql/AqlItemBlockInputMatrix.h index 80e37e40e1f9..6e638aa2dbb6 100644 --- a/arangod/Aql/AqlItemBlockInputMatrix.h +++ b/arangod/Aql/AqlItemBlockInputMatrix.h @@ -23,6 +23,7 @@ #ifndef ARANGOD_AQL_AQLITEMBLOCKMATRIXITERATOR_H #define ARANGOD_AQL_AQLITEMBLOCKMATRIXITERATOR_H +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/AqlItemMatrix.h" #include "Aql/ExecutionState.h" #include "Aql/InputAqlItemRow.h" @@ -46,12 +47,22 @@ class AqlItemBlockInputMatrix { bool hasDataRow() const noexcept; arangodb::aql::SharedAqlItemBlockPtr getBlock() const noexcept; + + // Will provide access to the first block (from _aqlItemMatrix) + // After a block has been delivered, the block index will be increased. + // Next call then will deliver the next block etc. + AqlItemBlockInputRange getNextInputRange(); std::pair getMatrix() noexcept; ExecutorState upstreamState() const noexcept; bool upstreamHasMore() const noexcept; size_t skipAllRemainingDataRows(); + // Will return HASMORE if we were able to increase the row index. + // Otherwise will return DONE. + ExecutorState incrBlockIndex(); + void resetBlockIndex() noexcept; + private: arangodb::aql::SharedAqlItemBlockPtr _block{nullptr}; ExecutorState _finalState{ExecutorState::HASMORE}; @@ -59,6 +70,7 @@ class AqlItemBlockInputMatrix { // Only if _aqlItemMatrix is set (and NOT a nullptr), we have a valid and // usable DataRange object available to work with. AqlItemMatrix* _aqlItemMatrix; + size_t _currentBlockRowIndex = 0; ShadowAqlItemRow _shadowRow{CreateInvalidShadowRowHint{}}; }; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 54bc29742555..d27ce8534525 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -144,9 +144,16 @@ constexpr bool isNewStyleExecutor = is_one_of_v< TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode #endif - UnsortedGatherExecutor, SubqueryStartExecutor, SubqueryEndExecutor, TraversalExecutor, - KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SortExecutor, - IResearchViewExecutor, + ModificationExecutor, + ModificationExecutor, InsertModifier>, + ModificationExecutor, + ModificationExecutor, RemoveModifier>, + ModificationExecutor, + ModificationExecutor, UpdateReplaceModifier>, + ModificationExecutor, + ModificationExecutor, UpsertModifier>, SubqueryStartExecutor, + UnsortedGatherExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, + LimitExecutor, SortExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -1120,9 +1127,16 @@ static SkipRowsRangeVariant constexpr skipRowsType() { #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif - UnsortedGatherExecutor, TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, - SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor, SortExecutor, - IResearchViewExecutor, + ModificationExecutor, + ModificationExecutor, InsertModifier>, + ModificationExecutor, + ModificationExecutor, RemoveModifier>, + ModificationExecutor, + ModificationExecutor, UpdateReplaceModifier>, + ModificationExecutor, + ModificationExecutor, UpsertModifier>, TraversalExecutor, + EnumerateListExecutor, SubqueryStartExecutor, SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor, + UnsortedGatherExecutor, SortExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -1187,7 +1201,15 @@ static auto fastForwardType(AqlCall const& call, Executor const& e) -> FastForwa } // TODO: We only need to do this is the executor actually require to call. // e.g. Modifications will always need to be called. Limit only if it needs to report fullCount - if constexpr (is_one_of_v) { + if constexpr (is_one_of_v, + ModificationExecutor, InsertModifier>, + ModificationExecutor, + ModificationExecutor, RemoveModifier>, + ModificationExecutor, + ModificationExecutor, UpdateReplaceModifier>, + ModificationExecutor, + ModificationExecutor, UpsertModifier>>) { return FastForwardVariant::EXECUTOR; } return FastForwardVariant::FETCHER; @@ -1441,6 +1463,7 @@ auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRang return {ExecutorState::DONE, NoStats{}, 0, AqlCall{}, 0}; } auto type = fastForwardType(clientCall, _executor); + switch (type) { case FastForwardVariant::FULLCOUNT: case FastForwardVariant::EXECUTOR: { diff --git a/arangod/Aql/ModificationExecutor.cpp b/arangod/Aql/ModificationExecutor.cpp index 76c8f1aac60b..5556ae39b93d 100644 --- a/arangod/Aql/ModificationExecutor.cpp +++ b/arangod/Aql/ModificationExecutor.cpp @@ -101,29 +101,23 @@ ModificationExecutor::ModificationExecutor(Fetcher& f // Fetches as many rows as possible from upstream using the fetcher's fetchRow // method and accumulates results through the modifier template -std::pair::Stats> -ModificationExecutor::doCollect(size_t maxOutputs) { +auto ModificationExecutor::doCollect(AqlItemBlockInputRange& input, + size_t maxOutputs) + -> void { // for fetchRow InputAqlItemRow row{CreateInvalidInputRowHint{}}; ExecutionState state = ExecutionState::HASMORE; // Maximum number of rows we can put into output // So we only ever produce this many here - // TODO: If we SKIP_IGNORE, then we'd be able to output more; - // this would require some counting to happen in the modifier - while (_modifier.nrOfOperations() < maxOutputs && state != ExecutionState::DONE) { - std::tie(state, row) = _fetcher.fetchRow(maxOutputs); - if (state == ExecutionState::WAITING) { - return {ExecutionState::WAITING, ModificationStats{}}; - } - if (row.isInitialized()) { - // Make sure we have a valid row - TRI_ASSERT(row.isInitialized()); - _modifier.accumulate(row); - } + while (_modifier.nrOfOperations() < maxOutputs && input.hasDataRow()) { + auto [state, row] = input.nextDataRow(); + + // Make sure we have a valid row + TRI_ASSERT(row.isInitialized()); + _modifier.accumulate(row); } TRI_ASSERT(state == ExecutionState::DONE || state == ExecutionState::HASMORE); - return {state, ModificationStats{}}; } // Outputs accumulated results, and counts the statistics @@ -131,7 +125,10 @@ template void ModificationExecutor::doOutput(OutputAqlItemRow& output, Stats& stats) { typename ModifierType::OutputIterator modifierOutputIterator(_modifier); + // We only accumulated as many items as we can output, so this + // should be correct for (auto const& modifierOutput : modifierOutputIterator) { + TRI_ASSERT(!output.isFull()); bool written = false; switch (modifierOutput.getType()) { case ModifierOutput::Type::ReturnIfRequired: @@ -157,53 +154,81 @@ void ModificationExecutor::doOutput(OutputAqlItemRow& break; } } - - if (_infos._doCount) { - stats.addWritesExecuted(_modifier.nrOfWritesExecuted()); - stats.addWritesIgnored(_modifier.nrOfWritesIgnored()); - } } template std::pair::Stats> ModificationExecutor::produceRows(OutputAqlItemRow& output) { + TRI_ASSERT(false); + + return {ExecutionState::DONE, ModificationStats{}}; +} + +template +[[nodiscard]] auto ModificationExecutor::produceRows( + typename FetcherType::DataRange& input, OutputAqlItemRow& output) + -> std::tuple { TRI_ASSERT(_infos._trx); - ModificationExecutor::Stats stats; + auto stats = ModificationStats{}; - const size_t maxOutputs = std::min(output.numRowsLeft(), _modifier.getBatchSize()); + _modifier.reset(); - // if we returned "WAITING" the last time we still have - // documents in the accumulator that we have not submitted - // yet - if (_lastState != ExecutionState::WAITING) { - _modifier.reset(); + ExecutorState upstreamState = ExecutorState::HASMORE; + // only produce at most output.numRowsLeft() many results + if constexpr (std::is_same_v) { + auto range = input.getNextInputRange(); + doCollect(range, output.numRowsLeft()); + upstreamState = range.upstreamState(); + } else { + doCollect(input, output.numRowsLeft()); + upstreamState = input.upstreamState(); } - TRI_IF_FAILURE("ModificationBlock::getSome") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } + if (_modifier.nrOfOperations() > 0) { + _modifier.transact(); - std::tie(_lastState, stats) = doCollect(maxOutputs); + if (_infos._doCount) { + stats.addWritesExecuted(_modifier.nrOfWritesExecuted()); + stats.addWritesIgnored(_modifier.nrOfWritesIgnored()); + } - if (_lastState == ExecutionState::WAITING) { - return {ExecutionState::WAITING, std::move(stats)}; + doOutput(output, stats); } - TRI_ASSERT(_lastState == ExecutionState::DONE || _lastState == ExecutionState::HASMORE); + return {upstreamState, stats, AqlCall{}}; +} - _modifier.transact(); +template +[[nodiscard]] auto ModificationExecutor::skipRowsRange( + typename FetcherType::DataRange& input, AqlCall& call) + -> std::tuple { + auto stats = ModificationStats{}; + _modifier.reset(); + + ExecutorState upstreamState = ExecutorState::HASMORE; + // only produce at most output.numRowsLeft() many results + if constexpr (std::is_same_v) { + auto range = input.getNextInputRange(); + doCollect(range, call.getOffset()); + upstreamState = range.upstreamState(); + } else { + doCollect(input, call.getOffset()); + upstreamState = input.upstreamState(); + } + + if (_modifier.nrOfOperations() > 0) { + _modifier.transact(); - // If the query is silent, there is no way to relate - // the results slice contents and the submitted documents - // If the query is *not* silent, we should get one result - // for every document. - // Yes. Really. - TRI_ASSERT(_infos._options.silent || _modifier.nrOfDocuments() == _modifier.nrOfResults()); + if (_infos._doCount) { + stats.addWritesExecuted(_modifier.nrOfWritesExecuted()); + stats.addWritesIgnored(_modifier.nrOfWritesIgnored()); + } - doOutput(output, stats); + call.didSkip(_modifier.nrOfOperations()); + } - return {_lastState, std::move(stats)}; + return {upstreamState, stats, _modifier.nrOfOperations(), AqlCall{}}; } using NoPassthroughSingleRowFetcher = SingleRowFetcher; diff --git a/arangod/Aql/ModificationExecutor.h b/arangod/Aql/ModificationExecutor.h index eeaeb5f298cb..deec6db66e29 100644 --- a/arangod/Aql/ModificationExecutor.h +++ b/arangod/Aql/ModificationExecutor.h @@ -39,6 +39,16 @@ namespace arangodb { namespace aql { + +struct AqlCall; +class AqlItemBlockInputRange; +class InputAqlItemRow; +class OutputAqlItemRow; +class ExecutorInfos; +class FilterStats; +template +class SingleRowFetcher; + // // ModificationExecutor is the "base" class for the Insert, Remove, // UpdateReplace and Upsert executors. @@ -163,8 +173,14 @@ class ModificationExecutor { std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(typename FetcherType::DataRange& input, OutputAqlItemRow& output) + -> std::tuple; + + [[nodiscard]] auto skipRowsRange(typename FetcherType::DataRange& inputRange, AqlCall& call) + -> std::tuple; + protected: - std::pair doCollect(size_t maxOutputs); + void doCollect(AqlItemBlockInputRange& input, size_t maxOutputs); void doOutput(OutputAqlItemRow& output, Stats& stats); // The state that was returned on the last call to produceRows. For us From d4e729949f9b10637ccde37f89c79c5043f8c12e Mon Sep 17 00:00:00 2001 From: Heiko Date: Sat, 29 Feb 2020 08:18:17 +0100 Subject: [PATCH 087/122] Feature/aql subquery operations stack materialize executor (#11192) * removed obsolete comment, added produceRows and skipRowsRange, enabled MaterializeExecutor as newStyleExecutor * forgot to use advanceRow, fetch input during skipRowsRange * removed old implementations * removed comment, fixed a logical loop error * skip could be simplified as well * temp workaround Co-authored-by: Michael Hackstein --- arangod/Aql/ExecutionBlockImpl.cpp | 6 +- arangod/Aql/FilterExecutor.cpp | 1 - arangod/Aql/MaterializeExecutor.cpp | 167 ++++++++++++++++------------ arangod/Aql/MaterializeExecutor.h | 47 +++++--- arangod/Aql/RemoteExecutor.cpp | 23 +++- 5 files changed, 151 insertions(+), 93 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index d27ce8534525..b13336abfc27 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -177,7 +177,8 @@ constexpr bool isNewStyleExecutor = is_one_of_v< NoResultsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, - SingleRemoteModificationExecutor, SingleRemoteModificationExecutor>; + SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, + MaterializeExecutor, MaterializeExecutor>; template ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, @@ -1159,7 +1160,8 @@ static SkipRowsRangeVariant constexpr skipRowsType() { TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor, NoResultsExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, - SingleRemoteModificationExecutor, SingleRemoteModificationExecutor>), + SingleRemoteModificationExecutor, SingleRemoteModificationExecutor, + MaterializeExecutor, MaterializeExecutor>), "Unexpected executor for SkipVariants::EXECUTOR"); // The LimitExecutor will not work correctly with SkipVariants::FETCHER! diff --git a/arangod/Aql/FilterExecutor.cpp b/arangod/Aql/FilterExecutor.cpp index 515c72fdfce4..516c2ea4678a 100644 --- a/arangod/Aql/FilterExecutor.cpp +++ b/arangod/Aql/FilterExecutor.cpp @@ -100,7 +100,6 @@ std::pair FilterExecutor::expectedNumberOfRows(size_t at return _fetcher.preFetchNumberOfRows(atMost); } -// TODO Remove me, we are using the getSome skip variant here. auto FilterExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) -> std::tuple { FilterStats stats{}; diff --git a/arangod/Aql/MaterializeExecutor.cpp b/arangod/Aql/MaterializeExecutor.cpp index 05ab2d5f8ed6..ca223272f2b4 100644 --- a/arangod/Aql/MaterializeExecutor.cpp +++ b/arangod/Aql/MaterializeExecutor.cpp @@ -22,93 +22,96 @@ #include "MaterializeExecutor.h" -#include "StorageEngine/EngineSelectorFeature.h" -#include "StorageEngine/StorageEngine.h" #include "Aql/SingleRowFetcher.h" #include "Aql/Stats.h" +#include "StorageEngine/EngineSelectorFeature.h" +#include "StorageEngine/StorageEngine.h" #include "Transaction/Methods.h" using namespace arangodb; using namespace arangodb::aql; -template -arangodb::IndexIterator::DocumentCallback MaterializeExecutor::ReadContext::copyDocumentCallback(ReadContext & ctx) { +template +arangodb::IndexIterator::DocumentCallback MaterializeExecutor::ReadContext::copyDocumentCallback( + ReadContext& ctx) { auto* engine = EngineSelectorFeature::ENGINE; TRI_ASSERT(engine); typedef std::function CallbackFactory; static CallbackFactory const callbackFactories[]{ - [](ReadContext& ctx) { - // capture only one reference to potentially avoid heap allocation - return [&ctx](LocalDocumentId /*id*/, VPackSlice doc) { - TRI_ASSERT(ctx._outputRow); - TRI_ASSERT(ctx._inputRow); - TRI_ASSERT(ctx._inputRow->isInitialized()); - TRI_ASSERT(ctx._infos); - arangodb::aql::AqlValue a{ arangodb::aql::AqlValueHintCopy(doc.begin()) }; - bool mustDestroy = true; - arangodb::aql::AqlValueGuard guard{ a, mustDestroy }; - ctx._outputRow->moveValueInto(ctx._infos->outputMaterializedDocumentRegId(), *ctx._inputRow, guard); - return true; - }; - }, - - [](ReadContext& ctx) { - // capture only one reference to potentially avoid heap allocation - return [&ctx](LocalDocumentId /*id*/, VPackSlice doc) { - TRI_ASSERT(ctx._outputRow); - TRI_ASSERT(ctx._inputRow); - TRI_ASSERT(ctx._inputRow->isInitialized()); - TRI_ASSERT(ctx._infos); - arangodb::aql::AqlValue a{ arangodb::aql::AqlValueHintDocumentNoCopy(doc.begin()) }; - bool mustDestroy = true; - arangodb::aql::AqlValueGuard guard{ a, mustDestroy }; - ctx._outputRow->moveValueInto(ctx._infos->outputMaterializedDocumentRegId(), *ctx._inputRow, guard); - return true; - }; - } }; + [](ReadContext& ctx) { + // capture only one reference to potentially avoid heap allocation + return [&ctx](LocalDocumentId /*id*/, VPackSlice doc) { + TRI_ASSERT(ctx._outputRow); + TRI_ASSERT(ctx._inputRow); + TRI_ASSERT(ctx._inputRow->isInitialized()); + TRI_ASSERT(ctx._infos); + arangodb::aql::AqlValue a{arangodb::aql::AqlValueHintCopy(doc.begin())}; + bool mustDestroy = true; + arangodb::aql::AqlValueGuard guard{a, mustDestroy}; + ctx._outputRow->moveValueInto(ctx._infos->outputMaterializedDocumentRegId(), + *ctx._inputRow, guard); + return true; + }; + }, + + [](ReadContext& ctx) { + // capture only one reference to potentially avoid heap allocation + return [&ctx](LocalDocumentId /*id*/, VPackSlice doc) { + TRI_ASSERT(ctx._outputRow); + TRI_ASSERT(ctx._inputRow); + TRI_ASSERT(ctx._inputRow->isInitialized()); + TRI_ASSERT(ctx._infos); + arangodb::aql::AqlValue a{arangodb::aql::AqlValueHintDocumentNoCopy(doc.begin())}; + bool mustDestroy = true; + arangodb::aql::AqlValueGuard guard{a, mustDestroy}; + ctx._outputRow->moveValueInto(ctx._infos->outputMaterializedDocumentRegId(), + *ctx._inputRow, guard); + return true; + }; + }}; return callbackFactories[size_t(engine->useRawDocumentPointers())](ctx); } -template +template arangodb::aql::MaterializerExecutorInfos::MaterializerExecutorInfos( RegisterId nrInputRegisters, RegisterId nrOutputRegisters, // cppcheck-suppress passedByValue std::unordered_set registersToClear, // cppcheck-suppress passedByValue - std::unordered_set registersToKeep, - T const collectionSource, RegisterId inNmDocId, - RegisterId outDocRegId, transaction::Methods* trx) - : ExecutorInfos( - getReadableInputRegisters(collectionSource, inNmDocId), - make_shared_unordered_set(std::initializer_list({outDocRegId})), - nrInputRegisters, nrOutputRegisters, - std::move(registersToClear), std::move(registersToKeep)), + std::unordered_set registersToKeep, T const collectionSource, + RegisterId inNmDocId, RegisterId outDocRegId, transaction::Methods* trx) + : ExecutorInfos(getReadableInputRegisters(collectionSource, inNmDocId), + make_shared_unordered_set(std::initializer_list({outDocRegId})), + nrInputRegisters, nrOutputRegisters, + std::move(registersToClear), std::move(registersToKeep)), _collectionSource(collectionSource), _inNonMaterializedDocRegId(inNmDocId), - _outMaterializedDocumentRegId(outDocRegId), _trx(trx) { + _outMaterializedDocumentRegId(outDocRegId), + _trx(trx) {} + +template +std::pair arangodb::aql::MaterializeExecutor::produceRows(OutputAqlItemRow& output) { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } -template -std::pair arangodb::aql::MaterializeExecutor::produceRows(OutputAqlItemRow & output) { - InputAqlItemRow input{CreateInvalidInputRowHint{}}; - ExecutionState state; - bool written = false; - // some micro-optimization - auto& callback = _readDocumentContext._callback; - auto docRegId = _readDocumentContext._infos->inputNonMaterializedDocRegId(); - T collectionSource = _readDocumentContext._infos->collectionSource(); - auto* trx = _readDocumentContext._infos->trx(); - do { - std::tie(state, input) = _fetcher.fetchRow(); - if (state == ExecutionState::WAITING) { - return { state, NoStats{} }; - } +template +std::tuple arangodb::aql::MaterializeExecutor::produceRows( + AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) { + AqlCall upstreamCall{}; + upstreamCall.fullCount = output.getClientCall().fullCount; + + while (inputRange.hasDataRow() && !output.isFull()) { + bool written = false; + + // some micro-optimization + auto& callback = _readDocumentContext._callback; + auto docRegId = _readDocumentContext._infos->inputNonMaterializedDocRegId(); + T collectionSource = _readDocumentContext._infos->collectionSource(); + auto* trx = _readDocumentContext._infos->trx(); + auto const [state, input] = inputRange.nextDataRow(); - if (!input) { - TRI_ASSERT(state == ExecutionState::DONE); - return {state, NoStats{}}; - } arangodb::LogicalCollection const* collection = nullptr; if constexpr (std::is_same::value) { if (_collection == nullptr) { @@ -116,26 +119,42 @@ std::pair arangodb::aql::MaterializeExecutor::produc } collection = _collection; } else { - collection = - reinterpret_cast( + collection = reinterpret_cast( input.getValue(collectionSource).slice().getUInt()); } TRI_ASSERT(collection != nullptr); _readDocumentContext._inputRow = &input; _readDocumentContext._outputRow = &output; - written = collection->readDocumentWithCallback(trx, - LocalDocumentId(input.getValue(docRegId).slice().getUInt()), - callback); - } while (!written && state != ExecutionState::DONE); - return {state, NoStats{}}; + written = collection->readDocumentWithCallback( + trx, LocalDocumentId(input.getValue(docRegId).slice().getUInt()), callback); + if (written) { + output.advanceRow(); + } + } + + return {inputRange.upstreamState(), NoStats{}, upstreamCall}; +} + +template +std::tuple arangodb::aql::MaterializeExecutor::skipRowsRange( + AqlItemBlockInputRange& inputRange, AqlCall& call) { + size_t skipped = 0; + + if (call.getLimit() > 0) { + // we can only account for offset + skipped = inputRange.skip(call.getOffset()); + } else { + skipped = inputRange.skipAll(); + } + call.didSkip(skipped); + + return {inputRange.upstreamState(), NoStats{}, skipped, call}; } -template +template std::tuple arangodb::aql::MaterializeExecutor::skipRows(size_t toSkipRequested) { - ExecutionState state; - size_t skipped; - std::tie(state, skipped) = _fetcher.skipRows(toSkipRequested); - return std::make_tuple(state, NoStats{}, skipped); + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } template class ::arangodb::aql::MaterializeExecutor; diff --git a/arangod/Aql/MaterializeExecutor.h b/arangod/Aql/MaterializeExecutor.h index cb1c74ab3e38..13fdc290120a 100644 --- a/arangod/Aql/MaterializeExecutor.h +++ b/arangod/Aql/MaterializeExecutor.h @@ -39,13 +39,15 @@ namespace arangodb { namespace aql { +struct AqlCall; +class AqlItemBlockInputRange; class InputAqlItemRow; class ExecutorInfos; template class SingleRowFetcher; class NoStats; -template +template class MaterializerExecutorInfos : public ExecutorInfos { public: MaterializerExecutorInfos(RegisterId nrInputRegisters, RegisterId nrOutputRegisters, @@ -67,18 +69,16 @@ class MaterializerExecutorInfos : public ExecutorInfos { return _inNonMaterializedDocRegId; } - transaction::Methods* trx() const { - return _trx; - } + transaction::Methods* trx() const { return _trx; } - T collectionSource() const { - return _collectionSource; - } + T collectionSource() const { return _collectionSource; } private: - std::shared_ptr> getReadableInputRegisters(T const collectionSource, RegisterId inNmDocId) { + std::shared_ptr> getReadableInputRegisters( + T const collectionSource, RegisterId inNmDocId) { if constexpr (std::is_same::value) { - return make_shared_unordered_set(std::initializer_list({collectionSource, inNmDocId})); + return make_shared_unordered_set( + std::initializer_list({collectionSource, inNmDocId})); } else { return make_shared_unordered_set(std::initializer_list({inNmDocId})); } @@ -94,7 +94,7 @@ class MaterializerExecutorInfos : public ExecutorInfos { transaction::Methods* _trx; }; -template +template class MaterializeExecutor { public: struct Properties { @@ -108,19 +108,36 @@ class MaterializeExecutor { MaterializeExecutor(MaterializeExecutor&&) = default; MaterializeExecutor(MaterializeExecutor const&) = delete; - MaterializeExecutor(Fetcher& fetcher, Infos& infos) : _readDocumentContext(infos), _infos(infos), _fetcher(fetcher) {} + MaterializeExecutor(Fetcher& fetcher, Infos& infos) + : _readDocumentContext(infos), _infos(infos), _fetcher(fetcher) {} std::pair produceRows(OutputAqlItemRow& output); std::tuple skipRows(size_t toSkipRequested); + /** + * @brief produce the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple produceRows( + AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output); + + /** + * @brief skip the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] std::tuple skipRowsRange( + AqlItemBlockInputRange& inputRange, AqlCall& call); + protected: class ReadContext { public: explicit ReadContext(Infos& infos) - : _infos(&infos), - _inputRow(nullptr), - _outputRow(nullptr), - _callback(copyDocumentCallback(*this)) {} + : _infos(&infos), + _inputRow(nullptr), + _outputRow(nullptr), + _callback(copyDocumentCallback(*this)) {} ReadContext(ReadContext&&) = default; diff --git a/arangod/Aql/RemoteExecutor.cpp b/arangod/Aql/RemoteExecutor.cpp index df23a2281a65..92d406c2d49e 100644 --- a/arangod/Aql/RemoteExecutor.cpp +++ b/arangod/Aql/RemoteExecutor.cpp @@ -434,7 +434,10 @@ std::tuple ExecutionBlockImpl ExecutionBlockImplsize()); + if (myCall.getLimit() == 0) { + return {ExecutionState::DONE, 0, block}; + } + } + return {state, 0, block}; + } else if (AqlCall::IsFullCountCall(myCall)) { + auto const [state, skipped] = skipSome(ExecutionBlock::SkipAllSize()); + if (state != ExecutionState::WAITING) { + myCall.didSkip(skipped); + } + return {state, skipped, nullptr}; + } else if (AqlCall::IsFastForwardCall(myCall)) { + // No idea if DONE is correct here... + return {ExecutionState::DONE, 0, nullptr}; } + // Should never get here! THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } From 9a76c0c4b495420f288526a77c08e72f7b9645b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Sat, 29 Feb 2020 08:24:00 +0100 Subject: [PATCH 088/122] Feature/aql subquery execute remote (#11197) * Clarified comment * Updated comment * Added two static strings * Avoid unintentional streaming operator of string_view as a container * Added parser for execute rest call * Fixed ambiguous overload in ResultT * Added execute and executeForClient to ExecutionEngine * Implemented executeForClient in RestAqlHandler * Add default value * Updated comment * Changed error attribute * Added API toggle to RemoteExecutor * Fixed compile error * Fixed duplicate log id * Try to make MSVC work * Moved AqlExecuteResult to a separate file * Moved AqlExecuteResult to a separate file * Implemtented execute in Remote; except for (de)serialization, which is still missing * Tried to fix MSVC compile error * Revert "Tried to fix MSVC compile error" This reverts commit f6f43b2c7e4bfe17dafe65f48bcd5c0b6e5c69a9. * Next try to fix MSVC * Implemented (de)serialization for RemoteExecutor::execute * Fixes * Added minimal AqlCall (de)serialization tests * Bugfix * Added minimal (de)serialization tests for AqlCallStack and AqlExecuteResult * Fixed test * Changed Serialization format a little * Bugfix * Initialize SingletonBlock immediately with an empty row * Revert "Next try to fix MSVC" This reverts commit 528c4c795d10ee0aa6686d143be1f6faa6e9b553. * Revert "Try to make MSVC work" This reverts commit ba7d9c072fccb969da1cdf045eadf49c297da8e9. * Work around MSVC shortcoming * Work around MSVC shortcoming * Attempt to fix windows compile issue Co-authored-by: Michael Hackstein --- arangod/Aql/AqlCall.cpp | 76 ++++++++-- arangod/Aql/AqlCall.h | 9 ++ arangod/Aql/AqlCallStack.cpp | 45 +++++- arangod/Aql/AqlCallStack.h | 4 + arangod/Aql/AqlExecuteResult.cpp | 149 ++++++++++++++++++++ arangod/Aql/AqlExecuteResult.h | 10 ++ arangod/Aql/ClusterNodes.cpp | 2 +- arangod/Aql/ExecutionBlock.cpp | 8 +- arangod/Aql/ExecutionBlock.h | 3 +- arangod/Aql/ExecutionBlockImpl.cpp | 3 +- arangod/Aql/ExecutionNode.cpp | 7 +- arangod/Aql/RemoteExecutor.cpp | 158 ++++++++++++++++++--- arangod/Aql/RemoteExecutor.h | 47 +++++-- arangod/Aql/RestAqlHandler.cpp | 11 +- arangod/Aql/RestAqlHandler.h | 24 ++-- lib/Basics/StaticStrings.cpp | 4 +- lib/Basics/StaticStrings.h | 1 + tests/Aql/RemoteExecutorTest.cpp | 217 +++++++++++++++++++++++++++++ tests/CMakeLists.txt | 1 + 19 files changed, 716 insertions(+), 63 deletions(-) create mode 100644 tests/Aql/RemoteExecutorTest.cpp diff --git a/arangod/Aql/AqlCall.cpp b/arangod/Aql/AqlCall.cpp index cc8f947f13c1..1621a38271e7 100644 --- a/arangod/Aql/AqlCall.cpp +++ b/arangod/Aql/AqlCall.cpp @@ -42,7 +42,7 @@ auto getStringView(velocypack::Slice slice) -> std::string_view { velocypack::StringRef ref = slice.stringRef(); return std::string_view(ref.data(), ref.size()); } -} +} // namespace auto AqlCall::fromVelocyPack(velocypack::Slice slice) -> ResultT { if (ADB_UNLIKELY(!slice.isObject())) { @@ -63,7 +63,9 @@ auto AqlCall::fromVelocyPack(velocypack::Slice slice) -> ResultT { auto fullCount = false; auto const readLimit = [](velocypack::Slice slice) -> ResultT { - if (slice.isEqualString(StaticStrings::AqlRemoteInfinity)) { + auto const type = slice.type(); + if (type == velocypack::ValueType::String && + slice.isEqualString(StaticStrings::AqlRemoteInfinity)) { return AqlCall::Limit{AqlCall::Infinity{}}; } else if (slice.isInteger()) { try { @@ -89,7 +91,11 @@ auto AqlCall::fromVelocyPack(velocypack::Slice slice) -> ResultT { } }; - auto const readLimitType = [](velocypack::Slice slice) -> ResultT { + auto const readLimitType = + [](velocypack::Slice slice) -> ResultT> { + if (slice.isNull()) { + return {std::nullopt}; + } if (ADB_UNLIKELY(!slice.isString())) { auto message = std::string{ "When deserializating AqlCall: When reading limitType: " @@ -99,12 +105,10 @@ auto AqlCall::fromVelocyPack(velocypack::Slice slice) -> ResultT { } auto value = getStringView(slice); if (value == StaticStrings::AqlRemoteLimitTypeSoft) { - return AqlCall::LimitType::SOFT; - } - else if (value == StaticStrings::AqlRemoteLimitTypeHard) { - return AqlCall::LimitType::HARD; - } - else { + return {AqlCall::LimitType::SOFT}; + } else if (value == StaticStrings::AqlRemoteLimitTypeHard) { + return {AqlCall::LimitType::HARD}; + } else { auto message = std::string{ "When deserializating AqlCall: When reading limitType: " "Unexpected value '"}; @@ -214,7 +218,8 @@ auto AqlCall::fromVelocyPack(velocypack::Slice slice) -> ResultT { break; } } else if (ADB_UNLIKELY(!std::holds_alternative(limit))) { - return Result(TRI_ERROR_TYPE_ERROR, + return Result( + TRI_ERROR_TYPE_ERROR, "When deserializating AqlCall: limit set, but limitType is missing."); } @@ -223,3 +228,54 @@ auto AqlCall::fromVelocyPack(velocypack::Slice slice) -> ResultT { return call; } + +void AqlCall::toVelocyPack(velocypack::Builder& builder) const { + using namespace velocypack; + + auto limitType = std::optional{}; + auto limit = Limit{Infinity{}}; + if (hasHardLimit()) { + limitType = LimitType::HARD; + limit = hardLimit; + } else if (hasSoftLimit()) { + limitType = LimitType::SOFT; + limit = softLimit; + } + + auto const limitValue = + std::visit(overload{ + [](Infinity) { + return Value(StaticStrings::AqlRemoteInfinity); + }, + [](std::size_t limit) { return Value(limit); }, + }, + limit); + auto const limitTypeValue = std::invoke([&]() { + if (!limitType.has_value()) { + return Value(ValueType::Null); + } else { + switch (limitType.value()) { + case LimitType::SOFT: + return Value(StaticStrings::AqlRemoteLimitTypeSoft); + case LimitType::HARD: + return Value(StaticStrings::AqlRemoteLimitTypeHard); + } + // unreachable + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); + } + }); + + builder.openObject(); + builder.add(StaticStrings::AqlRemoteLimit, limitValue); + builder.add(StaticStrings::AqlRemoteLimitType, limitTypeValue); + builder.add(StaticStrings::AqlRemoteFullCount, Value(fullCount)); + builder.add(StaticStrings::AqlRemoteOffset, Value(offset)); + builder.close(); +} + +auto AqlCall::toString() const -> std::string { + auto stream = std::stringstream{}; + stream << *this; + return stream.str(); +} diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index 07214859d631..b310dfee3175 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -64,6 +64,9 @@ struct AqlCall { fullCount{fullCount} {} static auto fromVelocyPack(velocypack::Slice) -> ResultT; + void toVelocyPack(velocypack::Builder&) const; + + auto toString() const -> std::string; // TODO Remove me, this will not be necessary later static AqlCall SimulateSkipSome(std::size_t toSkip) { @@ -234,6 +237,12 @@ constexpr bool operator==(AqlCall::Limit const& a, AqlCall::Limit const& b) { a); } +constexpr bool operator==(AqlCall const& left, AqlCall const& right) { + return left.hardLimit == right.hardLimit && left.softLimit == right.softLimit && + left.offset == right.offset && left.fullCount == right.fullCount && + left.skippedRows == right.skippedRows; +} + inline std::ostream& operator<<(std::ostream& out, const arangodb::aql::AqlCall::Limit& limit) { return std::visit(arangodb::overload{[&out](size_t const& i) -> std::ostream& { diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp index 92dab45176a5..68c5cd9c5b8d 100644 --- a/arangod/Aql/AqlCallStack.cpp +++ b/arangod/Aql/AqlCallStack.cpp @@ -22,8 +22,9 @@ #include "AqlCallStack.h" -#include +#include #include +#include #include // TODO: This class is not yet memory efficient or optimized in any way. @@ -34,8 +35,7 @@ using namespace arangodb; using namespace arangodb::aql; AqlCallStack::AqlCallStack(AqlCall call, bool compatibilityMode3_6) - : _operations{{std::move(call)}}, - _compatibilityMode3_6(compatibilityMode3_6) {} + : _operations{{std::move(call)}}, _compatibilityMode3_6(compatibilityMode3_6) {} AqlCallStack::AqlCallStack(AqlCallStack const& other, AqlCall call) : _operations{other._operations} { @@ -149,3 +149,42 @@ auto AqlCallStack::fromVelocyPack(velocypack::Slice const slice) -> ResultT{}; + reverseStack.reserve(_operations.size()); + { + auto ops = _operations; + while (!ops.empty()) { + reverseStack.emplace_back(ops.top()); + ops.pop(); + } + } + + builder.openArray(); + for (auto it = reverseStack.rbegin(); it != reverseStack.rend(); ++it) { + auto const& call = *it; + call.toVelocyPack(builder); + } + builder.close(); +} + +auto AqlCallStack::toString() const -> std::string { + auto result = std::string{}; + result += "["; + auto ops = _operations; + if (!ops.empty()) { + auto op = ops.top(); + ops.pop(); + result += " "; + result += op.toString(); + while (!ops.empty()) { + op = ops.top(); + ops.pop(); + result += ", "; + result += op.toString(); + } + } + result += " ]"; + return result; +} diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index 9d8e98df7d0b..f67e505ddffc 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -47,6 +47,8 @@ class AqlCallStack { static auto fromVelocyPack(velocypack::Slice) -> ResultT; + auto toString() const -> std::string; + // Quick test is this CallStack is of local relevance, or it is sufficient to pass it through bool isRelevant() const; @@ -88,6 +90,8 @@ class AqlCallStack { return _operations.size() + _depth; } + void toVelocyPack(velocypack::Builder& builder) const; + private: explicit AqlCallStack(std::stack&& operations); diff --git a/arangod/Aql/AqlExecuteResult.cpp b/arangod/Aql/AqlExecuteResult.cpp index 3b56bebec8bf..db10deeaab96 100644 --- a/arangod/Aql/AqlExecuteResult.cpp +++ b/arangod/Aql/AqlExecuteResult.cpp @@ -22,14 +22,28 @@ #include "AqlExecuteResult.h" +#include "Aql/AqlItemBlockManager.h" #include "Basics/StaticStrings.h" +#include "Cluster/ResultT.h" +#include "Logger/LogMacros.h" +#include "Logger/Logger.h" #include #include +#include + using namespace arangodb; using namespace arangodb::aql; +namespace { +// hack for MSVC +auto getStringView(velocypack::Slice slice) -> std::string_view { + velocypack::StringRef ref = slice.stringRef(); + return std::string_view(ref.data(), ref.size()); +} +} // namespace + auto AqlExecuteResult::state() const noexcept -> ExecutionState { return _state; } @@ -59,6 +73,7 @@ void AqlExecuteResult::toVelocyPack(velocypack::Builder& builder, THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL_AQL, "Unhandled state"); }; + builder.openObject(); builder.add(StaticStrings::AqlRemoteState, stateToValue(state())); builder.add(StaticStrings::AqlRemoteSkipped, Value(skipped())); if (block() != nullptr) { @@ -67,4 +82,138 @@ void AqlExecuteResult::toVelocyPack(velocypack::Builder& builder, } else { builder.add(StaticStrings::AqlRemoteBlock, Value(ValueType::Null)); } + builder.close(); +} + +auto AqlExecuteResult::fromVelocyPack(velocypack::Slice const slice, + AqlItemBlockManager& itemBlockManager) + -> ResultT { + if (ADB_UNLIKELY(!slice.isObject())) { + using namespace std::string_literals; + return Result(TRI_ERROR_TYPE_ERROR, + "When deserializating AqlExecuteResult: Expected object, got "s + + slice.typeName()); + } + + auto expectedPropertiesFound = std::map{}; + expectedPropertiesFound.emplace(StaticStrings::AqlRemoteState, false); + expectedPropertiesFound.emplace(StaticStrings::AqlRemoteSkipped, false); + expectedPropertiesFound.emplace(StaticStrings::AqlRemoteBlock, false); + + auto state = ExecutionState::HASMORE; + auto skipped = std::size_t{}; + auto block = SharedAqlItemBlockPtr{}; + + auto const readState = [](velocypack::Slice slice) -> ResultT { + if (ADB_UNLIKELY(!slice.isString())) { + auto message = std::string{ + "When deserializating AqlExecuteResult: When reading state: " + "Unexpected type "}; + message += slice.typeName(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + auto value = getStringView(slice); + if (value == StaticStrings::AqlRemoteStateDone) { + return ExecutionState::DONE; + } else if (value == StaticStrings::AqlRemoteStateHasmore) { + return ExecutionState::HASMORE; + } else { + auto message = std::string{ + "When deserializating AqlExecuteResult: When reading state: " + "Unexpected value '"}; + message += value; + message += "'"; + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + }; + + auto const readSkipped = [](velocypack::Slice slice) -> ResultT { + if (!slice.isInteger()) { + auto message = std::string{ + "When deserializating AqlExecuteResult: When reading skipped: " + "Unexpected type "}; + message += slice.typeName(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + try { + return slice.getNumber(); + } catch (velocypack::Exception const& ex) { + auto message = std::string{ + "When deserializating AqlExecuteResult: When reading skipped: "}; + message += ex.what(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + }; + + auto const readBlock = [&itemBlockManager](velocypack::Slice slice) -> ResultT { + if (slice.isNull()) { + return SharedAqlItemBlockPtr{nullptr}; + } + TRI_ASSERT(slice.isObject()); + return itemBlockManager.requestAndInitBlock(slice); + }; + + for (auto const it : velocypack::ObjectIterator(slice)) { + auto const keySlice = it.key; + if (ADB_UNLIKELY(!keySlice.isString())) { + return Result( + TRI_ERROR_TYPE_ERROR, + "When deserializating AqlExecuteResult: Key is not a string"); + } + auto const key = getStringView(keySlice); + + if (auto propIt = expectedPropertiesFound.find(key); + ADB_LIKELY(propIt != expectedPropertiesFound.end())) { + if (ADB_UNLIKELY(propIt->second)) { + return Result( + TRI_ERROR_TYPE_ERROR, + "When deserializating AqlExecuteResult: Encountered duplicate key"); + } + propIt->second = true; + } + + if (key == StaticStrings::AqlRemoteState) { + auto maybeState = readState(it.value); + if (maybeState.fail()) { + return std::move(maybeState).result(); + } + state = maybeState.get(); + } else if (key == StaticStrings::AqlRemoteSkipped) { + auto maybeSkipped = readSkipped(it.value); + if (maybeSkipped.fail()) { + return std::move(maybeSkipped).result(); + } + skipped = maybeSkipped.get(); + } else if (key == StaticStrings::AqlRemoteBlock) { + auto maybeBlock = readBlock(it.value); + if (maybeBlock.fail()) { + return std::move(maybeBlock).result(); + } + block = maybeBlock.get(); + } else { + LOG_TOPIC("cc6f4", WARN, Logger::AQL) + << "When deserializating AqlExecuteResult: Encountered unexpected " + "key " + << keySlice.toJson(); + // If you run into this assertion during rolling upgrades after adding a + // new attribute, remove it in the older version. + TRI_ASSERT(false); + } + } + + for (auto const& it : expectedPropertiesFound) { + if (ADB_UNLIKELY(!it.second)) { + auto message = + std::string{"When deserializating AqlExecuteResult: missing key "}; + message += it.first; + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + } + + return AqlExecuteResult{state, skipped, std::move(block)}; +} + +auto AqlExecuteResult::asTuple() const noexcept + -> std::tuple { + return {state(), skipped(), block()}; } diff --git a/arangod/Aql/AqlExecuteResult.h b/arangod/Aql/AqlExecuteResult.h index 4d23804bd019..56eb448c83db 100644 --- a/arangod/Aql/AqlExecuteResult.h +++ b/arangod/Aql/AqlExecuteResult.h @@ -28,6 +28,11 @@ #include +namespace arangodb { +template +class ResultT; +} + namespace arangodb::velocypack { class Builder; struct Options; @@ -41,11 +46,16 @@ class AqlExecuteResult { : _state(state), _skipped(skipped), _block(std::move(block)) {} void toVelocyPack(velocypack::Builder&, velocypack::Options const*); + static auto fromVelocyPack(velocypack::Slice, AqlItemBlockManager&) + -> ResultT; [[nodiscard]] auto state() const noexcept -> ExecutionState; [[nodiscard]] auto skipped() const noexcept -> std::size_t; [[nodiscard]] auto block() const noexcept -> SharedAqlItemBlockPtr const&; + [[nodiscard]] auto asTuple() const noexcept + -> std::tuple; + private: ExecutionState _state = ExecutionState::HASMORE; std::size_t _skipped = 0; diff --git a/arangod/Aql/ClusterNodes.cpp b/arangod/Aql/ClusterNodes.cpp index 1bcfffe3eb12..4e393a10caa6 100644 --- a/arangod/Aql/ClusterNodes.cpp +++ b/arangod/Aql/ClusterNodes.cpp @@ -172,7 +172,7 @@ std::unique_ptr RemoteNode::createBlock( std::move(regsToKeep)); return std::make_unique>( - &engine, this, std::move(infos), server(), getDistributeId(), queryId()); + &engine, this, std::move(infos), server(), getDistributeId(), queryId(), api()); } /// @brief toVelocyPack, for RemoteNode diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index a9ba6fb98b3a..ec4ca75d3433 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -316,8 +316,8 @@ void ExecutionBlock::traceExecuteBegin(AqlCallStack const& stack) { } } -void ExecutionBlock::traceExecuteEnd( - std::tuple const& result) { +auto ExecutionBlock::traceExecuteEnd(std::tuple const& result) + -> std::tuple { if (_profile >= PROFILE_LEVEL_BLOCKS) { auto const& [state, skipped, block] = result; auto const items = block != nullptr ? block->size() : 0; @@ -358,6 +358,8 @@ void ExecutionBlock::traceExecuteEnd( } } } + + return result; } auto ExecutionBlock::printTypeInfo() const -> std::string const { @@ -373,4 +375,4 @@ auto ExecutionBlock::printBlockInfo() const -> std::string const { ExecutionNode const* node = getPlanNode(); stream << printTypeInfo() << " this=" << (uintptr_t)this << " id=" << node->id(); return stream.str(); -} \ No newline at end of file +} diff --git a/arangod/Aql/ExecutionBlock.h b/arangod/Aql/ExecutionBlock.h index 059217978485..b3652bc5576c 100644 --- a/arangod/Aql/ExecutionBlock.h +++ b/arangod/Aql/ExecutionBlock.h @@ -155,7 +155,8 @@ class ExecutionBlock { void traceExecuteBegin(AqlCallStack const& stack); // Trace the end of a execute call, potentially with result - void traceExecuteEnd(std::tuple const& result); + auto traceExecuteEnd(std::tuple const& result) + -> std::tuple; [[nodiscard]] auto printBlockInfo() const -> std::string const; [[nodiscard]] auto printTypeInfo() const -> std::string const; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index b13336abfc27..add3cf3dee85 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -657,8 +657,7 @@ std::tuple ExecutionBlockImpl SingletonNode::createBlock( IdExecutorInfos infos(nrRegs, std::move(toKeep), getRegsToClear(), false); - return std::make_unique>>(&engine, this, - std::move(infos)); + auto res = + std::make_unique>>(&engine, this, + std::move(infos)); + std::ignore = res->initializeCursor(InputAqlItemRow{CreateInvalidInputRowHint{}}); + return res; } /// @brief toVelocyPack, for SingletonNode diff --git a/arangod/Aql/RemoteExecutor.cpp b/arangod/Aql/RemoteExecutor.cpp index 92d406c2d49e..c231fc4cb91e 100644 --- a/arangod/Aql/RemoteExecutor.cpp +++ b/arangod/Aql/RemoteExecutor.cpp @@ -24,13 +24,14 @@ #include "ApplicationFeatures/ApplicationServer.h" #include "Aql/AqlCallStack.h" +#include "Aql/AqlExecuteResult.h" #include "Aql/ClusterNodes.h" #include "Aql/ExecutionEngine.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" #include "Aql/Query.h" +#include "Aql/RestAqlHandler.h" #include "Basics/MutexLocker.h" -#include "Basics/RecursiveLocker.h" #include "Basics/StringBuffer.h" #include "Basics/VelocyPackHelper.h" #include "Cluster/ServerState.h" @@ -41,11 +42,10 @@ #include "Rest/CommonDefines.h" #include "Transaction/Context.h" #include "Transaction/Methods.h" -#include "VocBase/vocbase.h" #include #include - +#include #include #include @@ -61,7 +61,8 @@ constexpr std::chrono::seconds kDefaultTimeOutSecs(3600); ExecutionBlockImpl::ExecutionBlockImpl( ExecutionEngine* engine, RemoteNode const* node, ExecutorInfos&& infos, - std::string const& server, std::string const& ownName, std::string const& queryId) + std::string const& server, std::string const& ownName, + std::string const& queryId, Api const api) : ExecutionBlock(engine, node), _infos(std::move(infos)), _query(*engine->getQuery()), @@ -72,7 +73,8 @@ ExecutionBlockImpl::ExecutionBlockImpl( _lastError(TRI_ERROR_NO_ERROR), _lastTicket(0), _requestInFlight(false), - _hasTriggeredShutdown(false) { + _hasTriggeredShutdown(false), + _apiToUse(api) { TRI_ASSERT(!queryId.empty()); TRI_ASSERT((arangodb::ServerState::instance()->isCoordinator() && ownName.empty()) || (!arangodb::ServerState::instance()->isCoordinator() && !ownName.empty())); @@ -151,7 +153,7 @@ std::pair ExecutionBlockImpl ExecutionBlockImpl::skipSomeWi builder.close(); traceSkipSomeRequest(builder.slice(), atMost); } - auto res = sendAsyncRequest(fuerte::RestVerb::Put, "/_api/aql/skipSome/", + auto res = sendAsyncRequest(fuerte::RestVerb::Put, "/_api/aql/skipSome", std::move(buffer)); if (!res.ok()) { @@ -319,7 +321,7 @@ std::pair ExecutionBlockImpl::initialize traceInitializeCursorRequest(builder.slice()); auto res = sendAsyncRequest(fuerte::RestVerb::Put, - "/_api/aql/initializeCursor/", std::move(buffer)); + "/_api/aql/initializeCursor", std::move(buffer)); if (!res.ok()) { THROW_ARANGO_EXCEPTION(res); } @@ -351,7 +353,7 @@ std::pair ExecutionBlockImpl::shutdown(i traceShutdownRequest(builder.slice(), errorCode); - auto res = sendAsyncRequest(fuerte::RestVerb::Put, "/_api/aql/shutdown/", + auto res = sendAsyncRequest(fuerte::RestVerb::Put, "/_api/aql/shutdown", std::move(buffer)); if (!res.ok()) { THROW_ARANGO_EXCEPTION(res); @@ -429,23 +431,22 @@ std::pair ExecutionBlockImpl::shutdown(i return {ExecutionState::DONE, TRI_ERROR_NO_ERROR}; } -std::tuple ExecutionBlockImpl::execute( - AqlCallStack stack) { +auto ExecutionBlockImpl::executeViaOldApi(AqlCallStack stack) + -> std::tuple { // Use the old getSome/SkipSome API. - // TODO needs execute implementation instead auto myCall = stack.popCall(); TRI_ASSERT(AqlCall::IsSkipSomeCall(myCall) || AqlCall::IsGetSomeCall(myCall) || AqlCall::IsFullCountCall(myCall) || AqlCall::IsFastForwardCall(myCall)); if (AqlCall::IsSkipSomeCall(myCall)) { - auto const [state, skipped] = skipSome(myCall.getOffset()); + auto const [state, skipped] = skipSomeWithoutTrace(myCall.getOffset()); if (state != ExecutionState::WAITING) { myCall.didSkip(skipped); } return {state, skipped, nullptr}; } else if (AqlCall::IsGetSomeCall(myCall)) { - auto const [state, block] = getSome(myCall.getLimit()); + auto const [state, block] = getSomeWithoutTrace(myCall.getLimit()); // We do not need to count as softLimit will be overwritten, and hard cannot be set. if (stack.empty() && myCall.hasHardLimit() && !myCall.needsFullCount() && block != nullptr) { // However we can do a short-cut here to report DONE on hardLimit if we are on the top-level query. @@ -468,7 +469,126 @@ std::tuple ExecutionBlockImpl::execute(AqlCallStack stack) + -> std::tuple { + traceExecuteBegin(stack); + auto res = executeWithoutTrace(stack); + return traceExecuteEnd(res); +} + +auto ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) +-> std::tuple { + if (ADB_UNLIKELY(api() == Api::GET_SOME)) { + return executeViaOldApi(stack); + } + TRI_ASSERT(api() == Api::EXECUTE); + return executeViaNewApi(stack); +} + +auto ExecutionBlockImpl::executeViaNewApi(AqlCallStack callStack) + -> std::tuple { + // silence tests -- we need to introduce new failure tests for fetchers + TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome1") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome2") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome3") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + if (getQuery().killed()) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); + } + + std::unique_lock guard(_communicationMutex); + + if (_requestInFlight) { + // Already sent a shutdown request, but haven't got an answer yet. + return {ExecutionState::WAITING, 0, nullptr}; + } + + // For every call we simply forward via HTTP + if (_lastError.fail()) { + TRI_ASSERT(_lastResponse == nullptr); + Result res = std::move(_lastError); + _lastError.reset(); + // we were called with an error need to throw it. + THROW_ARANGO_EXCEPTION(res); + } + + if (_lastResponse != nullptr) { + TRI_ASSERT(_lastError.ok()); + // We do not have an error but a result, all is good + // We have an open result still. + auto response = std::move(_lastResponse); + // Result is the response which will be a serialized AqlItemBlock + + // both must be reset before return or throw + TRI_ASSERT(_lastError.ok() && _lastResponse == nullptr); + + VPackSlice responseBody = response->slice(); + + auto result = deserializeExecuteCallResultBody(responseBody); + + if (result.fail()) { + THROW_ARANGO_EXCEPTION(result.result()); + } + + return result->asTuple(); + } + + // We need to send a request here + auto buffer = serializeExecuteCallBody(callStack); + this->traceExecuteRequest(VPackSlice(buffer.data()), callStack); + + auto res = sendAsyncRequest(fuerte::RestVerb::Put, + RestAqlHandler::Route::execute(), std::move(buffer)); + + if (!res.ok()) { + THROW_ARANGO_EXCEPTION(res); + } + + return {ExecutionState::WAITING, 0, nullptr}; +} + +auto ExecutionBlockImpl::deserializeExecuteCallResultBody(VPackSlice const slice) const + -> ResultT { + // Errors should have been caught earlier + TRI_ASSERT(TRI_ERROR_NO_ERROR == + VelocyPackHelper::getNumericValue(slice, StaticStrings::Code, -1)); + + if (ADB_UNLIKELY(!slice.isObject())) { + using namespace std::string_literals; + return Result{TRI_ERROR_TYPE_ERROR, "When parsing execute result: expected object, got "s + slice.typeName()}; + } + + if (auto value = slice.get(StaticStrings::AqlRemoteResult); !value.isNone()) { + return AqlExecuteResult::fromVelocyPack(value, _engine->itemBlockManager()); + } + + return Result{TRI_ERROR_TYPE_ERROR, "When parsing execute result: field result missing"}; +} + +auto ExecutionBlockImpl::serializeExecuteCallBody(AqlCallStack const& callStack) const + -> VPackBuffer { + VPackBuffer buffer; + { + VPackBuilder builder(buffer); + builder.openObject(); + builder.add(VPackValue(StaticStrings::AqlRemoteCallStack)); + callStack.toVelocyPack(builder); + builder.close(); + } + return buffer; +} + +auto ExecutionBlockImpl::api() const noexcept -> Api { + return _apiToUse; } namespace { @@ -532,7 +652,7 @@ Result ExecutionBlockImpl::sendAsyncRequest(fuerte::RestVerb typ auto req = fuerte::createRequest(type, fuerte::ContentType::VPack); req->header.database = _query.vocbase().name(); - req->header.path = urlPart + _queryId; + req->header.path = urlPart + "/" + _queryId; req->addVPack(std::move(body)); // Later, we probably want to set these sensibly: @@ -575,6 +695,12 @@ Result ExecutionBlockImpl::sendAsyncRequest(fuerte::RestVerb typ return {TRI_ERROR_NO_ERROR}; } +void ExecutionBlockImpl::traceExecuteRequest(VPackSlice const slice, + AqlCallStack const& callStack) { + using namespace std::string_literals; + traceRequest("execute", slice, "callStack="s + callStack.toString()); +} + void ExecutionBlockImpl::traceGetSomeRequest(VPackSlice const slice, size_t const atMost) { using namespace std::string_literals; diff --git a/arangod/Aql/RemoteExecutor.h b/arangod/Aql/RemoteExecutor.h index 480033c9f32d..49244489cf00 100644 --- a/arangod/Aql/RemoteExecutor.h +++ b/arangod/Aql/RemoteExecutor.h @@ -23,17 +23,20 @@ #ifndef ARANGOD_AQL_REMOTE_EXECUTOR_H #define ARANGOD_AQL_REMOTE_EXECUTOR_H +#include "Aql/AqlExecuteResult.h" #include "Aql/ClusterNodes.h" #include "Aql/ExecutionBlockImpl.h" #include "Aql/ExecutorInfos.h" +#include + #include -#include -#include +namespace arangodb::fuerte { inline namespace v1 { +enum class RestVerb; +}} -namespace arangodb { -namespace aql { +namespace arangodb::aql { // The RemoteBlock is actually implemented by specializing ExecutionBlockImpl, // so this class only exists to identify the specialization. @@ -45,12 +48,14 @@ class RemoteExecutor final {}; template <> class ExecutionBlockImpl : public ExecutionBlock { public: + using Api = ::arangodb::aql::RemoteNode::Api; + // TODO Even if it's not strictly necessary here, for consistency's sake the // non-standard arguments (server, ownName and queryId) should probably be // moved into some RemoteExecutorInfos class. ExecutionBlockImpl(ExecutionEngine* engine, RemoteNode const* node, ExecutorInfos&& infos, std::string const& server, - std::string const& ownName, std::string const& queryId); + std::string const& ownName, std::string const& queryId, Api); ~ExecutionBlockImpl() override = default; @@ -64,6 +69,8 @@ class ExecutionBlockImpl : public ExecutionBlock { std::tuple execute(AqlCallStack stack) override; + [[nodiscard]] auto api() const noexcept -> Api; + #ifdef ARANGODB_ENABLE_MAINTAINER_MODE // only for asserts: public: @@ -77,6 +84,20 @@ class ExecutionBlockImpl : public ExecutionBlock { std::pair skipSomeWithoutTrace(size_t atMost); + auto executeWithoutTrace(AqlCallStack stack) + -> std::tuple; + + auto executeViaOldApi(AqlCallStack stack) + -> std::tuple; + + auto executeViaNewApi(AqlCallStack stack) + -> std::tuple; + + [[nodiscard]] auto deserializeExecuteCallResultBody(velocypack::Slice) const + -> ResultT; + [[nodiscard]] auto serializeExecuteCallBody(AqlCallStack const& callStack) const + -> velocypack::Buffer; + ExecutorInfos const& infos() const { return _infos; } Query const& getQuery() const { return _query; } @@ -89,6 +110,13 @@ class ExecutionBlockImpl : public ExecutionBlock { // _communicationMutex *must* be locked for this! unsigned generateRequestTicket(); + void traceExecuteRequest(velocypack::Slice slice, AqlCallStack const& callStack); + void traceGetSomeRequest(velocypack::Slice slice, size_t atMost); + void traceSkipSomeRequest(velocypack::Slice slice, size_t atMost); + void traceInitializeCursorRequest(velocypack::Slice slice); + void traceShutdownRequest(velocypack::Slice slice, int errorCode); + void traceRequest(const char* rpc, velocypack::Slice slice, std::string const& args); + private: enum class ReqState { None, @@ -133,14 +161,11 @@ class ExecutionBlockImpl : public ExecutionBlock { bool _hasTriggeredShutdown; - void traceGetSomeRequest(velocypack::Slice slice, size_t atMost); - void traceSkipSomeRequest(velocypack::Slice slice, size_t atMost); - void traceInitializeCursorRequest(velocypack::Slice slice); - void traceShutdownRequest(velocypack::Slice slice, int errorCode); - void traceRequest(const char* rpc, velocypack::Slice slice, std::string const& args); + /// @brief Whether to use the pre-3.7 getSome/skipSome API, instead of the + /// execute API. Used for rolling upgrades, so can be removed in 3.8. + Api _apiToUse = Api::EXECUTE; }; -} // namespace aql } // namespace arangodb #endif // ARANGOD_AQL_REMOTE_EXECUTOR_H diff --git a/arangod/Aql/RestAqlHandler.cpp b/arangod/Aql/RestAqlHandler.cpp index 366f65e4555f..58346af21a67 100644 --- a/arangod/Aql/RestAqlHandler.cpp +++ b/arangod/Aql/RestAqlHandler.cpp @@ -621,7 +621,7 @@ auto getStringView(velocypack::Slice slice) -> std::string_view { velocypack::StringRef ref = slice.stringRef(); return std::string_view(ref.data(), ref.size()); } -} +} // namespace // TODO Use the deserializer when available auto AqlExecuteCall::fromVelocyPack(VPackSlice const slice) -> ResultT { @@ -641,7 +641,7 @@ auto AqlExecuteCall::fromVelocyPack(VPackSlice const slice) -> ResultT ResultT ResultTtrx()->transactionContextPtr()->getVPackOptions()); answerBuilder.add(StaticStrings::Code, VPackValue(TRI_ERROR_NO_ERROR)); diff --git a/arangod/Aql/RestAqlHandler.h b/arangod/Aql/RestAqlHandler.h index c917c6a2f1ca..47f469840aad 100644 --- a/arangod/Aql/RestAqlHandler.h +++ b/arangod/Aql/RestAqlHandler.h @@ -54,7 +54,12 @@ class RestAqlHandler : public RestVocbaseBaseHandler { RestStatus execute() override; RestStatus continueExecute() override; void shutdownExecute(bool isFinalized) noexcept override; - + + class Route { + public: + static auto execute() -> const char* { return "/_api/aql/execute"; } + }; + public: // DELETE method for /_api/aql/kill/, (internal) bool killQuery(std::string const& idString); @@ -70,14 +75,15 @@ class RestAqlHandler : public RestVocbaseBaseHandler { // "callStack": an array of objects, each with the following attributes: // "offset": a non-negative integer // "limit": either a non-negative integer, or the string "infinity" - // "limitType: string or null, either "soft" or "hard"; set iff limit is not infinity - // "fullCount": a boolean + // "limitType: string or null, either "soft" or "hard"; set iff limit is + // not infinity "fullCount": a boolean // The result is an object with the attributes // "code": integer, error code. // If there was no error: - // "state": string, either "hasMore" or "done" - // "skipped": non-negative integer - // "result": serialized AqlItemBlock, or null when no rows are returned. + // "result": an object with the following attributes: + // "state": string, either "hasMore" or "done" + // "skipped": non-negative integer + // "block": serialized AqlItemBlock, or null when no rows are returned. // For the "getSome" operation one has to give: // "atMost": must be a positive integer, the cursor returns never // more than "atMost" items. Defaults to @@ -141,7 +147,7 @@ class RestAqlHandler : public RestVocbaseBaseHandler { std::shared_ptr const& ctx, double const ttl, bool& needToLock, arangodb::velocypack::Builder& answer); - + // handle for useQuery RestStatus handleUseQuery(std::string const&, arangodb::velocypack::Slice const); @@ -157,9 +163,9 @@ class RestAqlHandler : public RestVocbaseBaseHandler { // our traversal engine registry traverser::TraverserEngineRegistry* _traverserRegistry; - + aql::Query* _query; - + // id of current query QueryId _qId; }; diff --git a/lib/Basics/StaticStrings.cpp b/lib/Basics/StaticStrings.cpp index dc44f9502536..4a36d9d71ce1 100644 --- a/lib/Basics/StaticStrings.cpp +++ b/lib/Basics/StaticStrings.cpp @@ -56,7 +56,8 @@ std::string const StaticStrings::ReturnNewString("returnNew"); std::string const StaticStrings::ReturnOldString("returnOld"); std::string const StaticStrings::SilentString("silent"); std::string const StaticStrings::WaitForSyncString("waitForSync"); -std::string const StaticStrings::SkipDocumentValidation("skipDocumentValidation"); +std::string const StaticStrings::SkipDocumentValidation( + "skipDocumentValidation"); std::string const StaticStrings::IsSynchronousReplicationString( "isSynchronousReplication"); std::string const StaticStrings::Group("group"); @@ -307,6 +308,7 @@ std::string const StaticStrings::AqlRemoteLimitTypeHard("hard"); std::string const StaticStrings::AqlRemoteFullCount("fullCount"); std::string const StaticStrings::AqlRemoteOffset("offset"); std::string const StaticStrings::AqlRemoteInfinity("infinity"); +std::string const StaticStrings::AqlRemoteResult("result"); std::string const StaticStrings::AqlRemoteBlock("block"); std::string const StaticStrings::AqlRemoteSkipped("skipped"); std::string const StaticStrings::AqlRemoteState("state"); diff --git a/lib/Basics/StaticStrings.h b/lib/Basics/StaticStrings.h index d51660ec654d..b5e180ec1fd4 100644 --- a/lib/Basics/StaticStrings.h +++ b/lib/Basics/StaticStrings.h @@ -283,6 +283,7 @@ class StaticStrings { static std::string const AqlRemoteFullCount; static std::string const AqlRemoteOffset; static std::string const AqlRemoteInfinity; + static std::string const AqlRemoteResult; static std::string const AqlRemoteBlock; static std::string const AqlRemoteSkipped; static std::string const AqlRemoteState; diff --git a/tests/Aql/RemoteExecutorTest.cpp b/tests/Aql/RemoteExecutorTest.cpp new file mode 100644 index 000000000000..c5e4bef76825 --- /dev/null +++ b/tests/Aql/RemoteExecutorTest.cpp @@ -0,0 +1,217 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#include "Aql/AqlCall.h" +#include "Aql/AqlCallStack.h" +#include "Aql/AqlExecuteResult.h" +#include "Aql/AqlItemBlockManager.h" + +#include "AqlItemBlockHelper.h" + +#include "gtest/gtest.h" + +#include + +using namespace arangodb; +using namespace arangodb::aql; + +namespace arangodb::aql { + +auto operator==(AqlCallStack const& leftC, AqlCallStack const& rightC) -> bool { + auto left = leftC; + auto right = rightC; + + while (!left.empty() && !right.empty()) { + auto const l = left.popCall(); + auto const r = right.popCall(); + if (!(l == r)) { + return false; + } + } + + return left.empty() && right.empty(); +} + +auto operator==(AqlExecuteResult const& left, AqlExecuteResult const& right) -> bool { + return left.state() == right.state() && left.skipped() == right.skipped() && + (left.block() == nullptr) == (right.block() == nullptr) && + (left.block() == nullptr || *left.block() == *right.block()); +} + +} // namespace arangodb::aql + +namespace arangodb::tests::aql { + +auto blockToString(SharedAqlItemBlockPtr const& block) -> std::string { + velocypack::Builder blockBuilder; + block->toSimpleVPack(&velocypack::Options::Defaults, blockBuilder); + return blockBuilder.toJson(); +} + +class DeSerializeAqlCallTest : public ::testing::TestWithParam { + public: + DeSerializeAqlCallTest() = default; + + void SetUp() override { + aqlCall = GetParam(); + } + + protected: + AqlCall aqlCall{}; +}; + +auto const testingAqlCalls = ::testing::ValuesIn(std::array{ + AqlCall{0, false, AqlCall::Infinity{}}, + AqlCall{3, false, AqlCall::Infinity{}}, + AqlCall{0, false, 7, AqlCall::LimitType::SOFT}, + AqlCall{0, false, 7, AqlCall::LimitType::HARD}, + AqlCall{0, true, 7, AqlCall::LimitType::HARD}, + AqlCall{3, false, 7, AqlCall::LimitType::SOFT}, + AqlCall{3, false, 7, AqlCall::LimitType::HARD}, + AqlCall{3, true, 7, AqlCall::LimitType::HARD}, +}); + +TEST_P(DeSerializeAqlCallTest, testSuite) { + auto builder = velocypack::Builder{}; + aqlCall.toVelocyPack(builder); + + ASSERT_TRUE(builder.isClosed()); + + auto const maybeDeSerializedCall = std::invoke([&]() { + try { + return AqlCall::fromVelocyPack(builder.slice()); + } catch (std::exception const& ex) { + EXPECT_TRUE(false) << ex.what(); + } + return ResultT::error(-1); + }); + + ASSERT_TRUE(maybeDeSerializedCall.ok()) << maybeDeSerializedCall.errorMessage(); + + auto const deSerializedCall = *maybeDeSerializedCall; + + ASSERT_EQ(aqlCall, deSerializedCall); +} + +INSTANTIATE_TEST_CASE_P(DeSerializeAqlCallTestVariations, DeSerializeAqlCallTest, testingAqlCalls); + +class DeSerializeAqlCallStackTest : public ::testing::TestWithParam { + public: + DeSerializeAqlCallStackTest() = default; + + void SetUp() override { + aqlCallStack = GetParam(); + } + + protected: + AqlCallStack aqlCallStack{AqlCall{}}; +}; + +auto const testingAqlCallStacks = ::testing::ValuesIn(std::array{ + AqlCallStack{AqlCall{}}, + AqlCallStack{AqlCall{3, false, AqlCall::Infinity{}}}, + AqlCallStack{AqlCallStack{AqlCall{}}, AqlCall{3, false, AqlCall::Infinity{}}}, + AqlCallStack{AqlCallStack{AqlCallStack{AqlCall{1}}, AqlCall{2}}, AqlCall{3}}, + AqlCallStack{AqlCallStack{AqlCallStack{AqlCall{3}}, AqlCall{2}}, AqlCall{1}}, +}); + +TEST_P(DeSerializeAqlCallStackTest, testSuite) { + auto builder = velocypack::Builder{}; + aqlCallStack.toVelocyPack(builder); + + ASSERT_TRUE(builder.isClosed()); + + auto const maybeDeSerializedCallStack = std::invoke([&]() { + try { + return AqlCallStack::fromVelocyPack(builder.slice()); + } catch (std::exception const& ex) { + EXPECT_TRUE(false) << ex.what(); + } + return ResultT::error(-1); + }); + + ASSERT_TRUE(maybeDeSerializedCallStack.ok()) << maybeDeSerializedCallStack.errorMessage(); + + auto const deSerializedCallStack = *maybeDeSerializedCallStack; + + ASSERT_EQ(aqlCallStack, deSerializedCallStack); +} + +INSTANTIATE_TEST_CASE_P(DeSerializeAqlCallStackTestVariations, DeSerializeAqlCallStackTest, testingAqlCallStacks); + + +class DeSerializeAqlExecuteResultTest : public ::testing::TestWithParam { + public: + DeSerializeAqlExecuteResultTest() = default; + + void SetUp() override { + aqlExecuteResult = GetParam(); + } + + protected: + AqlExecuteResult aqlExecuteResult{ExecutionState::DONE, 0, nullptr}; +}; + +ResourceMonitor resourceMonitor{}; +AqlItemBlockManager manager{&resourceMonitor, SerializationFormat::SHADOWROWS}; + +auto const testingAqlExecuteResults = ::testing::ValuesIn(std::array{ + AqlExecuteResult{ExecutionState::DONE, 0, nullptr}, + AqlExecuteResult{ExecutionState::HASMORE, 0, nullptr}, + AqlExecuteResult{ExecutionState::HASMORE, 4, nullptr}, + AqlExecuteResult{ExecutionState::DONE, 0, buildBlock<1>(manager, {{42}})}, + AqlExecuteResult{ExecutionState::HASMORE, 3, buildBlock<2>(manager, {{3, 42}, {4, 41}})}, +}); + +TEST_P(DeSerializeAqlExecuteResultTest, testSuite) { + auto builder = velocypack::Builder{}; + aqlExecuteResult.toVelocyPack(builder, &velocypack::Options::Defaults); + + ASSERT_TRUE(builder.isClosed()); + + auto const maybeAqlExecuteResult = std::invoke([&]() { + try { + return AqlExecuteResult::fromVelocyPack(builder.slice(), manager); + } catch (std::exception const& ex) { + EXPECT_TRUE(false) << ex.what(); + } + return ResultT::error(-1); + }); + + ASSERT_TRUE(maybeAqlExecuteResult.ok()) << maybeAqlExecuteResult.errorMessage(); + + auto const deSerializedAqlExecuteResult = *maybeAqlExecuteResult; + + ASSERT_EQ(aqlExecuteResult.state(), deSerializedAqlExecuteResult.state()); + ASSERT_EQ(aqlExecuteResult.skipped(), deSerializedAqlExecuteResult.skipped()); + ASSERT_EQ(aqlExecuteResult.block() == nullptr, deSerializedAqlExecuteResult.block() == nullptr); + if (aqlExecuteResult.block() != nullptr) { + ASSERT_EQ(*aqlExecuteResult.block(), *deSerializedAqlExecuteResult.block()) + << "left: " << blockToString(aqlExecuteResult.block()) + << "; right: " << blockToString(deSerializedAqlExecuteResult.block()); + } + ASSERT_EQ(aqlExecuteResult, deSerializedAqlExecuteResult); +} + +INSTANTIATE_TEST_CASE_P(DeSerializeAqlExecuteResultTestVariations, DeSerializeAqlExecuteResultTest, testingAqlExecuteResults); + +} // namespace arangodb::tests::aql diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9df5c6a01515..b557ccf6498d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -57,6 +57,7 @@ set(ARANGODB_TESTS_SOURCES Aql/MultiDependencySingleRowFetcherTest.cpp Aql/NoResultsExecutorTest.cpp Aql/QueryHelper.cpp + Aql/RemoteExecutorTest.cpp Aql/RemoveExecutorTest.cpp Aql/ReplaceExecutorTest.cpp Aql/ReturnExecutorTest.cpp From 98c7eb623d80f61cde5fd02e0edfb2d345e9c2d2 Mon Sep 17 00:00:00 2001 From: Lars Maier Date: Sat, 29 Feb 2020 08:25:12 +0100 Subject: [PATCH 089/122] Added test for distinct collect with random order. (#11184) --- tests/Aql/DistinctCollectExecutorTest.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/Aql/DistinctCollectExecutorTest.cpp b/tests/Aql/DistinctCollectExecutorTest.cpp index 060ad80677ba..3a0693a0fdc7 100644 --- a/tests/Aql/DistinctCollectExecutorTest.cpp +++ b/tests/Aql/DistinctCollectExecutorTest.cpp @@ -94,6 +94,20 @@ TEST_P(DistinctCollectExecutorTest, split_1) { .run(); } +TEST_P(DistinctCollectExecutorTest, split_3) { + auto [split] = GetParam(); + + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) + .setInputValueList(1, 2, 1, 2, 5, 4, 3, 3, 1, 2) + .setInputSplitType(split) + .setCall(AqlCall{2, AqlCall::Infinity{}, 2, true}) + .expectOutputValueList(5, 4) + .expectSkipped(3) + .expectedState(ExecutionState::DONE) + .run(); +} + TEST_P(DistinctCollectExecutorTest, split_2) { auto [split] = GetParam(); From 609b0ff197eef95ecddfd6a130133e38e18b1739 Mon Sep 17 00:00:00 2001 From: Lars Maier Date: Sat, 29 Feb 2020 08:32:10 +0100 Subject: [PATCH 090/122] Feature/aql subquery execution block impl execute implementation subquery executor (#11196) * Intermediate commit, does not compile. * New style for SubqueryExecutor. * Removed bad assert. * Reset subsqueryInitialised in skip. Co-authored-by: Michael Hackstein --- arangod/Aql/ExecutionBlockImpl.cpp | 15 +- arangod/Aql/SubqueryExecutor.cpp | 202 +++++++++++- arangod/Aql/SubqueryExecutor.h | 15 +- tests/Aql/ExecutorTestHelper.h | 5 +- tests/Aql/SubqueryExecutorTest.cpp | 499 +++++++++++++++++++++++++++++ tests/CMakeLists.txt | 1 + 6 files changed, 714 insertions(+), 23 deletions(-) create mode 100644 tests/Aql/SubqueryExecutorTest.cpp diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index add3cf3dee85..31e98b72fed5 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -137,6 +137,7 @@ template constexpr bool isNewStyleExecutor = is_one_of_v< Executor, FilterExecutor, SortedCollectExecutor, IdExecutor, IdExecutor>, ReturnExecutor, DistinctCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, + SubqueryExecutor, SubqueryExecutor, // TODO: re-enable after new subquery end & start are implemented // CalculationExecutor, CalculationExecutor, CalculationExecutor, HashedCollectExecutor, ConstrainedSortExecutor, @@ -1103,10 +1104,12 @@ template static SkipRowsRangeVariant constexpr skipRowsType() { bool constexpr useFetcher = Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable && - !std::is_same>::value; + !std::is_same_v>; bool constexpr useExecutor = hasSkipRowsRange::value; + static_assert(!std::is_same_v> || hasSkipRowsRange::value); + // ConstFetcher and SingleRowFetcher can skip, but // it may not be done for modification subqueries. static_assert(useFetcher == @@ -1124,6 +1127,7 @@ static SkipRowsRangeVariant constexpr skipRowsType() { Executor, FilterExecutor, ShortestPathExecutor, ReturnExecutor, KShortestPathsExecutor, IdExecutor>, IdExecutor, HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, DistinctCollectExecutor, ConstrainedSortExecutor, + SubqueryExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif @@ -1167,15 +1171,12 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert( !std::is_same::value || useFetcher, "LimitExecutor needs to implement skipRows() to work correctly"); - - static_assert(useExecutor || useFetcher, "no skipping variant available"); - + if constexpr (useExecutor) { return SkipRowsRangeVariant::EXECUTOR; - } else { - static_assert(useFetcher); - return SkipRowsRangeVariant::FETCHER; } + static_assert(useFetcher); + return SkipRowsRangeVariant::FETCHER; } // Let's do it the C++ way. diff --git a/arangod/Aql/SubqueryExecutor.cpp b/arangod/Aql/SubqueryExecutor.cpp index 8190e7367a97..72074816f0fe 100644 --- a/arangod/Aql/SubqueryExecutor.cpp +++ b/arangod/Aql/SubqueryExecutor.cpp @@ -21,12 +21,16 @@ //////////////////////////////////////////////////////////////////////////////// #include "SubqueryExecutor.h" +#include +#include "Aql/AqlCallStack.h" #include "Aql/ExecutionBlock.h" #include "Aql/ExecutionNode.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/SingleRowFetcher.h" +#define LOG_DEVEL_SQ LOG_DEVEL_IF(false) + using namespace arangodb; using namespace arangodb::aql; @@ -48,11 +52,12 @@ SubqueryExecutorInfos::SubqueryExecutorInfos(SubqueryExecutorInfos&& other) = de SubqueryExecutorInfos::~SubqueryExecutorInfos() = default; -template -SubqueryExecutor::SubqueryExecutor(Fetcher& fetcher, SubqueryExecutorInfos& infos) +template +SubqueryExecutor::SubqueryExecutor(Fetcher& fetcher, + SubqueryExecutorInfos& infos) : _fetcher(fetcher), _infos(infos), - _state(ExecutionState::HASMORE), + _state(ExecutorState::HASMORE), _subqueryInitialized(false), _shutdownDone(false), _shutdownResult(TRI_ERROR_INTERNAL), @@ -60,7 +65,7 @@ SubqueryExecutor::SubqueryExecutor(Fetcher& fetcher, Sub _subqueryResults(nullptr), _input(CreateInvalidInputRowHint{}) {} -template +template SubqueryExecutor::~SubqueryExecutor() = default; /** @@ -70,9 +75,10 @@ SubqueryExecutor::~SubqueryExecutor() = default; * If we do not have a subquery ongoing, we fetch a row and we start a new Subquery and ask it for hasMore. */ -template +template std::pair SubqueryExecutor::produceRows(OutputAqlItemRow& output) { - if (_state == ExecutionState::DONE && !_input.isInitialized()) { +#if 0 + if (_state == ExecutorState::DONE && !_input.isInitialized()) { // We have seen DONE upstream, and we have discarded our local reference // to the last input, we will not be able to produce results anymore. return {_state, NoStats{}}; @@ -144,9 +150,93 @@ std::pair SubqueryExecutor::pro _subqueryInitialized = true; } } +#endif + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } -template +template +auto SubqueryExecutor::produceRows(AqlItemBlockInputRange& input, + OutputAqlItemRow& output) + -> std::tuple { + auto getUpstreamCall = [&]() { + AqlCall upstreamCall = output.getClientCall(); + if constexpr (isModificationSubquery) { + upstreamCall = AqlCall{}; + } + + return upstreamCall; + }; + + LOG_DEVEL_SQ << uint64_t(this) << "produceRows " << output.getClientCall(); + + if (_state == ExecutorState::DONE && !_input.isInitialized()) { + // We have seen DONE upstream, and we have discarded our local reference + // to the last input, we will not be able to produce results anymore. + return {_state, NoStats{}, getUpstreamCall()}; + } + while (true) { + if (_subqueryInitialized) { + // Continue in subquery + + // Const case + if (_infos.isConst() && !_input.isFirstDataRowInBlock()) { + // Simply write + writeOutput(output); + LOG_DEVEL_SQ << uint64_t(this) << "wrote output is const " << _state << " " << getUpstreamCall(); + return {_state, NoStats{}, getUpstreamCall()}; + } + + // Non const case, or first run in const + auto [state, skipped, block] = _subquery.execute(AqlCallStack(AqlCall{})); + TRI_ASSERT(skipped == 0); + // We get a result + if (block != nullptr) { + TRI_IF_FAILURE("SubqueryBlock::executeSubquery") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + if (_infos.returnsData()) { + TRI_ASSERT(_subqueryResults != nullptr); + _subqueryResults->emplace_back(std::move(block)); + } + } + + // Subquery DONE + if (state == ExecutionState::DONE) { + writeOutput(output); + LOG_DEVEL_SQ << uint64_t(this) << "wrote output subquery done " << _state << " " << getUpstreamCall(); + return {_state, NoStats{}, getUpstreamCall()}; + } + + } else { + // init new subquery + if (!_input) { + std::tie(_state, _input) = input.nextDataRow(); + LOG_DEVEL_SQ << uint64_t(this) << " nextDataRow: " << _state << " " << _input.isInitialized(); + if (!_input) { + LOG_DEVEL_SQ << uint64_t(this) << "exit produce, no more input" << _state; + return {_state, NoStats{}, getUpstreamCall()}; + } + } + + TRI_ASSERT(_input); + if (!_infos.isConst() || _input.isFirstDataRowInBlock()) { + auto initRes = _subquery.initializeCursor(_input); + + if (initRes.second.fail()) { + // Error during initialize cursor + THROW_ARANGO_EXCEPTION(initRes.second); + } + _subqueryResults = std::make_unique>(); + } + // on const subquery we can retoggle init as soon as we have new input. + _subqueryInitialized = true; + } + } +} + +template void SubqueryExecutor::writeOutput(OutputAqlItemRow& output) { _subqueryInitialized = false; TRI_IF_FAILURE("SubqueryBlock::getSome") { @@ -175,24 +265,26 @@ void SubqueryExecutor::writeOutput(OutputAqlItemRow& out } _input = InputAqlItemRow(CreateInvalidInputRowHint{}); TRI_ASSERT(output.produced()); + output.advanceRow(); } /// @brief shutdown, tell dependency and the subquery -template +template std::pair SubqueryExecutor::shutdown(int errorCode) { // Note this shutdown needs to be repeatable. // Also note the ordering of this shutdown is different // from earlier versions we now shutdown subquery first + ExecutionState state = ExecutionState::DONE; if (!_shutdownDone) { // We take ownership of _state here for shutdown state - std::tie(_state, _shutdownResult) = _subquery.shutdown(errorCode); - if (_state == ExecutionState::WAITING) { + std::tie(state, _shutdownResult) = _subquery.shutdown(errorCode); + if (state == ExecutionState::WAITING) { TRI_ASSERT(_shutdownResult.ok()); return {ExecutionState::WAITING, TRI_ERROR_NO_ERROR}; } _shutdownDone = true; } - return {_state, _shutdownResult}; + return {state, _shutdownResult}; } template @@ -202,5 +294,93 @@ SubqueryExecutor::fetchBlockForPassthrough(size_t atMost return {rv.first, {}, std::move(rv.second)}; } +template <> +template > +auto SubqueryExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { + auto getUpstreamCall = [&]() { + auto upstreamCall = AqlCall{}; + return upstreamCall; + }; + + size_t skipped = 0; + + LOG_DEVEL_SQ << uint64_t(this) << "skipRowsRange " << call; + + if (_state == ExecutorState::DONE && !_input.isInitialized()) { + // We have seen DONE upstream, and we have discarded our local reference + // to the last input, we will not be able to produce results anymore. + return {_state, NoStats{}, 0, getUpstreamCall()}; + } + while (true) { + if (_subqueryInitialized) { + // Continue in subquery + + // Const case + if (_infos.isConst() && !_input.isFirstDataRowInBlock()) { + // Simply write + _subqueryInitialized = false; + _input = InputAqlItemRow(CreateInvalidInputRowHint{}); + skipped += 1; + call.didSkip(1); + LOG_DEVEL_SQ << uint64_t(this) << "did skip one"; + return {_state, NoStats{}, skipped, getUpstreamCall()}; + } + + // Non const case, or first run in const + auto [state, skipped, block] = _subquery.execute(AqlCallStack(AqlCall{})); + TRI_ASSERT(skipped == 0); + // We get a result + if (block != nullptr) { + TRI_IF_FAILURE("SubqueryBlock::executeSubquery") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + if (_infos.returnsData()) { + TRI_ASSERT(_subqueryResults != nullptr); + _subqueryResults->emplace_back(std::move(block)); + } + } + + // Subquery DONE + if (state == ExecutionState::DONE) { + _subqueryInitialized = false; + _input = InputAqlItemRow(CreateInvalidInputRowHint{}); + skipped += 1; + call.didSkip(1); + LOG_DEVEL_SQ << uint64_t(this) << "did skip one"; + return {_state, NoStats{}, skipped, getUpstreamCall()}; + } + + } else { + // init new subquery + if (!_input) { + std::tie(_state, _input) = inputRange.nextDataRow(); + + if (!_input) { + LOG_DEVEL_SQ << uint64_t(this) << "skipped nothing waiting for input " << _state; + return {_state, NoStats{}, skipped, getUpstreamCall()}; + } + } + + TRI_ASSERT(_input); + if (!_infos.isConst() || _input.isFirstDataRowInBlock()) { + auto initRes = _subquery.initializeCursor(_input); + + if (initRes.second.fail()) { + // Error during initialize cursor + THROW_ARANGO_EXCEPTION(initRes.second); + } + _subqueryResults = std::make_unique>(); + } + // on const subquery we can retoggle init as soon as we have new input. + _subqueryInitialized = true; + } + } +} + template class ::arangodb::aql::SubqueryExecutor; +template auto SubqueryExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, + AqlCall& call) + -> std::tuple; template class ::arangodb::aql::SubqueryExecutor; diff --git a/arangod/Aql/SubqueryExecutor.h b/arangod/Aql/SubqueryExecutor.h index d78d9403eae0..c7fbee012be2 100644 --- a/arangod/Aql/SubqueryExecutor.h +++ b/arangod/Aql/SubqueryExecutor.h @@ -26,6 +26,8 @@ #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" +#include "Aql/AqlItemBlockInputRange.h" +#include "Aql/AqlCall.h" #include "Aql/Stats.h" #include "Basics/Result.h" @@ -63,7 +65,7 @@ class SubqueryExecutorInfos : public ExecutorInfos { bool const _isConst; }; -template +template class SubqueryExecutor { public: struct Properties { @@ -94,6 +96,15 @@ class SubqueryExecutor { */ std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; + + // skipRowsRange <=> isModificationSubquery + + template = 0> + auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple; + std::tuple fetchBlockForPassthrough(size_t atMost); private: @@ -108,7 +119,7 @@ class SubqueryExecutor { SubqueryExecutorInfos& _infos; // Upstream state, used to determine if we are done with all subqueries - ExecutionState _state; + ExecutorState _state; // Flag if the current subquery is initialized and worked on bool _subqueryInitialized; diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index 718c91b62054..c3b2686246f1 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -38,7 +38,6 @@ #include "Aql/ExecutionEngine.h" #include "Aql/ExecutionNode.h" #include "Aql/ExecutionState.h" -#include "Aql/ExecutionEngine.h" #include "Aql/ExecutionStats.h" #include "Aql/OutputAqlItemRow.h" #include "Aql/Query.h" @@ -162,7 +161,7 @@ struct Pipeline { return *this; } - ~Pipeline() { + virtual ~Pipeline() { for (auto&& b : _pipeline) { b.release(); } @@ -347,7 +346,7 @@ struct ExecutorTestHelper { std::move(infos)); } - auto setPipeline(Pipeline&& pipeline) -> ExecutorTestHelper& { + auto setPipeline(Pipeline pipeline) -> ExecutorTestHelper& { _pipeline = std::move(pipeline); return *this; } diff --git a/tests/Aql/SubqueryExecutorTest.cpp b/tests/Aql/SubqueryExecutorTest.cpp new file mode 100644 index 000000000000..ceb03c5ec074 --- /dev/null +++ b/tests/Aql/SubqueryExecutorTest.cpp @@ -0,0 +1,499 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Markus Pfeiffer +//////////////////////////////////////////////////////////////////////////////// + +#include "gtest/gtest.h" + +#include "AqlItemBlockHelper.h" +#include "Mocks/Servers.h" +#include "TestEmptyExecutorHelper.h" +#include "TestExecutorHelper.h" +#include "TestLambdaExecutor.h" +#include "WaitingExecutionBlockMock.h" +#include "fakeit.hpp" + +#include "Aql/AqlCallStack.h" +#include "Aql/AqlItemBlock.h" +#include "Aql/AqlItemBlockSerializationFormat.h" +#include "Aql/ConstFetcher.h" +#include "Aql/ExecutionBlockImpl.h" +#include "Aql/ExecutionEngine.h" +#include "Aql/IdExecutor.h" +#include "Aql/Query.h" +#include "Aql/RegisterPlan.h" +#include "Aql/ReturnExecutor.h" +#include "Aql/SingleRowFetcher.h" +#include "Aql/SubqueryExecutor.h" +#include "Transaction/Context.h" +#include "Transaction/Methods.h" + +#include "Aql/ExecutorTestHelper.h" +#include "Aql/TestLambdaExecutor.h" +#include "Aql/WaitingExecutionBlockMock.h" + +using namespace arangodb; +using namespace arangodb::aql; +using namespace arangodb::tests; +using namespace arangodb::tests::aql; +using namespace arangodb::basics; + +using SubqueryExecutorTestHelper = ExecutorTestHelper<1, 1>; +using SubqueryExecutorSplitType = SubqueryExecutorTestHelper::SplitType; +using SubqueryExecutorParamType = std::tuple; + +using RegisterSet = std::unordered_set; +using LambdaExePassThrough = TestLambdaExecutor; +using LambdaExe = TestLambdaSkipExecutor; + +// This will maintain another deque of ExecutionBlocks +// Which represents the content of the Subquery. +// Note this cannot be concated with any other pipeline. +struct SubqueryPipeline : public Pipeline { + explicit SubqueryPipeline(ExecBlock subquery, ExecBlock singleton) + : Pipeline(std::move(subquery)), + _subquery(get().front().get() /* == subquery */) { + // must be of type SubqueryExecutor + TRI_ASSERT(dynamic_cast*>(_subquery) != nullptr || + dynamic_cast*>(_subquery) != nullptr); + // We always need to start with a singleton + TRI_ASSERT(dynamic_cast*>(singleton.get()) != nullptr); + _subqueryPipeline.emplace_back(std::move(singleton)); + } + + Pipeline& addSubqueryConsumer(ExecBlock&& consumer) { + TRI_ASSERT(!_subqueryPipeline.empty()); + consumer->addDependency(_subqueryPipeline.front().get()); + _subqueryPipeline.emplace_front(std::move(consumer)); + + return *this; + } + + ExecutionBlock& getSubquery() const { return *_subquery; } + + private: + ExecutionBlock* _subquery; + PipelineStorage _subqueryPipeline; +}; + +class SubqueryExecutorIntegrationTest + : public AqlExecutorTestCaseWithParam { + protected: + ExecutorTestHelper<1, 1> executorTestHelper; + + SubqueryExecutorIntegrationTest() : executorTestHelper(*fakedQuery) {} + + // returns a new pipeline that contains body as a subquery + auto createSubquery(Pipeline body) -> Pipeline { + + /* + auto subqueryEnd = createSubqueryEndExecutionBlock(); + if (!body.empty()) { + subqueryEnd->addDependency(body.get().front().get()); + } + body.get().emplace_front(std::move(subqueryEnd)); + + auto subqueryStart = createSubqueryStartExecutionBlock(); + // This exists because we at least added the subqueryEnd + body.get().back()->addDependency(subqueryStart.get()); + + body.get().emplace_back(std::move(subqueryStart)); + */ + return std::move(body); + } + + auto createSubquery() -> Pipeline { return createSubquery(Pipeline()); } + + auto createDoNothingPipeline() -> Pipeline { + auto numRegs = size_t{1}; + auto emptyRegisterList = std::make_shared>( + std::initializer_list{}); + + auto inRegisterList = std::make_shared>( + std::initializer_list{0}); + auto outRegisterList = std::make_shared>( + std::initializer_list{1}); + + std::unordered_set toKeep; + + for (RegisterId r = 0; r < numRegs; ++r) { + toKeep.emplace(r); + } + + auto infos = LambdaExe::Infos(inRegisterList, outRegisterList, 1, 2, {}, + toKeep, createProduceCall(), createSkipCall()); + + return Pipeline(executorTestHelper.createExecBlock(std::move(infos))); + } + + auto createAssertPipeline() -> Pipeline { + auto numRegs = size_t{1}; + auto emptyRegisterList = std::make_shared>( + std::initializer_list{}); + + auto inRegisterList = std::make_shared>( + std::initializer_list{0}); + auto outRegisterList = std::make_shared>( + std::initializer_list{1}); + + std::unordered_set toKeep; + + for (RegisterId r = 0; r < numRegs; ++r) { + toKeep.emplace(r); + } + + auto infos = LambdaExe::Infos(inRegisterList, outRegisterList, 1, 2, {}, + toKeep, createAssertCall(), createSkipCall()); + + return Pipeline(executorTestHelper.createExecBlock(std::move(infos))); + } + + auto createCallAssertPipeline(AqlCall call) -> Pipeline { + auto numRegs = size_t{1}; + auto emptyRegisterList = std::make_shared>( + std::initializer_list{}); + + auto inRegisterList = std::make_shared>( + std::initializer_list{0}); + auto outRegisterList = std::make_shared>( + std::initializer_list{1}); + + std::unordered_set toKeep; + + for (RegisterId r = 0; r < numRegs; ++r) { + toKeep.emplace(r); + } + + auto infos = LambdaExe::Infos(inRegisterList, outRegisterList, 1, 2, {}, toKeep, + createAssertCallCall(call), createSkipCall()); + + return Pipeline(executorTestHelper.createExecBlock(std::move(infos))); + } +#if 0 + auto createSubqueryStartExecutionBlock() -> ExecBlock { + // Subquery start executor does not care about input or output registers? + // TODO: talk about registers & register planning + + auto inputRegisterSet = + std::make_shared(std::initializer_list{0}); + auto outputRegisterSet = + std::make_shared(std::initializer_list{}); + auto toKeepRegisterSet = RegisterSet{0}; + + auto infos = SubqueryStartExecutor::Infos(inputRegisterSet, outputRegisterSet, + inputRegisterSet->size(), + inputRegisterSet->size() + + outputRegisterSet->size(), + {}, toKeepRegisterSet); + + return executorTestHelper.createExecBlock(std::move(infos), + ExecutionNode::SUBQUERY_START); + } + + // Subquery end executor has an input and an output register, + // but only the output register is used, remove input reg? + auto createSubqueryEndExecutionBlock() -> ExecBlock { + auto const inputRegister = RegisterId{0}; + auto const outputRegister = RegisterId{1}; + auto inputRegisterSet = + std::make_shared(std::initializer_list{inputRegister}); + auto outputRegisterSet = + std::make_shared(std::initializer_list{outputRegister}); + auto toKeepRegisterSet = RegisterSet{0}; + + auto infos = + SubqueryEndExecutor::Infos(inputRegisterSet, outputRegisterSet, + inputRegisterSet->size(), + inputRegisterSet->size() + outputRegisterSet->size(), + {}, toKeepRegisterSet, nullptr, + inputRegister, outputRegister, false); + + return executorTestHelper.createExecBlock(std::move(infos), + ExecutionNode::SUBQUERY_END); + } +#endif + auto createReturnExecutionBlock() -> ExecBlock { + auto const inputRegister = RegisterId{0}; + auto const outputRegister = RegisterId{0}; + auto inputRegisterSet = + std::make_shared(std::initializer_list{inputRegister}); + auto outputRegisterSet = + std::make_shared(std::initializer_list{outputRegister}); + auto toKeepRegisterSet = RegisterSet{0}; + + auto infos = ReturnExecutor::Infos(inputRegister, 1, 1, false); + + return executorTestHelper.createExecBlock(std::move(infos), + ExecutionNode::RETURN); + } + + auto createProduceCall() -> ProduceCall { + return [](AqlItemBlockInputRange& input, + OutputAqlItemRow& output) -> std::tuple { + while (input.hasDataRow() && !output.isFull()) { + auto const [state, row] = input.nextDataRow(); + output.cloneValueInto(1, row, AqlValue("foo")); + output.advanceRow(); + } + NoStats stats{}; + AqlCall call{}; + + return {input.upstreamState(), stats, call}; + }; + }; + + auto createSkipCall() -> SkipCall { + return [](AqlItemBlockInputRange& input, + AqlCall& call) -> std::tuple { + auto skipped = size_t{0}; + while (input.hasDataRow() && call.shouldSkip()) { + auto const& [state, inputRow] = input.nextDataRow(); + EXPECT_TRUE(inputRow.isInitialized()); + call.didSkip(1); + skipped++; + } + auto upstreamCall = AqlCall{call}; + return {input.upstreamState(), NoStats{}, skipped, upstreamCall}; + }; + }; + + // Asserts if called. This is to check that when we use skip to + // skip over a subquery, the subquery's produce is not invoked + auto createAssertCall() -> ProduceCall { + return [](AqlItemBlockInputRange& input, + OutputAqlItemRow& output) -> std::tuple { + EXPECT_TRUE(false); + NoStats stats{}; + AqlCall call{}; + + return {ExecutorState::DONE, stats, call}; + }; + } + + auto createAssertCallCall(AqlCall call) -> ProduceCall { + return [call](AqlItemBlockInputRange& input, + OutputAqlItemRow& output) -> std::tuple { + auto clientCall = output.getClientCall(); + + EXPECT_EQ(clientCall.offset, call.offset); + EXPECT_EQ(clientCall.softLimit, call.softLimit); + EXPECT_EQ(clientCall.hardLimit, call.hardLimit); + EXPECT_EQ(clientCall.fullCount, call.fullCount); + + while (input.hasDataRow() && !output.isFull()) { + auto const [state, row] = input.nextDataRow(); + output.cloneValueInto(1, row, AqlValue("foo")); + output.advanceRow(); + } + + NoStats stats{}; + AqlCall call{}; + + return {input.upstreamState(), stats, call}; + }; + } + auto getSplit() -> SubqueryExecutorSplitType { + auto [split] = GetParam(); + return split; + } +}; + +template +const SubqueryExecutorSplitType splitIntoBlocks = + SubqueryExecutorSplitType{std::vector{vs...}}; +template +const SubqueryExecutorSplitType splitStep = SubqueryExecutorSplitType{step}; + +/* +INSTANTIATE_TEST_CASE_P(SubqueryExecutorIntegrationTest, SubqueryExecutorIntegrationTest, + ::testing::Values(splitIntoBlocks<2, 3>, splitIntoBlocks<3, 4>, + splitStep<2>, splitStep<1>)); +*/ + +TEST_P(SubqueryExecutorIntegrationTest, single_subquery_empty_input) { + auto call = AqlCall{}; + auto pipeline = createSubquery(); + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList() + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, single_subquery) { + auto call = AqlCall{}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {{1, R"([1])"}, + {2, R"([2])"}, + {5, R"([5])"}, + {2, R"([2])"}, + {1, R"([1])"}, + {5, R"([5])"}, + {7, R"([7])"}, + {1, R"([1])"}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, DISABLED_single_subquery_skip_and_produce) { + auto call = AqlCall{5}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {{5, R"([5])"}, {7, R"([7])"}, {1, R"([1])"}}) + .expectSkipped(5) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, DISABLED_single_subquery_skip_all) { + auto call = AqlCall{20}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {}) + .expectSkipped(8) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, DISABLED_single_subquery_fullcount) { + auto call = AqlCall{0, true, 0, AqlCall::LimitType::HARD}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {}) + .expectSkipped(8) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, DISABLED_single_subquery_skip_produce_count) { + auto call = AqlCall{2, true, 2, AqlCall::LimitType::HARD}; + auto pipeline = createSubquery(); + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {{5, R"([5])"}, {2, R"([2])"}}) + .expectSkipped(6) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, two_nested_subqueries_empty_input) { + auto call = AqlCall{}; + auto pipeline = createSubquery(createSubquery()); + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList() + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, two_nested_subqueries) { + auto call = AqlCall{}; + auto pipeline = createSubquery(createSubquery()); + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}, {2}, {1}, {5}, {7}, {1}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, two_sequential_subqueries) { + auto call = AqlCall{}; + auto pipeline = concatPipelines(createSubquery(), createSubquery()); + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}, {2}, {1}, {5}, {7}, {1}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, do_nothing_in_subquery) { + auto call = AqlCall{}; + auto pipeline = createSubquery(createDoNothingPipeline()); + + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {{1}, {2}, {5}, {2}, {1}, {5}, {7}, {1}}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, DISABLED_check_call_passes_subquery) { + auto call = AqlCall{10}; + auto pipeline = concatPipelines(createCallAssertPipeline(call), createSubquery()); + + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {}) + .expectSkipped(8) + .expectedState(ExecutionState::DONE) + .run(); +}; + +TEST_P(SubqueryExecutorIntegrationTest, DISABLED_check_skipping_subquery) { + auto call = AqlCall{10}; + auto pipeline = createSubquery(createAssertPipeline()); + + executorTestHelper.setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0}, {}) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +}; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b557ccf6498d..9ed9e68f7a40 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -70,6 +70,7 @@ set(ARANGODB_TESTS_SOURCES Aql/SortLimit-test.cpp Aql/SpliceSubqueryOptimizerRuleTest.cpp Aql/SplicedSubqueryIntegrationTest.cpp + Aql/SubqueryExecutorTest.cpp Aql/SubqueryEndExecutorTest.cpp Aql/SubqueryStartExecutorTest.cpp Aql/TestEmptyExecutorHelper.cpp From 9bb106d9d6964dfa113b8cab4677a938392f7b33 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Sat, 29 Feb 2020 08:34:42 +0100 Subject: [PATCH 091/122] Feature/aql subquery execution block impl execute implementation count collect (#11172) * Move SubqueryStartExecutor to new interface * Diddle * Tests pass for SubqueryStartExecutor * Delete most of the SubqueryEndExecutor tests They are irrelevant now as they only tested shadowrow behaviour. Shadow Rows are now handled by ExecutionBlockImpl. * Skeleton tests * Junk * Cleanup and make work * Tweak ExecutorTestHelper to test pipelines * SubqueryStart/SubqueryEnd * Blafummel. * Modifications * Fixup tests * Address some comments * Fix. * Fix tests * Various fixes. * Made the branch compile on Mac * Intermediate commit, does not compile, required for merge * Fixed one of the SplicedSubqueryTests * Seperated ShadowRow handling for SubqueryStart/End executors to get a better overview and understanding on how it should behave. * Allow to use a fullCallStack in ExecutorTestHelper. * Added asserts on SubqueryEnd that is not allowed to be called with Skipped from upstream * Use AqlTestCase in SubqueryStartExecutorTest. Transformed first test to TestHelper pipeline * Let ExecutorTestHelper also test for shadowRows * Adapted SingleInput=>Data+Shadow test to testing Helepr and make it pass * Transformed existing SubqueryStart tests to Helper, fixed a test where there was no place for the ShadowRow to be written * Fixed Skip in SubqueryStartExecutor * Allow the Pipeline to add Consumer and dependency without fiddling with the Deque from outside * Added Skip adn ShadowRowForwarding tests to SubqueryStartExecutor * Fixed fullCount of SubqueryStart to be called properly. * Renamed isModificationSubquery -> isModificatioNode again * Consume more than one subquery en-block * Fixed debugReporting in SplicedSubqueryTest. Disabled the skip in subquery tests for now * Removed AQL debug log level from testing config * First version of Subquery Skip bypassing. Including 1 passing test. * Added disabled tests for skip, and proposed to do it later * Fixed reporting of FILTERED in fullCount of EnumerateCollectionExecutor * Enable calculationExecutor execute interface * Fixed compile issues * To Velocypack the modification Subquery value * Fixed illegal assert and reactivated failure tests * Fixed compile issue * Deactivated Calculation again * Added a testSuite for countCollectExecutor * Fixed Subquery Test for CountCollect * Added CountCollectImplementation * Added Compatibility Mode to AqlCallStack * Removed IdExecutor the only feature it has can be moved into IDexecutor in general. This still has to be done * When falling back in BlocksWithClients use compatibility stack * Fixed EnumerateCollectionExecutor * Fixed fullCountBehaviour of EnumerateList. Also removed a unlimited,fullCount combination in IndexExecutor * Fixed endless loop while skipAl in OperationCursor * Fixed View Executor, which sends a no-limit no-offset upstream. * Fixed forwarding in NoResultsExecutor Co-authored-by: Markus Pfeiffer --- arangod/Aql/CountCollectExecutor.cpp | 80 ++--- arangod/Aql/CountCollectExecutor.h | 27 +- arangod/Aql/EnumerateCollectionExecutor.cpp | 7 +- arangod/Aql/EnumerateListExecutor.cpp | 7 +- arangod/Aql/ExecutionBlockImpl.cpp | 4 +- arangod/Aql/IResearchViewExecutor.cpp | 7 +- arangod/Aql/IndexExecutor.cpp | 1 - arangod/Aql/NoResultsExecutor.cpp | 3 +- arangod/Utils/OperationCursor.cpp | 9 +- tests/Aql/CountCollectExecutorTest.cpp | 333 +++++++++++++------- 10 files changed, 306 insertions(+), 172 deletions(-) diff --git a/arangod/Aql/CountCollectExecutor.cpp b/arangod/Aql/CountCollectExecutor.cpp index 8c6e80be8a99..ff6bb3ad4130 100644 --- a/arangod/Aql/CountCollectExecutor.cpp +++ b/arangod/Aql/CountCollectExecutor.cpp @@ -27,9 +27,7 @@ #include "Aql/AqlValue.h" #include "Aql/ExecutorInfos.h" -#include "Aql/InputAqlItemRow.h" #include "Aql/OutputAqlItemRow.h" -#include "Aql/SingleRowFetcher.h" #include "Aql/Stats.h" #include @@ -53,50 +51,60 @@ RegisterId CountCollectExecutorInfos::getOutputRegisterId() const { return _collectRegister; } -CountCollectExecutor::CountCollectExecutor(Fetcher& fetcher, Infos& infos) - : _infos(infos), _fetcher(fetcher), _state(ExecutionState::HASMORE), _count(0) {} +CountCollectExecutor::CountCollectExecutor(Fetcher&, Infos& infos) + : _infos(infos) {} -std::pair CountCollectExecutor::produceRows(OutputAqlItemRow& output) { +auto CountCollectExecutor::produceRows(AqlItemBlockInputRange& inputRange, + OutputAqlItemRow& output) + -> std::tuple { TRI_IF_FAILURE("CountCollectExecutor::produceRows") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - - if (_state == ExecutionState::DONE) { - return {_state, NoStats{}}; + // skipped > 0 -> done + // We have the guarantee that every thing is skipped + // within a single call + TRI_ASSERT(inputRange.skippedInFlight() == 0 || + inputRange.upstreamState() == ExecutorState::DONE); + if (inputRange.upstreamState() == ExecutorState::DONE) { + // We have skipped all, report it. + // In general, we do not have an input row. In fact, we never fetch one. + output.setAllowSourceRowUninitialized(); + + // We must produce exactly one output row. + output.cloneValueInto(_infos.getOutputRegisterId(), + InputAqlItemRow{CreateInvalidInputRowHint{}}, + AqlValue(AqlValueHintUInt(inputRange.skipAll()))); + output.advanceRow(); } + // We always send a hardLimit with fullcount to upstream + return {inputRange.upstreamState(), NoStats{}, + AqlCall{0, true, 0, AqlCall::LimitType::HARD}}; +} - while (_state != ExecutionState::DONE) { - size_t skipped; - std::tie(_state, skipped) = _fetcher.skipRows(ExecutionBlock::SkipAllSize()); - - if (_state == ExecutionState::WAITING) { - TRI_ASSERT(skipped == 0); - return {_state, NoStats{}}; - } - - TRI_ASSERT(skipped != 0 || _state == ExecutionState::DONE); - incrCountBy(skipped); +auto CountCollectExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { + TRI_IF_FAILURE("CountCollectExecutor::produceRows") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - - // In general, we do not have an input row. In fact, we never fetch one. - output.setAllowSourceRowUninitialized(); - - // We must produce exactly one output row. - output.cloneValueInto(_infos.getOutputRegisterId(), - InputAqlItemRow{CreateInvalidInputRowHint{}}, - AqlValue(AqlValueHintUInt(getCount()))); - - return {_state, NoStats{}}; + // skipped > 0 -> done + // We have the guarantee that every thing is skipped + // within a single call + TRI_ASSERT(inputRange.skippedInFlight() == 0 || + inputRange.upstreamState() == ExecutorState::DONE); + if (inputRange.upstreamState() == ExecutorState::DONE) { + // We have skipped all, report it. + auto skipped = inputRange.skipAll(); + // We request to just forward, we do not even care how much + TRI_ASSERT(skipped == 0); + call.didSkip(1); + } + // We always send a hardLimit with fullcount to upstream + return {inputRange.upstreamState(), NoStats{}, call.getSkipCount(), + AqlCall{0, false, 0, AqlCall::LimitType::HARD}}; } -void CountCollectExecutor::incrCountBy(size_t incr) noexcept { _count += incr; } - -uint64_t CountCollectExecutor::getCount() noexcept { return _count; } - std::pair CountCollectExecutor::expectedNumberOfRows(size_t) const { - if (_state == ExecutionState::DONE) { - return {ExecutionState::DONE, 0}; - } + TRI_ASSERT(false); return {ExecutionState::HASMORE, 1}; } diff --git a/arangod/Aql/CountCollectExecutor.h b/arangod/Aql/CountCollectExecutor.h index d6a25feb0a54..2fad185c917c 100644 --- a/arangod/Aql/CountCollectExecutor.h +++ b/arangod/Aql/CountCollectExecutor.h @@ -26,6 +26,8 @@ #ifndef ARANGOD_AQL_COUNT_COLLECT_EXECUTOR_H #define ARANGOD_AQL_COUNT_COLLECT_EXECUTOR_H +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" #include "Aql/types.h" @@ -42,6 +44,8 @@ class ExecutorInfos; class OutputAqlItemRow; template class SingleRowFetcher; +struct AqlCall; +class AqlItemBlockInputRange; class CountCollectExecutorInfos : public ExecutorInfos { public: @@ -80,20 +84,24 @@ class CountCollectExecutor { CountCollectExecutor() = delete; CountCollectExecutor(CountCollectExecutor&&) = default; CountCollectExecutor(CountCollectExecutor const&) = delete; - CountCollectExecutor(Fetcher& fetcher, Infos&); + CountCollectExecutor(Fetcher&, Infos&); ~CountCollectExecutor(); /** - * @brief produce the next Row of Aql Values. + * @brief produce the next Rows of Aql Values. * - * @return ExecutionState, and if successful exactly one new Row of AqlItems. + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream */ + [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; - std::pair produceRows(OutputAqlItemRow& output); - - void incrCountBy(size_t incr) noexcept; - - uint64_t getCount() noexcept; + /** + * @brief skip the next Row of Aql Values. + * + * @return ExecutorState, the stats, and a new Call that needs to be send to upstream + */ + [[nodiscard]] auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple; std::pair expectedNumberOfRows(size_t atMost) const; @@ -102,9 +110,6 @@ class CountCollectExecutor { private: Infos const& _infos; - Fetcher& _fetcher; - ExecutionState _state; - uint64_t _count; }; } // namespace aql diff --git a/arangod/Aql/EnumerateCollectionExecutor.cpp b/arangod/Aql/EnumerateCollectionExecutor.cpp index ee200f05ca62..3ff3220d415c 100644 --- a/arangod/Aql/EnumerateCollectionExecutor.cpp +++ b/arangod/Aql/EnumerateCollectionExecutor.cpp @@ -238,8 +238,13 @@ std::tuple EnumerateCo if (_cursorHasMore) { return {ExecutorState::HASMORE, stats, call.getSkipCount(), upstreamCall}; } + if (!call.needsFullCount()) { + // Do not overfetch too much + upstreamCall.softLimit = call.getOffset(); + // else we do unlimited softLimit. + // we are going to return everything anyways. + } - upstreamCall.softLimit = call.getOffset(); return {inputRange.upstreamState(), stats, call.getSkipCount(), upstreamCall}; } diff --git a/arangod/Aql/EnumerateListExecutor.cpp b/arangod/Aql/EnumerateListExecutor.cpp index 84d2ab2d080b..03a5932f8d1a 100644 --- a/arangod/Aql/EnumerateListExecutor.cpp +++ b/arangod/Aql/EnumerateListExecutor.cpp @@ -200,7 +200,12 @@ std::tuple EnumerateListExecutor::skipR } call.didSkip(skipped); - upstreamCall.softLimit = call.getOffset(); + if (!call.needsFullCount()) { + // Do not overfetch too much + upstreamCall.softLimit = call.getOffset(); + // else we do unlimited softLimit. + // we are going to return everything anyways. + } if (_inputArrayPosition < _inputArrayLength) { // fullCount will always skip the complete array TRI_ASSERT(offsetPhase); diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 31e98b72fed5..6c39b40d76fe 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -137,7 +137,7 @@ template constexpr bool isNewStyleExecutor = is_one_of_v< Executor, FilterExecutor, SortedCollectExecutor, IdExecutor, IdExecutor>, ReturnExecutor, DistinctCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, - SubqueryExecutor, SubqueryExecutor, + SubqueryExecutor, SubqueryExecutor, CountCollectExecutor, // TODO: re-enable after new subquery end & start are implemented // CalculationExecutor, CalculationExecutor, CalculationExecutor, HashedCollectExecutor, ConstrainedSortExecutor, @@ -1127,7 +1127,7 @@ static SkipRowsRangeVariant constexpr skipRowsType() { Executor, FilterExecutor, ShortestPathExecutor, ReturnExecutor, KShortestPathsExecutor, IdExecutor>, IdExecutor, HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, DistinctCollectExecutor, ConstrainedSortExecutor, - SubqueryExecutor, + CountCollectExecutor, SubqueryExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif diff --git a/arangod/Aql/IResearchViewExecutor.cpp b/arangod/Aql/IResearchViewExecutor.cpp index 5bd3cdc8d7d5..6f6a3788b01b 100644 --- a/arangod/Aql/IResearchViewExecutor.cpp +++ b/arangod/Aql/IResearchViewExecutor.cpp @@ -518,7 +518,12 @@ IResearchViewExecutorBase::skipRowsRange(AqlItemBlockInputRange& i stats.incrScanned(call.getSkipCount()); AqlCall upstreamCall{}; - upstreamCall.softLimit = call.getOffset() + std::min(call.softLimit, call.hardLimit); + if (!call.needsFullCount()) { + // Do not overfetch too much. + upstreamCall.softLimit = call.getOffset() + std::min(call.softLimit, call.hardLimit); + // else we do unlimited softLimit. + // we are going to return everything anyways. + } return {inputRange.upstreamState(), stats, call.getSkipCount(), upstreamCall}; } diff --git a/arangod/Aql/IndexExecutor.cpp b/arangod/Aql/IndexExecutor.cpp index 91e6c33ecf47..5b99e75fed48 100644 --- a/arangod/Aql/IndexExecutor.cpp +++ b/arangod/Aql/IndexExecutor.cpp @@ -752,7 +752,6 @@ auto IndexExecutor::produceRows(AqlItemBlockInputRange& inputRange, OutputAqlIte } AqlCall upstreamCall; - upstreamCall.fullCount = clientCall.needsFullCount(); LOG_DEVEL_IDX << "IndexExecutor::produceRows reporting state " << returnState(); return {returnState(), stats, upstreamCall}; diff --git a/arangod/Aql/NoResultsExecutor.cpp b/arangod/Aql/NoResultsExecutor.cpp index 1728e430e6dd..0291a889bd31 100644 --- a/arangod/Aql/NoResultsExecutor.cpp +++ b/arangod/Aql/NoResultsExecutor.cpp @@ -40,5 +40,6 @@ auto NoResultsExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItem auto NoResultsExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) const noexcept -> std::tuple { - return {ExecutorState::DONE, NoStats{}, 0, AqlCall{0, false, 0, AqlCall::LimitType::HARD}}; + return {inputRange.upstreamState(), NoStats{}, 0, + AqlCall{0, false, 0, AqlCall::LimitType::HARD}}; }; \ No newline at end of file diff --git a/arangod/Utils/OperationCursor.cpp b/arangod/Utils/OperationCursor.cpp index e6030a9dc83d..0245935344e3 100644 --- a/arangod/Utils/OperationCursor.cpp +++ b/arangod/Utils/OperationCursor.cpp @@ -24,10 +24,9 @@ #include "OperationCursor.h" using namespace arangodb; - + OperationCursor::OperationCursor(std::unique_ptr iterator) - : _indexIterator(std::move(iterator)), - _hasMore(true) { + : _indexIterator(std::move(iterator)), _hasMore(true) { TRI_ASSERT(_indexIterator != nullptr); } @@ -145,9 +144,11 @@ void OperationCursor::skipAll(uint64_t& skipped) { } while (_hasMore) { - _indexIterator->skip(toSkip, skipped); + uint64_t skippedLocal = 0; + _indexIterator->skip(toSkip, skippedLocal); if (skipped != toSkip) { _hasMore = false; } + skipped += skippedLocal; } } diff --git a/tests/Aql/CountCollectExecutorTest.cpp b/tests/Aql/CountCollectExecutorTest.cpp index dea9fc1f2982..8a0d6ef745da 100644 --- a/tests/Aql/CountCollectExecutorTest.cpp +++ b/tests/Aql/CountCollectExecutorTest.cpp @@ -20,141 +20,246 @@ /// @author Heiko Kernbach //////////////////////////////////////////////////////////////////////////////// -#include "RowFetcherHelper.h" #include "gtest/gtest.h" -#include "Aql/AqlItemBlock.h" +#include "ExecutorTestHelper.h" +#include "TestLambdaExecutor.h" + #include "Aql/CountCollectExecutor.h" -#include "Aql/InputAqlItemRow.h" -#include "Aql/OutputAqlItemRow.h" -#include "Aql/ResourceUsage.h" +#include "Aql/SingleRowFetcher.h" #include "Aql/Stats.h" - -#include -#include +#include "Aql/SubqueryEndExecutor.h" +#include "Aql/SubqueryStartExecutor.h" using namespace arangodb; using namespace arangodb::aql; +using LambdaExe = TestLambdaSkipExecutor; + namespace arangodb { namespace tests { namespace aql { -class CountCollectExecutorTest : public ::testing::Test { +using CountCollectTestHelper = ExecutorTestHelper<1, 1>; +using CountCollectSplitType = CountCollectTestHelper::SplitType; +using CountCollectParamType = std::tuple; + +class CountCollectExecutorTest + : public AqlExecutorTestCaseWithParam { protected: - ExecutionState state; - ResourceMonitor monitor; - AqlItemBlockManager itemBlockManager; - RegisterId nrOutputReg = 2; - SharedAqlItemBlockPtr block; - std::shared_ptr> outputRegisters; - - CountCollectExecutorTest() - : itemBlockManager(&monitor, SerializationFormat::SHADOWROWS), - nrOutputReg(2), - block(new AqlItemBlock(itemBlockManager, 1000, nrOutputReg)), - outputRegisters(std::make_shared>( - std::initializer_list{1})) {} + auto MakeInfos(RegisterId outReg) -> CountCollectExecutorInfos { + return CountCollectExecutorInfos{ + outReg, outReg /*inputRegisters*/, outReg + 1 /*outputRegisters*/, {}, {}}; + } + auto GetSplit() -> CountCollectSplitType { + auto const& [split] = GetParam(); + return split; + } + + auto MakeSubqueryStartInfos() -> SubqueryStartExecutor::Infos { + auto inputRegisterSet = make_shared_unordered_set({0}); + auto outputRegisterSet = make_shared_unordered_set({}); + + auto toKeepRegisterSet = std::unordered_set{0}; + + return SubqueryStartExecutor::Infos(inputRegisterSet, outputRegisterSet, + inputRegisterSet->size(), + inputRegisterSet->size() + + outputRegisterSet->size(), + {}, toKeepRegisterSet); + } + + auto MakeSubqueryEndInfos(RegisterId inputRegister) -> SubqueryEndExecutor::Infos { + auto const outputRegister = RegisterId{inputRegister + 1}; + auto inputRegisterSet = make_shared_unordered_set({}); + for (RegisterId r = 0; r <= inputRegister; ++r) { + inputRegisterSet->emplace(r); + } + auto outputRegisterSet = make_shared_unordered_set({outputRegister}); + auto toKeepRegisterSet = std::unordered_set{}; + + return SubqueryEndExecutor::Infos(inputRegisterSet, outputRegisterSet, + inputRegisterSet->size(), + inputRegisterSet->size() + + outputRegisterSet->size(), + {}, toKeepRegisterSet, nullptr, + inputRegister, outputRegister, false); + } + + auto MakeRemoveAllLinesInfos() -> LambdaExe::Infos { + auto numRegs = size_t{1}; + + auto inRegisterList = make_shared_unordered_set({}); + auto outRegisterList = make_shared_unordered_set({}); + + std::unordered_set toKeep; + + for (RegisterId r = 0; r < numRegs; ++r) { + toKeep.emplace(r); + } + + ProduceCall prod = [](AqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple { + EXPECT_TRUE(false) << "Should never be called"; + return {ExecutorState::DONE, NoStats{}, AqlCall{0, true, 0, AqlCall::LimitType::HARD}}; + }; + + SkipCall skip = [](AqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple { + std::ignore = input.skipAll(); + return {input.upstreamState(), NoStats{}, 0, + AqlCall{0, true, 0, AqlCall::LimitType::HARD}}; + }; + + return LambdaExe::Infos(inRegisterList, outRegisterList, numRegs, numRegs, + {}, toKeep, prod, skip); + } }; -TEST_F(CountCollectExecutorTest, there_are_no_rows_upstream_the_producer_doesnt_wait) { - CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), false); - CountCollectExecutor testee(fetcher, infos); - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), outputRegisters, - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - - auto block = result.stealBlock(); - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.toInt64(), 0); - - ASSERT_EQ(0, fetcher.totalSkipped()); +template +const CountCollectSplitType splitIntoBlocks = + CountCollectSplitType{std::vector{vs...}}; +template +const CountCollectSplitType splitStep = CountCollectSplitType{step}; + +INSTANTIATE_TEST_CASE_P(CountCollectExecutor, CountCollectExecutorTest, + ::testing::Values(CountCollectSplitType{std::monostate()}, + splitStep<1>, splitIntoBlocks<2, 3>, + splitStep<2>)); + +TEST_P(CountCollectExecutorTest, empty_input) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeInfos(1), ExecutionNode::COLLECT) + .expectedStats(ExecutionStats{}) + .setInputSplitType(GetSplit()) + .setInputValue({}) + .expectedState(ExecutionState::DONE) + .expectOutput({1}, {{0}}) + .expectSkipped(0) + .setCall(AqlCall{}) + .run(); +} + +TEST_P(CountCollectExecutorTest, count_input) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeInfos(1), ExecutionNode::COLLECT) + .expectedStats(ExecutionStats{}) + .setInputSplitType(GetSplit()) + .setInputValue({{0}, {1}, {2}, {3}}) + .expectedState(ExecutionState::DONE) + .expectOutput({1}, {{4}}) + .expectSkipped(0) + .setCall(AqlCall{}) + .run(); +} + +TEST_P(CountCollectExecutorTest, empty_input_skip) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeInfos(1), ExecutionNode::COLLECT) + .expectedStats(ExecutionStats{}) + .setInputSplitType(GetSplit()) + .setInputValue({}) + .expectedState(ExecutionState::DONE) + .expectOutput({1}, {}) + .expectSkipped(1) + .setCall(AqlCall{10}) + .run(); +} + +TEST_P(CountCollectExecutorTest, count_input_skip) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeInfos(1), ExecutionNode::COLLECT) + .expectedStats(ExecutionStats{}) + .setInputSplitType(GetSplit()) + .setInputValue({{0}, {1}, {2}, {3}}) + .expectedState(ExecutionState::DONE) + .expectOutput({1}, {}) + .expectSkipped(1) + .setCall(AqlCall{10}) + .run(); } -TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits) { - CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input.steal(), true); - CountCollectExecutor testee(fetcher, infos); - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), outputRegisters, - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - - auto block = result.stealBlock(); - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.toInt64(), 0); - - ASSERT_EQ(0, fetcher.totalSkipped()); +TEST_P(CountCollectExecutorTest, empty_input_fullCount) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeInfos(1), ExecutionNode::COLLECT) + .expectedStats(ExecutionStats{}) + .setInputSplitType(GetSplit()) + .setInputValue({}) + .expectedState(ExecutionState::DONE) + .expectOutput({1}, {}) + .expectSkipped(1) + .setCall(AqlCall{0, true, 0, AqlCall::LimitType::HARD}) + .run(); } -TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doesnt_wait) { - CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); - auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), false); - CountCollectExecutor testee(fetcher, infos); - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), outputRegisters, - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - - auto block = result.stealBlock(); - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.toInt64(), 3); - - ASSERT_EQ(3, fetcher.totalSkipped()); +TEST_P(CountCollectExecutorTest, count_input_fullCount) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeInfos(1), ExecutionNode::COLLECT) + .expectedStats(ExecutionStats{}) + .setInputSplitType(GetSplit()) + .setInputValue({{0}, {1}, {2}, {3}}) + .expectedState(ExecutionState::DONE) + .expectOutput({1}, {}) + .expectSkipped(1) + .setCall(AqlCall{0, true, 0, AqlCall::LimitType::HARD}) + .run(); } -TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) { - CountCollectExecutorInfos infos(1 /* outputRegId */, 1 /* nrIn */, nrOutputReg, {}, {}); - auto input = VPackParser::fromJson("[ [1], [2], [3] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher(itemBlockManager, input->steal(), true); - CountCollectExecutor testee(fetcher, infos); - NoStats stats{}; - OutputAqlItemRow result{std::move(block), outputRegisters, - infos.registersToKeep(), infos.registersToClear()}; - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(result.produced()); - - auto block = result.stealBlock(); - AqlValue x = block->getValue(0, 1); - ASSERT_TRUE(x.isNumber()); - ASSERT_EQ(x.toInt64(), 3); - - ASSERT_EQ(3, fetcher.totalSkipped()); +TEST_P(CountCollectExecutorTest, count_input_softlimit) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeInfos(1), ExecutionNode::COLLECT) + .expectedStats(ExecutionStats{}) + .setInputSplitType(GetSplit()) + .setInputValue({{0}, {1}, {2}, {3}}) + .expectedState(ExecutionState::DONE) + .expectOutput({1}, {{4}}) + .expectSkipped(0) + .setCall(AqlCall{0, false, 1, AqlCall::LimitType::SOFT}) + .run(); +} + +TEST_P(CountCollectExecutorTest, count_in_empty_subquery) { + ExecutorTestHelper<1, 1> helper(*fakedQuery); + + Pipeline pipe{}; + pipe.addConsumer(helper.createExecBlock(MakeSubqueryStartInfos(), + ExecutionNode::SUBQUERY_START)) + .addConsumer(helper.createExecBlock(MakeRemoveAllLinesInfos(), + ExecutionNode::FILTER)) + .addConsumer(helper.createExecBlock(MakeInfos(1), ExecutionNode::COLLECT)) + .addConsumer(helper.createExecBlock(MakeSubqueryEndInfos(1), + ExecutionNode::SUBQUERY_END)); + + helper.setPipeline(std::move(pipe)) + .expectedStats(ExecutionStats{}) + .setInputSplitType(GetSplit()) + .setInputValue({{0}, {1}, {2}, {3}}) + .expectedState(ExecutionState::DONE) + .expectOutput({2}, {{R"([0])"}, {R"([0])"}, {R"([0])"}, {R"([0])"}}) + .expectSkipped(0) + .setCall(AqlCall{}) + .run(); +} + +TEST_P(CountCollectExecutorTest, count_in_subquery) { + ExecutorTestHelper<1, 1> helper(*fakedQuery); + + Pipeline pipe{}; + pipe.addConsumer(helper.createExecBlock(MakeSubqueryStartInfos(), + ExecutionNode::SUBQUERY_START)) + .addConsumer(helper.createExecBlock(MakeInfos(1), ExecutionNode::COLLECT)) + .addConsumer(helper.createExecBlock(MakeSubqueryEndInfos(1), + ExecutionNode::SUBQUERY_END)); + + helper.setPipeline(std::move(pipe)) + .expectedStats(ExecutionStats{}) + .setInputSplitType(GetSplit()) + .setInputValue({{0}, {1}, {2}, {3}}) + .expectedState(ExecutionState::DONE) + .expectOutput({2}, {{R"([1])"}, {R"([1])"}, {R"([1])"}, {R"([1])"}}) + .expectSkipped(0) + .setCall(AqlCall{}) + .run(); } } // namespace aql From 2262370ee6b1b7336559df7e876522394b19f28f Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Sat, 29 Feb 2020 08:35:42 +0100 Subject: [PATCH 092/122] Activate CalculationExecutors (#11177) --- arangod/Aql/ExecutionBlockImpl.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 6c39b40d76fe..59a52dd6d239 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -138,8 +138,7 @@ constexpr bool isNewStyleExecutor = is_one_of_v< Executor, FilterExecutor, SortedCollectExecutor, IdExecutor, IdExecutor>, ReturnExecutor, DistinctCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, SubqueryExecutor, SubqueryExecutor, CountCollectExecutor, - // TODO: re-enable after new subquery end & start are implemented - // CalculationExecutor, CalculationExecutor, CalculationExecutor, + CalculationExecutor, CalculationExecutor, CalculationExecutor, HashedCollectExecutor, ConstrainedSortExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaExecutor, From c69dc935e04d1f1aa66c7aaef7b0ad52f0f775d0 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Sat, 29 Feb 2020 08:48:31 +0100 Subject: [PATCH 093/122] Fixed compile bug-due to hidden merge conflict --- arangod/Aql/ExecutionBlockImpl.cpp | 42 ++++++++++++++++-------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 59a52dd6d239..ca8ae4c1d24a 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -136,10 +136,10 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); template constexpr bool isNewStyleExecutor = is_one_of_v< Executor, FilterExecutor, SortedCollectExecutor, IdExecutor, - IdExecutor>, ReturnExecutor, DistinctCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, - SubqueryExecutor, SubqueryExecutor, CountCollectExecutor, - CalculationExecutor, CalculationExecutor, CalculationExecutor, - HashedCollectExecutor, ConstrainedSortExecutor, + IdExecutor>, ReturnExecutor, DistinctCollectExecutor, IndexExecutor, + EnumerateCollectionExecutor, SubqueryExecutor, SubqueryExecutor, CountCollectExecutor, + CalculationExecutor, CalculationExecutor, + CalculationExecutor, HashedCollectExecutor, ConstrainedSortExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaExecutor, TestLambdaSkipExecutor, // we need one after these to avoid compile errors in non-test mode @@ -151,9 +151,10 @@ constexpr bool isNewStyleExecutor = is_one_of_v< ModificationExecutor, ModificationExecutor, UpdateReplaceModifier>, ModificationExecutor, - ModificationExecutor, UpsertModifier>, SubqueryStartExecutor, - UnsortedGatherExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, - LimitExecutor, SortExecutor, IResearchViewExecutor, + ModificationExecutor, UpsertModifier>, + SubqueryStartExecutor, UnsortedGatherExecutor, SubqueryEndExecutor, TraversalExecutor, + KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SortExecutor, + IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -1107,7 +1108,8 @@ static SkipRowsRangeVariant constexpr skipRowsType() { bool constexpr useExecutor = hasSkipRowsRange::value; - static_assert(!std::is_same_v> || hasSkipRowsRange::value); + static_assert(!std::is_same_v> || + hasSkipRowsRange::value); // ConstFetcher and SingleRowFetcher can skip, but // it may not be done for modification subqueries. @@ -1124,9 +1126,9 @@ static SkipRowsRangeVariant constexpr skipRowsType() { useExecutor == (is_one_of_v< Executor, FilterExecutor, ShortestPathExecutor, ReturnExecutor, KShortestPathsExecutor, - IdExecutor>, IdExecutor, HashedCollectExecutor, - IndexExecutor, EnumerateCollectionExecutor, DistinctCollectExecutor, ConstrainedSortExecutor, - CountCollectExecutor, SubqueryExecutor, + IdExecutor>, IdExecutor, + HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, DistinctCollectExecutor, + ConstrainedSortExecutor, CountCollectExecutor, SubqueryExecutor, #ifdef ARANGODB_USE_GOOGLE_TESTS TestLambdaSkipExecutor, #endif @@ -1137,9 +1139,10 @@ static SkipRowsRangeVariant constexpr skipRowsType() { ModificationExecutor, ModificationExecutor, UpdateReplaceModifier>, ModificationExecutor, - ModificationExecutor, UpsertModifier>, TraversalExecutor, - EnumerateListExecutor, SubqueryStartExecutor, SubqueryEndExecutor, SortedCollectExecutor, LimitExecutor, - UnsortedGatherExecutor, SortExecutor, IResearchViewExecutor, + ModificationExecutor, UpsertModifier>, + TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, SubqueryEndExecutor, + SortedCollectExecutor, LimitExecutor, UnsortedGatherExecutor, SortExecutor, + IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -1170,12 +1173,13 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert( !std::is_same::value || useFetcher, "LimitExecutor needs to implement skipRows() to work correctly"); - + if constexpr (useExecutor) { return SkipRowsRangeVariant::EXECUTOR; + } else { + static_assert(useFetcher); + return SkipRowsRangeVariant::FETCHER; } - static_assert(useFetcher); - return SkipRowsRangeVariant::FETCHER; } // Let's do it the C++ way. @@ -1202,8 +1206,7 @@ static auto fastForwardType(AqlCall const& call, Executor const& e) -> FastForwa } // TODO: We only need to do this is the executor actually require to call. // e.g. Modifications will always need to be called. Limit only if it needs to report fullCount - if constexpr (is_one_of_v, + if constexpr (is_one_of_v, ModificationExecutor, InsertModifier>, ModificationExecutor, ModificationExecutor, RemoveModifier>, @@ -1238,6 +1241,7 @@ auto ExecutionBlockImpl::executeFetcher(AqlCallStack& stack, size_t co } } else { TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } } From 341ef5fbf6b4de3a1c7433b0c4d51e502f13a99d Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 2 Mar 2020 13:28:16 +0100 Subject: [PATCH 094/122] Feature/aql subquery execution block impl execute implementation base bugfixes (#11201) * Fixed range-handling for Modification Executors * DataRange handling in ModificationExecutor * Honor batch-size defined by UpstreamExecutor * Fixed compile issue * More fixes in modification * Remvoed log devel * Fixed profiler Test. for NoResults node we cahnge the behaviour * Activated getSome failure tests in ExecuteRestHandler * Fixed skipping in Index * Let the MultiDependencySingleROwFetcher return the correct states. * Fixed non-maintainer compilation * Attempt to fix windows compile issue * Fixed the non-maintainer compile ina different way * Allow waiting within old-style subquery * Fixed invalid skipRwos in unsorted gather * Added some Assertions in MultiAqlItemBlockInputRange * Initialize dependdencies of MultiDependencyFetcher * Fixed skipRows loop in UnsortingGatherNode * Fixed return state of GatherNode --- arangod/Aql/AqlItemBlockInputMatrix.cpp | 21 +++-- arangod/Aql/AqlItemBlockInputMatrix.h | 3 +- arangod/Aql/EnumerateCollectionExecutor.cpp | 15 +--- arangod/Aql/ExecutionBlockImpl.cpp | 84 +++++++++++++++--- arangod/Aql/ModificationExecutor.cpp | 87 ++++++++++++++----- arangod/Aql/MultiAqlItemBlockInputRange.cpp | 4 +- .../Aql/MultiDependencySingleRowFetcher.cpp | 20 ++--- arangod/Aql/RestAqlHandler.cpp | 3 + arangod/Aql/SubqueryExecutor.cpp | 79 +++++++++++------ arangod/Aql/SubqueryExecutor.h | 16 ++-- arangod/Aql/UnsortedGatherExecutor.cpp | 33 ++++--- arangod/Utils/OperationCursor.cpp | 2 +- tests/js/server/aql/aql-profiler.js | 10 +-- 13 files changed, 262 insertions(+), 115 deletions(-) diff --git a/arangod/Aql/AqlItemBlockInputMatrix.cpp b/arangod/Aql/AqlItemBlockInputMatrix.cpp index bbd495784e27..3d4fc6839b7a 100644 --- a/arangod/Aql/AqlItemBlockInputMatrix.cpp +++ b/arangod/Aql/AqlItemBlockInputMatrix.cpp @@ -54,18 +54,22 @@ AqlItemBlockInputMatrix::AqlItemBlockInputMatrix(ExecutorState state, AqlItemMat } } -AqlItemBlockInputRange AqlItemBlockInputMatrix::getNextInputRange() { +AqlItemBlockInputRange& AqlItemBlockInputMatrix::getInputRange() { TRI_ASSERT(_aqlItemMatrix != nullptr); + if (_lastRange.hasDataRow()) { + return _lastRange; + } + // Need initialze lastRange if (_aqlItemMatrix->numberOfBlocks() == 0) { - return AqlItemBlockInputRange{upstreamState()}; + _lastRange = {AqlItemBlockInputRange{upstreamState()}}; + } else { + SharedAqlItemBlockPtr blockPtr = _aqlItemMatrix->getBlock(_currentBlockRowIndex); + auto [start, end] = blockPtr->getRelevantRange(); + ExecutorState state = incrBlockIndex(); + _lastRange = {state, 0, std::move(blockPtr), start}; } - - SharedAqlItemBlockPtr blockPtr = _aqlItemMatrix->getBlock(_currentBlockRowIndex); - auto [start, end] = blockPtr->getRelevantRange(); - ExecutorState state = incrBlockIndex(); - - return {state, 0, std::move(blockPtr), start}; + return _lastRange; } SharedAqlItemBlockPtr AqlItemBlockInputMatrix::getBlock() const noexcept { @@ -170,5 +174,6 @@ ExecutorState AqlItemBlockInputMatrix::incrBlockIndex() { } void AqlItemBlockInputMatrix::resetBlockIndex() noexcept { + _lastRange = {AqlItemBlockInputRange{upstreamState()}}; _currentBlockRowIndex = 0; } diff --git a/arangod/Aql/AqlItemBlockInputMatrix.h b/arangod/Aql/AqlItemBlockInputMatrix.h index 6e638aa2dbb6..01deb9852df7 100644 --- a/arangod/Aql/AqlItemBlockInputMatrix.h +++ b/arangod/Aql/AqlItemBlockInputMatrix.h @@ -51,7 +51,7 @@ class AqlItemBlockInputMatrix { // Will provide access to the first block (from _aqlItemMatrix) // After a block has been delivered, the block index will be increased. // Next call then will deliver the next block etc. - AqlItemBlockInputRange getNextInputRange(); + AqlItemBlockInputRange& getInputRange(); std::pair getMatrix() noexcept; ExecutorState upstreamState() const noexcept; @@ -70,6 +70,7 @@ class AqlItemBlockInputMatrix { // Only if _aqlItemMatrix is set (and NOT a nullptr), we have a valid and // usable DataRange object available to work with. AqlItemMatrix* _aqlItemMatrix; + AqlItemBlockInputRange _lastRange{ExecutorState::HASMORE}; size_t _currentBlockRowIndex = 0; ShadowAqlItemRow _shadowRow{CreateInvalidShadowRowHint{}}; }; diff --git a/arangod/Aql/EnumerateCollectionExecutor.cpp b/arangod/Aql/EnumerateCollectionExecutor.cpp index 3ff3220d415c..7dfe8894fb87 100644 --- a/arangod/Aql/EnumerateCollectionExecutor.cpp +++ b/arangod/Aql/EnumerateCollectionExecutor.cpp @@ -195,12 +195,10 @@ std::tuple EnumerateCo AqlItemBlockInputRange& inputRange, AqlCall& call) { AqlCall upstreamCall{}; EnumerateCollectionStats stats{}; - bool offsetPhase = (call.getOffset() > 0); TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumScanned() == 0); TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumFiltered() == 0); - - while (inputRange.hasDataRow() && call.shouldSkip()) { + while ((inputRange.hasDataRow() || _cursorHasMore) && call.shouldSkip()) { uint64_t skipped = 0; if (!_cursorHasMore) { @@ -210,13 +208,8 @@ std::tuple EnumerateCo if (_cursorHasMore) { TRI_ASSERT(_currentRow.isInitialized()); // if offset is > 0, we're in offset skip phase - if (offsetPhase) { - if (skipped < call.getOffset()) { - skipped += skipEntries(call.getOffset(), stats); - } else { - // we skipped enough in our offset phase - break; - } + if (call.getOffset() > 0) { + skipped += skipEntries(call.getOffset(), stats); } else { // fullCount phase if (_infos.getFilter() == nullptr) { @@ -276,7 +269,6 @@ std::tuple EnumerateCollection TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumScanned() == 0); TRI_ASSERT(_documentProducingFunctionContext.getAndResetNumFiltered() == 0); _documentProducingFunctionContext.setOutputRow(&output); - while (inputRange.hasDataRow() && !output.isFull()) { if (!_cursorHasMore) { initializeNewRow(inputRange); @@ -309,7 +301,6 @@ std::tuple EnumerateCollection if (!_cursorHasMore) { initializeNewRow(inputRange); } - return {inputRange.upstreamState(), stats, upstreamCall}; } diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index ca8ae4c1d24a..abf241d61570 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -1252,12 +1252,17 @@ auto ExecutionBlockImpl::executeProduceRows(typename Fetcher::DataRang if constexpr (isNewStyleExecutor) { if constexpr (is_one_of_v) { return _executor.produceRows(input, output); + } else if constexpr (is_one_of_v, SubqueryExecutor>) { + // The SubqueryExecutor has it's own special handling outside. + // SO this code is in fact not reachable + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); } else { auto [state, stats, call] = _executor.produceRows(input, output); return {state, stats, call, 0}; } } else { - TRI_ASSERT(false); + return {ExecutorState::DONE, typename Executor::Stats{}, AqlCall{}, 0}; } } @@ -1274,6 +1279,11 @@ auto ExecutionBlockImpl::executeSkipRowsRange(typename Fetcher::DataRa auto res = _executor.skipRowsRange(inputRange, call); _executorReturnedDone = std::get(res) == ExecutorState::DONE; return res; + } else if constexpr (is_one_of_v, SubqueryExecutor>) { + // The SubqueryExecutor has it's own special handling outside. + // SO this code is in fact not reachable + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); } else { auto [state, stats, skipped, localCall] = _executor.skipRowsRange(inputRange, call); @@ -1576,10 +1586,19 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(!(clientCall.getOffset() == 0 && clientCall.softLimit == AqlCall::Limit{0})); TRI_ASSERT(!(clientCall.hasSoftLimit() && clientCall.fullCount)); TRI_ASSERT(!(clientCall.hasSoftLimit() && clientCall.hasHardLimit())); + if constexpr (is_one_of_v, SubqueryExecutor>) { + // The old subquery executor can in-fact return waiting on produce call. + // if it needs to wait for the subquery. + // So we need to allow the return state here as well. + TRI_ASSERT(_execState == ExecState::CHECKCALL || + _execState == ExecState::SHADOWROWS || _execState == ExecState::UPSTREAM || + _execState == ExecState::PRODUCE || _execState == ExecState::SKIP); + } else { + // We can only have returned the following internal states + TRI_ASSERT(_execState == ExecState::CHECKCALL || _execState == ExecState::SHADOWROWS || + _execState == ExecState::UPSTREAM); + } - // We can only have returned the following internal states - TRI_ASSERT(_execState == ExecState::CHECKCALL || _execState == ExecState::SHADOWROWS || - _execState == ExecState::UPSTREAM); // Skip can only be > 0 if we are in upstream cases. TRI_ASSERT(_skipped == 0 || _execState == ExecState::UPSTREAM); @@ -1633,8 +1652,33 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { clientCall.getLimit() == 0 && clientCall.needsFullCount(); #endif LOG_QUERY("1f786", DEBUG) << printTypeInfo() << " call skipRows " << clientCall; - auto [state, stats, skippedLocal, call, dependency] = - executeSkipRowsRange(_lastRange, clientCall); + + ExecutorState state = ExecutorState::HASMORE; + typename Executor::Stats stats; + size_t skippedLocal = 0; + AqlCall call{}; + size_t dependency = 0; + if constexpr (is_one_of_v>) { + // NOTE: The subquery Executor will by itself call EXECUTE on it's + // subquery. This can return waiting => we can get a WAITING state + // here. We can only get the waiting state for SUbquery executors. + ExecutionState subqueryState = ExecutionState::HASMORE; + std::tie(subqueryState, stats, skippedLocal, call) = + _executor.skipRowsRange(_lastRange, clientCall); + if (subqueryState == ExecutionState::WAITING) { + TRI_ASSERT(skippedLocal == 0); + return {subqueryState, 0, nullptr}; + } else if (subqueryState == ExecutionState::DONE) { + state = ExecutorState::DONE; + } else { + state = ExecutorState::HASMORE; + } + } else { + // Execute skipSome + std::tie(state, stats, skippedLocal, call, dependency) = + executeSkipRowsRange(_lastRange, clientCall); + } + _requestedDependency = dependency; #ifdef ARANGODB_ENABLE_MAINTAINER_MODE // Assertion: We did skip 'skippedLocal' documents here. @@ -1695,11 +1739,29 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } TRI_ASSERT(_outputItemRow); TRI_ASSERT(!_executorReturnedDone); - - // Execute getSome - auto const [state, stats, call, dependency] = - executeProduceRows(_lastRange, *_outputItemRow); - // TODO: Check + ExecutorState state = ExecutorState::HASMORE; + typename Executor::Stats stats; + AqlCall call{}; + size_t dependency = 0; + if constexpr (is_one_of_v, SubqueryExecutor>) { + // NOTE: The subquery Executor will by itself call EXECUTE on it's + // subquery. This can return waiting => we can get a WAITING state + // here. We can only get the waiting state for SUbquery executors. + ExecutionState subqueryState = ExecutionState::HASMORE; + std::tie(subqueryState, stats, call) = + _executor.produceRows(_lastRange, *_outputItemRow); + if (subqueryState == ExecutionState::WAITING) { + return {subqueryState, 0, nullptr}; + } else if (subqueryState == ExecutionState::DONE) { + state = ExecutorState::DONE; + } else { + state = ExecutorState::HASMORE; + } + } else { + // Execute getSome + std::tie(state, stats, call, dependency) = + executeProduceRows(_lastRange, *_outputItemRow); + } _requestedDependency = dependency; _executorReturnedDone = state == ExecutorState::DONE; _engine->_stats += stats; diff --git a/arangod/Aql/ModificationExecutor.cpp b/arangod/Aql/ModificationExecutor.cpp index 5556ae39b93d..6554c6574fed 100644 --- a/arangod/Aql/ModificationExecutor.cpp +++ b/arangod/Aql/ModificationExecutor.cpp @@ -169,17 +169,34 @@ template typename FetcherType::DataRange& input, OutputAqlItemRow& output) -> std::tuple { TRI_ASSERT(_infos._trx); - + AqlCall upstreamCall{}; + if constexpr (std::is_same_v && + !std::is_same_v) { + upstreamCall.softLimit = _modifier.getBatchSize(); + } auto stats = ModificationStats{}; _modifier.reset(); + if (!input.hasDataRow()) { + // Input is empty + return {input.upstreamState(), stats, upstreamCall}; + } + + TRI_IF_FAILURE("ModificationBlock::getSome") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } - ExecutorState upstreamState = ExecutorState::HASMORE; // only produce at most output.numRowsLeft() many results + ExecutorState upstreamState = ExecutorState::HASMORE; if constexpr (std::is_same_v) { - auto range = input.getNextInputRange(); + auto& range = input.getInputRange(); doCollect(range, output.numRowsLeft()); upstreamState = range.upstreamState(); + if (upstreamState == ExecutorState::DONE) { + // We are done with this input. + // We need to forward it to the last ShadowRow. + input.skipAllRemainingDataRows(); + } } else { doCollect(input, output.numRowsLeft()); upstreamState = input.upstreamState(); @@ -196,39 +213,65 @@ template doOutput(output, stats); } - return {upstreamState, stats, AqlCall{}}; + return {upstreamState, stats, upstreamCall}; } template [[nodiscard]] auto ModificationExecutor::skipRowsRange( typename FetcherType::DataRange& input, AqlCall& call) -> std::tuple { + AqlCall upstreamCall{}; + if constexpr (std::is_same_v && + !std::is_same_v) { + upstreamCall.softLimit = _modifier.getBatchSize(); + } + auto stats = ModificationStats{}; - _modifier.reset(); - ExecutorState upstreamState = ExecutorState::HASMORE; - // only produce at most output.numRowsLeft() many results - if constexpr (std::is_same_v) { - auto range = input.getNextInputRange(); - doCollect(range, call.getOffset()); - upstreamState = range.upstreamState(); - } else { - doCollect(input, call.getOffset()); - upstreamState = input.upstreamState(); + TRI_IF_FAILURE("ModificationBlock::getSome") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } - if (_modifier.nrOfOperations() > 0) { - _modifier.transact(); - - if (_infos._doCount) { - stats.addWritesExecuted(_modifier.nrOfWritesExecuted()); - stats.addWritesIgnored(_modifier.nrOfWritesIgnored()); + // only produce at most output.numRowsLeft() many results + ExecutorState upstreamState = input.upstreamState(); + while (input.hasDataRow() && call.needSkipMore()) { + _modifier.reset(); + size_t toSkip = call.getOffset(); + if (call.getLimit() == 0 && call.hasHardLimit()) { + // We need to produce all modification operations. + // If we are bound by limits or not! + toSkip = ExecutionBlock::SkipAllSize(); + } + if constexpr (std::is_same_v) { + auto& range = input.getInputRange(); + if (range.hasDataRow()) { + doCollect(range, toSkip); + } + upstreamState = range.upstreamState(); + if (upstreamState == ExecutorState::DONE) { + // We are done with this input. + // We need to forward it to the last ShadowRow. + input.skipAllRemainingDataRows(); + TRI_ASSERT(input.upstreamState() == ExecutorState::DONE); + } + } else { + doCollect(input, toSkip); + upstreamState = input.upstreamState(); } - call.didSkip(_modifier.nrOfOperations()); + if (_modifier.nrOfOperations() > 0) { + _modifier.transact(); + + if (_infos._doCount) { + stats.addWritesExecuted(_modifier.nrOfWritesExecuted()); + stats.addWritesIgnored(_modifier.nrOfWritesIgnored()); + } + + call.didSkip(_modifier.nrOfOperations()); + } } - return {upstreamState, stats, _modifier.nrOfOperations(), AqlCall{}}; + return {upstreamState, stats, call.getSkipCount(), upstreamCall}; } using NoPassthroughSingleRowFetcher = SingleRowFetcher; diff --git a/arangod/Aql/MultiAqlItemBlockInputRange.cpp b/arangod/Aql/MultiAqlItemBlockInputRange.cpp index 8b5aad8595d7..556ee2c174a7 100644 --- a/arangod/Aql/MultiAqlItemBlockInputRange.cpp +++ b/arangod/Aql/MultiAqlItemBlockInputRange.cpp @@ -36,12 +36,14 @@ MultiAqlItemBlockInputRange::MultiAqlItemBlockInputRange(ExecutorState state, std::size_t skipped, std::size_t nrInputRanges) { _inputs.resize(nrInputRanges, AqlItemBlockInputRange{state, skipped}); + TRI_ASSERT(nrInputRanges > 0); } auto MultiAqlItemBlockInputRange::resizeIfNecessary(ExecutorState state, size_t skipped, size_t nrInputRanges) -> void { // We never want to reduce the number of dependencies. TRI_ASSERT(_inputs.size() <= nrInputRanges); + TRI_ASSERT(nrInputRanges > 0); if (_inputs.size() < nrInputRanges) { _inputs.resize(nrInputRanges, AqlItemBlockInputRange{state, skipped}); } @@ -89,7 +91,7 @@ auto MultiAqlItemBlockInputRange::hasShadowRow() const noexcept -> bool { // * assert that all dependencies are on a shadow row? auto MultiAqlItemBlockInputRange::peekShadowRow() const -> arangodb::aql::ShadowAqlItemRow { TRI_ASSERT(!hasDataRow()); - + TRI_ASSERT(!_inputs.empty()); // TODO: Correct? return _inputs.at(0).peekShadowRow(); } diff --git a/arangod/Aql/MultiDependencySingleRowFetcher.cpp b/arangod/Aql/MultiDependencySingleRowFetcher.cpp index 517542e953e1..aac3b3b866ac 100644 --- a/arangod/Aql/MultiDependencySingleRowFetcher.cpp +++ b/arangod/Aql/MultiDependencySingleRowFetcher.cpp @@ -368,11 +368,16 @@ auto MultiDependencySingleRowFetcher::useStack(AqlCallStack const& stack) -> voi auto MultiDependencySingleRowFetcher::executeForDependency(size_t const dependency, AqlCallStack& stack) -> std::tuple { + if (_dependencyStates.empty()) { + initDependencies(); + } auto [state, skipped, block] = _dependencyProxy->executeForDependency(dependency, stack); if (state == ExecutionState::WAITING) { return {state, 0, AqlItemBlockInputRange{ExecutorState::HASMORE}}; } + ExecutorState execState = + state == ExecutionState::DONE ? ExecutorState::DONE : ExecutorState::HASMORE; _dependencyStates.at(dependency) = state; if (std::any_of(std::begin(_dependencyStates), std::end(_dependencyStates), @@ -384,18 +389,9 @@ auto MultiDependencySingleRowFetcher::executeForDependency(size_t const dependen state = ExecutionState::DONE; } if (block == nullptr) { - if (state == ExecutionState::HASMORE) { - return {state, skipped, AqlItemBlockInputRange{ExecutorState::HASMORE, skipped}}; - } - return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE, skipped}}; + return {state, skipped, AqlItemBlockInputRange{execState, skipped}}; } - + TRI_ASSERT(block != nullptr); auto [start, end] = block->getRelevantRange(); - if (state == ExecutionState::HASMORE) { - TRI_ASSERT(block != nullptr); - return {state, skipped, - AqlItemBlockInputRange{ExecutorState::DONE, skipped, block, start}}; - } - return {state, skipped, - AqlItemBlockInputRange{ExecutorState::DONE, skipped, block, start}}; + return {state, skipped, AqlItemBlockInputRange{execState, skipped, block, start}}; } diff --git a/arangod/Aql/RestAqlHandler.cpp b/arangod/Aql/RestAqlHandler.cpp index 58346af21a67..fa57a645b8b2 100644 --- a/arangod/Aql/RestAqlHandler.cpp +++ b/arangod/Aql/RestAqlHandler.cpp @@ -734,6 +734,9 @@ RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, generateError(std::move(maybeExecuteCall).result()); return RestStatus::DONE; } + TRI_IF_FAILURE("RestAqlHandler::getSome") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } auto& executeCall = maybeExecuteCall.get(); auto items = SharedAqlItemBlockPtr{}; diff --git a/arangod/Aql/SubqueryExecutor.cpp b/arangod/Aql/SubqueryExecutor.cpp index 72074816f0fe..c84b47f0346f 100644 --- a/arangod/Aql/SubqueryExecutor.cpp +++ b/arangod/Aql/SubqueryExecutor.cpp @@ -158,7 +158,7 @@ std::pair SubqueryExecutor::pro template auto SubqueryExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) - -> std::tuple { + -> std::tuple { auto getUpstreamCall = [&]() { AqlCall upstreamCall = output.getClientCall(); if constexpr (isModificationSubquery) { @@ -168,12 +168,12 @@ auto SubqueryExecutor::produceRows(AqlItemBlockInputRang return upstreamCall; }; - LOG_DEVEL_SQ << uint64_t(this) << "produceRows " << output.getClientCall(); + LOG_DEVEL_SQ << uint64_t(this) << "produceRows " << output.getClientCall(); if (_state == ExecutorState::DONE && !_input.isInitialized()) { // We have seen DONE upstream, and we have discarded our local reference // to the last input, we will not be able to produce results anymore. - return {_state, NoStats{}, getUpstreamCall()}; + return {translatedReturnType(), NoStats{}, getUpstreamCall()}; } while (true) { if (_subqueryInitialized) { @@ -183,14 +183,19 @@ auto SubqueryExecutor::produceRows(AqlItemBlockInputRang if (_infos.isConst() && !_input.isFirstDataRowInBlock()) { // Simply write writeOutput(output); - LOG_DEVEL_SQ << uint64_t(this) << "wrote output is const " << _state << " " << getUpstreamCall(); - return {_state, NoStats{}, getUpstreamCall()}; + LOG_DEVEL_SQ << uint64_t(this) << "wrote output is const " << _state + << " " << getUpstreamCall(); + return {translatedReturnType(), NoStats{}, getUpstreamCall()}; } // Non const case, or first run in const auto [state, skipped, block] = _subquery.execute(AqlCallStack(AqlCall{})); TRI_ASSERT(skipped == 0); + if (state == ExecutionState::WAITING) { + return {state, NoStats{}, getUpstreamCall()}; + } // We get a result + LOG_DEVEL_SQ << uint64_t(this) << " we get subquery result"; if (block != nullptr) { TRI_IF_FAILURE("SubqueryBlock::executeSubquery") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); @@ -198,6 +203,8 @@ auto SubqueryExecutor::produceRows(AqlItemBlockInputRang if (_infos.returnsData()) { TRI_ASSERT(_subqueryResults != nullptr); + LOG_DEVEL_SQ << uint64_t(this) + << " store subquery result for writing " << block->size(); _subqueryResults->emplace_back(std::move(block)); } } @@ -205,28 +212,35 @@ auto SubqueryExecutor::produceRows(AqlItemBlockInputRang // Subquery DONE if (state == ExecutionState::DONE) { writeOutput(output); - LOG_DEVEL_SQ << uint64_t(this) << "wrote output subquery done " << _state << " " << getUpstreamCall(); - return {_state, NoStats{}, getUpstreamCall()}; + LOG_DEVEL_SQ << uint64_t(this) << "wrote output subquery done " + << _state << " " << getUpstreamCall(); + return {translatedReturnType(), NoStats{}, getUpstreamCall()}; } } else { // init new subquery if (!_input) { std::tie(_state, _input) = input.nextDataRow(); - LOG_DEVEL_SQ << uint64_t(this) << " nextDataRow: " << _state << " " << _input.isInitialized(); + LOG_DEVEL_SQ << uint64_t(this) << " nextDataRow: " << _state << " " + << _input.isInitialized(); if (!_input) { LOG_DEVEL_SQ << uint64_t(this) << "exit produce, no more input" << _state; - return {_state, NoStats{}, getUpstreamCall()}; + return {translatedReturnType(), NoStats{}, getUpstreamCall()}; } } TRI_ASSERT(_input); if (!_infos.isConst() || _input.isFirstDataRowInBlock()) { - auto initRes = _subquery.initializeCursor(_input); + LOG_DEVEL_SQ << "Subquery: Initialize cursor"; + auto [state, result] = _subquery.initializeCursor(_input); + if (state == ExecutionState::WAITING) { + LOG_DEVEL_SQ << "Waiting on initialize cursor"; + return {state, NoStats{}, AqlCall{}}; + } - if (initRes.second.fail()) { + if (result.fail()) { // Error during initialize cursor - THROW_ARANGO_EXCEPTION(initRes.second); + THROW_ARANGO_EXCEPTION(result); } _subqueryResults = std::make_unique>(); } @@ -294,10 +308,19 @@ SubqueryExecutor::fetchBlockForPassthrough(size_t atMost return {rv.first, {}, std::move(rv.second)}; } +template +auto SubqueryExecutor::translatedReturnType() const + noexcept -> ExecutionState { + if (_state == ExecutorState::DONE) { + return ExecutionState::DONE; + } + return ExecutionState::HASMORE; +} + +template <> template <> -template > -auto SubqueryExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) - -> std::tuple { +auto SubqueryExecutor::skipRowsRange<>(AqlItemBlockInputRange& inputRange, AqlCall& call) + -> std::tuple { auto getUpstreamCall = [&]() { auto upstreamCall = AqlCall{}; return upstreamCall; @@ -305,12 +328,12 @@ auto SubqueryExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, A size_t skipped = 0; - LOG_DEVEL_SQ << uint64_t(this) << "skipRowsRange " << call; + LOG_DEVEL_SQ << uint64_t(this) << "skipRowsRange " << call; if (_state == ExecutorState::DONE && !_input.isInitialized()) { // We have seen DONE upstream, and we have discarded our local reference // to the last input, we will not be able to produce results anymore. - return {_state, NoStats{}, 0, getUpstreamCall()}; + return {translatedReturnType(), NoStats{}, 0, getUpstreamCall()}; } while (true) { if (_subqueryInitialized) { @@ -324,12 +347,16 @@ auto SubqueryExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, A skipped += 1; call.didSkip(1); LOG_DEVEL_SQ << uint64_t(this) << "did skip one"; - return {_state, NoStats{}, skipped, getUpstreamCall()}; + return {translatedReturnType(), NoStats{}, skipped, getUpstreamCall()}; } // Non const case, or first run in const auto [state, skipped, block] = _subquery.execute(AqlCallStack(AqlCall{})); TRI_ASSERT(skipped == 0); + if (state == ExecutionState::WAITING) { + return {state, NoStats{}, 0, getUpstreamCall()}; + } + // We get a result if (block != nullptr) { TRI_IF_FAILURE("SubqueryBlock::executeSubquery") { @@ -349,7 +376,7 @@ auto SubqueryExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, A skipped += 1; call.didSkip(1); LOG_DEVEL_SQ << uint64_t(this) << "did skip one"; - return {_state, NoStats{}, skipped, getUpstreamCall()}; + return {translatedReturnType(), NoStats{}, skipped, getUpstreamCall()}; } } else { @@ -359,17 +386,20 @@ auto SubqueryExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, A if (!_input) { LOG_DEVEL_SQ << uint64_t(this) << "skipped nothing waiting for input " << _state; - return {_state, NoStats{}, skipped, getUpstreamCall()}; + return {translatedReturnType(), NoStats{}, skipped, getUpstreamCall()}; } } TRI_ASSERT(_input); if (!_infos.isConst() || _input.isFirstDataRowInBlock()) { - auto initRes = _subquery.initializeCursor(_input); + auto [state, result] = _subquery.initializeCursor(_input); + if (state == ExecutionState::WAITING) { + return {state, NoStats{}, 0, getUpstreamCall()}; + } - if (initRes.second.fail()) { + if (result.fail()) { // Error during initialize cursor - THROW_ARANGO_EXCEPTION(initRes.second); + THROW_ARANGO_EXCEPTION(result); } _subqueryResults = std::make_unique>(); } @@ -380,7 +410,4 @@ auto SubqueryExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, A } template class ::arangodb::aql::SubqueryExecutor; -template auto SubqueryExecutor::skipRowsRange(AqlItemBlockInputRange& inputRange, - AqlCall& call) - -> std::tuple; template class ::arangodb::aql::SubqueryExecutor; diff --git a/arangod/Aql/SubqueryExecutor.h b/arangod/Aql/SubqueryExecutor.h index c7fbee012be2..b9e5c299827b 100644 --- a/arangod/Aql/SubqueryExecutor.h +++ b/arangod/Aql/SubqueryExecutor.h @@ -23,11 +23,11 @@ #ifndef ARANGOD_AQL_SUBQUERY_EXECUTOR_H #define ARANGOD_AQL_SUBQUERY_EXECUTOR_H +#include "Aql/AqlCall.h" +#include "Aql/AqlItemBlockInputRange.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" -#include "Aql/AqlItemBlockInputRange.h" -#include "Aql/AqlCall.h" #include "Aql/Stats.h" #include "Basics/Result.h" @@ -97,13 +97,13 @@ class SubqueryExecutor { std::pair produceRows(OutputAqlItemRow& output); [[nodiscard]] auto produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) - -> std::tuple; + -> std::tuple; // skipRowsRange <=> isModificationSubquery - template = 0> + template = 0> auto skipRowsRange(AqlItemBlockInputRange& inputRange, AqlCall& call) - -> std::tuple; + -> std::tuple; std::tuple fetchBlockForPassthrough(size_t atMost); @@ -114,6 +114,12 @@ class SubqueryExecutor { */ void writeOutput(OutputAqlItemRow& output); + /** + * @brief Translate _state => to to execution allowing waiting. + * + */ + auto translatedReturnType() const noexcept -> ExecutionState; + private: Fetcher& _fetcher; SubqueryExecutorInfos& _infos; diff --git a/arangod/Aql/UnsortedGatherExecutor.cpp b/arangod/Aql/UnsortedGatherExecutor.cpp index e1b3221a640f..ef2babd051f2 100644 --- a/arangod/Aql/UnsortedGatherExecutor.cpp +++ b/arangod/Aql/UnsortedGatherExecutor.cpp @@ -85,25 +85,36 @@ auto UnsortedGatherExecutor::produceRows(typename Fetcher::DataRange& input, auto UnsortedGatherExecutor::skipRowsRange(typename Fetcher::DataRange& input, AqlCall& call) -> std::tuple { - auto skipped = size_t{0}; - while (call.needSkipMore() && input.hasDataRow(currentDependency())) { - auto [state, inputRow] = input.nextDataRow(currentDependency()); - - call.didSkip(1); - skipped++; + while (call.needSkipMore() && !done()) { + if (input.hasDataRow(currentDependency())) { + auto [state, inputRow] = input.nextDataRow(currentDependency()); + call.didSkip(1); - if (state == ExecutorState::DONE) { - advanceDependency(); + if (state == ExecutorState::DONE) { + advanceDependency(); + } + } else { + if (input.upstreamState(currentDependency()) == ExecutorState::DONE) { + advanceDependency(); + } else { + // We need to fetch more first + break; + } } } + while (!done() && input.upstreamState(currentDependency()) == ExecutorState::DONE) { + advanceDependency(); + } + if (done()) { // here currentDependency is invalid which will cause things to crash // if we ask upstream in ExecutionBlockImpl. yolo. - return {ExecutorState::DONE, Stats{}, skipped, AqlCall{}, currentDependency()}; + return {ExecutorState::DONE, Stats{}, call.getSkipCount(), AqlCall{}, + currentDependency()}; } else { - return {input.upstreamState(currentDependency()), Stats{}, skipped, - AqlCall{}, currentDependency()}; + return {input.upstreamState(currentDependency()), Stats{}, + call.getSkipCount(), AqlCall{}, currentDependency()}; } } diff --git a/arangod/Utils/OperationCursor.cpp b/arangod/Utils/OperationCursor.cpp index 0245935344e3..198d508b838a 100644 --- a/arangod/Utils/OperationCursor.cpp +++ b/arangod/Utils/OperationCursor.cpp @@ -146,7 +146,7 @@ void OperationCursor::skipAll(uint64_t& skipped) { while (_hasMore) { uint64_t skippedLocal = 0; _indexIterator->skip(toSkip, skippedLocal); - if (skipped != toSkip) { + if (skippedLocal != toSkip) { _hasMore = false; } skipped += skippedLocal; diff --git a/tests/js/server/aql/aql-profiler.js b/tests/js/server/aql/aql-profiler.js index 50eca3fee3a3..31c08f58d75c 100644 --- a/tests/js/server/aql/aql-profiler.js +++ b/tests/js/server/aql/aql-profiler.js @@ -571,13 +571,13 @@ function ahuacatlProfilerTestSuite () { testNoResultsBlock1: function() { const query = 'FOR i IN 1..@rows FILTER 1 == 0 RETURN i'; - // As the descendant blocks of NoResultsBlock don't get a single getSome - // call, they don't show up in the statistics. + // Also if we have no results, we do send a drop-all to dependecies + // potentielly we have modifiaction nodes that need to be executed. const genNodeList = () => [ - {type: SingletonBlock, calls: 0, items: 0}, - {type: CalculationBlock, calls: 0, items: 0}, - {type: EnumerateListBlock, calls: 0, items: 0}, + {type: SingletonBlock, calls: 1, items: 0}, + {type: CalculationBlock, calls: 1, items: 0}, + {type: EnumerateListBlock, calls: 1, items: 0}, {type: NoResultsBlock, calls: 1, items: 0}, {type: ReturnBlock, calls: 1, items: 0}, ]; From c653260ca3a43e05e6460fcc5f4f3bdf59e130f1 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Tue, 3 Mar 2020 09:03:49 +0000 Subject: [PATCH 095/122] Fix memory leak in traversal (#11210) --- arangod/Aql/TraversalExecutor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arangod/Aql/TraversalExecutor.cpp b/arangod/Aql/TraversalExecutor.cpp index c111df3861a4..7b54279e653e 100644 --- a/arangod/Aql/TraversalExecutor.cpp +++ b/arangod/Aql/TraversalExecutor.cpp @@ -184,15 +184,18 @@ auto TraversalExecutor::doOutput(OutputAqlItemRow& output) -> void { // traverser now has next v, e, p values if (_infos.useVertexOutput()) { AqlValue vertex = _traverser.lastVertexToAqlValue(); + AqlValueGuard guard{vertex, true}; output.cloneValueInto(_infos.vertexRegister(), _inputRow, vertex); } if (_infos.useEdgeOutput()) { AqlValue edge = _traverser.lastEdgeToAqlValue(); + AqlValueGuard guard{edge, true}; output.cloneValueInto(_infos.edgeRegister(), _inputRow, edge); } if (_infos.usePathOutput()) { transaction::BuilderLeaser tmp(_traverser.trx()); AqlValue path = _traverser.pathToAqlValue(*tmp.builder()); + AqlValueGuard guard{path, true}; output.cloneValueInto(_infos.pathRegister(), _inputRow, path); } output.advanceRow(); From 41ce86054219eee979a520cff67e838d5db4e78d Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Tue, 3 Mar 2020 10:43:26 +0000 Subject: [PATCH 096/122] AqlSubqueryExecutionBlockImplExecuteImplementation use moveValueInto instead of cloneValueInto (#11213) * Fix memory leak in traversal * Use move instead of clone for AqlValues --- arangod/Aql/TraversalExecutor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arangod/Aql/TraversalExecutor.cpp b/arangod/Aql/TraversalExecutor.cpp index 7b54279e653e..92b6c7fc4048 100644 --- a/arangod/Aql/TraversalExecutor.cpp +++ b/arangod/Aql/TraversalExecutor.cpp @@ -185,18 +185,18 @@ auto TraversalExecutor::doOutput(OutputAqlItemRow& output) -> void { if (_infos.useVertexOutput()) { AqlValue vertex = _traverser.lastVertexToAqlValue(); AqlValueGuard guard{vertex, true}; - output.cloneValueInto(_infos.vertexRegister(), _inputRow, vertex); + output.moveValueInto(_infos.vertexRegister(), _inputRow, guard); } if (_infos.useEdgeOutput()) { AqlValue edge = _traverser.lastEdgeToAqlValue(); AqlValueGuard guard{edge, true}; - output.cloneValueInto(_infos.edgeRegister(), _inputRow, edge); + output.moveValueInto(_infos.edgeRegister(), _inputRow, guard); } if (_infos.usePathOutput()) { transaction::BuilderLeaser tmp(_traverser.trx()); AqlValue path = _traverser.pathToAqlValue(*tmp.builder()); AqlValueGuard guard{path, true}; - output.cloneValueInto(_infos.pathRegister(), _inputRow, path); + output.moveValueInto(_infos.pathRegister(), _inputRow, guard); } output.advanceRow(); } From 26242a1987ab9261ea1e64255c854ee1f0ab1c6b Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 3 Mar 2020 17:19:37 +0100 Subject: [PATCH 097/122] Feature/aql subquery execution block impl execute implementation gather executors (#11202) * Fixed range-handling for Modification Executors * DataRange handling in ModificationExecutor * Honor batch-size defined by UpstreamExecutor * Fixed compile issue * More fixes in modification * Remvoed log devel * Fixed profiler Test. for NoResults node we cahnge the behaviour * Activated getSome failure tests in ExecuteRestHandler * Fixed skipping in Index * Let the MultiDependencySingleROwFetcher return the correct states. * Fixed non-maintainer compilation * Attempt to fix windows compile issue * Fixed the non-maintainer compile ina different way * Added API in MultiAqlItemBlockInputRange to get Number of dependencies * Comments * Savepoint commit, does not compile, but no harm is done. Will start breaking things now * Another savepoint commit. does not compile, yet. * First draft of new Style SortingGather not yet implemented: Parallelism this needs to be handled in ExecutionBlockImpl now. * Allow waiting within old-style subquery * Fixed invalid skipRwos in unsorted gather * First draft of ParallelUnsortedGatherExecutor * Removed unused local variables * Added some Assertions in MultiAqlItemBlockInputRange * Initialize dependdencies of MultiDependencyFetcher * Fixed skipRows loop in UnsortingGatherNode * Fixed return state of GatherNode * Added an assertion before accessing a vectir unbounded --- arangod/Aql/ExecutionBlockImpl.cpp | 18 +- arangod/Aql/MultiAqlItemBlockInputRange.cpp | 17 +- arangod/Aql/MultiAqlItemBlockInputRange.h | 14 +- .../Aql/ParallelUnsortedGatherExecutor.cpp | 167 +++---- arangod/Aql/ParallelUnsortedGatherExecutor.h | 51 +- arangod/Aql/SortingGatherExecutor.cpp | 460 +++++++----------- arangod/Aql/SortingGatherExecutor.h | 116 +++-- arangod/Aql/UnsortedGatherExecutor.h | 25 +- 8 files changed, 396 insertions(+), 472 deletions(-) diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index abf241d61570..a81f1c6c3d7f 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -135,7 +135,7 @@ constexpr bool is_one_of_v = (std::is_same_v || ...); */ template constexpr bool isNewStyleExecutor = is_one_of_v< - Executor, FilterExecutor, SortedCollectExecutor, IdExecutor, + Executor, FilterExecutor, SortedCollectExecutor, IdExecutor, ParallelUnsortedGatherExecutor, IdExecutor>, ReturnExecutor, DistinctCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, SubqueryExecutor, SubqueryExecutor, CountCollectExecutor, CalculationExecutor, CalculationExecutor, @@ -151,8 +151,8 @@ constexpr bool isNewStyleExecutor = is_one_of_v< ModificationExecutor, ModificationExecutor, UpdateReplaceModifier>, ModificationExecutor, - ModificationExecutor, UpsertModifier>, - SubqueryStartExecutor, UnsortedGatherExecutor, SubqueryEndExecutor, TraversalExecutor, + ModificationExecutor, UpsertModifier>, SubqueryStartExecutor, + UnsortedGatherExecutor, SortingGatherExecutor, SubqueryEndExecutor, TraversalExecutor, KShortestPathsExecutor, ShortestPathExecutor, EnumerateListExecutor, LimitExecutor, SortExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -1125,7 +1125,7 @@ static SkipRowsRangeVariant constexpr skipRowsType() { static_assert( useExecutor == (is_one_of_v< - Executor, FilterExecutor, ShortestPathExecutor, ReturnExecutor, KShortestPathsExecutor, + Executor, FilterExecutor, ShortestPathExecutor, ReturnExecutor, KShortestPathsExecutor, ParallelUnsortedGatherExecutor, IdExecutor>, IdExecutor, HashedCollectExecutor, IndexExecutor, EnumerateCollectionExecutor, DistinctCollectExecutor, ConstrainedSortExecutor, CountCollectExecutor, SubqueryExecutor, @@ -1139,9 +1139,9 @@ static SkipRowsRangeVariant constexpr skipRowsType() { ModificationExecutor, ModificationExecutor, UpdateReplaceModifier>, ModificationExecutor, - ModificationExecutor, UpsertModifier>, - TraversalExecutor, EnumerateListExecutor, SubqueryStartExecutor, SubqueryEndExecutor, - SortedCollectExecutor, LimitExecutor, UnsortedGatherExecutor, SortExecutor, + ModificationExecutor, UpsertModifier>, TraversalExecutor, + EnumerateListExecutor, SubqueryStartExecutor, SubqueryEndExecutor, SortedCollectExecutor, + LimitExecutor, UnsortedGatherExecutor, SortingGatherExecutor, SortExecutor, IResearchViewExecutor, IResearchViewExecutor, IResearchViewExecutor, @@ -1250,7 +1250,7 @@ auto ExecutionBlockImpl::executeProduceRows(typename Fetcher::DataRang OutputAqlItemRow& output) -> std::tuple { if constexpr (isNewStyleExecutor) { - if constexpr (is_one_of_v) { + if constexpr (is_one_of_v) { return _executor.produceRows(input, output); } else if constexpr (is_one_of_v, SubqueryExecutor>) { // The SubqueryExecutor has it's own special handling outside. @@ -1273,7 +1273,7 @@ auto ExecutionBlockImpl::executeSkipRowsRange(typename Fetcher::DataRa if constexpr (isNewStyleExecutor) { call.skippedRows = 0; if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { - if constexpr (is_one_of_v) { + if constexpr (is_one_of_v) { // If the executor has a method skipRowsRange, to skip outputs. // Every non-passthrough executor needs to implement this. auto res = _executor.skipRowsRange(inputRange, call); diff --git a/arangod/Aql/MultiAqlItemBlockInputRange.cpp b/arangod/Aql/MultiAqlItemBlockInputRange.cpp index 556ee2c174a7..ab8ea0d32e51 100644 --- a/arangod/Aql/MultiAqlItemBlockInputRange.cpp +++ b/arangod/Aql/MultiAqlItemBlockInputRange.cpp @@ -67,12 +67,23 @@ auto MultiAqlItemBlockInputRange::hasDataRow() const noexcept -> bool { }); } +auto MultiAqlItemBlockInputRange::rangeForDependency(size_t const dependency) + -> AqlItemBlockInputRange& { + TRI_ASSERT(dependency < _inputs.size()); + return _inputs.at(dependency); +} + auto MultiAqlItemBlockInputRange::peekDataRow(size_t const dependency) const -> std::pair { TRI_ASSERT(dependency < _inputs.size()); return _inputs.at(dependency).peekDataRow(); } +auto MultiAqlItemBlockInputRange::skipAll(size_t const dependency) noexcept -> std::size_t { + TRI_ASSERT(dependency < _inputs.size()); + return _inputs.at(dependency).skipAll(); +} + auto MultiAqlItemBlockInputRange::nextDataRow(size_t const dependency) -> std::pair { TRI_ASSERT(dependency < _inputs.size()); @@ -132,7 +143,7 @@ auto MultiAqlItemBlockInputRange::isDone() const -> bool { return res; } -size_t MultiAqlItemBlockInputRange::skipAllRemainingDataRows() { +auto MultiAqlItemBlockInputRange::skipAllRemainingDataRows() -> size_t { for (size_t i = 0; i < _inputs.size(); i++) { _inputs.at(i).skipAllRemainingDataRows(); if (_inputs.at(i).upstreamState() == ExecutorState::HASMORE) { @@ -141,3 +152,7 @@ size_t MultiAqlItemBlockInputRange::skipAllRemainingDataRows() { } return 0; } + +auto MultiAqlItemBlockInputRange::numberDependencies() const noexcept -> size_t { + return _inputs.size(); +} \ No newline at end of file diff --git a/arangod/Aql/MultiAqlItemBlockInputRange.h b/arangod/Aql/MultiAqlItemBlockInputRange.h index e23a65f5ad7a..74aa1584c2ac 100644 --- a/arangod/Aql/MultiAqlItemBlockInputRange.h +++ b/arangod/Aql/MultiAqlItemBlockInputRange.h @@ -49,8 +49,18 @@ class MultiAqlItemBlockInputRange { bool hasDataRow() const noexcept; bool hasDataRow(size_t const dependency) const noexcept; + /** + * @brief Get a reference to the range of a given dependency + * NOTE: Modifing this range will modify the state of this class as well + * + * @param dependency index of the dependency + * @return AqlItemBlockInputRange& Modifyable reference to the input data stream + */ + auto rangeForDependency(size_t const dependency) -> AqlItemBlockInputRange&; + std::pair peekDataRow(size_t const dependency) const; std::pair nextDataRow(size_t const dependency); + auto skipAll(size_t const dependency) noexcept -> std::size_t; bool hasShadowRow() const noexcept; @@ -65,7 +75,9 @@ class MultiAqlItemBlockInputRange { auto setDependency(size_t const dependency, AqlItemBlockInputRange& range) -> void; - size_t skipAllRemainingDataRows(); + auto skipAllRemainingDataRows() -> size_t; + + auto numberDependencies() const noexcept -> size_t; private: ExecutorState _finalState{ExecutorState::HASMORE}; diff --git a/arangod/Aql/ParallelUnsortedGatherExecutor.cpp b/arangod/Aql/ParallelUnsortedGatherExecutor.cpp index 629d8bea2893..cc5e83a32b04 100644 --- a/arangod/Aql/ParallelUnsortedGatherExecutor.cpp +++ b/arangod/Aql/ParallelUnsortedGatherExecutor.cpp @@ -39,11 +39,15 @@ ParallelUnsortedGatherExecutorInfos::ParallelUnsortedGatherExecutorInfos( nrInOutRegisters, nrInOutRegisters, std::move(registersToClear), std::move(registersToKeep)) {} -ParallelUnsortedGatherExecutor::ParallelUnsortedGatherExecutor(Fetcher& fetcher, Infos& infos) - : _fetcher(fetcher), _numberDependencies(0), _currentDependency(0), _skipped(0) {} +ParallelUnsortedGatherExecutor::ParallelUnsortedGatherExecutor(Fetcher&, Infos& infos) {} ParallelUnsortedGatherExecutor::~ParallelUnsortedGatherExecutor() = default; +auto ParallelUnsortedGatherExecutor::upstreamCall(AqlCall const& clientCall) const + noexcept -> AqlCall { + return clientCall; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief Guarantees requiredby this this block: /// 1) For every dependency the input is sorted, according to the same strategy. @@ -58,117 +62,74 @@ ParallelUnsortedGatherExecutor::~ParallelUnsortedGatherExecutor() = default; /// //////////////////////////////////////////////////////////////////////////////// -std::pair ParallelUnsortedGatherExecutor::produceRows(OutputAqlItemRow& output) { - initDependencies(); - - ExecutionState state; - InputAqlItemRow inputRow = InputAqlItemRow{CreateInvalidInputRowHint{}}; - - size_t x; - for (x = 0; x < _numberDependencies; ++x) { - size_t i = (_currentDependency + x) % _numberDependencies; - - if (_upstream[i] == ExecutionState::DONE) { - continue; - } - - size_t tmp = 0; - - state = ExecutionState::HASMORE; - while (!output.isFull() && state == ExecutionState::HASMORE) { - std::tie(state, inputRow) = _fetcher.fetchRowForDependency(i, output.numRowsLeft() /*atMost*/); - if (inputRow) { - output.copyRow(inputRow); - TRI_ASSERT(output.produced()); +auto ParallelUnsortedGatherExecutor::produceRows(typename Fetcher::DataRange& input, + OutputAqlItemRow& output) + -> std::tuple { + // Illegal dependency, on purpose to trigger asserts + size_t waitingDep = input.numberDependencies(); + for (size_t dep = 0; dep < input.numberDependencies(); ++dep) { + while (!output.isFull()) { + auto [state, row] = input.nextDataRow(dep); + if (row) { + output.copyRow(row); output.advanceRow(); - tmp++; + } else { + // This output did not produce anything + if (state == ExecutorState::HASMORE) { + waitingDep = dep; + } + break; } } - - _upstream[i] = state; - if (output.isFull()) { - break; - } - } - _currentDependency = x; - - NoStats stats; - - // fix assert in ExecutionBlockImpl::getSomeWithoutTrace - if (output.isFull()) { - return {ExecutionState::HASMORE, stats}; - } - - size_t numWaiting = 0; - for (x = 0; x < _numberDependencies; ++x) { - if (_upstream[x] == ExecutionState::HASMORE) { - return {ExecutionState::HASMORE, stats}; - } else if (_upstream[x] == ExecutionState::WAITING) { - numWaiting++; - } } - if (numWaiting > 0) { - return {ExecutionState::WAITING, stats}; + if (input.isDone()) { + // We cannot have one that we are waiting on, if we are done. + TRI_ASSERT(waitingDep == input.numberDependencies()); + return {ExecutorState::DONE, NoStats{}, AqlCall{}, waitingDep}; } - - TRI_ASSERT(std::all_of(_upstream.begin(), _upstream.end(), [](auto const& s) { return s == ExecutionState::DONE; } )); - return {ExecutionState::DONE, stats}; + return {ExecutorState::HASMORE, NoStats{}, upstreamCall(output.getClientCall()), waitingDep}; } -std::tuple -ParallelUnsortedGatherExecutor::skipRows(size_t const toSkip) { - initDependencies(); - TRI_ASSERT(_skipped <= toSkip); - - ExecutionState state = ExecutionState::HASMORE; - while (_skipped < toSkip) { - - const size_t i = _currentDependency; - if (_upstream[i] == ExecutionState::DONE) { - if (std::all_of(_upstream.begin(), _upstream.end(), - [](auto s) { return s == ExecutionState::DONE; })) { - state = ExecutionState::DONE; - break; - } - _currentDependency = (i + 1) % _numberDependencies; - continue; - } - - TRI_ASSERT(_skipped <= toSkip); +auto ParallelUnsortedGatherExecutor::skipRowsRange(typename Fetcher::DataRange& input, + AqlCall& call) + -> std::tuple { + size_t waitingDep = input.numberDependencies(); + for (size_t dep = 0; dep < input.numberDependencies(); ++dep) { + auto& range = input.rangeForDependency(dep); + while (call.needSkipMore()) { + if (!range.hasDataRow() && range.skippedInFlight() == 0) { + // Consumed this range, + // consume the next one - size_t skippedNow; - std::tie(state, skippedNow) = _fetcher.skipRowsForDependency(i, toSkip - _skipped); - _upstream[i] = state; - if (state == ExecutionState::WAITING) { - TRI_ASSERT(skippedNow == 0); - return {ExecutionState::WAITING, NoStats{}, 0}; - } - _skipped += skippedNow; - - if (_upstream[i] == ExecutionState::DONE) { - if (std::all_of(_upstream.begin(), _upstream.end(), - [](auto s) { return s == ExecutionState::DONE; })) { + // Guarantee: + // While in offsetPhase, we will only send requests to the first + // NON-DONE dependency. + if (range.upstreamState() == ExecutorState::HASMORE && + waitingDep == input.numberDependencies()) { + waitingDep = dep; + } break; } - _currentDependency = (i + 1) % _numberDependencies; - continue; - } + if (range.hasDataRow()) { + // We overfetched, skipLocally + // By gurantee we will only see data, if + // we are past the offset phase. + TRI_ASSERT(call.getOffset() == 0); + } else { + if (call.getOffset() > 0) { + call.didSkip(range.skip(call.getOffset())); + } else { + // Fullcount Case + call.didSkip(range.skipAll()); + } + } + } } - - size_t skipped = _skipped; - _skipped = 0; - - TRI_ASSERT(skipped <= toSkip); - return {state, NoStats{}, skipped}; -} - -void ParallelUnsortedGatherExecutor::initDependencies() { - if (_numberDependencies == 0) { - // We need to initialize the dependencies once, they are injected - // after the fetcher is created. - _numberDependencies = _fetcher.numberDependencies(); - TRI_ASSERT(_numberDependencies > 0); - _upstream.resize(_numberDependencies, ExecutionState::HASMORE); - TRI_ASSERT(std::all_of(_upstream.begin(), _upstream.end(), [](auto const& s) { return s == ExecutionState::HASMORE; } )); + if (input.isDone()) { + // We cannot have one that we are waiting on, if we are done. + TRI_ASSERT(waitingDep == input.numberDependencies()); + return {ExecutorState::DONE, NoStats{}, call.getSkipCount(), AqlCall{}, waitingDep}; } + return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), + upstreamCall(call), waitingDep}; } diff --git a/arangod/Aql/ParallelUnsortedGatherExecutor.h b/arangod/Aql/ParallelUnsortedGatherExecutor.h index ed4a32e31333..6df2900bce22 100644 --- a/arangod/Aql/ParallelUnsortedGatherExecutor.h +++ b/arangod/Aql/ParallelUnsortedGatherExecutor.h @@ -37,10 +37,11 @@ class Methods; namespace aql { +struct AqlCall; +class MultiAqlItemBlockInputRange; class MultiDependencySingleRowFetcher; class NoStats; class OutputAqlItemRow; -struct SortRegister; class ParallelUnsortedGatherExecutorInfos : public ExecutorInfos { public: @@ -55,7 +56,6 @@ class ParallelUnsortedGatherExecutorInfos : public ExecutorInfos { class ParallelUnsortedGatherExecutor { public: - public: struct Properties { static constexpr bool preservesOrder = true; @@ -71,31 +71,36 @@ class ParallelUnsortedGatherExecutor { ~ParallelUnsortedGatherExecutor(); /** - * @brief produce the next Row of Aql Values. + * @brief Produce rows * - * @return ExecutionState, - * if something was written output.hasValue() == true + * @param input DataRange delivered by the fetcher + * @param output place to write rows to + * @return std::tuple + * ExecutorState: DONE or HASMORE (only within a subquery) + * Stats: Stats gerenated here + * AqlCall: Request to upstream + * size:t: Dependency to request */ - std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(MultiAqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; - std::tuple skipRows(size_t atMost); - - private: - - void initDependencies(); - - private: - Fetcher& _fetcher; - // 64: default size of buffer; 8: Alignment size; computed to 4 but breaks in windows debug build. - ::arangodb::containers::SmallVector::allocator_type::arena_type _arena; - ::arangodb::containers::SmallVector _upstream{_arena}; + /** + * @brief Skip rows + * + * @param input DataRange delivered by the fetcher + * @param call skip request form consumer + * @return std::tuple + * ExecutorState: DONE or HASMORE (only within a subquery) + * Stats: Stats gerenated here + * size_t: Number of rows skipped + * AqlCall: Request to upstream + * size:t: Dependency to request + */ + [[nodiscard]] auto skipRowsRange(MultiAqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple; - // Total Number of dependencies - size_t _numberDependencies; - - size_t _currentDependency; - - size_t _skipped; + private: + auto upstreamCall(AqlCall const& clientCall) const noexcept -> AqlCall; }; } // namespace aql diff --git a/arangod/Aql/SortingGatherExecutor.cpp b/arangod/Aql/SortingGatherExecutor.cpp index d6d366c0a08a..61c4ea8ebe80 100644 --- a/arangod/Aql/SortingGatherExecutor.cpp +++ b/arangod/Aql/SortingGatherExecutor.cpp @@ -161,15 +161,18 @@ class MinElementSorting final : public SortingGatherExecutor::SortingStrategy, } // namespace SortingGatherExecutor::ValueType::ValueType(size_t index) - : dependencyIndex{index}, row{CreateInvalidInputRowHint()}, state{ExecutionState::HASMORE} {} + : dependencyIndex{index}, row{CreateInvalidInputRowHint()}, state{ExecutorState::HASMORE} {} + +SortingGatherExecutor::ValueType::ValueType(size_t index, InputAqlItemRow prow, ExecutorState pstate) + : dependencyIndex{index}, row{prow}, state{pstate} {} SortingGatherExecutorInfos::SortingGatherExecutorInfos( std::shared_ptr> inputRegisters, std::shared_ptr> outputRegisters, RegisterId nrInputRegisters, RegisterId nrOutputRegisters, std::unordered_set registersToClear, - std::unordered_set registersToKeep, std::vector&& sortRegister, - arangodb::transaction::Methods* trx, GatherNode::SortMode sortMode, size_t limit, - GatherNode::Parallelism p) + std::unordered_set registersToKeep, + std::vector&& sortRegister, arangodb::transaction::Methods* trx, + GatherNode::SortMode sortMode, size_t limit, GatherNode::Parallelism p) : ExecutorInfos(std::move(inputRegisters), std::move(outputRegisters), nrInputRegisters, nrOutputRegisters, std::move(registersToClear), std::move(registersToKeep)), @@ -183,17 +186,11 @@ SortingGatherExecutorInfos::SortingGatherExecutorInfos(SortingGatherExecutorInfo SortingGatherExecutorInfos::~SortingGatherExecutorInfos() = default; SortingGatherExecutor::SortingGatherExecutor(Fetcher& fetcher, Infos& infos) - : _fetcher(fetcher), - _initialized(false), + : _initialized(false), _numberDependencies(0), - _dependencyToFetch(0), _inputRows(), - _nrDone(0), _limit(infos.limit()), _rowsReturned(0), - _heapCounted(false), - _rowsLeftInHeap(0), - _skipped(0), _strategy(nullptr), _fetchParallel(infos.parallelism() == GatherNode::Parallelism::Parallel) { switch (infos.sortMode()) { @@ -213,6 +210,88 @@ SortingGatherExecutor::SortingGatherExecutor(Fetcher& fetcher, Infos& infos) SortingGatherExecutor::~SortingGatherExecutor() = default; +auto SortingGatherExecutor::initialize(typename Fetcher::DataRange const& inputRange) + -> std::optional> { + if (!_initialized) { + // We cannot modify the number of dependencies, so we start + // with 0 dependencies, and will increase to whatever inputRange gives us. + TRI_ASSERT(_numberDependencies == 0 || + _numberDependencies == inputRange.numberDependencies()); + _numberDependencies = inputRange.numberDependencies(); + auto call = requiresMoreInput(inputRange); + if (call.has_value()) { + return call; + } + // If we have collected all ranges once, we can prepare the local data-structure copy + _inputRows.reserve(_numberDependencies); + for (size_t dep = 0; dep < _numberDependencies; ++dep) { + auto const [state, row] = inputRange.peekDataRow(dep); + _inputRows.emplace_back(dep, row, state); + } + _strategy->prepare(_inputRows); + _initialized = true; + _numberDependencies = inputRange.numberDependencies(); + } + return {}; +} + +auto SortingGatherExecutor::requiresMoreInput(typename Fetcher::DataRange const& inputRange) + -> std::optional> { + for (size_t dep = 0; dep < _numberDependencies; ++dep) { + auto const& [state, input] = inputRange.peekDataRow(dep); + // Update the local copy, just to be sure it is up to date + // We might do too many copies here, but most likely this + // will not be a performance bottleneck. + ValueType& localDep = _inputRows[dep]; + localDep.row = input; + localDep.state = state; + if (!input && state != ExecutorState::DONE) { + // This dependency requires input + // TODO: This call requires limits + return std::tuple{AqlCall{}, dep}; + } + } + // No call required + return {}; +} + +auto SortingGatherExecutor::isDone(typename Fetcher::DataRange const& input) const -> bool { + // TODO: Include contrained sort + return input.isDone(); +} + +auto SortingGatherExecutor::nextRow(MultiAqlItemBlockInputRange& input) -> InputAqlItemRow { + if (isDone(input)) { + // No rows, there is a chance we get into this. + // If we requested data from upstream, but all if it is done. + return InputAqlItemRow{CreateInvalidInputRowHint{}}; + } +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + bool oneWithContent = false; + for (size_t dep = 0; dep < _numberDependencies; ++dep) { + auto const& [state, row] = input.peekDataRow(dep); + if (row) { + oneWithContent = true; + } + } + TRI_ASSERT(oneWithContent); +#endif + auto nextVal = _strategy->nextValue(); + _rowsReturned++; + { + // Consume the row, and set it to next input + std::ignore = input.nextDataRow(nextVal.dependencyIndex); + auto const& [state, row] = input.peekDataRow(nextVal.dependencyIndex); + _inputRows[nextVal.dependencyIndex].state = state; + _inputRows[nextVal.dependencyIndex].row = row; + + // TODO we might do some short-cuts here to maintain a list of requests + // to send in order to improve requires input + } + + return nextVal.row; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief Guarantees requiredby this this block: /// 1) For every dependency the input is sorted, according to the same strategy. @@ -227,192 +306,105 @@ SortingGatherExecutor::~SortingGatherExecutor() = default; /// //////////////////////////////////////////////////////////////////////////////// -std::pair SortingGatherExecutor::produceRows(OutputAqlItemRow& output) { - size_t const atMost = constrainedSort() ? output.numRowsLeft() - : ExecutionBlock::DefaultBatchSize; - ExecutionState state; - InputAqlItemRow row{CreateInvalidInputRowHint{}}; - std::tie(state, row) = produceNextRow(atMost); - - // HASMORE => row has to be initialized - TRI_ASSERT(state != ExecutionState::HASMORE || row.isInitialized()); - // WAITING => row may not be initialized - TRI_ASSERT(state != ExecutionState::WAITING || !row.isInitialized()); - - if (row) { - // NOTE: The original gatherBlock did referencing - // inside the outputblock by identical AQL values. - // This optimization is not in use anymore. - output.copyRow(row); +auto SortingGatherExecutor::produceRows(typename Fetcher::DataRange& input, + OutputAqlItemRow& output) + -> std::tuple { + { + // First initialize + auto maybeCall = initialize(input); + if (maybeCall.has_value()) { + auto const& [request, dep] = maybeCall.value(); + return {ExecutorState::HASMORE, NoStats{}, request, dep}; + } } - return {state, NoStats{}}; -} - -std::pair SortingGatherExecutor::produceNextRow(size_t const atMost) { - TRI_ASSERT(_strategy != nullptr); - assertConstrainedDoesntOverfetch(atMost); - // We shouldn't be asked for more rows when we are allowed to skip - TRI_ASSERT(!maySkip()); - if (!_initialized) { - ExecutionState state = init(atMost); - if (state != ExecutionState::HASMORE) { - // Can be DONE(unlikely, no input) of WAITING - return {state, InputAqlItemRow{CreateInvalidInputRowHint{}}}; - } - } else { - // Activate this assert as soon as all blocks follow the done == no call api - // TRI_ASSERT(_nrDone < _numberDependencies); - if (_inputRows[_dependencyToFetch].state == ExecutionState::DONE) { - _inputRows[_dependencyToFetch].row = InputAqlItemRow{CreateInvalidInputRowHint()}; - } else { - // This is executed on every produceRows, and will replace the row that we have returned last time - std::tie(_inputRows[_dependencyToFetch].state, - _inputRows[_dependencyToFetch].row) = - _fetcher.fetchRowForDependency(_dependencyToFetch, atMost); - if (_inputRows[_dependencyToFetch].state == ExecutionState::WAITING) { - return {ExecutionState::WAITING, InputAqlItemRow{CreateInvalidInputRowHint{}}}; - } - if (!_inputRows[_dependencyToFetch].row) { - TRI_ASSERT(_inputRows[_dependencyToFetch].state == ExecutionState::DONE); - adjustNrDone(_dependencyToFetch); - } + while (!isDone(input) && !output.isFull()) { + TRI_ASSERT(!maySkip()); + auto maybeCall = requiresMoreInput(input); + if (maybeCall.has_value()) { + auto const& [request, dep] = maybeCall.value(); + return {ExecutorState::HASMORE, NoStats{}, request, dep}; } - } - if (_nrDone >= _numberDependencies) { - // We cannot return a row, because all are done - return {ExecutionState::DONE, InputAqlItemRow{CreateInvalidInputRowHint{}}}; - } -// if we get here, we have a valid row for every not done dependency. -// And we have atLeast 1 valid row left -#ifdef ARANGODB_ENABLE_MAINTAINER_MODE - bool oneWithContent = false; - for (auto const& inPair : _inputRows) { - // Waiting needs to bail out at fetch state - TRI_ASSERT(inPair.state != ExecutionState::WAITING); - // row.invalid => dependency is done - TRI_ASSERT(inPair.row || inPair.state == ExecutionState::DONE); - if (inPair.row) { - oneWithContent = true; + auto row = nextRow(input); + TRI_ASSERT(row.isInitialized() || isDone(input)); + if (row) { + output.copyRow(row); + output.advanceRow(); } } - // We have at least one row to sort. - TRI_ASSERT(oneWithContent); -#endif - // get the index of the next best value. - ValueType val = _strategy->nextValue(); - _dependencyToFetch = val.dependencyIndex; - // We can never pick an invalid row! - TRI_ASSERT(val.row); - ++_rowsReturned; - adjustNrDone(_dependencyToFetch); - if (_nrDone >= _numberDependencies) { - return {ExecutionState::DONE, val.row}; - } - return {ExecutionState::HASMORE, val.row}; -} -void SortingGatherExecutor::adjustNrDone(size_t const dependency) { - auto const& dep = _inputRows[dependency]; - if (dep.state == ExecutionState::DONE) { -#ifdef ARANGODB_ENABLE_MAINTAINER_MODE - TRI_ASSERT(_flaggedAsDone[dependency] == false); - _flaggedAsDone[dependency] = true; -#endif - ++_nrDone; + // Call and dependency unused, so we return a too large dependency + // in order to trigger asserts if it is used. + if (isDone(input)) { + return {ExecutorState::DONE, NoStats{}, AqlCall{}, _numberDependencies + 1}; } + return {ExecutorState::HASMORE, NoStats{}, AqlCall{}, _numberDependencies + 1}; } -void SortingGatherExecutor::initNumDepsIfNecessary() { - if (_numberDependencies == 0) { - // We need to initialize the dependencies once, they are injected - // after the fetcher is created. - _numberDependencies = _fetcher.numberDependencies(); - TRI_ASSERT(_numberDependencies > 0); - _inputRows.reserve(_numberDependencies); - for (size_t index = 0; index < _numberDependencies; ++index) { - _inputRows.emplace_back(ValueType{index}); -#ifdef ARANGODB_ENABLE_MAINTAINER_MODE - _flaggedAsDone.emplace_back(false); -#endif +auto SortingGatherExecutor::skipRowsRange(typename Fetcher::DataRange& input, AqlCall& call) + -> std::tuple { + { + // First initialize + auto maybeCall = initialize(input); + if (maybeCall.has_value()) { + auto const& [request, dep] = maybeCall.value(); + return {ExecutorState::HASMORE, NoStats{}, 0, request, dep}; } } -} - -ExecutionState SortingGatherExecutor::init(size_t const atMost) { - assertConstrainedDoesntOverfetch(atMost); - initNumDepsIfNecessary(); - size_t numWaiting = 0; - for (size_t i = 0; i < _numberDependencies; i++) { - if (_inputRows[i].state == ExecutionState::DONE || - _inputRows[i].row) { - continue; + while (!isDone(input) && call.needSkipMore()) { + auto maybeCall = requiresMoreInput(input); + if (maybeCall.has_value()) { + auto const& [request, dep] = maybeCall.value(); + return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), request, dep}; } - - std::tie(_inputRows[i].state, - _inputRows[i].row) = _fetcher.fetchRowForDependency(i, atMost); - if (_inputRows[i].state == ExecutionState::WAITING) { - if (!_fetchParallel) { - return ExecutionState::WAITING; + if (call.getOffset() > 0) { + TRI_ASSERT(!maySkip()); + // We need to sort still + // And account the row in the limit + auto row = nextRow(input); + TRI_ASSERT(row.isInitialized() || isDone(input)); + if (row) { + call.didSkip(1); + } + } else { + // We are only called with fullcount. + // sorting does not matter. + // Start simply skip all from upstream. + for (size_t dep = 0; dep < input.numberDependencies(); ++dep) { + ExecutorState state = ExecutorState::HASMORE; + InputAqlItemRow row{CreateInvalidInputRowHint{}}; + while (state == ExecutorState::HASMORE) { + std::tie(state, row) = input.nextDataRow(dep); + if (row) { + call.didSkip(1); + } else { + // We have consumed all overfetched rows. + // We may still have a skip counter within the range. + call.didSkip(input.skipAll(dep)); + if (state == ExecutorState::HASMORE) { + // We need to fetch more data, but can fullCount now + AqlCall request{0, true, 0, AqlCall::LimitType::HARD}; + return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), request, dep}; + } + } + } } - numWaiting++; - } else if (!_inputRows[i].row) { - TRI_ASSERT(_inputRows[i].state == ExecutionState::DONE); - adjustNrDone(i); } } - if (numWaiting > 0) { - return ExecutionState::WAITING; - } - - TRI_ASSERT(_numberDependencies > 0); - _dependencyToFetch = _numberDependencies - 1; - _initialized = true; - if (_nrDone >= _numberDependencies) { - return ExecutionState::DONE; + // Call and dependency unused, so we return a too large dependency + // in order to trigger asserts if it is used. + if (isDone(input)) { + return {ExecutorState::DONE, NoStats{}, call.getSkipCount(), AqlCall{}, + _numberDependencies + 1}; } - _strategy->prepare(_inputRows); - return ExecutionState::HASMORE; + return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), AqlCall{}, + _numberDependencies + 1}; } std::pair SortingGatherExecutor::expectedNumberOfRows(size_t const atMost) const { - assertConstrainedDoesntOverfetch(atMost); - // We shouldn't be asked for more rows when we are allowed to skip - TRI_ASSERT(!maySkip()); - ExecutionState state; - size_t expectedNumberOfRows; - std::tie(state, expectedNumberOfRows) = _fetcher.preFetchNumberOfRows(atMost); - if (state == ExecutionState::WAITING) { - return {state, 0}; - } - if (expectedNumberOfRows >= atMost) { - // We do not care, we have more than atMost anyways. - return {state, expectedNumberOfRows}; - } - // Now we need to figure out a more precise state - for (auto const& inRow : _inputRows) { - if (inRow.state == ExecutionState::HASMORE) { - // This block is not fully fetched, we do NOT know how many rows - // will be in the next batch, overestimate! - return {ExecutionState::HASMORE, atMost}; - } - if (inRow.row.isInitialized()) { - // This dependency is in owned by this Executor - expectedNumberOfRows++; - } - } - if (expectedNumberOfRows == 0) { - return {ExecutionState::DONE, 0}; - } - return {ExecutionState::HASMORE, expectedNumberOfRows}; -} - -size_t SortingGatherExecutor::rowsLeftToWrite() const noexcept { - TRI_ASSERT(constrainedSort()); - TRI_ASSERT(_limit >= _rowsReturned); - return _limit - _rowsReturned; + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } bool SortingGatherExecutor::constrainedSort() const noexcept { @@ -430,116 +422,8 @@ bool SortingGatherExecutor::maySkip() const noexcept { return constrainedSort() && _rowsReturned >= _limit; } -std::tuple SortingGatherExecutor::skipRows(size_t const atMost) { - if (!maySkip()) { - // Until our limit, we must produce rows, because we might be asked later - // to produce rows, in which case all rows have to have been skipped in - // order. - return produceAndSkipRows(atMost); - } else { - // If we've reached our limit, we will never be asked to produce rows again. - // So we can just skip without sorting. - return reallySkipRows(atMost); - } -} - -std::tuple SortingGatherExecutor::reallySkipRows( - size_t const atMost) { - // Once, count all rows that are left in the heap (and free them) - if (!_heapCounted) { - initNumDepsIfNecessary(); - - // This row was just fetched: - _inputRows[_dependencyToFetch].row = InputAqlItemRow{CreateInvalidInputRowHint{}}; - _rowsLeftInHeap = 0; - for (auto& it : _inputRows) { - if (it.row) { - ++_rowsLeftInHeap; - it.row = InputAqlItemRow{CreateInvalidInputRowHint{}}; - } - } - _heapCounted = true; - - // Now we will just skip through all dependencies, starting with the first. - _dependencyToFetch = 0; - } - - { // Skip rows we had left in the heap first - std::size_t const skip = std::min(atMost, _rowsLeftInHeap); - _rowsLeftInHeap -= skip; - _skipped += skip; - } - - while (_dependencyToFetch < _numberDependencies && _skipped < atMost) { - auto& state = _inputRows[_dependencyToFetch].state; - while (state != ExecutionState::DONE && _skipped < atMost) { - std::size_t skippedNow; - std::tie(state, skippedNow) = - _fetcher.skipRowsForDependency(_dependencyToFetch, atMost - _skipped); - if (state == ExecutionState::WAITING) { - TRI_ASSERT(skippedNow == 0); - return {state, NoStats{}, 0}; - } - _skipped += skippedNow; - } - if (state == ExecutionState::DONE) { - ++_dependencyToFetch; - } - } - - // Skip dependencies which are DONE - while (_dependencyToFetch < _numberDependencies && - _inputRows[_dependencyToFetch].state == ExecutionState::DONE) { - ++_dependencyToFetch; - } - // The current dependency must now neither be DONE, nor WAITING. - TRI_ASSERT(_dependencyToFetch >= _numberDependencies || - _inputRows[_dependencyToFetch].state == ExecutionState::HASMORE); - - ExecutionState const state = _dependencyToFetch < _numberDependencies - ? ExecutionState::HASMORE - : ExecutionState::DONE; - - TRI_ASSERT(_skipped <= atMost); - std::size_t const skipped = _skipped; - _skipped = 0; - return {state, NoStats{}, skipped}; -} - -std::tuple SortingGatherExecutor::produceAndSkipRows( - size_t const atMost) { - ExecutionState state = ExecutionState::HASMORE; - InputAqlItemRow row{CreateInvalidInputRowHint{}}; - - // We may not skip more rows in this method than we can produce! - auto const ourAtMost = constrainedSort() - ? std::min(atMost, rowsLeftToWrite()) - : atMost; - - while(state == ExecutionState::HASMORE && _skipped < ourAtMost) { - std::tie(state, row) = produceNextRow(ourAtMost - _skipped); - // HASMORE => row has to be initialized - TRI_ASSERT(state != ExecutionState::HASMORE || row.isInitialized()); - // WAITING => row may not be initialized - TRI_ASSERT(state != ExecutionState::WAITING || !row.isInitialized()); - - if (row.isInitialized()) { - ++_skipped; - } - } - - if (state == ExecutionState::WAITING) { - return {state, NoStats{}, 0}; - } - - // Note that _skipped *can* be larger than `ourAtMost`, due to WAITING, in - // which case we might get a lower `ourAtMost` on the second call than during - // the first. - TRI_ASSERT(_skipped <= atMost); - TRI_ASSERT(state != ExecutionState::HASMORE || _skipped > 0); - TRI_ASSERT(state != ExecutionState::WAITING || _skipped == 0); - - std::size_t const skipped = _skipped; - _skipped = 0; - return {state, NoStats{}, skipped}; +auto SortingGatherExecutor::rowsLeftToWrite() const noexcept -> size_t { + TRI_ASSERT(constrainedSort()); + TRI_ASSERT(_limit >= _rowsReturned); + return _limit - std::min(_limit, _rowsReturned); } diff --git a/arangod/Aql/SortingGatherExecutor.h b/arangod/Aql/SortingGatherExecutor.h index 38c7aca7187e..440a507847c9 100644 --- a/arangod/Aql/SortingGatherExecutor.h +++ b/arangod/Aql/SortingGatherExecutor.h @@ -36,7 +36,9 @@ class Methods; namespace aql { +struct AqlCall; class MultiDependencySingleRowFetcher; +class MultiAqlItemBlockInputRange; class NoStats; class OutputAqlItemRow; struct SortRegister; @@ -49,9 +51,8 @@ class SortingGatherExecutorInfos : public ExecutorInfos { std::unordered_set registersToClear, std::unordered_set registersToKeep, std::vector&& sortRegister, - arangodb::transaction::Methods* trx, - GatherNode::SortMode sortMode, size_t limit, - GatherNode::Parallelism p); + arangodb::transaction::Methods* trx, GatherNode::SortMode sortMode, + size_t limit, GatherNode::Parallelism p); SortingGatherExecutorInfos() = delete; SortingGatherExecutorInfos(SortingGatherExecutorInfos&&); SortingGatherExecutorInfos(SortingGatherExecutorInfos const&) = delete; @@ -62,7 +63,7 @@ class SortingGatherExecutorInfos : public ExecutorInfos { arangodb::transaction::Methods* trx() { return _trx; } GatherNode::SortMode sortMode() const noexcept { return _sortMode; } - + GatherNode::Parallelism parallelism() const noexcept { return _parallelism; } size_t limit() const noexcept { return _limit; } @@ -80,9 +81,10 @@ class SortingGatherExecutor { struct ValueType { size_t dependencyIndex; InputAqlItemRow row; - ExecutionState state; + ExecutorState state; explicit ValueType(size_t index); + ValueType(size_t, InputAqlItemRow, ExecutorState); }; //////////////////////////////////////////////////////////////////////////////// @@ -112,34 +114,43 @@ class SortingGatherExecutor { using Infos = SortingGatherExecutorInfos; using Stats = NoStats; - SortingGatherExecutor(Fetcher& fetcher, Infos& infos); + SortingGatherExecutor(Fetcher&, Infos& infos); ~SortingGatherExecutor(); /** - * @brief produce the next Row of Aql Values. + * @brief Produce rows * - * @return ExecutionState, - * if something was written output.hasValue() == true + * @param input DataRange delivered by the fetcher + * @param output place to write rows to + * @return std::tuple + * ExecutorState: DONE or HASMORE (only within a subquery) + * Stats: Stats gerenated here + * AqlCall: Request to upstream + * size:t: Dependency to request */ - std::pair produceRows(OutputAqlItemRow& output); + [[nodiscard]] auto produceRows(MultiAqlItemBlockInputRange& input, OutputAqlItemRow& output) + -> std::tuple; - void adjustNrDone(size_t dependency); + /** + * @brief Skip rows + * + * @param input DataRange delivered by the fetcher + * @param call skip request form consumer + * @return std::tuple + * ExecutorState: DONE or HASMORE (only within a subquery) + * Stats: Stats gerenated here + * size_t: Number of rows skipped + * AqlCall: Request to upstream + * size:t: Dependency to request + */ + [[nodiscard]] auto skipRowsRange(MultiAqlItemBlockInputRange& input, AqlCall& call) + -> std::tuple; std::pair expectedNumberOfRows(size_t atMost) const; - std::tuple skipRows(size_t atMost); - private: - void initNumDepsIfNecessary(); - - ExecutionState init(size_t atMost); - - std::pair produceNextRow(size_t atMost); - bool constrainedSort() const noexcept; - size_t rowsLeftToWrite() const noexcept; - void assertConstrainedDoesntOverfetch(size_t atMost) const noexcept; // This is interesting in case this is a constrained sort and fullCount is @@ -148,24 +159,54 @@ class SortingGatherExecutor { // This also means that we may not produce rows anymore after that point. bool maySkip() const noexcept; - private: - Fetcher& _fetcher; + /** + * @brief Function that checks if all dependencies are either + * done, or have a row. + * The first one that does not match the condition + * will produce an upstream call to be fulfilled. + * + * @param inputRange Range of all input dependencies + * @return std::optional> optional call for the dependnecy requiring input + */ + auto requiresMoreInput(MultiAqlItemBlockInputRange const& inputRange) + -> std::optional>; + /** + * @brief Get the next row matching the sorting strategy + * + * @return InputAqlItemRow best fit row. Might be invalid if all input is done. + */ + auto nextRow(MultiAqlItemBlockInputRange& input) -> InputAqlItemRow; + + /** + * @brief Tests if this Executor is done producing + * => All inputs are fully consumed + * + * @return true we are done + * @return false we have more + */ + auto isDone(MultiAqlItemBlockInputRange const& input) const -> bool; + + /** + * @brief Initialize the Sorting strategy with the given input. + * This is known to be empty, but all prepared at this point. + * @param inputRange The input, no data included yet. + */ + auto initialize(MultiAqlItemBlockInputRange const& inputRange) + -> std::optional>; + + auto rowsLeftToWrite() const noexcept -> size_t; + + private: // Flag if we are past the initialize phase (fetched one block for every dependency). bool _initialized; // Total Number of dependencies size_t _numberDependencies; - // The Dependency we have to fetch next - size_t _dependencyToFetch; - // Input data to process std::vector _inputRows; - // Counter for DONE states - size_t _nrDone; - /// @brief If we do a constrained sort, it holds the limit > 0. Otherwise, it's 0. size_t _limit; @@ -174,26 +215,9 @@ class SortingGatherExecutor { /// dependencies. size_t _rowsReturned; - /// @brief When we reached the limit, we once count the rows that are left in - /// the heap (in _rowsLeftInHeap), so we can count them for skipping. - bool _heapCounted; - - /// @brief See comment for _heapCounted first. At the first real skip, this - /// is set to the number of rows left in the heap. It will be reduced while - /// skipping. - size_t _rowsLeftInHeap; - - size_t _skipped; - /// @brief sorting strategy std::unique_ptr _strategy; -#ifdef ARANGODB_ENABLE_MAINTAINER_MODE - std::vector _flaggedAsDone; -#endif - std::tuple reallySkipRows(size_t atMost); - std::tuple produceAndSkipRows(size_t atMost); - const bool _fetchParallel; }; diff --git a/arangod/Aql/UnsortedGatherExecutor.h b/arangod/Aql/UnsortedGatherExecutor.h index 5dc6d5c8afe8..2793c31be674 100644 --- a/arangod/Aql/UnsortedGatherExecutor.h +++ b/arangod/Aql/UnsortedGatherExecutor.h @@ -83,9 +83,32 @@ class UnsortedGatherExecutor { [[nodiscard]] auto skipRows(size_t atMost) -> std::tuple; - // TODO: This should really be the DataRange of the fetcher? + /** + * @brief Produce rows + * + * @param input DataRange delivered by the fetcher + * @param output place to write rows to + * @return std::tuple + * ExecutorState: DONE or HASMORE (only within a subquery) + * Stats: Stats gerenated here + * AqlCall: Request to upstream + * size:t: Dependency to request + */ [[nodiscard]] auto produceRows(typename Fetcher::DataRange& input, OutputAqlItemRow& output) -> std::tuple; + + /** + * @brief Skip rows + * + * @param input DataRange delivered by the fetcher + * @param call skip request form consumer + * @return std::tuple + * ExecutorState: DONE or HASMORE (only within a subquery) + * Stats: Stats gerenated here + * size_t: Number of rows skipped + * AqlCall: Request to upstream + * size:t: Dependency to request + */ [[nodiscard]] auto skipRowsRange(typename Fetcher::DataRange& input, AqlCall& call) -> std::tuple; From fd763cb7ddaefe9bc4d05dfe64993db1e3353d19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Tue, 3 Mar 2020 17:33:33 +0100 Subject: [PATCH 098/122] Feature/aql subquery execute parallel gather (#11215) * Fixed range-handling for Modification Executors * DataRange handling in ModificationExecutor * Honor batch-size defined by UpstreamExecutor * Fixed compile issue * More fixes in modification * Remvoed log devel * Fixed profiler Test. for NoResults node we cahnge the behaviour * Activated getSome failure tests in ExecuteRestHandler * Fixed skipping in Index * Let the MultiDependencySingleROwFetcher return the correct states. * Fixed non-maintainer compilation * Attempt to fix windows compile issue * Fixed the non-maintainer compile ina different way * Added API in MultiAqlItemBlockInputRange to get Number of dependencies * Comments * Savepoint commit, does not compile, but no harm is done. Will start breaking things now * Another savepoint commit. does not compile, yet. * First draft of new Style SortingGather not yet implemented: Parallelism this needs to be handled in ExecutionBlockImpl now. * Allow waiting within old-style subquery * Fixed invalid skipRwos in unsorted gather * First draft of ParallelUnsortedGatherExecutor * Removed unused local variables * Added some Assertions in MultiAqlItemBlockInputRange * Initialize dependdencies of MultiDependencyFetcher * Fixed skipRows loop in UnsortingGatherNode * Fixed return state of GatherNode * Added an assertion before accessing a vectir unbounded * Fixed uninitialized member in DistributeExecutor * Fixed use before vector initialization in SortingGather * Fixed uninitialized dependencies in MultiDepRowFetcher * First step towards parallel Aql * Fixed an assertion * Fixed upstream skipping in ParallelUnsortedGather Co-authored-by: Michael Hackstein --- arangod/Aql/BlocksWithClients.cpp | 6 ++-- arangod/Aql/DistributeExecutor.cpp | 2 +- arangod/Aql/DistributeExecutor.h | 4 +-- arangod/Aql/ExecutionBlockImpl.cpp | 33 ++++++++++++------- arangod/Aql/ExecutionBlockImpl.h | 10 ++++++ .../Aql/ParallelUnsortedGatherExecutor.cpp | 20 ++++++++--- arangod/Aql/ParallelUnsortedGatherExecutor.h | 3 +- arangod/Aql/SortingGatherExecutor.cpp | 9 +++-- 8 files changed, 59 insertions(+), 28 deletions(-) diff --git a/arangod/Aql/BlocksWithClients.cpp b/arangod/Aql/BlocksWithClients.cpp index 8df29015729f..b4a98714b920 100644 --- a/arangod/Aql/BlocksWithClients.cpp +++ b/arangod/Aql/BlocksWithClients.cpp @@ -209,7 +209,7 @@ auto BlocksWithClientsImpl::executeWithoutTraceForClient(AqlCallStack std::string const& clientId) -> std::tuple { TRI_ASSERT(!clientId.empty()); - if (clientId.empty()) { + if (ADB_UNLIKELY(clientId.empty())) { // Security bailout to avoid UB THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "got empty distribution id"); @@ -217,7 +217,7 @@ auto BlocksWithClientsImpl::executeWithoutTraceForClient(AqlCallStack auto it = _clientBlockData.find(clientId); TRI_ASSERT(it != _clientBlockData.end()); - if (it == _clientBlockData.end()) { + if (ADB_UNLIKELY(it == _clientBlockData.end())) { // Security bailout to avoid UB std::string message("AQL: unknown distribution id "); message.append(clientId); @@ -303,4 +303,4 @@ std::pair BlocksWithClientsImpl::skipSomeForSh } template class ::arangodb::aql::BlocksWithClientsImpl; -template class ::arangodb::aql::BlocksWithClientsImpl; \ No newline at end of file +template class ::arangodb::aql::BlocksWithClientsImpl; diff --git a/arangod/Aql/DistributeExecutor.cpp b/arangod/Aql/DistributeExecutor.cpp index c9b13abfa452..07cd89c9f4b5 100644 --- a/arangod/Aql/DistributeExecutor.cpp +++ b/arangod/Aql/DistributeExecutor.cpp @@ -453,4 +453,4 @@ std::pair ExecutionBlockImpl>> _queue; - // This is unique_ptr to get away with everything beeing forward declared... + // This is unique_ptr to get away with everything being forward declared... std::unique_ptr _executor; - bool _executorHasMore; + bool _executorHasMore = false; }; DistributeExecutor(DistributeExecutorInfos const& infos); diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index a81f1c6c3d7f..517096c07f93 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -126,9 +126,6 @@ class TestLambdaSkipExecutor; } // namespace arangodb #endif -template -constexpr bool is_one_of_v = (std::is_same_v || ...); - /* * Determine whether we execute new style or old style skips, i.e. pre or post shadow row introduction * TODO: This should be removed once all executors and fetchers are ported to the new style. @@ -1230,12 +1227,23 @@ auto ExecutionBlockImpl::executeFetcher(AqlCallStack& stack, size_t co // to fit all inputs, in particular the one executed below TRI_ASSERT(dependency < _dependencies.size()); _lastRange.resizeIfNecessary(ExecutorState::HASMORE, 0, _dependencies.size()); - - auto [state, skipped, range] = _rowFetcher.executeForDependency(dependency, stack); - - _lastRange.setDependency(dependency, range); - - return {state, skipped, _lastRange}; + if constexpr (!isParallelExecutor) { + auto [state, skipped, range] = _rowFetcher.executeForDependency(dependency, stack); + _lastRange.setDependency(dependency, range); + return {state, skipped, _lastRange}; + } else { + _callsInFlight.resize(_dependencyProxy.numberDependencies()); + if (!_callsInFlight[dependency].has_value()) { + _callsInFlight[dependency] = stack; + } + TRI_ASSERT(_callsInFlight[dependency].has_value()); + auto [state, skipped, range] = _rowFetcher.executeForDependency(dependency, _callsInFlight[dependency].value()); + if (state != ExecutionState::WAITING) { + _callsInFlight[dependency] = std::nullopt; + } + _lastRange.setDependency(dependency, range); + return {state, skipped, _lastRange}; + } } else { return _rowFetcher.execute(stack); } @@ -1251,6 +1259,7 @@ auto ExecutionBlockImpl::executeProduceRows(typename Fetcher::DataRang -> std::tuple { if constexpr (isNewStyleExecutor) { if constexpr (is_one_of_v) { + input.resizeIfNecessary(ExecutorState::HASMORE, 0, _dependencies.size()); return _executor.produceRows(input, output); } else if constexpr (is_one_of_v, SubqueryExecutor>) { // The SubqueryExecutor has it's own special handling outside. @@ -1646,9 +1655,9 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } case ExecState::SKIP: { #ifdef ARANGODB_ENABLE_MAINTAINER_MODE - size_t offsetBefore = clientCall.getOffset(); + auto const offsetBefore = clientCall.getOffset(); TRI_ASSERT(offsetBefore > 0); - size_t canPassFullcount = + bool const canPassFullcount = clientCall.getLimit() == 0 && clientCall.needsFullCount(); #endif LOG_QUERY("1f786", DEBUG) << printTypeInfo() << " call skipRows " << clientCall; @@ -1661,7 +1670,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if constexpr (is_one_of_v>) { // NOTE: The subquery Executor will by itself call EXECUTE on it's // subquery. This can return waiting => we can get a WAITING state - // here. We can only get the waiting state for SUbquery executors. + // here. We can only get the waiting state for Subquery executors. ExecutionState subqueryState = ExecutionState::HASMORE; std::tie(subqueryState, stats, skippedLocal, call) = _executor.skipRowsRange(_lastRange, clientCall); diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 0d2d3da66d1e..c0e072289704 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -51,6 +51,10 @@ class InputAqlItemRow; class OutputAqlItemRow; class Query; class ShadowAqlItemRow; +class ParallelUnsortedGatherExecutor; + +template +constexpr bool is_one_of_v = (std::is_same_v || ...); /** * @brief This is the implementation class of AqlExecutionBlocks. @@ -135,6 +139,9 @@ class ExecutionBlockImpl final : public ExecutionBlock { DONE }; + static constexpr bool isParallelExecutor = + is_one_of_v; + public: /** * @brief Construct a new ExecutionBlock @@ -354,6 +361,9 @@ class ExecutionBlockImpl final : public ExecutionBlock { bool _hasUsedDataRangeBlock; bool _executorReturnedDone = false; + + /// @brief Only needed for parallel executors; could be omitted otherwise + std::vector> _callsInFlight; }; } // namespace arangodb::aql diff --git a/arangod/Aql/ParallelUnsortedGatherExecutor.cpp b/arangod/Aql/ParallelUnsortedGatherExecutor.cpp index cc5e83a32b04..6a22758d5864 100644 --- a/arangod/Aql/ParallelUnsortedGatherExecutor.cpp +++ b/arangod/Aql/ParallelUnsortedGatherExecutor.cpp @@ -43,8 +43,19 @@ ParallelUnsortedGatherExecutor::ParallelUnsortedGatherExecutor(Fetcher&, Infos& ParallelUnsortedGatherExecutor::~ParallelUnsortedGatherExecutor() = default; -auto ParallelUnsortedGatherExecutor::upstreamCall(AqlCall const& clientCall) const +auto ParallelUnsortedGatherExecutor::upstreamCallSkip(AqlCall const& clientCall) const noexcept -> AqlCall { + // Only skip, don't ask for rows + auto upstreamCall = clientCall; + upstreamCall.softLimit = 0; + upstreamCall.hardLimit = AqlCall::Infinity{}; + upstreamCall.fullCount = false; + return upstreamCall; +} + +auto ParallelUnsortedGatherExecutor::upstreamCallProduce(AqlCall const& clientCall) const + noexcept -> AqlCall { + TRI_ASSERT(clientCall.getOffset() == 0); return clientCall; } @@ -87,7 +98,8 @@ auto ParallelUnsortedGatherExecutor::produceRows(typename Fetcher::DataRange& in TRI_ASSERT(waitingDep == input.numberDependencies()); return {ExecutorState::DONE, NoStats{}, AqlCall{}, waitingDep}; } - return {ExecutorState::HASMORE, NoStats{}, upstreamCall(output.getClientCall()), waitingDep}; + return {ExecutorState::HASMORE, NoStats{}, + upstreamCallProduce(output.getClientCall()), waitingDep}; } auto ParallelUnsortedGatherExecutor::skipRowsRange(typename Fetcher::DataRange& input, @@ -112,7 +124,7 @@ auto ParallelUnsortedGatherExecutor::skipRowsRange(typename Fetcher::DataRange& } if (range.hasDataRow()) { // We overfetched, skipLocally - // By gurantee we will only see data, if + // By guarantee we will only see data, if // we are past the offset phase. TRI_ASSERT(call.getOffset() == 0); } else { @@ -131,5 +143,5 @@ auto ParallelUnsortedGatherExecutor::skipRowsRange(typename Fetcher::DataRange& return {ExecutorState::DONE, NoStats{}, call.getSkipCount(), AqlCall{}, waitingDep}; } return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), - upstreamCall(call), waitingDep}; + upstreamCallSkip(call), waitingDep}; } diff --git a/arangod/Aql/ParallelUnsortedGatherExecutor.h b/arangod/Aql/ParallelUnsortedGatherExecutor.h index 6df2900bce22..1e52f8114267 100644 --- a/arangod/Aql/ParallelUnsortedGatherExecutor.h +++ b/arangod/Aql/ParallelUnsortedGatherExecutor.h @@ -100,7 +100,8 @@ class ParallelUnsortedGatherExecutor { -> std::tuple; private: - auto upstreamCall(AqlCall const& clientCall) const noexcept -> AqlCall; + auto upstreamCallSkip(AqlCall const& clientCall) const noexcept -> AqlCall; + auto upstreamCallProduce(AqlCall const& clientCall) const noexcept -> AqlCall; }; } // namespace aql diff --git a/arangod/Aql/SortingGatherExecutor.cpp b/arangod/Aql/SortingGatherExecutor.cpp index 61c4ea8ebe80..4ba8087e808c 100644 --- a/arangod/Aql/SortingGatherExecutor.cpp +++ b/arangod/Aql/SortingGatherExecutor.cpp @@ -218,19 +218,18 @@ auto SortingGatherExecutor::initialize(typename Fetcher::DataRange const& inputR TRI_ASSERT(_numberDependencies == 0 || _numberDependencies == inputRange.numberDependencies()); _numberDependencies = inputRange.numberDependencies(); - auto call = requiresMoreInput(inputRange); - if (call.has_value()) { - return call; - } // If we have collected all ranges once, we can prepare the local data-structure copy _inputRows.reserve(_numberDependencies); for (size_t dep = 0; dep < _numberDependencies; ++dep) { auto const [state, row] = inputRange.peekDataRow(dep); _inputRows.emplace_back(dep, row, state); } + auto call = requiresMoreInput(inputRange); + if (call.has_value()) { + return call; + } _strategy->prepare(_inputRows); _initialized = true; - _numberDependencies = inputRange.numberDependencies(); } return {}; } From 4794ebed2160f6228b61ee528b7d6f24940544f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Tue, 10 Mar 2020 14:49:57 +0100 Subject: [PATCH 099/122] Feature/aql subquery execute parallel gather 2 (#11221) * Fixed range-handling for Modification Executors * DataRange handling in ModificationExecutor * Honor batch-size defined by UpstreamExecutor * Fixed compile issue * More fixes in modification * Remvoed log devel * Fixed profiler Test. for NoResults node we cahnge the behaviour * Activated getSome failure tests in ExecuteRestHandler * Fixed skipping in Index * Let the MultiDependencySingleROwFetcher return the correct states. * Fixed non-maintainer compilation * Attempt to fix windows compile issue * Fixed the non-maintainer compile ina different way * Added API in MultiAqlItemBlockInputRange to get Number of dependencies * Comments * Savepoint commit, does not compile, but no harm is done. Will start breaking things now * Another savepoint commit. does not compile, yet. * First draft of new Style SortingGather not yet implemented: Parallelism this needs to be handled in ExecutionBlockImpl now. * Allow waiting within old-style subquery * Fixed invalid skipRwos in unsorted gather * First draft of ParallelUnsortedGatherExecutor * Removed unused local variables * Added some Assertions in MultiAqlItemBlockInputRange * Initialize dependdencies of MultiDependencyFetcher * Fixed skipRows loop in UnsortingGatherNode * Fixed return state of GatherNode * Added an assertion before accessing a vectir unbounded * Fixed uninitialized member in DistributeExecutor * Fixed use before vector initialization in SortingGather * Fixed uninitialized dependencies in MultiDepRowFetcher * First step towards parallel Aql * Fixed an assertion * Fixed upstream skipping in ParallelUnsortedGather * [WIP] Changed Api for MultiDepExecutors in ExecBlockImpl (not yet in the actual executors) * Moved AqlCallSet into a separate file * Changed SortingGather to use the new API * Changed ParallelUnsortedGather to use the new API * Changed UnsortedGather to use the new API * Moved AqlCall operator<< into .cpp file * Implement operator<< for AqlCallSet * Fix boolean mix-up * Fixed state machine: go to UPSTREAM when the AqlCallSet is not empty * Fixed assertion * Bugfix * SortingGather bugfixes * Added init() method to fix an assertion and cleanup * Removed unused variable * Fixed constrained sort * Fixed constrained sort #2 * Fix boolean mix-up * Remove old interface * Use call parameter for upstream request in produceRows * Remove more old interface code * Add skip methods to MultiAqlItemBlockInputRange * Skip in UnsortedGather * skip for UnsortedGather * Fix skip and upstream calls for UnsortedGather * skipRowsRange change * Remove useless comments * Moved multi-dep related code from ExeBlockImpl to MultiFetcher * Cleanup in SortingGather, implemented parallel fullCount * Try to fix a windows compile error * Simplify and extend skipRowsRange for UnsortedGatherExecutor * Made ParallelUnsortedGather actually parallel * Removed erroneous assertion * Undid erroneous change * Fixed MacOs compile. Also disabled tests for non-relevant AqlCallStacks. They will be removed * Fixed initialize Cursor for multi dependency blocks * Fixed fullCount case in parallel unsorted gather * Fixed fullCount upstream call of ParallelUnsortedGatherExecutor * Fixed fullCount in SortingGather * Windows \o/ if you cannot work properly with constexpr and static asserts, we do not let you do it! * Do not advance in Unsorted gather if there are still rows to skip * Add more comparison operators for AqlCall limits * Send clientCall limits to upstream in SortingGather * Improved fullCount in SortingGatherExectur * Disabled a cluster profile test. We now ask the RemoteNode more often if it already has data. It is a bit unclear to me if this is now better performance wise (<< i think so) or triggers undesired side effects * Helpless attempt to work around issues in stonage Visual Studio Compiler we are using. * Clearly adding an operator on a well defined type causes ambigousness on random basic types using the operator Co-authored-by: Michael Hackstein Co-authored-by: Markus Pfeiffer --- arangod/Aql/AqlCall.cpp | 17 + arangod/Aql/AqlCall.h | 40 +-- arangod/Aql/AqlCallSet.cpp | 54 +++ arangod/Aql/AqlCallSet.h | 53 +++ arangod/Aql/AqlCallStack.cpp | 6 + arangod/Aql/AqlCallStack.h | 3 + arangod/Aql/ClusterNodes.cpp | 6 + arangod/Aql/ClusterNodes.h | 5 +- arangod/Aql/ExecutionBlockImpl.cpp | 243 ++++++++------ arangod/Aql/ExecutionBlockImpl.h | 42 ++- arangod/Aql/IdExecutor.h | 4 - arangod/Aql/MultiAqlItemBlockInputRange.cpp | 42 ++- arangod/Aql/MultiAqlItemBlockInputRange.h | 19 +- .../Aql/MultiDependencySingleRowFetcher.cpp | 97 +++++- arangod/Aql/MultiDependencySingleRowFetcher.h | 21 +- .../Aql/ParallelUnsortedGatherExecutor.cpp | 57 ++-- arangod/Aql/ParallelUnsortedGatherExecutor.h | 5 +- arangod/Aql/SortingGatherExecutor.cpp | 313 ++++++++++++------ arangod/Aql/SortingGatherExecutor.h | 45 ++- arangod/Aql/UnsortedGatherExecutor.cpp | 139 ++------ arangod/Aql/UnsortedGatherExecutor.h | 28 +- arangod/CMakeLists.txt | 3 +- tests/Aql/ExecutionBlockImplTest.cpp | 9 +- ...timizer-rule-parallelize-gather-cluster.js | 2 +- tests/js/server/aql/aql-profiler-cluster.js | 16 +- 25 files changed, 803 insertions(+), 466 deletions(-) create mode 100644 arangod/Aql/AqlCallSet.cpp create mode 100644 arangod/Aql/AqlCallSet.h diff --git a/arangod/Aql/AqlCall.cpp b/arangod/Aql/AqlCall.cpp index 1621a38271e7..0a1d065380bd 100644 --- a/arangod/Aql/AqlCall.cpp +++ b/arangod/Aql/AqlCall.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -279,3 +280,19 @@ auto AqlCall::toString() const -> std::string { stream << *this; return stream.str(); } + +auto aql::operator<<(std::ostream& out, AqlCall::Limit const& limit) -> std::ostream& { + return std::visit(overload{[&out](size_t const& i) -> std::ostream& { + return out << i; + }, + [&out](AqlCall::Infinity const&) -> std::ostream& { + return out << "unlimited"; + }}, + limit); +} + +auto aql::operator<<(std::ostream& out, AqlCall const& call) -> std::ostream& { + return out << "{ skip: " << call.getOffset() << ", softLimit: " << call.softLimit + << ", hardLimit: " << call.hardLimit + << ", fullCount: " << std::boolalpha << call.fullCount << " }"; +} diff --git a/arangod/Aql/AqlCall.h b/arangod/Aql/AqlCall.h index b310dfee3175..af14123495eb 100644 --- a/arangod/Aql/AqlCall.h +++ b/arangod/Aql/AqlCall.h @@ -28,6 +28,7 @@ #include "Cluster/ResultT.h" #include +#include #include namespace arangodb::velocypack { @@ -50,14 +51,14 @@ struct AqlCall { AqlCall() = default; // Replacements for struct initialization - explicit AqlCall(size_t offset, Limit softLimit = Infinity{}, - Limit hardLimit = Infinity{}, bool fullCount = false) + explicit constexpr AqlCall(size_t offset, Limit softLimit = Infinity{}, + Limit hardLimit = Infinity{}, bool fullCount = false) : offset{offset}, softLimit{softLimit}, hardLimit{hardLimit}, fullCount{fullCount} {} enum class LimitType { SOFT, HARD }; - AqlCall(size_t offset, bool fullCount, Infinity) + constexpr AqlCall(size_t offset, bool fullCount, Infinity) : offset{offset}, softLimit{Infinity{}}, hardLimit{Infinity{}}, fullCount{fullCount} {} - AqlCall(size_t offset, bool fullCount, size_t limit, LimitType limitType) + constexpr AqlCall(size_t offset, bool fullCount, size_t limit, LimitType limitType) : offset{offset}, softLimit{limitType == LimitType::SOFT ? Limit{limit} : Limit{Infinity{}}}, hardLimit{limitType == LimitType::HARD ? Limit{limit} : Limit{Infinity{}}}, @@ -158,6 +159,7 @@ struct AqlCall { return skippedRows; } + // TODO this is the same as shouldSkip(), remove one of them. [[nodiscard]] bool needSkipMore() const noexcept { return (0 < getOffset()) || (getLimit() == 0 && needsFullCount()); } @@ -174,6 +176,8 @@ struct AqlCall { std::visit(minus, hardLimit); } + bool hasLimit() const { return hasHardLimit() || hasSoftLimit(); } + bool hasHardLimit() const { return !std::holds_alternative(hardLimit); } @@ -184,6 +188,7 @@ struct AqlCall { bool needsFullCount() const { return fullCount; } + // TODO this is the same as needSkipMore(), remove one of them. bool shouldSkip() const { return getOffset() > 0 || (getLimit() == 0 && needsFullCount()); } @@ -199,6 +204,15 @@ constexpr bool operator<(AqlCall::Limit const& a, AqlCall::Limit const& b) { return std::get(a) < std::get(b); } +constexpr bool operator<(AqlCall::Limit const& a, size_t b) { + if (std::holds_alternative(a)) { + return false; + } + return std::get(a) < b; +} + +constexpr bool operator<(size_t a, AqlCall::Limit const& b) { return !(b < a); } + constexpr AqlCall::Limit operator+(AqlCall::Limit const& a, size_t n) { return std::visit(overload{[n](size_t const& i) -> AqlCall::Limit { return i + n; @@ -243,22 +257,10 @@ constexpr bool operator==(AqlCall const& left, AqlCall const& right) { left.skippedRows == right.skippedRows; } -inline std::ostream& operator<<(std::ostream& out, - const arangodb::aql::AqlCall::Limit& limit) { - return std::visit(arangodb::overload{[&out](size_t const& i) -> std::ostream& { - return out << i; - }, - [&out](arangodb::aql::AqlCall::Infinity const&) -> std::ostream& { - return out << "unlimited"; - }}, - limit); -} +auto operator<<(std::ostream& out, const arangodb::aql::AqlCall::Limit& limit) + -> std::ostream&; -inline std::ostream& operator<<(std::ostream& out, const arangodb::aql::AqlCall& call) { - return out << "{ skip: " << call.getOffset() << ", softLimit: " << call.softLimit - << ", hardLimit: " << call.hardLimit - << ", fullCount: " << std::boolalpha << call.fullCount << " }"; -} +auto operator<<(std::ostream& out, const arangodb::aql::AqlCall& call) -> std::ostream&; } // namespace arangodb::aql diff --git a/arangod/Aql/AqlCallSet.cpp b/arangod/Aql/AqlCallSet.cpp new file mode 100644 index 000000000000..87ace64d871b --- /dev/null +++ b/arangod/Aql/AqlCallSet.cpp @@ -0,0 +1,54 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#include "AqlCallSet.h" + +using namespace arangodb; +using namespace arangodb::aql; + +auto aql::operator<<(std::ostream& out, AqlCallSet::DepCallPair const& callPair) -> std::ostream& { + return out << callPair.dependency << " => " << callPair.call; +} + +auto aql::operator<<(std::ostream& out, AqlCallSet const& callSet) -> std::ostream& { + out << "["; + auto first = true; + for (auto const& it : callSet.calls) { + if (first) { + out << " "; + first = false; + } else { + out << ", "; + } + out << it; + } + out << " ]"; + return out; +} + +auto AqlCallSet::empty() const noexcept -> bool { + return calls.empty(); +} + +auto AqlCallSet::size() const noexcept -> size_t { + return calls.size(); +} diff --git a/arangod/Aql/AqlCallSet.h b/arangod/Aql/AqlCallSet.h new file mode 100644 index 000000000000..abc8ee8de08d --- /dev/null +++ b/arangod/Aql/AqlCallSet.h @@ -0,0 +1,53 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Tobias Gödderz +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_AQLCALLSET_H +#define ARANGOD_AQL_AQLCALLSET_H + +#include "Aql/AqlCall.h" + +#include +#include + +namespace arangodb::aql { + +// Partial map dep -> call. May be empty. +// IMPORTANT: Are expected to be saved in increasing order (regarding dependency) +struct AqlCallSet { + struct DepCallPair { + std::size_t dependency{}; + AqlCall call; + }; + std::vector calls; + + [[nodiscard]] auto empty() const noexcept -> bool; + + [[nodiscard]] auto size() const noexcept -> size_t; +}; + +auto operator<<(std::ostream& out, AqlCallSet::DepCallPair const& callPair) + -> std::ostream&; +auto operator<<(std::ostream&, AqlCallSet const&) -> std::ostream&; + +} // namespace arangodb::aql + +#endif // ARANGOD_AQL_AQLCALLSET_H diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp index 68c5cd9c5b8d..911ea0c86fd2 100644 --- a/arangod/Aql/AqlCallStack.cpp +++ b/arangod/Aql/AqlCallStack.cpp @@ -87,6 +87,12 @@ void AqlCallStack::pushCall(AqlCall&& call) { _operations.push(call); } +void AqlCallStack::pushCall(AqlCall const& call) { + // TODO is this correct on subqueries? + TRI_ASSERT(isRelevant()); + _operations.push(call); +} + void AqlCallStack::stackUpMissingCalls() { while (!isRelevant()) { // For every depth, we add an additional default call. diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index f67e505ddffc..4aa03285f5d8 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -63,6 +63,9 @@ class AqlCallStack { // Put another call on top of the stack. void pushCall(AqlCall&& call); + // Put another call on top of the stack. + void pushCall(AqlCall const& call); + // fill up all missing calls within this stack s.t. we reach depth == 0 // This needs to be called if an executor requires to be fully executed, even if skipped, // even if the subquery it is located in is skipped. diff --git a/arangod/Aql/ClusterNodes.cpp b/arangod/Aql/ClusterNodes.cpp index 4e393a10caa6..feb4ca124d97 100644 --- a/arangod/Aql/ClusterNodes.cpp +++ b/arangod/Aql/ClusterNodes.cpp @@ -635,6 +635,12 @@ void GatherNode::setParallelism(GatherNode::Parallelism value) { _parallelism = value; } +GatherNode::SortMode GatherNode::evaluateSortMode(size_t numberOfShards, + size_t shardsRequiredForHeapMerge) noexcept { + return numberOfShards >= shardsRequiredForHeapMerge ? SortMode::Heap + : SortMode::MinElement; +} + SingleRemoteOperationNode::SingleRemoteOperationNode( ExecutionPlan* plan, size_t id, NodeType mode, bool replaceIndexNode, std::string const& key, Collection const* collection, diff --git a/arangod/Aql/ClusterNodes.h b/arangod/Aql/ClusterNodes.h index 5eefc5395cb9..d0206ac0fd21 100644 --- a/arangod/Aql/ClusterNodes.h +++ b/arangod/Aql/ClusterNodes.h @@ -315,10 +315,7 @@ class GatherNode final : public ExecutionNode { /// @returns sort mode for the specified number of shards static SortMode evaluateSortMode(size_t numberOfShards, - size_t shardsRequiredForHeapMerge = 5) noexcept { - return numberOfShards >= shardsRequiredForHeapMerge ? SortMode::Heap - : SortMode::MinElement; - } + size_t shardsRequiredForHeapMerge = 5) noexcept; /// @brief constructor with an id GatherNode(ExecutionPlan* plan, size_t id, SortMode sortMode, diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 517096c07f93..8e71f0dc0192 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -117,13 +117,11 @@ CREATE_HAS_MEMBER_CHECK(skipRowsRange, hasSkipRowsRange); #ifdef ARANGODB_USE_GOOGLE_TESTS // Forward declaration of Test Executors. // only used as long as isNewStyleExecutor is required. -namespace arangodb { -namespace aql { +namespace arangodb::aql { class TestLambdaExecutor; class TestLambdaSkipExecutor; -} // namespace aql -} // namespace arangodb +} // namespace arangodb::aql #endif /* @@ -196,7 +194,6 @@ ExecutionBlockImpl::ExecutionBlockImpl(ExecutionEngine* engine, _execState{ExecState::CHECKCALL}, _upstreamRequest{}, _clientRequest{}, - _requestedDependency{}, _hasUsedDataRangeBlock{false} { // already insert ourselves into the statistics results if (_profile >= PROFILE_LEVEL_BLOCKS) { @@ -606,13 +603,19 @@ template std::pair ExecutionBlockImpl::initializeCursor(InputAqlItemRow const& input) { // reinitialize the DependencyProxy _dependencyProxy.reset(); - _lastRange = DataRange(ExecutorState::HASMORE); _hasUsedDataRangeBlock = false; - + initOnce(); // destroy and re-create the Fetcher _rowFetcher.~Fetcher(); new (&_rowFetcher) Fetcher(_dependencyProxy); + if constexpr (isMultiDepExecutor) { + _lastRange.reset(); + _rowFetcher.init(); + } else { + _lastRange = DataRange(ExecutorState::HASMORE); + } + TRI_ASSERT(_skipped == 0); _skipped = 0; TRI_ASSERT(_state == InternalState::DONE || _state == InternalState::FETCH_DATA); @@ -653,6 +656,7 @@ std::tuple ExecutionBlockImpl ExecutionBlockImpl::r "Executors should implement the method " "fetchBlockForPassthrough() iff " "Properties::allowsBlockPassthrough is true"); + static_assert( + Executor::Properties::inputSizeRestrictsOutputSize == + hasExpectedNumberOfRows::value, + "Executors should implement the method expectedNumberOfRows() iff " + "Properties::inputSizeRestrictsOutputSize is true"); } - static_assert( - Executor::Properties::inputSizeRestrictsOutputSize == - hasExpectedNumberOfRows::value, - "Executors should implement the method expectedNumberOfRows() iff " - "Properties::inputSizeRestrictsOutputSize is true"); constexpr RequestWrappedBlockVariant variant = isNewStyleExecutor @@ -1217,35 +1221,45 @@ static auto fastForwardType(AqlCall const& call, Executor const& e) -> FastForwa } template -auto ExecutionBlockImpl::executeFetcher(AqlCallStack& stack, size_t const dependency) +auto ExecutionBlockImpl::executeFetcher(AqlCallStack& stack, AqlCallType const& aqlCall) -> std::tuple { - // Silence compiler about unused dependency - (void)dependency; if constexpr (isNewStyleExecutor) { - if constexpr (is_one_of_v) { - // TODO: This is a hack to guarantee we have enough space in our range - // to fit all inputs, in particular the one executed below - TRI_ASSERT(dependency < _dependencies.size()); - _lastRange.resizeIfNecessary(ExecutorState::HASMORE, 0, _dependencies.size()); - if constexpr (!isParallelExecutor) { - auto [state, skipped, range] = _rowFetcher.executeForDependency(dependency, stack); - _lastRange.setDependency(dependency, range); - return {state, skipped, _lastRange}; - } else { - _callsInFlight.resize(_dependencyProxy.numberDependencies()); - if (!_callsInFlight[dependency].has_value()) { - _callsInFlight[dependency] = stack; - } - TRI_ASSERT(_callsInFlight[dependency].has_value()); - auto [state, skipped, range] = _rowFetcher.executeForDependency(dependency, _callsInFlight[dependency].value()); - if (state != ExecutionState::WAITING) { - _callsInFlight[dependency] = std::nullopt; - } + // TODO The logic in the MultiDependencySingleRowFetcher branch should be + // moved into the MultiDependencySingleRowFetcher. + static_assert(isMultiDepExecutor == + std::is_same_v); + if constexpr (std::is_same_v) { + // Note the aqlCall is an AqlCallSet in this case: + static_assert(std::is_same_v>); + TRI_ASSERT(_lastRange.numberDependencies() == _dependencies.size()); + auto const& [state, skipped, ranges] = _rowFetcher.execute(stack, aqlCall); + for (auto const& [dependency, range] : ranges) { _lastRange.setDependency(dependency, range); - return {state, skipped, _lastRange}; } + return {state, skipped, _lastRange}; } else { - return _rowFetcher.execute(stack); + // If we are SubqueryStart, we remove the top element of the stack + // which belongs to the subquery enclosed by this + // SubqueryStart and the partnered SubqueryEnd by *not* + // pushing the upstream request. + if constexpr (!std::is_same_v) { + auto callCopy = _upstreamRequest; + stack.pushCall(std::move(callCopy)); + } + + auto const result = _rowFetcher.execute(stack); + + if constexpr (!std::is_same_v) { + // As the stack is copied into the fetcher, we need to pop off our call + // again. If we use other datastructures or moving we may hand over + // ownership of the stack here instead and no popCall is necessary. + stack.popCall(); + } else { + // Do not pop the call, we did not put it on. + // However we need it for accounting later. + } + + return result; } } else { TRI_ASSERT(false); @@ -1256,10 +1270,10 @@ auto ExecutionBlockImpl::executeFetcher(AqlCallStack& stack, size_t co template auto ExecutionBlockImpl::executeProduceRows(typename Fetcher::DataRange& input, OutputAqlItemRow& output) - -> std::tuple { + -> std::tuple { if constexpr (isNewStyleExecutor) { - if constexpr (is_one_of_v) { - input.resizeIfNecessary(ExecutorState::HASMORE, 0, _dependencies.size()); + if constexpr (isMultiDepExecutor) { + TRI_ASSERT(input.numberDependencies() == _dependencies.size()); return _executor.produceRows(input, output); } else if constexpr (is_one_of_v, SubqueryExecutor>) { // The SubqueryExecutor has it's own special handling outside. @@ -1268,21 +1282,22 @@ auto ExecutionBlockImpl::executeProduceRows(typename Fetcher::DataRang THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); } else { auto [state, stats, call] = _executor.produceRows(input, output); - return {state, stats, call, 0}; + return {state, stats, call}; } } else { - return {ExecutorState::DONE, typename Executor::Stats{}, AqlCall{}, 0}; + return {ExecutorState::DONE, typename Executor::Stats{}, AqlCall{}}; } } template auto ExecutionBlockImpl::executeSkipRowsRange(typename Fetcher::DataRange& inputRange, AqlCall& call) - -> std::tuple { + -> std::tuple { if constexpr (isNewStyleExecutor) { call.skippedRows = 0; if constexpr (skipRowsType() == SkipRowsRangeVariant::EXECUTOR) { - if constexpr (is_one_of_v) { + if constexpr (isMultiDepExecutor) { + TRI_ASSERT(inputRange.numberDependencies() == _dependencies.size()); // If the executor has a method skipRowsRange, to skip outputs. // Every non-passthrough executor needs to implement this. auto res = _executor.skipRowsRange(inputRange, call); @@ -1297,7 +1312,7 @@ auto ExecutionBlockImpl::executeSkipRowsRange(typename Fetcher::DataRa auto [state, stats, skipped, localCall] = _executor.skipRowsRange(inputRange, call); _executorReturnedDone = state == ExecutorState::DONE; - return {state, stats, skipped, localCall, 0}; + return {state, stats, skipped, localCall}; } } else if constexpr (skipRowsType() == SkipRowsRangeVariant::FETCHER) { // If we know that every input row produces exactly one output row (this @@ -1308,18 +1323,17 @@ auto ExecutionBlockImpl::executeSkipRowsRange(typename Fetcher::DataRa static_assert( std::is_same_v, "Executors with custom statistics must implement skipRowsRange."); - return {inputRange.upstreamState(), NoStats{}, 0, call, 0}; + return {inputRange.upstreamState(), NoStats{}, 0, call}; } else { static_assert(dependent_false::value, "This value of SkipRowsRangeVariant is not supported"); - return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call, 0); + TRI_ASSERT(false); } } else { TRI_ASSERT(false); - return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call, 0); } - // Compiler is unhappy without this. - return std::make_tuple(ExecutorState::DONE, typename Executor::Stats{}, 0, call, 0); + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); } template <> @@ -1474,7 +1488,7 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState { template auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRange& inputRange, AqlCall& clientCall) - -> std::tuple { + -> std::tuple { TRI_ASSERT(isNewStyleExecutor); if constexpr (std::is_same_v) { if (clientCall.needsFullCount() && clientCall.getOffset() == 0 && @@ -1484,7 +1498,7 @@ auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRang return executeSkipRowsRange(_lastRange, clientCall); } // Do not fastForward anything, the Subquery start will handle it by itself - return {ExecutorState::DONE, NoStats{}, 0, AqlCall{}, 0}; + return {ExecutorState::DONE, NoStats{}, 0, AqlCall{}}; } auto type = fastForwardType(clientCall, _executor); @@ -1492,9 +1506,7 @@ auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRang case FastForwardVariant::FULLCOUNT: case FastForwardVariant::EXECUTOR: { LOG_QUERY("cb135", DEBUG) << printTypeInfo() << " apply full count."; - auto [state, stats, skippedLocal, call, dependency] = - executeSkipRowsRange(_lastRange, clientCall); - _requestedDependency = dependency; + auto [state, stats, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); if (type == FastForwardVariant::EXECUTOR) { // We do not report the skip @@ -1508,22 +1520,40 @@ auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRang inputRange.skipAllRemainingDataRows(); } - return {state, stats, skippedLocal, call, dependency}; + return {state, stats, skippedLocal, call}; } case FastForwardVariant::FETCHER: { LOG_QUERY("fa327", DEBUG) << printTypeInfo() << " bypass unused rows."; - _requestedDependency = inputRange.skipAllRemainingDataRows(); - AqlCall call{}; - call.hardLimit = 0; + auto const dependency = inputRange.skipAllRemainingDataRows(); + auto constexpr fastForwardCall = AqlCall{0, false, 0, AqlCall::LimitType::HARD}; + auto const call = std::invoke([&]() -> AqlCallType { + if constexpr (std::is_same_v) { + return fastForwardCall; + } else { +#ifndef _WIN32 + // For some reason our Windows compiler complains about this static assert + // in the cases that should be in the above constexpr path. + // So simply not compile it in. + static_assert(std::is_same_v); +#endif + auto call = AqlCallSet{}; + call.calls.emplace_back(typename AqlCallSet::DepCallPair{dependency, fastForwardCall}); + return call; + } + }); // TODO We have to ask all dependencies to go forward to the next shadow row - if constexpr (std::is_same_v) { - return {inputRange.upstreamState(_requestedDependency), - typename Executor::Stats{}, 0, call, _requestedDependency}; - } else { - return {inputRange.upstreamState(), typename Executor::Stats{}, 0, call, - _requestedDependency}; - } + auto const state = std::invoke( + [&](auto) { + if constexpr (std::is_same_v) { + return inputRange.upstreamState(dependency); + } else { + return inputRange.upstreamState(); + } + }, + 0); + + return {state, typename Executor::Stats{}, 0, call}; } } // Unreachable @@ -1585,7 +1615,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { size_t skippedLocal = 0; typename Fetcher::DataRange bypassedRange{ExecutorState::HASMORE}; std::tie(_upstreamState, skippedLocal, bypassedRange) = - executeFetcher(stack, _requestedDependency); + executeFetcher(stack, _upstreamRequest); return {_upstreamState, skippedLocal, bypassedRange.getBlock()}; } @@ -1665,8 +1695,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { ExecutorState state = ExecutorState::HASMORE; typename Executor::Stats stats; size_t skippedLocal = 0; - AqlCall call{}; - size_t dependency = 0; + AqlCallType call{}; if constexpr (is_one_of_v>) { // NOTE: The subquery Executor will by itself call EXECUTE on it's // subquery. This can return waiting => we can get a WAITING state @@ -1684,11 +1713,10 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } } else { // Execute skipSome - std::tie(state, stats, skippedLocal, call, dependency) = + std::tie(state, stats, skippedLocal, call) = executeSkipRowsRange(_lastRange, clientCall); } - _requestedDependency = dependency; #ifdef ARANGODB_ENABLE_MAINTAINER_MODE // Assertion: We did skip 'skippedLocal' documents here. // This means that they have to be removed from @@ -1750,8 +1778,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { TRI_ASSERT(!_executorReturnedDone); ExecutorState state = ExecutorState::HASMORE; typename Executor::Stats stats; - AqlCall call{}; - size_t dependency = 0; + auto call = AqlCallType{}; if constexpr (is_one_of_v, SubqueryExecutor>) { // NOTE: The subquery Executor will by itself call EXECUTE on it's // subquery. This can return waiting => we can get a WAITING state @@ -1768,10 +1795,8 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } } else { // Execute getSome - std::tie(state, stats, call, dependency) = - executeProduceRows(_lastRange, *_outputItemRow); + std::tie(state, stats, call) = executeProduceRows(_lastRange, *_outputItemRow); } - _requestedDependency = dependency; _executorReturnedDone = state == ExecutorState::DONE; _engine->_stats += stats; localExecutorState = state; @@ -1790,7 +1815,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // In all other branches only if the client Still needs more data. _execState = ExecState::DONE; break; - } else if (clientCall.getLimit() > 0 && !lastRangeHasDataRow()) { + } else if (clientCall.getLimit() > 0 && executorNeedsCall(call)) { TRI_ASSERT(_upstreamState != ExecutionState::DONE); // We need to request more _upstreamRequest = call; @@ -1804,10 +1829,9 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { case ExecState::FASTFORWARD: { LOG_QUERY("96e2c", DEBUG) << printTypeInfo() << " all produced, fast forward to end up (sub-)query."; - auto [state, stats, skippedLocal, call, dependency] = + auto [state, stats, skippedLocal, call] = executeFastForward(_lastRange, clientCall); - _requestedDependency = dependency; _skipped += skippedLocal; _engine->_stats += stats; localExecutorState = state; @@ -1831,39 +1855,22 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // If this triggers the executors produceRows function has returned // HASMORE even if it knew that upstream has no further rows. TRI_ASSERT(_upstreamState != ExecutionState::DONE); - // We need to make sure _lastRange is all used - TRI_ASSERT(!lastRangeHasDataRow()); + // We need to make sure _lastRange is all used for single-dependency + // executors. + TRI_ASSERT(isMultiDepExecutor || !lastRangeHasDataRow()); TRI_ASSERT(!_lastRange.hasShadowRow()); size_t skippedLocal = 0; #ifdef ARANGODB_ENABLE_MAINTAINER_MODE - size_t subqueryLevelBefore = stack.subqueryLevel(); + auto subqueryLevelBefore = stack.subqueryLevel(); #endif - // If we are SubqueryStart, we remove the top element of the stack - // which belongs to the subquery enclosed by this - // SubqueryStart and the partnered SubqueryEnd by *not* - // pushing the upstream request. - if constexpr (!std::is_same_v) { - auto callCopy = _upstreamRequest; - stack.pushCall(std::move(callCopy)); - } - std::tie(_upstreamState, skippedLocal, _lastRange) = - executeFetcher(stack, _requestedDependency); - - if constexpr (std::is_same_v) { - // Do not pop the call, we did not put it on. - // However we need it for accounting later. - } else { - // As the stack is copied into the fetcher, we need to pop off our call again. - // If we use other datastructures or moving we may hand over ownership of the stack here - // instead and no popCall is necessary. - stack.popCall(); - } + executeFetcher(stack, _upstreamRequest); #ifdef ARANGODB_ENABLE_MAINTAINER_MODE TRI_ASSERT(subqueryLevelBefore == stack.subqueryLevel()); #endif + if (_upstreamState == ExecutionState::WAITING) { // We need to persist the old call before we return. // We might have some local accounting to this call. @@ -2019,9 +2026,8 @@ auto ExecutionBlockImpl::outputIsFull() const noexcept -> bool { _outputItemRow->allRowsUsed(); } -// TODO: remove again template -auto ExecutionBlockImpl::lastRangeHasDataRow() const -> bool { +auto ExecutionBlockImpl::lastRangeHasDataRow() const noexcept -> bool { return _lastRange.hasDataRow(); } @@ -2032,6 +2038,37 @@ RegisterId ExecutionBlockImpl +void ExecutionBlockImpl::init() { + TRI_ASSERT(!_initialized); + if constexpr (isMultiDepExecutor) { + _lastRange.resizeOnce(ExecutorState::HASMORE, 0, _dependencies.size()); + _rowFetcher.init(); + } +} + +template +void ExecutionBlockImpl::initOnce() { + if (!_initialized) { + init(); + _initialized = true; + } +} +template +auto ExecutionBlockImpl::executorNeedsCall(AqlCallType& call) const + noexcept -> bool { + if constexpr (isMultiDepExecutor) { + // call is an AqlCallSet. We need to call upstream if it's not empty. + return !call.empty(); + } else { + // call is an AqlCall, unconditionally. The current convention is + // to call upstream when there is no input left. + // This could be made unnecessary by returning an optional AqlCall + // for single-dependency executors. + return !lastRangeHasDataRow(); + } +}; + template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; template class ::arangodb::aql::ExecutionBlockImpl>; diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index c0e072289704..8e1009c5b518 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -27,6 +27,7 @@ #define ARANGOD_AQL_EXECUTION_BLOCK_IMPL_H 1 #include "Aql/AqlCall.h" +#include "Aql/AqlCallSet.h" #include "Aql/ConstFetcher.h" #include "Aql/DependencyProxy.h" #include "Aql/ExecutionBlock.h" @@ -51,11 +52,15 @@ class InputAqlItemRow; class OutputAqlItemRow; class Query; class ShadowAqlItemRow; -class ParallelUnsortedGatherExecutor; +class MultiDependencySingleRowFetcher; template constexpr bool is_one_of_v = (std::is_same_v || ...); +template +static constexpr bool isMultiDepExecutor = + std::is_same_v; + /** * @brief This is the implementation class of AqlExecutionBlocks. * It is responsible to create AqlItemRows for subsequent @@ -139,8 +144,11 @@ class ExecutionBlockImpl final : public ExecutionBlock { DONE }; - static constexpr bool isParallelExecutor = - is_one_of_v; + // Where Executors with a single dependency return an AqlCall, Executors with + // multiple dependencies return a partial map depIndex -> AqlCall. + // It may be empty. If the cardinality is greater than one, the calls will be + // executed in parallel. + using AqlCallType = std::conditional_t, AqlCallSet, AqlCall>; public: /** @@ -157,6 +165,13 @@ class ExecutionBlockImpl final : public ExecutionBlock { ~ExecutionBlockImpl() override; + /// @brief Must be called exactly once after the plan is instantiated (i.e., + /// all blocks are created and dependencies are injected), but before + /// the first execute() call. + /// Is currently called conditionally in execute() itself, but should + /// better be called in instantiateFromPlan and similar methods. + void init(); + /** * @brief Produce atMost many output rows, or less. * May return waiting if I/O has to be performed @@ -241,17 +256,17 @@ class ExecutionBlockImpl final : public ExecutionBlock { std::tuple executeWithoutTrace(AqlCallStack stack); std::tuple executeFetcher( - AqlCallStack& stack, size_t const dependency); + AqlCallStack& stack, AqlCallType const& aqlCall); - std::tuple executeProduceRows( + std::tuple executeProduceRows( typename Fetcher::DataRange& input, OutputAqlItemRow& output); // execute a skipRowsRange call auto executeSkipRowsRange(typename Fetcher::DataRange& inputRange, AqlCall& call) - -> std::tuple; + -> std::tuple; auto executeFastForward(typename Fetcher::DataRange& inputRange, AqlCall& clientCall) - -> std::tuple; + -> std::tuple; /** * @brief Inner getSome() part, without the tracing calls. @@ -310,10 +325,14 @@ class ExecutionBlockImpl final : public ExecutionBlock { [[nodiscard]] auto outputIsFull() const noexcept -> bool; - [[nodiscard]] auto lastRangeHasDataRow() const -> bool; + [[nodiscard]] auto lastRangeHasDataRow() const noexcept -> bool; void resetExecutor(); + void initOnce(); + + [[nodiscard]] auto executorNeedsCall(AqlCallType& call) const noexcept -> bool; + private: /** * @brief Used to allow the row Fetcher to access selected methods of this @@ -348,12 +367,10 @@ class ExecutionBlockImpl final : public ExecutionBlock { ExecState _execState; - AqlCall _upstreamRequest; + AqlCallType _upstreamRequest; AqlCall _clientRequest; - size_t _requestedDependency; - // Only used in passthrough variant. // We track if we have reference the range's block // into an output block. @@ -362,8 +379,7 @@ class ExecutionBlockImpl final : public ExecutionBlock { bool _executorReturnedDone = false; - /// @brief Only needed for parallel executors; could be omitted otherwise - std::vector> _callsInFlight; + bool _initialized = false; }; } // namespace arangodb::aql diff --git a/arangod/Aql/IdExecutor.h b/arangod/Aql/IdExecutor.h index dbab3c746ebc..2b45ac5430e5 100644 --- a/arangod/Aql/IdExecutor.h +++ b/arangod/Aql/IdExecutor.h @@ -88,10 +88,6 @@ class IdExecutorInfos : public ExecutorInfos { bool const _isResponsibleForInitializeCursor; }; -// forward declaration -template -class IdExecutor; - template // cppcheck-suppress noConstructor class IdExecutor { diff --git a/arangod/Aql/MultiAqlItemBlockInputRange.cpp b/arangod/Aql/MultiAqlItemBlockInputRange.cpp index ab8ea0d32e51..1cd6e114618b 100644 --- a/arangod/Aql/MultiAqlItemBlockInputRange.cpp +++ b/arangod/Aql/MultiAqlItemBlockInputRange.cpp @@ -39,14 +39,13 @@ MultiAqlItemBlockInputRange::MultiAqlItemBlockInputRange(ExecutorState state, TRI_ASSERT(nrInputRanges > 0); } -auto MultiAqlItemBlockInputRange::resizeIfNecessary(ExecutorState state, size_t skipped, - size_t nrInputRanges) -> void { +auto MultiAqlItemBlockInputRange::resizeOnce(ExecutorState state, size_t skipped, + size_t nrInputRanges) -> void { + // Is expected to be called exactly once to set the number of dependencies. // We never want to reduce the number of dependencies. TRI_ASSERT(_inputs.size() <= nrInputRanges); TRI_ASSERT(nrInputRanges > 0); - if (_inputs.size() < nrInputRanges) { - _inputs.resize(nrInputRanges, AqlItemBlockInputRange{state, skipped}); - } + _inputs.resize(nrInputRanges, AqlItemBlockInputRange{state, skipped}); } auto MultiAqlItemBlockInputRange::upstreamState(size_t const dependency) const @@ -84,6 +83,12 @@ auto MultiAqlItemBlockInputRange::skipAll(size_t const dependency) noexcept -> s return _inputs.at(dependency).skipAll(); } +auto MultiAqlItemBlockInputRange::skippedInFlight(size_t const dependency) const + noexcept -> std::size_t { + TRI_ASSERT(dependency < _inputs.size()); + return _inputs.at(dependency).skippedInFlight(); +} + auto MultiAqlItemBlockInputRange::nextDataRow(size_t const dependency) -> std::pair { TRI_ASSERT(dependency < _inputs.size()); @@ -129,7 +134,7 @@ auto MultiAqlItemBlockInputRange::getBlock(size_t const dependency) const } auto MultiAqlItemBlockInputRange::setDependency(size_t const dependency, - AqlItemBlockInputRange& range) -> void { + AqlItemBlockInputRange const& range) -> void { TRI_ASSERT(dependency < _inputs.size()); _inputs.at(dependency) = range; } @@ -143,9 +148,13 @@ auto MultiAqlItemBlockInputRange::isDone() const -> bool { return res; } +auto MultiAqlItemBlockInputRange::state() const -> ExecutorState { + return isDone() ? ExecutorState::DONE : ExecutorState::HASMORE; +} + auto MultiAqlItemBlockInputRange::skipAllRemainingDataRows() -> size_t { for (size_t i = 0; i < _inputs.size(); i++) { - _inputs.at(i).skipAllRemainingDataRows(); + std::ignore = _inputs.at(i).skipAllRemainingDataRows(); if (_inputs.at(i).upstreamState() == ExecutorState::HASMORE) { return i; } @@ -153,6 +162,23 @@ auto MultiAqlItemBlockInputRange::skipAllRemainingDataRows() -> size_t { return 0; } +// Subtract up to count rows from the local _skipped state +auto MultiAqlItemBlockInputRange::skipForDependency(size_t const dependency, + size_t count) -> size_t { + return _inputs.at(dependency).skip(count); +} + +// Skipp all that is available +auto MultiAqlItemBlockInputRange::skipAllForDependency(size_t const dependency) -> size_t { + return _inputs.at(dependency).skipAll(); +} + auto MultiAqlItemBlockInputRange::numberDependencies() const noexcept -> size_t { return _inputs.size(); -} \ No newline at end of file +} + +auto MultiAqlItemBlockInputRange::reset() -> void { + for (size_t i = 0; i < _inputs.size(); ++i) { + _inputs[i] = AqlItemBlockInputRange(ExecutorState::HASMORE); + } +} diff --git a/arangod/Aql/MultiAqlItemBlockInputRange.h b/arangod/Aql/MultiAqlItemBlockInputRange.h index 74aa1584c2ac..e02ee639231a 100644 --- a/arangod/Aql/MultiAqlItemBlockInputRange.h +++ b/arangod/Aql/MultiAqlItemBlockInputRange.h @@ -62,26 +62,35 @@ class MultiAqlItemBlockInputRange { std::pair nextDataRow(size_t const dependency); auto skipAll(size_t const dependency) noexcept -> std::size_t; + [[nodiscard]] auto skippedInFlight(size_t dependency) const noexcept -> std::size_t; + bool hasShadowRow() const noexcept; arangodb::aql::ShadowAqlItemRow peekShadowRow() const; std::pair nextShadowRow(); auto isDone() const -> bool; + auto state() const -> ExecutorState; - auto resizeIfNecessary(ExecutorState state, size_t skipped, size_t nrInputRanges) -> void; + auto resizeOnce(ExecutorState state, size_t skipped, size_t nrInputRanges) -> void; - auto getBlock(size_t const dependency = 0) const noexcept -> SharedAqlItemBlockPtr; + [[nodiscard]] auto getBlock(size_t dependency = 0) const noexcept -> SharedAqlItemBlockPtr; - auto setDependency(size_t const dependency, AqlItemBlockInputRange& range) -> void; + auto setDependency(size_t dependency, AqlItemBlockInputRange const& range) -> void; + // This discards all remaining data rows auto skipAllRemainingDataRows() -> size_t; + // Subtract up to count rows from the local _skipped state + auto skipForDependency(size_t const dependency, size_t count) -> size_t; + // Skipp all that is available + auto skipAllForDependency(size_t const dependency) -> size_t; + auto numberDependencies() const noexcept -> size_t; - private: - ExecutorState _finalState{ExecutorState::HASMORE}; + auto reset() -> void; + private: std::vector _inputs; }; diff --git a/arangod/Aql/MultiDependencySingleRowFetcher.cpp b/arangod/Aql/MultiDependencySingleRowFetcher.cpp index aac3b3b866ac..b275f0c19567 100644 --- a/arangod/Aql/MultiDependencySingleRowFetcher.cpp +++ b/arangod/Aql/MultiDependencySingleRowFetcher.cpp @@ -176,12 +176,15 @@ void MultiDependencySingleRowFetcher::initDependencies() { } size_t MultiDependencySingleRowFetcher::numberDependencies() { - if (_dependencyInfos.empty()) { - initDependencies(); - } return _dependencyInfos.size(); } +void MultiDependencySingleRowFetcher::init() { + TRI_ASSERT(_dependencyInfos.empty()); + initDependencies(); + _callsInFlight.resize(numberDependencies()); +} + std::pair MultiDependencySingleRowFetcher::preFetchNumberOfRows(size_t atMost) { ExecutionState state = ExecutionState::DONE; size_t available = 0; @@ -368,9 +371,6 @@ auto MultiDependencySingleRowFetcher::useStack(AqlCallStack const& stack) -> voi auto MultiDependencySingleRowFetcher::executeForDependency(size_t const dependency, AqlCallStack& stack) -> std::tuple { - if (_dependencyStates.empty()) { - initDependencies(); - } auto [state, skipped, block] = _dependencyProxy->executeForDependency(dependency, stack); if (state == ExecutionState::WAITING) { @@ -380,14 +380,7 @@ auto MultiDependencySingleRowFetcher::executeForDependency(size_t const dependen state == ExecutionState::DONE ? ExecutorState::DONE : ExecutorState::HASMORE; _dependencyStates.at(dependency) = state; - if (std::any_of(std::begin(_dependencyStates), std::end(_dependencyStates), - [](ExecutionState const s) { - return s == ExecutionState::HASMORE; - })) { - state = ExecutionState::HASMORE; - } else { - state = ExecutionState::DONE; - } + if (block == nullptr) { return {state, skipped, AqlItemBlockInputRange{execState, skipped}}; } @@ -395,3 +388,79 @@ auto MultiDependencySingleRowFetcher::executeForDependency(size_t const dependen auto [start, end] = block->getRelevantRange(); return {state, skipped, AqlItemBlockInputRange{execState, skipped, block, start}}; } + +auto MultiDependencySingleRowFetcher::execute(AqlCallStack const& stack, + AqlCallSet const& aqlCallSet) + -> std::tuple>> { + TRI_ASSERT(_callsInFlight.size() == numberDependencies()); + + auto ranges = std::vector>{}; + ranges.reserve(aqlCallSet.size()); + + auto depCallIdx = size_t{0}; + auto allAskedDepsAreWaiting = true; + auto askedAtLeastOneDep = false; + auto skippedTotal = size_t{0}; + // Iterate in parallel over `_callsInFlight` and `aqlCall.calls`. + // _callsInFlight[i] corresponds to aqlCalls.calls[k] iff + // aqlCalls.calls[k].dependency = i. + // So there is not always a matching entry in aqlCall.calls. + for (auto dependency = size_t{0}; dependency < _callsInFlight.size(); ++dependency) { + auto& maybeCallInFlight = _callsInFlight[dependency]; + + // See if there is an entry for `dependency` in `aqlCall.calls` + if (depCallIdx < aqlCallSet.calls.size() && + aqlCallSet.calls[depCallIdx].dependency == dependency) { + // If there is a call in flight, we *must not* change the call, + // no matter what we got. Otherwise, we save the current call. + if (!maybeCallInFlight.has_value()) { + auto depStack = stack; + depStack.pushCall(aqlCallSet.calls[depCallIdx].call); + maybeCallInFlight = depStack; + } + ++depCallIdx; + if (depCallIdx < aqlCallSet.calls.size()) { + TRI_ASSERT(aqlCallSet.calls[depCallIdx - 1].dependency < + aqlCallSet.calls[depCallIdx].dependency); + } + } + + if (maybeCallInFlight.has_value()) { + // We either need to make a new call, or check whether we got a result + // for a call in flight. + auto& callInFlight = maybeCallInFlight.value(); + auto [state, skipped, range] = executeForDependency(dependency, callInFlight); + askedAtLeastOneDep = true; + if (state != ExecutionState::WAITING) { + // Got a result, call is no longer in flight + maybeCallInFlight = std::nullopt; + allAskedDepsAreWaiting = false; + } else { + TRI_ASSERT(skipped == 0); + } + skippedTotal += skipped; + ranges.emplace_back(dependency, range); + } + } + + auto const state = std::invoke([&]() { + if (askedAtLeastOneDep && allAskedDepsAreWaiting) { + return ExecutionState::WAITING; + } else { + return upstreamState(); + } + }); + + return {state, skippedTotal, ranges}; +} + +auto MultiDependencySingleRowFetcher::upstreamState() const -> ExecutionState { + if (std::any_of(std::begin(_dependencyStates), std::end(_dependencyStates), + [](ExecutionState const s) { + return s == ExecutionState::HASMORE; + })) { + return ExecutionState::HASMORE; + } else { + return ExecutionState::DONE; + } +} diff --git a/arangod/Aql/MultiDependencySingleRowFetcher.h b/arangod/Aql/MultiDependencySingleRowFetcher.h index a2c2ebdd911e..277ebf628997 100644 --- a/arangod/Aql/MultiDependencySingleRowFetcher.h +++ b/arangod/Aql/MultiDependencySingleRowFetcher.h @@ -23,6 +23,7 @@ #ifndef ARANGOD_AQL_MULTI_DEPENDENCY_SINGLE_ROW_FETCHER_H #define ARANGOD_AQL_MULTI_DEPENDENCY_SINGLE_ROW_FETCHER_H +#include "Aql/AqlCallSet.h" #include "Aql/ExecutionBlock.h" #include "Aql/ExecutionState.h" #include "Aql/InputAqlItemRow.h" @@ -32,8 +33,7 @@ #include -namespace arangodb { -namespace aql { +namespace arangodb::aql { class AqlItemBlock; template @@ -89,6 +89,8 @@ class MultiDependencySingleRowFetcher { explicit MultiDependencySingleRowFetcher(DependencyProxy& executionBlock); TEST_VIRTUAL ~MultiDependencySingleRowFetcher() = default; + void init(); + protected: // only for testing! Does not initialize _dependencyProxy! MultiDependencySingleRowFetcher(); @@ -134,9 +136,14 @@ class MultiDependencySingleRowFetcher { //@deprecated auto useStack(AqlCallStack const& stack) -> void; - auto executeForDependency(size_t const dependency, AqlCallStack& stack) + [[nodiscard]] auto execute(AqlCallStack const&, AqlCallSet const&) + -> std::tuple>>; + + [[nodiscard]] auto executeForDependency(size_t dependency, AqlCallStack& stack) -> std::tuple; + [[nodiscard]] auto upstreamState() const -> ExecutionState; + private: DependencyProxy* _dependencyProxy; @@ -146,6 +153,11 @@ class MultiDependencySingleRowFetcher { std::vector _dependencyInfos; std::vector _dependencyStates; + /// @brief Only needed for parallel executors; could be omitted otherwise + /// It's size is >0 after init() is called, and this is currently used + /// in initOnce() to make sure that init() is called exactly once. + std::vector> _callsInFlight; + private: /** * @brief Delegates to ExecutionBlock::fetchBlock() @@ -177,7 +189,6 @@ class MultiDependencySingleRowFetcher { bool fetchBlockIfNecessary(const size_t dependency, const size_t atMost); }; -} // namespace aql -} // namespace arangodb +} // namespace arangodb::aql #endif // ARANGOD_AQL_SINGLE_ROW_FETCHER_H diff --git a/arangod/Aql/ParallelUnsortedGatherExecutor.cpp b/arangod/Aql/ParallelUnsortedGatherExecutor.cpp index 6a22758d5864..457d6daa61c1 100644 --- a/arangod/Aql/ParallelUnsortedGatherExecutor.cpp +++ b/arangod/Aql/ParallelUnsortedGatherExecutor.cpp @@ -45,12 +45,21 @@ ParallelUnsortedGatherExecutor::~ParallelUnsortedGatherExecutor() = default; auto ParallelUnsortedGatherExecutor::upstreamCallSkip(AqlCall const& clientCall) const noexcept -> AqlCall { + TRI_ASSERT(clientCall.needSkipMore()); + // Only skip, don't ask for rows - auto upstreamCall = clientCall; - upstreamCall.softLimit = 0; - upstreamCall.hardLimit = AqlCall::Infinity{}; - upstreamCall.fullCount = false; - return upstreamCall; + if (clientCall.getOffset() > 0) { + auto upstreamCall = clientCall; + upstreamCall.softLimit = 0; + upstreamCall.hardLimit = AqlCall::Infinity{}; + upstreamCall.fullCount = false; + return upstreamCall; + } + TRI_ASSERT(clientCall.getLimit() == 0 && clientCall.hasHardLimit()); + + // This can onyl be fullCount or fastForward call. + // Send it upstream. + return clientCall; } auto ParallelUnsortedGatherExecutor::upstreamCallProduce(AqlCall const& clientCall) const @@ -75,9 +84,12 @@ auto ParallelUnsortedGatherExecutor::upstreamCallProduce(AqlCall const& clientCa auto ParallelUnsortedGatherExecutor::produceRows(typename Fetcher::DataRange& input, OutputAqlItemRow& output) - -> std::tuple { - // Illegal dependency, on purpose to trigger asserts - size_t waitingDep = input.numberDependencies(); + -> std::tuple { + auto const& clientCall = output.getClientCall(); + TRI_ASSERT(clientCall.getOffset() == 0); + + auto callSet = AqlCallSet{}; + for (size_t dep = 0; dep < input.numberDependencies(); ++dep) { while (!output.isFull()) { auto [state, row] = input.nextDataRow(dep); @@ -87,24 +99,24 @@ auto ParallelUnsortedGatherExecutor::produceRows(typename Fetcher::DataRange& in } else { // This output did not produce anything if (state == ExecutorState::HASMORE) { - waitingDep = dep; + callSet.calls.emplace_back( + AqlCallSet::DepCallPair{dep, upstreamCallProduce(clientCall)}); } break; } } } - if (input.isDone()) { - // We cannot have one that we are waiting on, if we are done. - TRI_ASSERT(waitingDep == input.numberDependencies()); - return {ExecutorState::DONE, NoStats{}, AqlCall{}, waitingDep}; - } - return {ExecutorState::HASMORE, NoStats{}, - upstreamCallProduce(output.getClientCall()), waitingDep}; + + // We cannot have one that we are waiting on, if we are done. + TRI_ASSERT(!input.isDone() || callSet.empty()); + + return {input.state(), NoStats{}, callSet}; } auto ParallelUnsortedGatherExecutor::skipRowsRange(typename Fetcher::DataRange& input, AqlCall& call) - -> std::tuple { + -> std::tuple { + // TODO skipping is currently not parallelized, but should be size_t waitingDep = input.numberDependencies(); for (size_t dep = 0; dep < input.numberDependencies(); ++dep) { auto& range = input.rangeForDependency(dep); @@ -140,8 +152,13 @@ auto ParallelUnsortedGatherExecutor::skipRowsRange(typename Fetcher::DataRange& if (input.isDone()) { // We cannot have one that we are waiting on, if we are done. TRI_ASSERT(waitingDep == input.numberDependencies()); - return {ExecutorState::DONE, NoStats{}, call.getSkipCount(), AqlCall{}, waitingDep}; + return {ExecutorState::DONE, NoStats{}, call.getSkipCount(), AqlCallSet{}}; + } + auto callSet = AqlCallSet{}; + if (call.needSkipMore()) { + // We are not done with skipping. + // Prepare next call. + callSet.calls.emplace_back(AqlCallSet::DepCallPair{waitingDep, upstreamCallSkip(call)}); } - return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), - upstreamCallSkip(call), waitingDep}; + return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), callSet}; } diff --git a/arangod/Aql/ParallelUnsortedGatherExecutor.h b/arangod/Aql/ParallelUnsortedGatherExecutor.h index 1e52f8114267..f053f2f308f9 100644 --- a/arangod/Aql/ParallelUnsortedGatherExecutor.h +++ b/arangod/Aql/ParallelUnsortedGatherExecutor.h @@ -23,6 +23,7 @@ #ifndef ARANGOD_AQL_PARALLEL_UNSORTED_GATHER_EXECUTOR_H #define ARANGOD_AQL_PARALLEL_UNSORTED_GATHER_EXECUTOR_H +#include "Aql/AqlCallSet.h" #include "Aql/ClusterNodes.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" @@ -82,7 +83,7 @@ class ParallelUnsortedGatherExecutor { * size:t: Dependency to request */ [[nodiscard]] auto produceRows(MultiAqlItemBlockInputRange& input, OutputAqlItemRow& output) - -> std::tuple; + -> std::tuple; /** * @brief Skip rows @@ -97,7 +98,7 @@ class ParallelUnsortedGatherExecutor { * size:t: Dependency to request */ [[nodiscard]] auto skipRowsRange(MultiAqlItemBlockInputRange& input, AqlCall& call) - -> std::tuple; + -> std::tuple; private: auto upstreamCallSkip(AqlCall const& clientCall) const noexcept -> AqlCall; diff --git a/arangod/Aql/SortingGatherExecutor.cpp b/arangod/Aql/SortingGatherExecutor.cpp index 4ba8087e808c..d3fa50ccdcf0 100644 --- a/arangod/Aql/SortingGatherExecutor.cpp +++ b/arangod/Aql/SortingGatherExecutor.cpp @@ -28,6 +28,8 @@ #include "Aql/Stats.h" #include "Transaction/Methods.h" +#include + using namespace arangodb; using namespace arangodb::aql; @@ -124,9 +126,12 @@ class HeapSorting final : public SortingGatherExecutor::SortingStrategy, private virtual void reset() noexcept override { _heap.clear(); } - bool operator()(SortingGatherExecutor::ValueType const& lhs, - SortingGatherExecutor::ValueType const& rhs) const { - return OurLessThan::operator()(rhs, lhs); + // The STL heap (regarding push_heap, pop_heap, make_heap) is a max heap, but + // we want a min heap! + bool operator()(SortingGatherExecutor::ValueType const& left, + SortingGatherExecutor::ValueType const& right) const { + // Note that right and left are swapped! + return OurLessThan::operator()(right, left); } private: @@ -138,7 +143,7 @@ class HeapSorting final : public SortingGatherExecutor::SortingStrategy, private /// @brief "MinElement" sorting strategy //////////////////////////////////////////////////////////////////////////////// class MinElementSorting final : public SortingGatherExecutor::SortingStrategy, - public OurLessThan { + private OurLessThan { public: MinElementSorting(arangodb::transaction::Methods* trx, std::vector& sortRegisters) noexcept @@ -155,6 +160,8 @@ class MinElementSorting final : public SortingGatherExecutor::SortingStrategy, virtual void reset() noexcept override { _blockPos = nullptr; } + using OurLessThan::operator(); + private: std::vector const* _blockPos; }; @@ -164,7 +171,7 @@ SortingGatherExecutor::ValueType::ValueType(size_t index) : dependencyIndex{index}, row{CreateInvalidInputRowHint()}, state{ExecutorState::HASMORE} {} SortingGatherExecutor::ValueType::ValueType(size_t index, InputAqlItemRow prow, ExecutorState pstate) - : dependencyIndex{index}, row{prow}, state{pstate} {} + : dependencyIndex{index}, row{std::move(prow)}, state{pstate} {} SortingGatherExecutorInfos::SortingGatherExecutorInfos( std::shared_ptr> inputRegisters, @@ -186,8 +193,7 @@ SortingGatherExecutorInfos::SortingGatherExecutorInfos(SortingGatherExecutorInfo SortingGatherExecutorInfos::~SortingGatherExecutorInfos() = default; SortingGatherExecutor::SortingGatherExecutor(Fetcher& fetcher, Infos& infos) - : _initialized(false), - _numberDependencies(0), + : _numberDependencies(0), _inputRows(), _limit(infos.limit()), _rowsReturned(0), @@ -210,8 +216,8 @@ SortingGatherExecutor::SortingGatherExecutor(Fetcher& fetcher, Infos& infos) SortingGatherExecutor::~SortingGatherExecutor() = default; -auto SortingGatherExecutor::initialize(typename Fetcher::DataRange const& inputRange) - -> std::optional> { +auto SortingGatherExecutor::initialize(typename Fetcher::DataRange const& inputRange, + AqlCall const& clientCall) -> AqlCallSet { if (!_initialized) { // We cannot modify the number of dependencies, so we start // with 0 dependencies, and will increase to whatever inputRange gives us. @@ -220,13 +226,27 @@ auto SortingGatherExecutor::initialize(typename Fetcher::DataRange const& inputR _numberDependencies = inputRange.numberDependencies(); // If we have collected all ranges once, we can prepare the local data-structure copy _inputRows.reserve(_numberDependencies); + if (_inputRows.empty()) { + for (size_t dep = 0; dep < _numberDependencies; ++dep) { + _inputRows.emplace_back(dep); + } + } + + auto callSet = AqlCallSet{}; for (size_t dep = 0; dep < _numberDependencies; ++dep) { auto const [state, row] = inputRange.peekDataRow(dep); - _inputRows.emplace_back(dep, row, state); + _inputRows[dep] = {dep, row, state}; + if (!row && state != ExecutorState::DONE) { + // This dependency requires input + callSet.calls.emplace_back( + AqlCallSet::DepCallPair{dep, calculateUpstreamCall(clientCall)}); + if (!_fetchParallel) { + break; + } + } } - auto call = requiresMoreInput(inputRange); - if (call.has_value()) { - return call; + if (!callSet.empty()) { + return callSet; } _strategy->prepare(_inputRows); _initialized = true; @@ -234,37 +254,39 @@ auto SortingGatherExecutor::initialize(typename Fetcher::DataRange const& inputR return {}; } -auto SortingGatherExecutor::requiresMoreInput(typename Fetcher::DataRange const& inputRange) - -> std::optional> { - for (size_t dep = 0; dep < _numberDependencies; ++dep) { - auto const& [state, input] = inputRange.peekDataRow(dep); - // Update the local copy, just to be sure it is up to date - // We might do too many copies here, but most likely this - // will not be a performance bottleneck. - ValueType& localDep = _inputRows[dep]; - localDep.row = input; - localDep.state = state; - if (!input && state != ExecutorState::DONE) { - // This dependency requires input +auto SortingGatherExecutor::requiresMoreInput(typename Fetcher::DataRange const& inputRange, + AqlCall const& clientCall) -> AqlCallSet { + auto callSet = AqlCallSet{}; + + if (_depToUpdate.has_value()) { + auto const dependency = _depToUpdate.value(); + auto const& [state, row] = inputRange.peekDataRow(dependency); + auto const needMoreInput = !row && state != ExecutorState::DONE; + if (needMoreInput) { + // Still waiting for input // TODO: This call requires limits - return std::tuple{AqlCall{}, dep}; + callSet.calls.emplace_back( + AqlCallSet::DepCallPair{dependency, calculateUpstreamCall(clientCall)}); + } else { + // We got an answer, save it + ValueType& localDep = _inputRows[dependency]; + localDep.row = row; + localDep.state = state; + // We don't need to update this dep anymore + _depToUpdate = std::nullopt; } } - // No call required - return {}; -} -auto SortingGatherExecutor::isDone(typename Fetcher::DataRange const& input) const -> bool { - // TODO: Include contrained sort - return input.isDone(); + return callSet; } auto SortingGatherExecutor::nextRow(MultiAqlItemBlockInputRange& input) -> InputAqlItemRow { - if (isDone(input)) { + if (input.isDone()) { // No rows, there is a chance we get into this. // If we requested data from upstream, but all if it is done. return InputAqlItemRow{CreateInvalidInputRowHint{}}; } + TRI_ASSERT(!_depToUpdate.has_value()); #ifdef ARANGODB_ENABLE_MAINTAINER_MODE bool oneWithContent = false; for (size_t dep = 0; dep < _numberDependencies; ++dep) { @@ -276,16 +298,20 @@ auto SortingGatherExecutor::nextRow(MultiAqlItemBlockInputRange& input) -> Input TRI_ASSERT(oneWithContent); #endif auto nextVal = _strategy->nextValue(); + TRI_ASSERT(nextVal.row); _rowsReturned++; { // Consume the row, and set it to next input - std::ignore = input.nextDataRow(nextVal.dependencyIndex); - auto const& [state, row] = input.peekDataRow(nextVal.dependencyIndex); - _inputRows[nextVal.dependencyIndex].state = state; - _inputRows[nextVal.dependencyIndex].row = row; - - // TODO we might do some short-cuts here to maintain a list of requests - // to send in order to improve requires input + auto const dependency = nextVal.dependencyIndex; + std::ignore = input.nextDataRow(dependency); + auto const& [state, row] = input.peekDataRow(dependency); + _inputRows[dependency].state = state; + _inputRows[dependency].row = row; + + auto const needMoreInput = !row && state != ExecutorState::DONE; + if (needMoreInput) { + _depToUpdate = dependency; + } } return nextVal.row; @@ -307,103 +333,128 @@ auto SortingGatherExecutor::nextRow(MultiAqlItemBlockInputRange& input) -> Input auto SortingGatherExecutor::produceRows(typename Fetcher::DataRange& input, OutputAqlItemRow& output) - -> std::tuple { + -> std::tuple { { // First initialize - auto maybeCall = initialize(input); - if (maybeCall.has_value()) { - auto const& [request, dep] = maybeCall.value(); - return {ExecutorState::HASMORE, NoStats{}, request, dep}; + auto const callSet = initialize(input, output.getClientCall()); + if (!callSet.empty()) { + return {ExecutorState::HASMORE, NoStats{}, callSet}; } } - while (!isDone(input) && !output.isFull()) { - TRI_ASSERT(!maySkip()); - auto maybeCall = requiresMoreInput(input); - if (maybeCall.has_value()) { - auto const& [request, dep] = maybeCall.value(); - return {ExecutorState::HASMORE, NoStats{}, request, dep}; + { + auto const callSet = requiresMoreInput(input, output.getClientCall()); + if (!callSet.empty()) { + return {ExecutorState::HASMORE, NoStats{}, callSet}; } + } + + // produceRows should not be called again when the limit is reached; + // the downstream limit should see to that. + TRI_ASSERT(!limitReached()); + + while (!input.isDone() && !output.isFull() && !limitReached()) { + TRI_ASSERT(!maySkip()); auto row = nextRow(input); - TRI_ASSERT(row.isInitialized() || isDone(input)); if (row) { output.copyRow(row); output.advanceRow(); } + auto const callSet = requiresMoreInput(input, output.getClientCall()); + if (!callSet.empty()) { + return {ExecutorState::HASMORE, NoStats{}, callSet}; + } } - // Call and dependency unused, so we return a too large dependency - // in order to trigger asserts if it is used. - if (isDone(input)) { - return {ExecutorState::DONE, NoStats{}, AqlCall{}, _numberDependencies + 1}; - } - return {ExecutorState::HASMORE, NoStats{}, AqlCall{}, _numberDependencies + 1}; + auto const state = std::invoke([&]() { + if (input.isDone()) { + return ExecutorState::DONE; + } else if (limitReached() && !output.getClientCall().needsFullCount()) { + return ExecutorState::DONE; + } else { + return ExecutorState::HASMORE; + } + }); + + return {state, NoStats{}, AqlCallSet{}}; } auto SortingGatherExecutor::skipRowsRange(typename Fetcher::DataRange& input, AqlCall& call) - -> std::tuple { + -> std::tuple { { // First initialize - auto maybeCall = initialize(input); - if (maybeCall.has_value()) { - auto const& [request, dep] = maybeCall.value(); - return {ExecutorState::HASMORE, NoStats{}, 0, request, dep}; + auto const callSet = initialize(input, call); + if (!callSet.empty()) { + return {ExecutorState::HASMORE, NoStats{}, 0, callSet}; } } - while (!isDone(input) && call.needSkipMore()) { - auto maybeCall = requiresMoreInput(input); - if (maybeCall.has_value()) { - auto const& [request, dep] = maybeCall.value(); - return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), request, dep}; + { + auto const callSet = requiresMoreInput(input, call); + if (!callSet.empty()) { + return {ExecutorState::HASMORE, NoStats{}, 0, callSet}; + } + } + + // skip offset + while (!input.isDone() && call.getOffset() > 0) { + // During offset phase we have the guarntee + // that the rows we need to skip have been fetched + // We will fetch rows as data from upstream for + // all rows we need to skip here. + TRI_ASSERT(!maySkip()); + // We need to sort still + // And account the row in the limit + auto row = nextRow(input); + TRI_ASSERT(row.isInitialized() || input.isDone()); + if (row) { + call.didSkip(1); + } + auto const callSet = requiresMoreInput(input, call); + if (!callSet.empty()) { + return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), callSet}; } - if (call.getOffset() > 0) { - TRI_ASSERT(!maySkip()); - // We need to sort still - // And account the row in the limit - auto row = nextRow(input); - TRI_ASSERT(row.isInitialized() || isDone(input)); - if (row) { + } + + TRI_ASSERT(input.isDone() || call.getOffset() == 0); + + auto callSet = AqlCallSet{}; + if (call.needSkipMore() && call.getOffset() == 0) { + // We can only skip more if the offset is reached. + // Otherwise we would have looped more above + TRI_ASSERT(call.getOffset() == 0); + TRI_ASSERT(call.hasHardLimit()); + + // We are only called with fullcount. + // or if the input is done. + // sorting does not matter. + // Start simply skip all from upstream. + for (size_t dep = 0; dep < input.numberDependencies(); ++dep) { + auto& range = input.rangeForDependency(dep); + while (range.hasDataRow()) { + // Consume the row and count it as skipped + std::ignore = input.nextDataRow(dep); call.didSkip(1); } - } else { - // We are only called with fullcount. - // sorting does not matter. - // Start simply skip all from upstream. - for (size_t dep = 0; dep < input.numberDependencies(); ++dep) { - ExecutorState state = ExecutorState::HASMORE; - InputAqlItemRow row{CreateInvalidInputRowHint{}}; - while (state == ExecutorState::HASMORE) { - std::tie(state, row) = input.nextDataRow(dep); - if (row) { - call.didSkip(1); - } else { - // We have consumed all overfetched rows. - // We may still have a skip counter within the range. - call.didSkip(input.skipAll(dep)); - if (state == ExecutorState::HASMORE) { - // We need to fetch more data, but can fullCount now - AqlCall request{0, true, 0, AqlCall::LimitType::HARD}; - return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), request, dep}; - } - } + // Skip all rows in flight + call.didSkip(range.skipAll()); + + if (range.upstreamState() == ExecutorState::HASMORE) { + TRI_ASSERT(!input.hasDataRow(dep)); + TRI_ASSERT(input.skippedInFlight(dep) == 0); + // We need to fetch more data, but can fullCount now + AqlCall request{0, true, 0, AqlCall::LimitType::HARD}; + callSet.calls.emplace_back(AqlCallSet::DepCallPair{dep, request}); + if (!_fetchParallel) { + break; } } } } - // Call and dependency unused, so we return a too large dependency - // in order to trigger asserts if it is used. - if (isDone(input)) { - return {ExecutorState::DONE, NoStats{}, call.getSkipCount(), AqlCall{}, - _numberDependencies + 1}; - } - return {ExecutorState::HASMORE, NoStats{}, call.getSkipCount(), AqlCall{}, - _numberDependencies + 1}; -} + TRI_ASSERT(!input.isDone() || callSet.empty()); -std::pair SortingGatherExecutor::expectedNumberOfRows(size_t const atMost) const { - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); + return {input.state(), NoStats{}, call.getSkipCount(), callSet}; } bool SortingGatherExecutor::constrainedSort() const noexcept { @@ -426,3 +477,51 @@ auto SortingGatherExecutor::rowsLeftToWrite() const noexcept -> size_t { TRI_ASSERT(_limit >= _rowsReturned); return _limit - std::min(_limit, _rowsReturned); } + +auto SortingGatherExecutor::limitReached() const noexcept -> bool { + return constrainedSort() && rowsLeftToWrite() == 0; +} + +[[nodiscard]] auto SortingGatherExecutor::calculateUpstreamCall(AqlCall const& clientCall) const + noexcept -> AqlCall { + auto upstreamCall = AqlCall{}; + if (constrainedSort()) { + if (clientCall.hasSoftLimit()) { + // We do not know if we are going to be asked again to do a fullcount + // So we can only request a softLimit bounded by our internal limit to upstream + + upstreamCall.softLimit = clientCall.offset + clientCall.softLimit; + if (rowsLeftToWrite() < upstreamCall.softLimit) { + // Do not overfetch + // NOTE: We cannnot use std::min as the numbers have different types ;( + upstreamCall.softLimit = rowsLeftToWrite(); + } + + // We need at least 1 to now violate API. It seems we have nothing to + // produce and are called with a softLimit. + TRI_ASSERT(0 < upstreamCall.softLimit); + } else { + if (rowsLeftToWrite() < upstreamCall.hardLimit) { + // Do not overfetch + // NOTE: We cannnot use std::min as the numbers have different types ;( + upstreamCall.hardLimit = rowsLeftToWrite(); + } + // In case the client needs a fullCount we do it as well, for all rows + // after the above limits + upstreamCall.fullCount = clientCall.fullCount; + TRI_ASSERT(0 < upstreamCall.hardLimit || upstreamCall.needsFullCount()); + } + } else { + // Increase the clientCall limit by it's offset and forward. + upstreamCall.softLimit = clientCall.softLimit + clientCall.offset; + upstreamCall.hardLimit = clientCall.hardLimit + clientCall.offset; + // In case the client needs a fullCount we do it as well, for all rows + // after the above limits + upstreamCall.fullCount = clientCall.fullCount; + } + + // We do never send a skip to upstream here. + // We need to look at every relevant line ourselves + TRI_ASSERT(upstreamCall.offset == 0); + return upstreamCall; +} diff --git a/arangod/Aql/SortingGatherExecutor.h b/arangod/Aql/SortingGatherExecutor.h index 440a507847c9..96f179bee753 100644 --- a/arangod/Aql/SortingGatherExecutor.h +++ b/arangod/Aql/SortingGatherExecutor.h @@ -23,6 +23,7 @@ #ifndef ARANGOD_AQL_SORTING_GATHER_EXECUTOR_H #define ARANGOD_AQL_SORTING_GATHER_EXECUTOR_H +#include "Aql/AqlCallSet.h" #include "Aql/ClusterNodes.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" @@ -94,7 +95,7 @@ class SortingGatherExecutor { virtual ~SortingStrategy() = default; /// @brief returns next value - virtual ValueType nextValue() = 0; + [[nodiscard]] virtual auto nextValue() -> ValueType = 0; /// @brief prepare strategy fetching values virtual void prepare(std::vector& /*blockPos*/) {} @@ -129,7 +130,7 @@ class SortingGatherExecutor { * size:t: Dependency to request */ [[nodiscard]] auto produceRows(MultiAqlItemBlockInputRange& input, OutputAqlItemRow& output) - -> std::tuple; + -> std::tuple; /** * @brief Skip rows @@ -144,12 +145,10 @@ class SortingGatherExecutor { * size:t: Dependency to request */ [[nodiscard]] auto skipRowsRange(MultiAqlItemBlockInputRange& input, AqlCall& call) - -> std::tuple; - - std::pair expectedNumberOfRows(size_t atMost) const; + -> std::tuple; private: - bool constrainedSort() const noexcept; + [[nodiscard]] auto constrainedSort() const noexcept -> bool; void assertConstrainedDoesntOverfetch(size_t atMost) const noexcept; @@ -157,7 +156,7 @@ class SortingGatherExecutor { // enabled. Then, after the limit is reached, we may pass skipSome through // to our dependencies, and not sort any more. // This also means that we may not produce rows anymore after that point. - bool maySkip() const noexcept; + [[nodiscard]] auto maySkip() const noexcept -> bool; /** * @brief Function that checks if all dependencies are either @@ -168,43 +167,39 @@ class SortingGatherExecutor { * @param inputRange Range of all input dependencies * @return std::optional> optional call for the dependnecy requiring input */ - auto requiresMoreInput(MultiAqlItemBlockInputRange const& inputRange) - -> std::optional>; + [[nodiscard]] auto requiresMoreInput(MultiAqlItemBlockInputRange const& inputRange, + AqlCall const& clientCall) -> AqlCallSet; /** * @brief Get the next row matching the sorting strategy * * @return InputAqlItemRow best fit row. Might be invalid if all input is done. */ - auto nextRow(MultiAqlItemBlockInputRange& input) -> InputAqlItemRow; - - /** - * @brief Tests if this Executor is done producing - * => All inputs are fully consumed - * - * @return true we are done - * @return false we have more - */ - auto isDone(MultiAqlItemBlockInputRange const& input) const -> bool; + [[nodiscard]] auto nextRow(MultiAqlItemBlockInputRange& input) -> InputAqlItemRow; /** * @brief Initialize the Sorting strategy with the given input. * This is known to be empty, but all prepared at this point. * @param inputRange The input, no data included yet. */ - auto initialize(MultiAqlItemBlockInputRange const& inputRange) - -> std::optional>; + [[nodiscard]] auto initialize(MultiAqlItemBlockInputRange const& inputRange, + AqlCall const& clientCall) -> AqlCallSet; + + [[nodiscard]] auto rowsLeftToWrite() const noexcept -> size_t; - auto rowsLeftToWrite() const noexcept -> size_t; + [[nodiscard]] auto limitReached() const noexcept -> bool; + + [[nodiscard]] auto calculateUpstreamCall(AqlCall const& clientCall) const + noexcept -> AqlCall; private: // Flag if we are past the initialize phase (fetched one block for every dependency). - bool _initialized; + bool _initialized = false; // Total Number of dependencies size_t _numberDependencies; - // Input data to process + // Input data to process, indexed by dependency, referenced by the SortingStrategy std::vector _inputRows; /// @brief If we do a constrained sort, it holds the limit > 0. Otherwise, it's 0. @@ -219,6 +214,8 @@ class SortingGatherExecutor { std::unique_ptr _strategy; const bool _fetchParallel; + + std::optional _depToUpdate = std::nullopt; }; } // namespace aql diff --git a/arangod/Aql/UnsortedGatherExecutor.cpp b/arangod/Aql/UnsortedGatherExecutor.cpp index ef2babd051f2..3b2273f7742a 100644 --- a/arangod/Aql/UnsortedGatherExecutor.cpp +++ b/arangod/Aql/UnsortedGatherExecutor.cpp @@ -41,135 +41,89 @@ struct Dependency { }; UnsortedGatherExecutor::UnsortedGatherExecutor(Fetcher& fetcher, Infos& infos) - : _fetcher(fetcher) {} + : _fetcher{fetcher} {} UnsortedGatherExecutor::~UnsortedGatherExecutor() = default; auto UnsortedGatherExecutor::produceRows(typename Fetcher::DataRange& input, OutputAqlItemRow& output) - -> std::tuple { + -> std::tuple { while (!output.isFull() && !done()) { if (input.hasDataRow(currentDependency())) { auto [state, inputRow] = input.nextDataRow(currentDependency()); output.copyRow(inputRow); TRI_ASSERT(output.produced()); output.advanceRow(); - - if (state == ExecutorState::DONE) { - advanceDependency(); - } } else { if (input.upstreamState(currentDependency()) == ExecutorState::DONE) { + TRI_ASSERT(input.rangeForDependency(currentDependency()).skippedInFlight() == 0); advanceDependency(); } else { - return {input.upstreamState(currentDependency()), Stats{}, AqlCall{}, - currentDependency()}; + auto callSet = AqlCallSet{}; + callSet.calls.emplace_back( + AqlCallSet::DepCallPair{currentDependency(), output.getClientCall()}); + return {input.upstreamState(currentDependency()), Stats{}, callSet}; } } } while (!done() && input.upstreamState(currentDependency()) == ExecutorState::DONE) { + auto range = input.rangeForDependency(currentDependency()); + if (range.upstreamState() == ExecutorState::HASMORE || range.skippedInFlight() > 0) { + // skippedInFlight > 0 -> output.isFull() + TRI_ASSERT(range.skippedInFlight() == 0 || output.isFull()); + break; + } + TRI_ASSERT(input.rangeForDependency(currentDependency()).skippedInFlight() == 0); advanceDependency(); } if (done()) { - // here currentDependency is invalid which will cause things to crash - // if we ask upstream in ExecutionBlockImpl. yolo. TRI_ASSERT(!input.hasDataRow()); - return {ExecutorState::DONE, Stats{}, AqlCall{}, currentDependency()}; + return {ExecutorState::DONE, Stats{}, AqlCallSet{}}; } else { - return {input.upstreamState(currentDependency()), Stats{}, AqlCall{}, - currentDependency()}; + auto callSet = AqlCallSet{}; + callSet.calls.emplace_back( + AqlCallSet::DepCallPair{currentDependency(), output.getClientCall()}); + return {input.upstreamState(currentDependency()), Stats{}, callSet}; } } auto UnsortedGatherExecutor::skipRowsRange(typename Fetcher::DataRange& input, AqlCall& call) - -> std::tuple { - while (call.needSkipMore() && !done()) { - if (input.hasDataRow(currentDependency())) { - auto [state, inputRow] = input.nextDataRow(currentDependency()); - call.didSkip(1); + -> std::tuple { + auto skipped = size_t{0}; - if (state == ExecutorState::DONE) { - advanceDependency(); - } - } else { - if (input.upstreamState(currentDependency()) == ExecutorState::DONE) { - advanceDependency(); - } else { - // We need to fetch more first - break; - } - } + if (call.getOffset() > 0) { + skipped = input.skipForDependency(currentDependency(), call.getOffset()); + } else { + skipped = input.skipAllForDependency(currentDependency()); } + call.didSkip(skipped); + // Skip over dependencies that are DONE, they cannot skip more while (!done() && input.upstreamState(currentDependency()) == ExecutorState::DONE) { advanceDependency(); } + // Here we are either done, or currentDependency() still could produce more if (done()) { - // here currentDependency is invalid which will cause things to crash - // if we ask upstream in ExecutionBlockImpl. yolo. - return {ExecutorState::DONE, Stats{}, call.getSkipCount(), AqlCall{}, - currentDependency()}; + return {ExecutorState::DONE, Stats{}, skipped, AqlCallSet{}}; } else { - return {input.upstreamState(currentDependency()), Stats{}, - call.getSkipCount(), AqlCall{}, currentDependency()}; - } -} - -auto UnsortedGatherExecutor::produceRows(OutputAqlItemRow& output) - -> std::pair { - while (!output.isFull() && !done()) { - // Note that fetchNextRow may return DONE (because the current dependency is - // DONE), and also return an unitialized row in that case, but we are not - // DONE completely - that's what `done()` is for. - auto [state, inputRow] = fetchNextRow(output.numRowsLeft()); - if (state == ExecutionState::WAITING) { - return {state, {}}; - } - // HASMORE => inputRow.isInitialized() - TRI_ASSERT(state == ExecutionState::DONE || inputRow.isInitialized()); - if (inputRow.isInitialized()) { - output.copyRow(inputRow); - TRI_ASSERT(output.produced()); - output.advanceRow(); + // If we're not done skipping, we can just request the current clientcall + // from upstream + auto callSet = AqlCallSet{}; + if (call.needSkipMore()) { + callSet.calls.emplace_back(AqlCallSet::DepCallPair{currentDependency(), call}); } + return {ExecutorState::HASMORE, Stats{}, skipped, callSet}; } - - auto state = done() ? ExecutionState::DONE : ExecutionState::HASMORE; - return {state, {}}; -} - -auto UnsortedGatherExecutor::fetcher() const noexcept -> const Fetcher& { - return _fetcher; } -auto UnsortedGatherExecutor::fetcher() noexcept -> Fetcher& { return _fetcher; } - auto UnsortedGatherExecutor::numDependencies() const noexcept(noexcept(_fetcher.numberDependencies())) -> size_t { return _fetcher.numberDependencies(); } -auto UnsortedGatherExecutor::fetchNextRow(size_t atMost) - -> std::pair { - auto res = fetcher().fetchRowForDependency(currentDependency(), atMost); - if (res.first == ExecutionState::DONE) { - advanceDependency(); - } - return res; -} - -auto UnsortedGatherExecutor::skipNextRows(size_t atMost) - -> std::pair { - auto res = fetcher().skipRowsForDependency(currentDependency(), atMost); - if (res.first == ExecutionState::DONE) { - advanceDependency(); - } - return res; -} - auto UnsortedGatherExecutor::done() const noexcept -> bool { return _currentDependency >= numDependencies(); } @@ -182,26 +136,3 @@ auto UnsortedGatherExecutor::advanceDependency() noexcept -> void { TRI_ASSERT(_currentDependency < numDependencies()); ++_currentDependency; } - -auto UnsortedGatherExecutor::skipRows(size_t const atMost) - -> std::tuple { - auto const rowsLeftToSkip = [&atMost, &skipped = this->_skipped]() { - TRI_ASSERT(atMost >= skipped); - return atMost - skipped; - }; - while (rowsLeftToSkip() > 0 && !done()) { - // Note that skipNextRow may return DONE (because the current dependency is - // DONE), and also return an unitialized row in that case, but we are not - // DONE completely - that's what `done()` is for. - auto [state, skipped] = skipNextRows(rowsLeftToSkip()); - _skipped += skipped; - if (state == ExecutionState::WAITING) { - return {state, {}, 0}; - } - } - - auto state = done() ? ExecutionState::DONE : ExecutionState::HASMORE; - auto skipped = size_t{0}; - std::swap(skipped, _skipped); - return {state, {}, skipped}; -} diff --git a/arangod/Aql/UnsortedGatherExecutor.h b/arangod/Aql/UnsortedGatherExecutor.h index 2793c31be674..86e28b70272c 100644 --- a/arangod/Aql/UnsortedGatherExecutor.h +++ b/arangod/Aql/UnsortedGatherExecutor.h @@ -23,6 +23,7 @@ #ifndef ARANGOD_AQL_UNSORTEDGATHEREXECUTOR_H #define ARANGOD_AQL_UNSORTEDGATHEREXECUTOR_H +#include "Aql/AqlCallSet.h" #include "Aql/ExecutionState.h" #include "Aql/ExecutorInfos.h" #include "Aql/MultiDependencySingleRowFetcher.h" @@ -72,17 +73,6 @@ class UnsortedGatherExecutor { UnsortedGatherExecutor(Fetcher& fetcher, Infos&); ~UnsortedGatherExecutor(); - /** - * @brief produce the next Row of Aql Values. - * - * @return ExecutionState, - * if something was written output.hasValue() == true - */ - [[nodiscard]] auto produceRows(OutputAqlItemRow& output) - -> std::pair; - - [[nodiscard]] auto skipRows(size_t atMost) -> std::tuple; - /** * @brief Produce rows * @@ -91,11 +81,10 @@ class UnsortedGatherExecutor { * @return std::tuple * ExecutorState: DONE or HASMORE (only within a subquery) * Stats: Stats gerenated here - * AqlCall: Request to upstream - * size:t: Dependency to request + * AqlCallSet: Request to upstream */ [[nodiscard]] auto produceRows(typename Fetcher::DataRange& input, OutputAqlItemRow& output) - -> std::tuple; + -> std::tuple; /** * @brief Skip rows @@ -106,28 +95,21 @@ class UnsortedGatherExecutor { * ExecutorState: DONE or HASMORE (only within a subquery) * Stats: Stats gerenated here * size_t: Number of rows skipped - * AqlCall: Request to upstream - * size:t: Dependency to request + * AqlCallSet: Request to upstream */ [[nodiscard]] auto skipRowsRange(typename Fetcher::DataRange& input, AqlCall& call) - -> std::tuple; + -> std::tuple; private: [[nodiscard]] auto numDependencies() const noexcept(noexcept(static_cast(nullptr)->numberDependencies())) -> size_t; - [[nodiscard]] auto fetcher() const noexcept -> Fetcher const&; - [[nodiscard]] auto fetcher() noexcept -> Fetcher&; [[nodiscard]] auto done() const noexcept -> bool; [[nodiscard]] auto currentDependency() const noexcept -> size_t; - [[nodiscard]] auto fetchNextRow(size_t atMost) - -> std::pair; - [[nodiscard]] auto skipNextRows(size_t atMost) -> std::pair; auto advanceDependency() noexcept -> void; private: Fetcher& _fetcher; size_t _currentDependency{0}; - size_t _skipped{0}; }; } // namespace arangodb::aql diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 785416e6a60a..c5ae8e10dc53 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -219,12 +219,13 @@ set(LIB_ARANGO_AQL_SOURCES Aql/Aggregator.cpp Aql/AllRowsFetcher.cpp Aql/AqlCall.cpp + Aql/AqlCallSet.cpp Aql/AqlCallStack.cpp Aql/AqlExecuteResult.cpp Aql/AqlFunctionFeature.cpp Aql/AqlItemBlock.cpp - Aql/AqlItemBlockInputRange.cpp Aql/AqlItemBlockInputMatrix.cpp + Aql/AqlItemBlockInputRange.cpp Aql/AqlItemBlockManager.cpp Aql/AqlItemBlockUtils.cpp Aql/AqlItemMatrix.cpp diff --git a/tests/Aql/ExecutionBlockImplTest.cpp b/tests/Aql/ExecutionBlockImplTest.cpp index ea6133c55117..5199c766d248 100644 --- a/tests/Aql/ExecutionBlockImplTest.cpp +++ b/tests/Aql/ExecutionBlockImplTest.cpp @@ -1794,7 +1794,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_produce_using_two) { // We use two pass-through producers, that simply copy over input and assert an calls. // On top of them we have a 1000 line producer. // We expect the result to be identical to the 1000 line producer only. -TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_passthrough) { +TEST_P(ExecutionBlockImplExecuteIntegrationTest, DISABLED_test_call_forwarding_passthrough) { auto singleton = createSingleton(); auto builder = std::make_shared(); @@ -1837,7 +1837,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_passthroug // does skipping. // On top of them we have a 1000 line producer. // We expect the result to be identical to the 1000 line producer only. -TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_call_forwarding_implement_skip) { +TEST_P(ExecutionBlockImplExecuteIntegrationTest, DISABLED_test_call_forwarding_implement_skip) { auto singleton = createSingleton(); auto builder = std::make_shared(); @@ -2500,7 +2500,8 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, empty_subquery) { // Test forward outer queries. // The executors should not be called if there is no relevant call on the Stack // Block shall be returned unmodified. -TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_outer_subquery_forwarding_passthrough) { +TEST_P(ExecutionBlockImplExecuteIntegrationTest, + DISABLED_test_outer_subquery_forwarding_passthrough) { std::deque blockDeque; auto builder = std::make_shared(); { @@ -2560,7 +2561,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_outer_subquery_forwarding_ // Test forward outer queries. // The executors should not be called if there is no relevant call on the Stack // Block shall be returned unmodified. -TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_outer_subquery_forwarding) { +TEST_P(ExecutionBlockImplExecuteIntegrationTest, DISABLED_test_outer_subquery_forwarding) { std::deque blockDeque; auto builder = std::make_shared(); { diff --git a/tests/js/server/aql/aql-optimizer-rule-parallelize-gather-cluster.js b/tests/js/server/aql/aql-optimizer-rule-parallelize-gather-cluster.js index fc2d38c7b0bf..d6a5c18346e1 100644 --- a/tests/js/server/aql/aql-optimizer-rule-parallelize-gather-cluster.js +++ b/tests/js/server/aql/aql-optimizer-rule-parallelize-gather-cluster.js @@ -175,7 +175,7 @@ function optimizerRuleTestSuite () { queries.forEach(function(query) { let result = AQL_EXPLAIN(query[0]); assertNotEqual(-1, result.plan.rules.indexOf(ruleName), query); - + result = AQL_EXECUTE(query[0]).json; assertEqual(query[1], result.length); diff --git a/tests/js/server/aql/aql-profiler-cluster.js b/tests/js/server/aql/aql-profiler-cluster.js index 762cd436bcd6..9542f9ebab7a 100644 --- a/tests/js/server/aql/aql-profiler-cluster.js +++ b/tests/js/server/aql/aql-profiler-cluster.js @@ -215,6 +215,10 @@ function ahuacatlProfilerTestSuite () { /// @brief test RemoteBlock and SortingGatherBlock //////////////////////////////////////////////////////////////////////////////// +/* + * Note: disabled this test for now. Using parallel Gather this kind of get's out of hand. + * The RemoteNode is asked rather often although it does not have data yet. + * I checked that the upstream Blocks are not called too often. testRemoteAndSortingGatherBlock : function () { const query = `FOR doc IN ${cn} SORT doc.i RETURN doc`; // Number of local getSome calls that do not return WAITING. @@ -233,15 +237,17 @@ function ahuacatlProfilerTestSuite () { { type : EnumerateCollectionBlock, calls : dbServerBatches(rowsPerShard), items : totalItems(rowsPerShard) }, { type : CalculationBlock, calls : dbServerBatches(rowsPerShard), items : totalItems(rowsPerShard) }, { type : SortBlock, calls : dbServerOptimalBatches(rowsPerShard), items : totalItems(rowsPerShard) }, - // Twice the number due to WAITING - { type : RemoteBlock, calls : 2 * dbServerOptimalBatches(rowsPerServer), items : totalItems(rowsPerShard) }, + // Twice the number due to WAITING, also we will call the upstream even if we do not have data yet (we do not know, if we have data we can continue.) + { type : RemoteBlock, calls : [2 * dbServerOptimalBatches(rowsPerServer), 2 * coordinatorBatches(rowsPerServer)], items : totalItems(rowsPerShard) }, // We get dbServerBatches(rowsPerShard) times WAITING, plus the non-waiting getSome calls. - { type : SortingGatherBlock, calls : coordinatorBatches(rowsPerServer), items : totalItems(rowsPerShard) }, - { type : ReturnBlock, calls : coordinatorBatches(rowsPerServer), items : totalItems(rowsPerShard) } + // In a very lucky case we may get away with 1 call less, that is if the DBServers are fist enough to deliver all data in + // the same roundtrip interval + { type : SortingGatherBlock, calls : [coordinatorBatches(rowsPerServer) - 1, coordinatorBatches(rowsPerServer)], items : totalItems(rowsPerShard) }, + { type : ReturnBlock, calls : [coordinatorBatches(rowsPerServer) - 1, coordinatorBatches(rowsPerServer)], items : totalItems(rowsPerShard) } ]; profHelper.runClusterChecks({col, exampleDocumentsByShard, query, genNodeList}); }, - +*/ }; } From 25de4debf99984b1f51f63b5689782d040996276 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 12 Mar 2020 14:53:48 +0100 Subject: [PATCH 100/122] Feature/aql subquery execution block impl execute implementation bypass skip (#11203) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fixed range-handling for Modification Executors * DataRange handling in ModificationExecutor * Honor batch-size defined by UpstreamExecutor * Fixed compile issue * More fixes in modification * Remvoed log devel * Fixed profiler Test. for NoResults node we cahnge the behaviour * Activated getSome failure tests in ExecuteRestHandler * Fixed skipping in Index * Let the MultiDependencySingleROwFetcher return the correct states. * Fixed non-maintainer compilation * Attempt to fix windows compile issue * Fixed the non-maintainer compile ina different way * Added API in MultiAqlItemBlockInputRange to get Number of dependencies * Comments * Savepoint commit, does not compile, but no harm is done. Will start breaking things now * Another savepoint commit. does not compile, yet. * First draft of new Style SortingGather not yet implemented: Parallelism this needs to be handled in ExecutionBlockImpl now. * Allow waiting within old-style subquery * Fixed invalid skipRwos in unsorted gather * First draft of ParallelUnsortedGatherExecutor * Removed unused local variables * Added some Assertions in MultiAqlItemBlockInputRange * Initialize dependdencies of MultiDependencyFetcher * Fixed skipRows loop in UnsortingGatherNode * Added an implementation for a SkipResult, in order to simplify exchange of it. * Moved production API -> SkipResult * Made tests compile with new SkipResult * Added a test using skip and limit on subqueries * Prepared to use subqueries in SkipResult * Let subqueries modify the SkipResult subquery stack * Fixed return state of GatherNode * Activate all spliced subquery tests \o/ * Let SubqueryEnd honor the client request * Added a Maintainer only test for the stack, if it is 36 compatible * Added first part of side-effect executors. They now send upstream a fetch-all stack. Need to fix downstream reporting, and call the executor with hardLimit 0 * Add a fake FASTFORWARD call into a subquery-skipped ModificationExecutor. * Added helper shadow row function for SideEffect executors * Let the Modification Executor also produce data, even if no FullCount is used. * Revert "Let the Modification Executor also produce data, even if no FullCount is used." This reverts commit b1c6af5674de445f9e3d6343feb0e3cad422588c. * Revert "Revert "Let the Modification Executor also produce data, even if no FullCount is used."" This reverts commit ac94ae303c28b4ab3c0a138fc0aad1281fa5cca6. * Implemented proper fastForwarding and skipReporting in ExecutorsWithSideEffects. * Removed unreachable code, somehow the G++ in our tests tries to comile it and fails now. * noexcept -> throw is nono good. Thank you compiler for helping me here \o/ * Implment copy on SkipResult * Adapted SubqueryStartTest to allow easy testing for Skipping on outer levels. * Fixed koenig lookup of SkipResult ostream operator * Removed special case of SubqueryStartExecutor and include it on the handling for SideEffect Executors * Sorry needed to make the _operations vector mutual because of 3.6 compatibility * Attempt to fix windows compile issue * Fixed behvaiour of SubqueryEndExecutor * Another windows attempt * Fixed modify test, which would actually iterate over too many documents if the LIMIT does not LIMIT the executed modification operations anymore. * Fixed tests that assert on existence of SubqueryNode, now there will be SubqueryStartNode! * Consider a hardLimitFastForward inside the Callstack like a needToSkipSubquery. But do not report the skipping of it. * Fixed all tests that are related to subqueries, which now need to assert spliced queries. * Fixed jslint * Fixed the callstack that has been seperated from the clientCall. In some places it was not handled correctly. * Fixed skip result forwarding in Scatter/Gather * Fixed assertion if the ConstFetcher gets a block with subquery level skip injected * Moved merging of SubquerySkips in MultiDependencies into the Fetcher * Removed dead code and fixed overproduction of Rows in Subquery Executor * Fixed bypassing of skip in SideEffect executors if they trigger waiting at some point * Refactored old SubqueryExecutor, there has been some issue with WAITING within the cluster. * Removed debug logging in test * Fixed empty subquery executor * Added an assertion in the AqlResult that no invalid block tries to be serialized * Added clientId into profile tracing. Fixed return of invalid blocks in blocks with client * Removed invalid AqlExecuteResult from Test * Update tests/Aql/SubqueryStartExecutorTest.cpp Co-Authored-By: Tobias Gödderz * Fixed comment, thanks to reviewer Co-authored-by: Tobias Gödderz --- arangod/Aql/AllRowsFetcher.cpp | 11 +- arangod/Aql/AllRowsFetcher.h | 3 +- arangod/Aql/AqlCallStack.cpp | 108 +++-- arangod/Aql/AqlCallStack.h | 60 ++- arangod/Aql/AqlExecuteResult.cpp | 56 ++- arangod/Aql/AqlExecuteResult.h | 10 +- arangod/Aql/BlocksWithClients.cpp | 56 ++- arangod/Aql/BlocksWithClients.h | 9 +- arangod/Aql/ConstFetcher.cpp | 19 +- arangod/Aql/ConstFetcher.h | 3 +- arangod/Aql/DependencyProxy.cpp | 38 +- arangod/Aql/DependencyProxy.h | 5 +- arangod/Aql/DistributeExecutor.cpp | 35 +- arangod/Aql/DistributeExecutor.h | 19 +- arangod/Aql/ExecutionBlock.cpp | 20 +- arangod/Aql/ExecutionBlock.h | 13 +- arangod/Aql/ExecutionBlockImpl.cpp | 457 +++++++++++++----- arangod/Aql/ExecutionBlockImpl.h | 34 +- arangod/Aql/ExecutionEngine.cpp | 17 +- arangod/Aql/ExecutionEngine.h | 5 +- arangod/Aql/ModificationExecutor.cpp | 2 - .../Aql/MultiDependencySingleRowFetcher.cpp | 49 +- arangod/Aql/MultiDependencySingleRowFetcher.h | 7 +- arangod/Aql/OptimizerRules.cpp | 32 +- arangod/Aql/RemoteExecutor.cpp | 42 +- arangod/Aql/RemoteExecutor.h | 18 +- arangod/Aql/RestAqlHandler.cpp | 2 +- arangod/Aql/ScatterExecutor.cpp | 26 +- arangod/Aql/ScatterExecutor.h | 11 +- arangod/Aql/SingleRowFetcher.cpp | 16 +- arangod/Aql/SingleRowFetcher.h | 3 +- arangod/Aql/SkipResult.cpp | 184 +++++++ arangod/Aql/SkipResult.h | 89 ++++ arangod/Aql/SortingGatherExecutor.h | 2 + arangod/Aql/SubqueryExecutor.cpp | 243 ++++------ arangod/Aql/SubqueryExecutor.h | 15 + arangod/CMakeLists.txt | 1 + tests/Aql/DependencyProxyMock.cpp | 7 +- tests/Aql/DependencyProxyMock.h | 8 +- tests/Aql/ExecutionBlockImplTest.cpp | 88 ++-- tests/Aql/ExecutionBlockImplTestInstances.cpp | 4 +- tests/Aql/ExecutorTestHelper.h | 33 +- tests/Aql/HashedCollectExecutorTest.cpp | 4 +- tests/Aql/IdExecutorTest.cpp | 49 +- tests/Aql/RemoteExecutorTest.cpp | 53 +- tests/Aql/ScatterExecutorTest.cpp | 46 +- tests/Aql/SingleRowFetcherTest.cpp | 4 +- tests/Aql/SkipResultTest.cpp | 267 ++++++++++ tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp | 92 ++-- tests/Aql/SplicedSubqueryIntegrationTest.cpp | 49 +- tests/Aql/SubqueryStartExecutorTest.cpp | 165 ++++++- tests/Aql/WaitingExecutionBlockMock.cpp | 27 +- tests/Aql/WaitingExecutionBlockMock.h | 7 +- tests/CMakeLists.txt | 1 + tests/js/server/aql/aql-optimizer-indexes.js | 255 ++++++---- ...l-optimizer-rule-move-calculations-down.js | 2 +- ...o-document-materialization-arangosearch.js | 4 +- tests/js/server/aql/aql-subquery.js | 6 +- 58 files changed, 2047 insertions(+), 844 deletions(-) create mode 100644 arangod/Aql/SkipResult.cpp create mode 100644 arangod/Aql/SkipResult.h create mode 100644 tests/Aql/SkipResultTest.cpp diff --git a/arangod/Aql/AllRowsFetcher.cpp b/arangod/Aql/AllRowsFetcher.cpp index 27702b4f5995..98096c152342 100644 --- a/arangod/Aql/AllRowsFetcher.cpp +++ b/arangod/Aql/AllRowsFetcher.cpp @@ -60,7 +60,7 @@ std::pair AllRowsFetcher::fetchAllRows() { return {ExecutionState::DONE, nullptr}; } -std::tuple AllRowsFetcher::execute(AqlCallStack& stack) { +std::tuple AllRowsFetcher::execute(AqlCallStack& stack) { if (!stack.isRelevant()) { auto [state, skipped, block] = _dependencyProxy->execute(stack); return {state, skipped, AqlItemBlockInputMatrix{block}}; @@ -79,13 +79,14 @@ std::tuple AllRowsFetcher::exec TRI_ASSERT(!_aqlItemMatrix->stoppedOnShadowRow()); while (true) { auto [state, skipped, block] = _dependencyProxy->execute(stack); - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.getSkipCount() == 0); // we will either build a complete fetched AqlItemBlockInputMatrix or return an empty one if (state == ExecutionState::WAITING) { + TRI_ASSERT(skipped.nothingSkipped()); TRI_ASSERT(block == nullptr); // On waiting we have nothing to return - return {state, 0, AqlItemBlockInputMatrix{ExecutorState::HASMORE}}; + return {state, SkipResult{}, AqlItemBlockInputMatrix{ExecutorState::HASMORE}}; } TRI_ASSERT(block != nullptr || state == ExecutionState::DONE); @@ -97,10 +98,10 @@ std::tuple AllRowsFetcher::exec // If we find a ShadowRow or ExecutionState == Done, we're done fetching. if (_aqlItemMatrix->stoppedOnShadowRow() || state == ExecutionState::DONE) { if (state == ExecutionState::HASMORE) { - return {state, 0, + return {state, skipped, AqlItemBlockInputMatrix{ExecutorState::HASMORE, _aqlItemMatrix.get()}}; } - return {state, 0, + return {state, skipped, AqlItemBlockInputMatrix{ExecutorState::DONE, _aqlItemMatrix.get()}}; } } diff --git a/arangod/Aql/AllRowsFetcher.h b/arangod/Aql/AllRowsFetcher.h index f285715c7e11..71c336ebbb88 100644 --- a/arangod/Aql/AllRowsFetcher.h +++ b/arangod/Aql/AllRowsFetcher.h @@ -44,6 +44,7 @@ enum class ExecutionState; template class DependencyProxy; class ShadowAqlItemRow; +class SkipResult; /** * @brief Interface for all AqlExecutors that do need all @@ -110,7 +111,7 @@ class AllRowsFetcher { * size_t => Amount of documents skipped * DataRange => Resulting data */ - std::tuple execute(AqlCallStack& stack); + std::tuple execute(AqlCallStack& stack); /** * @brief Fetch one new AqlItemRow from upstream. diff --git a/arangod/Aql/AqlCallStack.cpp b/arangod/Aql/AqlCallStack.cpp index 911ea0c86fd2..77ffed71ad4f 100644 --- a/arangod/Aql/AqlCallStack.cpp +++ b/arangod/Aql/AqlCallStack.cpp @@ -42,7 +42,7 @@ AqlCallStack::AqlCallStack(AqlCallStack const& other, AqlCall call) // We can only use this constructor on relevant levels // Alothers need to use passThrough constructor TRI_ASSERT(other._depth == 0); - _operations.push(std::move(call)); + _operations.emplace_back(std::move(call)); _compatibilityMode3_6 = other._compatibilityMode3_6; } @@ -51,7 +51,7 @@ AqlCallStack::AqlCallStack(AqlCallStack const& other) _depth(other._depth), _compatibilityMode3_6(other._compatibilityMode3_6) {} -AqlCallStack::AqlCallStack(std::stack&& operations) +AqlCallStack::AqlCallStack(std::vector&& operations) : _operations(std::move(operations)) {} bool AqlCallStack::isRelevant() const { return _depth == 0; } @@ -68,40 +68,41 @@ AqlCall AqlCallStack::popCall() { // to the upwards subquery. // => Simply put another fetchAll Call on the stack. // This code is to be removed in the next version after 3.7 - _operations.push(AqlCall{}); + _operations.emplace_back(AqlCall{}); } - auto call = _operations.top(); - _operations.pop(); + auto call = _operations.back(); + _operations.pop_back(); return call; } AqlCall const& AqlCallStack::peek() const { TRI_ASSERT(isRelevant()); + TRI_ASSERT(_compatibilityMode3_6 || !_operations.empty()); + if (is36Compatible() && _operations.empty()) { + // This is only for compatibility with 3.6 + // there we do not have the stack beeing passed-through + // in AQL, we only have a single call. + // We can only get into this state in the abscence of + // LIMIT => we always do an unlimted softLimit call + // to the upwards subquery. + // => Simply put another fetchAll Call on the stack. + // This code is to be removed in the next version after 3.7 + _operations.emplace_back(AqlCall{}); + } TRI_ASSERT(!_operations.empty()); - return _operations.top(); + return _operations.back(); } void AqlCallStack::pushCall(AqlCall&& call) { // TODO is this correct on subqueries? TRI_ASSERT(isRelevant()); - _operations.push(call); + _operations.emplace_back(std::move(call)); } void AqlCallStack::pushCall(AqlCall const& call) { // TODO is this correct on subqueries? TRI_ASSERT(isRelevant()); - _operations.push(call); -} - -void AqlCallStack::stackUpMissingCalls() { - while (!isRelevant()) { - // For every depth, we add an additional default call. - // The default is to produce unlimited many results, - // using DefaultBatchSize each. - _operations.emplace(AqlCall{}); - _depth--; - } - TRI_ASSERT(isRelevant()); + _operations.emplace_back(call); } void AqlCallStack::pop() { @@ -133,8 +134,9 @@ auto AqlCallStack::fromVelocyPack(velocypack::Slice const slice) -> ResultT{}; + auto stack = std::vector{}; auto i = std::size_t{0}; + stack.reserve(slice.length()); for (auto const entry : VPackArrayIterator(slice)) { auto maybeAqlCall = AqlCall::fromVelocyPack(entry); @@ -146,7 +148,7 @@ auto AqlCallStack::fromVelocyPack(velocypack::Slice const slice) -> ResultT ResultT{}; - reverseStack.reserve(_operations.size()); - { - auto ops = _operations; - while (!ops.empty()) { - reverseStack.emplace_back(ops.top()); - ops.pop(); - } - } - builder.openArray(); - for (auto it = reverseStack.rbegin(); it != reverseStack.rend(); ++it) { - auto const& call = *it; + for (auto const& call : _operations) { call.toVelocyPack(builder); } builder.close(); @@ -178,19 +169,50 @@ void AqlCallStack::toVelocyPack(velocypack::Builder& builder) const { auto AqlCallStack::toString() const -> std::string { auto result = std::string{}; result += "["; - auto ops = _operations; - if (!ops.empty()) { - auto op = ops.top(); - ops.pop(); + bool isFirst = true; + for (auto const& op : _operations) { + if (!isFirst) { + result += ","; + } + isFirst = false; result += " "; result += op.toString(); - while (!ops.empty()) { - op = ops.top(); - ops.pop(); - result += ", "; - result += op.toString(); - } } result += " ]"; return result; } + +auto AqlCallStack::createEquivalentFetchAllShadowRowsStack() const -> AqlCallStack { + AqlCallStack res{*this}; + std::replace_if( + res._operations.begin(), res._operations.end(), + [](auto const&) -> bool { return true; }, AqlCall{}); + return res; +} + +auto AqlCallStack::needToSkipSubquery() const noexcept -> bool { + return std::any_of(_operations.begin(), _operations.end(), [](AqlCall const& call) -> bool { + return call.needSkipMore() || call.hardLimit == 0; + }); +} + +auto AqlCallStack::shadowRowDepthToSkip() const -> size_t { + TRI_ASSERT(needToSkipSubquery()); + for (size_t i = 0; i < _operations.size(); ++i) { + auto& call = _operations.at(i); + if (call.needSkipMore() || call.hardLimit == 0) { + return _operations.size() - i - 1; + } + } + // unreachable + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL_AQL); +} + +auto AqlCallStack::modifyCallAtDepth(size_t depth) -> AqlCall& { + // depth 0 is back of vector + TRI_ASSERT(_operations.size() > depth); + // Take the depth-most from top of the vector. + auto& call = *(_operations.rbegin() + depth); + return call; +} diff --git a/arangod/Aql/AqlCallStack.h b/arangod/Aql/AqlCallStack.h index 4aa03285f5d8..a457578223bf 100644 --- a/arangod/Aql/AqlCallStack.h +++ b/arangod/Aql/AqlCallStack.h @@ -66,14 +66,6 @@ class AqlCallStack { // Put another call on top of the stack. void pushCall(AqlCall const& call); - // fill up all missing calls within this stack s.t. we reach depth == 0 - // This needs to be called if an executor requires to be fully executed, even if skipped, - // even if the subquery it is located in is skipped. - // The default operations added here will correspond to produce all Rows, unlimitted. - // e.g. every Modification Executor needs to call this functionality, as modifictions need to be - // performed even if skipped. - void stackUpMissingCalls(); - // Pops one subquery level. // if this isRelevent it pops the top-most call from the stack. // if this is not revelent it reduces the depth by 1. @@ -95,12 +87,58 @@ class AqlCallStack { void toVelocyPack(velocypack::Builder& builder) const; + auto is36Compatible() const noexcept -> bool { return _compatibilityMode3_6; } + + /** + * @brief Create an equivalent call stack that does a full-produce + * of all Subquery levels. This is required for blocks + * that are not allowed to be bpassed. + * The top-most call remains unmodified, as the Executor might + * require some soft limit on it. + * + * @return AqlCallStack a stack of equivalent size, that does not skip + * on any lower subquery. + */ + auto createEquivalentFetchAllShadowRowsStack() const -> AqlCallStack; + + /** + * @brief Check if we are in a subquery that is in-fact required to + * be skipped. This is relevant for executors that have created + * an equivalentFetchAllShadowRows stack, in order to decide if + * the need to produce output or if they are skipped. + * + * @return true + * @return false + */ + auto needToSkipSubquery() const noexcept -> bool; + + /** + * @brief This is only valid if needToSkipSubquery is true. + * It will resolve to the heighest subquery level + * (outermost) that needs to be skipped. + * + * + * @return size_t Depth of the subquery that asks to be skipped. + */ + auto shadowRowDepthToSkip() const -> size_t; + + /** + * @brief Get a reference to the call at the given shadowRowDepth + * + * @param depth ShadowRow depth we need to work on + * @return AqlCall& reference to the call, can be modified. + */ + auto modifyCallAtDepth(size_t depth) -> AqlCall&; + private: - explicit AqlCallStack(std::stack&& operations); + explicit AqlCallStack(std::vector&& operations); private: - // The list of operations, stacked by depth (e.g. bottom element is from main query) - std::stack _operations; + // The list of operations, stacked by depth (e.g. bottom element is from main + // query) NOTE: This is only mutable on 3.6 compatibility mode. We need to + // inject an additional call in any const operation here just to pretend we + // are not empty. Can be removed after 3.7. + mutable std::vector _operations; // The depth of subqueries that have not issued calls into operations, // as they have been skipped. diff --git a/arangod/Aql/AqlExecuteResult.cpp b/arangod/Aql/AqlExecuteResult.cpp index db10deeaab96..be7d49d1cb98 100644 --- a/arangod/Aql/AqlExecuteResult.cpp +++ b/arangod/Aql/AqlExecuteResult.cpp @@ -44,11 +44,25 @@ auto getStringView(velocypack::Slice slice) -> std::string_view { } } // namespace +AqlExecuteResult::AqlExecuteResult(ExecutionState state, SkipResult skipped, + SharedAqlItemBlockPtr&& block) + : _state(state), _skipped(skipped), _block(std::move(block)) { + // Make sure we only produce a valid response + // The block should have checked as well. + // We must return skipped and/or data when reporting HASMORE + + // noskip && no data => state != HASMORE + // <=> skipped || data || state != HASMORE + TRI_ASSERT(!_skipped.nothingSkipped() || + (_block != nullptr && _block->numEntries() > 0) || + _state != ExecutionState::HASMORE); +} + auto AqlExecuteResult::state() const noexcept -> ExecutionState { return _state; } -auto AqlExecuteResult::skipped() const noexcept -> std::size_t { +auto AqlExecuteResult::skipped() const noexcept -> SkipResult { return _skipped; } @@ -75,7 +89,8 @@ void AqlExecuteResult::toVelocyPack(velocypack::Builder& builder, builder.openObject(); builder.add(StaticStrings::AqlRemoteState, stateToValue(state())); - builder.add(StaticStrings::AqlRemoteSkipped, Value(skipped())); + builder.add(Value(StaticStrings::AqlRemoteSkipped)); + skipped().toVelocyPack(builder); if (block() != nullptr) { ObjectBuilder guard(&builder, StaticStrings::AqlRemoteBlock); block()->toVelocyPack(options, builder); @@ -101,7 +116,7 @@ auto AqlExecuteResult::fromVelocyPack(velocypack::Slice const slice, expectedPropertiesFound.emplace(StaticStrings::AqlRemoteBlock, false); auto state = ExecutionState::HASMORE; - auto skipped = std::size_t{}; + auto skipped = SkipResult{}; auto block = SharedAqlItemBlockPtr{}; auto const readState = [](velocypack::Slice slice) -> ResultT { @@ -127,24 +142,6 @@ auto AqlExecuteResult::fromVelocyPack(velocypack::Slice const slice, } }; - auto const readSkipped = [](velocypack::Slice slice) -> ResultT { - if (!slice.isInteger()) { - auto message = std::string{ - "When deserializating AqlExecuteResult: When reading skipped: " - "Unexpected type "}; - message += slice.typeName(); - return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); - } - try { - return slice.getNumber(); - } catch (velocypack::Exception const& ex) { - auto message = std::string{ - "When deserializating AqlExecuteResult: When reading skipped: "}; - message += ex.what(); - return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); - } - }; - auto const readBlock = [&itemBlockManager](velocypack::Slice slice) -> ResultT { if (slice.isNull()) { return SharedAqlItemBlockPtr{nullptr}; @@ -165,9 +162,9 @@ auto AqlExecuteResult::fromVelocyPack(velocypack::Slice const slice, if (auto propIt = expectedPropertiesFound.find(key); ADB_LIKELY(propIt != expectedPropertiesFound.end())) { if (ADB_UNLIKELY(propIt->second)) { - return Result( - TRI_ERROR_TYPE_ERROR, - "When deserializating AqlExecuteResult: Encountered duplicate key"); + return Result(TRI_ERROR_TYPE_ERROR, + "When deserializating AqlExecuteResult: " + "Encountered duplicate key"); } propIt->second = true; } @@ -179,7 +176,7 @@ auto AqlExecuteResult::fromVelocyPack(velocypack::Slice const slice, } state = maybeState.get(); } else if (key == StaticStrings::AqlRemoteSkipped) { - auto maybeSkipped = readSkipped(it.value); + auto maybeSkipped = SkipResult::fromVelocyPack(it.value); if (maybeSkipped.fail()) { return std::move(maybeSkipped).result(); } @@ -192,11 +189,12 @@ auto AqlExecuteResult::fromVelocyPack(velocypack::Slice const slice, block = maybeBlock.get(); } else { LOG_TOPIC("cc6f4", WARN, Logger::AQL) - << "When deserializating AqlExecuteResult: Encountered unexpected " + << "When deserializating AqlExecuteResult: Encountered " + "unexpected " "key " << keySlice.toJson(); - // If you run into this assertion during rolling upgrades after adding a - // new attribute, remove it in the older version. + // If you run into this assertion during rolling upgrades after + // adding a new attribute, remove it in the older version. TRI_ASSERT(false); } } @@ -214,6 +212,6 @@ auto AqlExecuteResult::fromVelocyPack(velocypack::Slice const slice, } auto AqlExecuteResult::asTuple() const noexcept - -> std::tuple { + -> std::tuple { return {state(), skipped(), block()}; } diff --git a/arangod/Aql/AqlExecuteResult.h b/arangod/Aql/AqlExecuteResult.h index 56eb448c83db..409e598f9835 100644 --- a/arangod/Aql/AqlExecuteResult.h +++ b/arangod/Aql/AqlExecuteResult.h @@ -25,6 +25,7 @@ #include "Aql/ExecutionState.h" #include "Aql/SharedAqlItemBlockPtr.h" +#include "Aql/SkipResult.h" #include @@ -42,23 +43,22 @@ namespace arangodb::aql { class AqlExecuteResult { public: - AqlExecuteResult(ExecutionState state, std::size_t skipped, SharedAqlItemBlockPtr&& block) - : _state(state), _skipped(skipped), _block(std::move(block)) {} + AqlExecuteResult(ExecutionState state, SkipResult skipped, SharedAqlItemBlockPtr&& block); void toVelocyPack(velocypack::Builder&, velocypack::Options const*); static auto fromVelocyPack(velocypack::Slice, AqlItemBlockManager&) -> ResultT; [[nodiscard]] auto state() const noexcept -> ExecutionState; - [[nodiscard]] auto skipped() const noexcept -> std::size_t; + [[nodiscard]] auto skipped() const noexcept -> SkipResult; [[nodiscard]] auto block() const noexcept -> SharedAqlItemBlockPtr const&; [[nodiscard]] auto asTuple() const noexcept - -> std::tuple; + -> std::tuple; private: ExecutionState _state = ExecutionState::HASMORE; - std::size_t _skipped = 0; + SkipResult _skipped{}; SharedAqlItemBlockPtr _block = nullptr; }; diff --git a/arangod/Aql/BlocksWithClients.cpp b/arangod/Aql/BlocksWithClients.cpp index b4a98714b920..89ebd3ca26cb 100644 --- a/arangod/Aql/BlocksWithClients.cpp +++ b/arangod/Aql/BlocksWithClients.cpp @@ -35,6 +35,7 @@ #include "Aql/InputAqlItemRow.h" #include "Aql/Query.h" #include "Aql/ScatterExecutor.h" +#include "Aql/SkipResult.h" #include "Basics/Exceptions.h" #include "Basics/StaticStrings.h" #include "Basics/StringBuffer.h" @@ -188,7 +189,8 @@ std::pair BlocksWithClientsImpl::skipSome(size } template -std::tuple BlocksWithClientsImpl::execute(AqlCallStack stack) { +std::tuple +BlocksWithClientsImpl::execute(AqlCallStack stack) { // This will not be implemented here! TRI_ASSERT(false); THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); @@ -197,17 +199,17 @@ std::tuple BlocksWithClientsImpl< template auto BlocksWithClientsImpl::executeForClient(AqlCallStack stack, std::string const& clientId) - -> std::tuple { - // traceExecuteBegin(stack); + -> std::tuple { + traceExecuteBegin(stack, clientId); auto res = executeWithoutTraceForClient(stack, clientId); - // traceExecuteEnd(res); + traceExecuteEnd(res, clientId); return res; } template auto BlocksWithClientsImpl::executeWithoutTraceForClient(AqlCallStack stack, std::string const& clientId) - -> std::tuple { + -> std::tuple { TRI_ASSERT(!clientId.empty()); if (ADB_UNLIKELY(clientId.empty())) { // Security bailout to avoid UB @@ -230,21 +232,32 @@ auto BlocksWithClientsImpl::executeWithoutTraceForClient(AqlCallStack // We do not have anymore data locally. // Need to fetch more from upstream auto& dataContainer = it->second; - - while (!dataContainer.hasDataFor(call)) { - if (_upstreamState == ExecutionState::DONE) { - // We are done, with everything, we will not be able to fetch any more rows - return {_upstreamState, 0, nullptr}; + while (true) { + while (!dataContainer.hasDataFor(call)) { + if (_upstreamState == ExecutionState::DONE) { + // We are done, with everything, we will not be able to fetch any more rows + return {_upstreamState, SkipResult{}, nullptr}; + } + + auto state = fetchMore(stack); + if (state == ExecutionState::WAITING) { + return {state, SkipResult{}, nullptr}; + } + _upstreamState = state; } - - auto state = fetchMore(stack); - if (state == ExecutionState::WAITING) { - return {state, 0, nullptr}; + { + // If we get here we have data and can return it. + // However the call might force us to drop everything (e.g. hardLimit == + // 0) So we need to refetch data eventually. + stack.pushCall(call); + auto [state, skipped, result] = dataContainer.execute(stack, _upstreamState); + if (state == ExecutionState::DONE || !skipped.nothingSkipped() || result != nullptr) { + // We have a valid result. + return {state, skipped, result}; + } + stack.popCall(); } - _upstreamState = state; } - // If we get here we have data and can return it. - return dataContainer.execute(call, _upstreamState); } template @@ -262,10 +275,9 @@ auto BlocksWithClientsImpl::fetchMore(AqlCallStack stack) -> Execution TRI_ASSERT(_dependencies.size() == 1); auto [state, skipped, block] = _dependencies[0]->execute(stack); - // We can never ever forward skip! // We could need the row in a different block, and once skipped // we cannot get it back. - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.getSkipCount() == 0); TRI_IF_FAILURE("ExecutionBlock::getBlock") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); @@ -274,7 +286,7 @@ auto BlocksWithClientsImpl::fetchMore(AqlCallStack stack) -> Execution // Waiting -> no block TRI_ASSERT(state != ExecutionState::WAITING || block == nullptr); if (block != nullptr) { - _executor.distributeBlock(block, _clientBlockData); + _executor.distributeBlock(block, skipped, _clientBlockData); } return state; @@ -287,7 +299,7 @@ std::pair BlocksWithClientsImpl size_t atMost, std::string const& shardId) { AqlCallStack stack(AqlCall::SimulateGetSome(atMost), true); auto [state, skipped, block] = executeForClient(stack, shardId); - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.nothingSkipped()); return {state, block}; } @@ -299,7 +311,7 @@ std::pair BlocksWithClientsImpl::skipSomeForSh AqlCallStack stack(AqlCall::SimulateSkipSome(atMost), true); auto [state, skipped, block] = executeForClient(stack, shardId); TRI_ASSERT(block == nullptr); - return {state, skipped}; + return {state, skipped.getSkipCount()}; } template class ::arangodb::aql::BlocksWithClientsImpl; diff --git a/arangod/Aql/BlocksWithClients.h b/arangod/Aql/BlocksWithClients.h index ead4589bf64f..75e68a37e306 100644 --- a/arangod/Aql/BlocksWithClients.h +++ b/arangod/Aql/BlocksWithClients.h @@ -48,6 +48,7 @@ class AqlItemBlock; struct Collection; class ExecutionEngine; class ExecutionNode; +class SkipResult; class ClientsExecutorInfos { public: @@ -87,7 +88,7 @@ class BlocksWithClients { * @return std::tuple */ virtual auto executeForClient(AqlCallStack stack, std::string const& clientId) - -> std::tuple = 0; + -> std::tuple = 0; }; /** @@ -130,7 +131,7 @@ class BlocksWithClientsImpl : public ExecutionBlock, public BlocksWithClients { std::pair skipSome(size_t atMost) final; /// @brief execute: shouldn't be used, use executeForClient - std::tuple execute(AqlCallStack stack) override; + std::tuple execute(AqlCallStack stack) override; /** * @brief Execute for client. @@ -141,7 +142,7 @@ class BlocksWithClientsImpl : public ExecutionBlock, public BlocksWithClients { * @return std::tuple */ auto executeForClient(AqlCallStack stack, std::string const& clientId) - -> std::tuple override; + -> std::tuple override; private: /** @@ -152,7 +153,7 @@ class BlocksWithClientsImpl : public ExecutionBlock, public BlocksWithClients { * @return std::tuple */ auto executeWithoutTraceForClient(AqlCallStack stack, std::string const& clientId) - -> std::tuple; + -> std::tuple; /** * @brief Load more data from upstream and distribute it into _clientBlockData diff --git a/arangod/Aql/ConstFetcher.cpp b/arangod/Aql/ConstFetcher.cpp index 14ba394b584e..42e858d1f9e8 100644 --- a/arangod/Aql/ConstFetcher.cpp +++ b/arangod/Aql/ConstFetcher.cpp @@ -25,6 +25,7 @@ #include "Aql/AqlCallStack.h" #include "Aql/DependencyProxy.h" #include "Aql/ShadowAqlItemRow.h" +#include "Aql/SkipResult.h" #include "Basics/Exceptions.h" #include "Basics/voc-errors.h" @@ -37,7 +38,7 @@ ConstFetcher::ConstFetcher(DependencyProxy& executionBlock) : _currentBlock{nullptr}, _rowIndex(0) {} auto ConstFetcher::execute(AqlCallStack& stack) - -> std::tuple { + -> std::tuple { // Note this fetcher can only be executed on top level (it is the singleton, or test) TRI_ASSERT(stack.isRelevant()); // We only peek the call here, as we do not take over ownership. @@ -45,7 +46,7 @@ auto ConstFetcher::execute(AqlCallStack& stack) auto call = stack.peek(); if (_blockForPassThrough == nullptr) { // we are done, nothing to move arround here. - return {ExecutionState::DONE, 0, AqlItemBlockInputRange{ExecutorState::DONE}}; + return {ExecutionState::DONE, SkipResult{}, AqlItemBlockInputRange{ExecutorState::DONE}}; } std::vector> sliceIndexes; sliceIndexes.emplace_back(_rowIndex, _blockForPassThrough->size()); @@ -152,7 +153,10 @@ auto ConstFetcher::execute(AqlCallStack& stack) SharedAqlItemBlockPtr resultBlock = _blockForPassThrough; _blockForPassThrough.reset(nullptr); _rowIndex = 0; - return {ExecutionState::DONE, call.getSkipCount(), + SkipResult skipped{}; + skipped.didSkip(call.getSkipCount()); + + return {ExecutionState::DONE, skipped, DataRange{ExecutorState::DONE, call.getSkipCount(), resultBlock, 0}}; } @@ -176,7 +180,9 @@ auto ConstFetcher::execute(AqlCallStack& stack) // No data to be returned // Block is dropped. resultBlock = nullptr; - return {ExecutionState::DONE, call.getSkipCount(), + SkipResult skipped{}; + skipped.didSkip(call.getSkipCount()); + return {ExecutionState::DONE, skipped, DataRange{ExecutorState::DONE, call.getSkipCount()}}; } @@ -187,8 +193,9 @@ auto ConstFetcher::execute(AqlCallStack& stack) _blockForPassThrough == nullptr ? ExecutorState::DONE : ExecutorState::HASMORE; resultBlock = resultBlock->slice(sliceIndexes); - return {resState, call.getSkipCount(), - DataRange{rangeState, call.getSkipCount(), resultBlock, 0}}; + SkipResult skipped{}; + skipped.didSkip(call.getSkipCount()); + return {resState, skipped, DataRange{rangeState, call.getSkipCount(), resultBlock, 0}}; } void ConstFetcher::injectBlock(SharedAqlItemBlockPtr block) { diff --git a/arangod/Aql/ConstFetcher.h b/arangod/Aql/ConstFetcher.h index 70fe73fa4c12..33e0215bee70 100644 --- a/arangod/Aql/ConstFetcher.h +++ b/arangod/Aql/ConstFetcher.h @@ -37,6 +37,7 @@ class AqlItemBlock; template class DependencyProxy; class ShadowAqlItemRow; +class SkipResult; /** * @brief Interface for all AqlExecutors that do only need one @@ -71,7 +72,7 @@ class ConstFetcher { * size_t => Amount of documents skipped * DataRange => Resulting data */ - auto execute(AqlCallStack& stack) -> std::tuple; + auto execute(AqlCallStack& stack) -> std::tuple; /** * @brief Fetch one new AqlItemRow from upstream. diff --git a/arangod/Aql/DependencyProxy.cpp b/arangod/Aql/DependencyProxy.cpp index cdbc955fac6c..0c9b8535987d 100644 --- a/arangod/Aql/DependencyProxy.cpp +++ b/arangod/Aql/DependencyProxy.cpp @@ -32,10 +32,10 @@ using namespace arangodb; using namespace arangodb::aql; template -std::tuple +std::tuple DependencyProxy::execute(AqlCallStack& stack) { ExecutionState state = ExecutionState::HASMORE; - size_t skipped = 0; + SkipResult skipped; SharedAqlItemBlockPtr block = nullptr; // Note: upstreamBlock will return next dependency // if we need to loop here @@ -47,16 +47,16 @@ DependencyProxy::execute(AqlCallStack& stack) { break; } } - } while (state != ExecutionState::WAITING && skipped == 0 && block == nullptr); + } while (state != ExecutionState::WAITING && skipped.nothingSkipped() && block == nullptr); return {state, skipped, block}; } template -std::tuple DependencyProxy::executeForDependency( - size_t dependency, AqlCallStack& stack) { - // TODO: assert dependency in range +std::tuple +DependencyProxy::executeForDependency(size_t dependency, + AqlCallStack& stack) { ExecutionState state = ExecutionState::HASMORE; - size_t skipped = 0; + SkipResult skipped; SharedAqlItemBlockPtr block = nullptr; if (!_distributeId.empty()) { @@ -81,7 +81,7 @@ std::tuple DependencyProxy::prefetchBlock(size_t atMost) { AqlCallStack stack = _injectedStack; stack.pushCall(AqlCall::SimulateGetSome(atMost)); // Also temporary, will not be used here. - size_t skipped = 0; + SkipResult skipped; do { // Note: upstreamBlock will return next dependency // if we need to loop here @@ -115,7 +115,7 @@ ExecutionState DependencyProxy::prefetchBlock(size_t atMost) { } // Cannot do skipping here // Temporary! - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.nothingSkipped()); if (state == ExecutionState::WAITING) { TRI_ASSERT(block == nullptr); @@ -190,7 +190,7 @@ DependencyProxy::fetchBlockForDependency(size_t dependency, si AqlCallStack stack = _injectedStack; stack.pushCall(AqlCall::SimulateGetSome(atMost)); // Also temporary, will not be used here. - size_t skipped = 0; + SkipResult skipped{}; if (_distributeId.empty()) { std::tie(state, skipped, block) = upstream.execute(stack); @@ -199,7 +199,7 @@ DependencyProxy::fetchBlockForDependency(size_t dependency, si std::tie(state, skipped, block) = upstreamWithClient->executeForClient(stack, _distributeId); } - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.nothingSkipped()); TRI_IF_FAILURE("ExecutionBlock::getBlock") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } @@ -244,7 +244,7 @@ std::pair DependencyProxy::skipSomeFor SharedAqlItemBlockPtr block; while (state == ExecutionState::HASMORE && _skipped < atMost) { - size_t skippedNow; + SkipResult skippedNow; TRI_ASSERT(_skipped <= atMost); { // Make sure we call with the correct offset @@ -255,14 +255,14 @@ std::pair DependencyProxy::skipSomeFor } std::tie(state, skippedNow, block) = upstream.execute(stack); if (state == ExecutionState::WAITING) { - TRI_ASSERT(skippedNow == 0); + TRI_ASSERT(skippedNow.nothingSkipped()); return {state, 0}; } // Temporary. // If we return a block here it will be lost TRI_ASSERT(block == nullptr); - _skipped += skippedNow; + _skipped += skippedNow.getSkipCount(); TRI_ASSERT(_skipped <= atMost); } TRI_ASSERT(state != ExecutionState::WAITING); @@ -290,7 +290,7 @@ std::pair DependencyProxy::skipSome(si SharedAqlItemBlockPtr block; while (_skipped < toSkip) { - size_t skippedNow; + SkipResult skippedNow; // Note: upstreamBlock will return next dependency // if we need to loop here TRI_ASSERT(_skipped <= toSkip); @@ -309,10 +309,10 @@ std::pair DependencyProxy::skipSome(si upstreamWithClient->executeForClient(stack, _distributeId); } - TRI_ASSERT(skippedNow <= toSkip - _skipped); + TRI_ASSERT(skippedNow.getSkipCount() <= toSkip - _skipped); if (state == ExecutionState::WAITING) { - TRI_ASSERT(skippedNow == 0); + TRI_ASSERT(skippedNow.nothingSkipped()); return {state, 0}; } @@ -320,7 +320,7 @@ std::pair DependencyProxy::skipSome(si // If we return a block here it will be lost TRI_ASSERT(block == nullptr); - _skipped += skippedNow; + _skipped += skippedNow.getSkipCount(); // When the current dependency is done, advance. if (state == ExecutionState::DONE) { diff --git a/arangod/Aql/DependencyProxy.h b/arangod/Aql/DependencyProxy.h index 108e6bad1ac9..d28dda04a312 100644 --- a/arangod/Aql/DependencyProxy.h +++ b/arangod/Aql/DependencyProxy.h @@ -37,6 +37,7 @@ namespace arangodb::aql { class ExecutionBlock; class AqlItemBlockManager; +class SkipResult; /** * @brief Thin interface to access the methods of ExecutionBlock that are @@ -74,9 +75,9 @@ class DependencyProxy { TEST_VIRTUAL ~DependencyProxy() = default; // TODO Implement and document properly! - TEST_VIRTUAL std::tuple execute(AqlCallStack& stack); + TEST_VIRTUAL std::tuple execute(AqlCallStack& stack); - TEST_VIRTUAL std::tuple executeForDependency( + TEST_VIRTUAL std::tuple executeForDependency( size_t dependency, AqlCallStack& stack); // This is only TEST_VIRTUAL, so we ignore this lint warning: diff --git a/arangod/Aql/DistributeExecutor.cpp b/arangod/Aql/DistributeExecutor.cpp index 07cd89c9f4b5..a528e68c3fc6 100644 --- a/arangod/Aql/DistributeExecutor.cpp +++ b/arangod/Aql/DistributeExecutor.cpp @@ -31,6 +31,7 @@ #include "Aql/Query.h" #include "Aql/RegisterPlan.h" #include "Aql/ShadowAqlItemRow.h" +#include "Aql/SkipResult.h" #include "Basics/StaticStrings.h" #include "VocBase/LogicalCollection.h" @@ -152,6 +153,12 @@ auto DistributeExecutor::ClientBlockData::addBlock(SharedAqlItemBlockPtr block, _queue.emplace_back(block, std::move(usedIndexes)); } +auto DistributeExecutor::ClientBlockData::addSkipResult(SkipResult const& skipResult) -> void { + TRI_ASSERT(_skipped.subqueryDepth() == 1 || + _skipped.subqueryDepth() == skipResult.subqueryDepth()); + _skipped.merge(skipResult, false); +} + auto DistributeExecutor::ClientBlockData::hasDataFor(AqlCall const& call) -> bool { return _executorHasMore || !_queue.empty(); } @@ -166,7 +173,8 @@ auto DistributeExecutor::ClientBlockData::hasDataFor(AqlCall const& call) -> boo * * @return SharedAqlItemBlockPtr a joind block from the queue. */ -auto DistributeExecutor::ClientBlockData::popJoinedBlock() -> SharedAqlItemBlockPtr { +auto DistributeExecutor::ClientBlockData::popJoinedBlock() + -> std::tuple { // There are some optimizations available in this implementation. // Namely we could apply good logic to cut the blocks at shadow rows // in order to allow the IDexecutor to hand them out en-block. @@ -208,14 +216,16 @@ auto DistributeExecutor::ClientBlockData::popJoinedBlock() -> SharedAqlItemBlock // Drop block form queue. _queue.pop_front(); } - return newBlock; + SkipResult skip = _skipped; + _skipped.reset(); + return {newBlock, skip}; } -auto DistributeExecutor::ClientBlockData::execute(AqlCall call, ExecutionState upstreamState) - -> std::tuple { +auto DistributeExecutor::ClientBlockData::execute(AqlCallStack callStack, ExecutionState upstreamState) + -> std::tuple { TRI_ASSERT(_executor != nullptr); // Make sure we actually have data before you call execute - TRI_ASSERT(hasDataFor(call)); + TRI_ASSERT(hasDataFor(callStack.peek())); if (!_executorHasMore) { // This cast is guaranteed, we create this a couple lines above and only // this executor is used here. @@ -224,16 +234,15 @@ auto DistributeExecutor::ClientBlockData::execute(AqlCall call, ExecutionState u auto casted = static_cast>*>(_executor.get()); TRI_ASSERT(casted != nullptr); - auto block = popJoinedBlock(); + auto [block, skipped] = popJoinedBlock(); // We will at least get one block, otherwise the hasDataFor would // be required to return false! TRI_ASSERT(block != nullptr); - casted->injectConstantBlock(block); + casted->injectConstantBlock(block, skipped); _executorHasMore = true; } - AqlCallStack stack{call}; - auto [state, skipped, result] = _executor->execute(stack); + auto [state, skipped, result] = _executor->execute(callStack); // We have all data locally cannot wait here. TRI_ASSERT(state != ExecutionState::WAITING); @@ -257,7 +266,7 @@ auto DistributeExecutor::ClientBlockData::execute(AqlCall call, ExecutionState u DistributeExecutor::DistributeExecutor(DistributeExecutorInfos const& infos) : _infos(infos){}; -auto DistributeExecutor::distributeBlock(SharedAqlItemBlockPtr block, +auto DistributeExecutor::distributeBlock(SharedAqlItemBlockPtr block, SkipResult skipped, std::unordered_map& blockMap) -> void { std::unordered_map> choosenMap; @@ -289,6 +298,12 @@ auto DistributeExecutor::distributeBlock(SharedAqlItemBlockPtr block, } target->second.addBlock(block, std::move(value)); } + + // Add the skipResult to all clients. + // It needs to be fetched once for every client. + for (auto& [key, map] : blockMap) { + map.addSkipResult(skipped); + } } auto DistributeExecutor::getClient(SharedAqlItemBlockPtr block, size_t rowIndex) diff --git a/arangod/Aql/DistributeExecutor.h b/arangod/Aql/DistributeExecutor.h index b6954bc1ff3a..0e7bd1a618b6 100644 --- a/arangod/Aql/DistributeExecutor.h +++ b/arangod/Aql/DistributeExecutor.h @@ -92,29 +92,33 @@ class DistributeExecutor { auto clear() -> void; auto addBlock(SharedAqlItemBlockPtr block, std::vector usedIndexes) -> void; + + auto addSkipResult(SkipResult const& skipResult) -> void; auto hasDataFor(AqlCall const& call) -> bool; - auto execute(AqlCall call, ExecutionState upstreamState) - -> std::tuple; + auto execute(AqlCallStack callStack, ExecutionState upstreamState) + -> std::tuple; private: /** * @brief This call will join as many blocks as available from the queue * and return them in a SingleBlock. We then use the IdExecutor * to hand out the data contained in these blocks - * We do on purpose not give any kind of guarantees on the sizing of - * this block to be flexible with the implementation, and find a good + * We do on purpose not give any kind of guarantees on the sizing + * of this block to be flexible with the implementation, and find a good * trade-off between blocksize and block copy operations. * - * @return SharedAqlItemBlockPtr a joind block from the queue. + * @return SharedAqlItemBlockPtr a joined block from the queue. + * SkipResult the skip information matching to this block */ - auto popJoinedBlock() -> SharedAqlItemBlockPtr; + auto popJoinedBlock() -> std::tuple; private: AqlItemBlockManager& _blockManager; ExecutorInfos const& _infos; std::deque>> _queue; + SkipResult _skipped{}; // This is unique_ptr to get away with everything being forward declared... std::unique_ptr _executor; @@ -132,9 +136,10 @@ class DistributeExecutor { * Hence this method is not const ;( * * @param block The block to be distributed + * @param skipped The rows that have been skipped from upstream * @param blockMap Map client => Data. Will provide the required data to the correct client. */ - auto distributeBlock(SharedAqlItemBlockPtr block, + auto distributeBlock(SharedAqlItemBlockPtr block, SkipResult skipped, std::unordered_map& blockMap) -> void; private: diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index ec4ca75d3433..37538244f10f 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -298,7 +298,7 @@ bool ExecutionBlock::isInSplicedSubquery() const noexcept { return _isInSplicedSubquery; } -void ExecutionBlock::traceExecuteBegin(AqlCallStack const& stack) { +void ExecutionBlock::traceExecuteBegin(AqlCallStack const& stack, std::string const& clientId) { if (_profile >= PROFILE_LEVEL_BLOCKS) { if (_getSomeBegin <= 0.0) { _getSomeBegin = TRI_microtime(); @@ -311,20 +311,22 @@ void ExecutionBlock::traceExecuteBegin(AqlCallStack const& stack) { LOG_TOPIC("1e717", INFO, Logger::QUERIES) << "[query#" << queryId << "] " << "execute type=" << node->getTypeString() << " call= " << call - << " this=" << (uintptr_t)this << " id=" << node->id(); + << " this=" << (uintptr_t)this << " id=" << node->id() + << (clientId.empty() ? "" : " clientId=" + clientId); } } } -auto ExecutionBlock::traceExecuteEnd(std::tuple const& result) - -> std::tuple { +auto ExecutionBlock::traceExecuteEnd(std::tuple const& result, + std::string const& clientId) + -> std::tuple { if (_profile >= PROFILE_LEVEL_BLOCKS) { auto const& [state, skipped, block] = result; auto const items = block != nullptr ? block->size() : 0; ExecutionNode const* en = getPlanNode(); ExecutionStats::Node stats; stats.calls = 1; - stats.items = skipped + items; + stats.items = skipped.getSkipCount() + items; if (state != ExecutionState::WAITING) { stats.runtime = TRI_microtime() - _getSomeBegin; _getSomeBegin = 0.0; @@ -339,9 +341,11 @@ auto ExecutionBlock::traceExecuteEnd(std::tuple= PROFILE_LEVEL_TRACE_1) { ExecutionNode const* node = getPlanNode(); - LOG_QUERY("60bbc", INFO) << "execute done " << printBlockInfo() - << " state=" << stateToString(state) - << " skipped=" << skipped << " produced=" << items; + LOG_QUERY("60bbc", INFO) + << "execute done " << printBlockInfo() << " state=" << stateToString(state) + << " skipped=" << skipped.getSkipCount() << " produced=" << items + << (clientId.empty() ? "" : " clientId=" + clientId); + ; if (_profile >= PROFILE_LEVEL_TRACE_2) { if (block == nullptr) { diff --git a/arangod/Aql/ExecutionBlock.h b/arangod/Aql/ExecutionBlock.h index b3652bc5576c..c1544a123b45 100644 --- a/arangod/Aql/ExecutionBlock.h +++ b/arangod/Aql/ExecutionBlock.h @@ -26,6 +26,7 @@ #include "Aql/BlockCollector.h" #include "Aql/ExecutionState.h" +#include "Aql/SkipResult.h" #include "Basics/Result.h" #include @@ -144,19 +145,21 @@ class ExecutionBlock { /// * WAITING: We have async operation going on, nothing happend, please call again /// * HASMORE: Here is some data in the request range, there is still more, if required call again /// * DONE: Here is some data, and there will be no further data available. - /// 2. size_t: Amount of documents skipped. + /// 2. SkipResult: Amount of documents skipped. /// 3. SharedAqlItemBlockPtr: The next data block. - virtual std::tuple execute(AqlCallStack stack) = 0; + virtual std::tuple execute(AqlCallStack stack) = 0; [[nodiscard]] bool isInSplicedSubquery() const noexcept; protected: // Trace the start of a execute call - void traceExecuteBegin(AqlCallStack const& stack); + void traceExecuteBegin(AqlCallStack const& stack, + std::string const& clientId = ""); // Trace the end of a execute call, potentially with result - auto traceExecuteEnd(std::tuple const& result) - -> std::tuple; + auto traceExecuteEnd(std::tuple const& result, + std::string const& clientId = "") + -> std::tuple; [[nodiscard]] auto printBlockInfo() const -> std::string const; [[nodiscard]] auto printTypeInfo() const -> std::string const; diff --git a/arangod/Aql/ExecutionBlockImpl.cpp b/arangod/Aql/ExecutionBlockImpl.cpp index 8e71f0dc0192..133df32b4677 100644 --- a/arangod/Aql/ExecutionBlockImpl.cpp +++ b/arangod/Aql/ExecutionBlockImpl.cpp @@ -58,6 +58,7 @@ #include "Aql/ShortestPathExecutor.h" #include "Aql/SimpleModifier.h" #include "Aql/SingleRemoteModificationExecutor.h" +#include "Aql/SkipResult.h" #include "Aql/SortExecutor.h" #include "Aql/SortRegister.h" #include "Aql/SortedCollectExecutor.h" @@ -124,6 +125,23 @@ class TestLambdaSkipExecutor; } // namespace arangodb::aql #endif +/* + * Determine whether an executor cannot bypass subquery skips. + * This is if exection of this Executor does have side-effects + * other then it's own result. + */ + +template +constexpr bool executorHasSideEffects = + is_one_of_v, + ModificationExecutor, InsertModifier>, + ModificationExecutor, + ModificationExecutor, RemoveModifier>, + ModificationExecutor, + ModificationExecutor, UpdateReplaceModifier>, + ModificationExecutor, + ModificationExecutor, UpsertModifier>>; + /* * Determine whether we execute new style or old style skips, i.e. pre or post shadow row introduction * TODO: This should be removed once all executors and fetchers are ported to the new style. @@ -507,38 +525,17 @@ static SkipVariants constexpr skipType() { template std::pair ExecutionBlockImpl::skipSome(size_t const atMost) { - if constexpr (isNewStyleExecutor) { - AqlCallStack stack{AqlCall::SimulateSkipSome(atMost)}; - auto const [state, skipped, block] = execute(stack); - - // execute returns ExecutionState::DONE here, which stops execution after simulating a skip. - // If we indiscriminately return ExecutionState::HASMORE, then we end up in an infinite loop - // - // luckily we can dispose of this kludge once executors have been ported. - if (skipped < atMost && state == ExecutionState::DONE) { - return {ExecutionState::DONE, skipped}; - } else { - return {ExecutionState::HASMORE, skipped}; - } + AqlCallStack stack{AqlCall::SimulateSkipSome(atMost)}; + auto const [state, skipped, block] = execute(stack); + + // execute returns ExecutionState::DONE here, which stops execution after simulating a skip. + // If we indiscriminately return ExecutionState::HASMORE, then we end up in an infinite loop + // + // luckily we can dispose of this kludge once executors have been ported. + if (skipped.getSkipCount() < atMost && state == ExecutionState::DONE) { + return {ExecutionState::DONE, skipped.getSkipCount()}; } else { - traceSkipSomeBegin(atMost); - auto state = ExecutionState::HASMORE; - - while (state == ExecutionState::HASMORE && _skipped < atMost) { - auto res = skipSomeOnceWithoutTrace(atMost - _skipped); - TRI_ASSERT(state != ExecutionState::WAITING || res.second == 0); - state = res.first; - _skipped += res.second; - TRI_ASSERT(_skipped <= atMost); - } - - size_t skipped = 0; - if (state != ExecutionState::WAITING) { - std::swap(skipped, _skipped); - } - - TRI_ASSERT(skipped <= atMost); - return traceSkipSomeEnd(state, skipped); + return {ExecutionState::HASMORE, skipped.getSkipCount()}; } } @@ -616,8 +613,8 @@ std::pair ExecutionBlockImpl::initializeCursor _lastRange = DataRange(ExecutorState::HASMORE); } - TRI_ASSERT(_skipped == 0); - _skipped = 0; + TRI_ASSERT(_skipped.nothingSkipped()); + _skipped.reset(); TRI_ASSERT(_state == InternalState::DONE || _state == InternalState::FETCH_DATA); _state = InternalState::FETCH_DATA; @@ -640,7 +637,8 @@ std::pair ExecutionBlockImpl::shutdown(int err } template -std::tuple ExecutionBlockImpl::execute(AqlCallStack stack) { +std::tuple +ExecutionBlockImpl::execute(AqlCallStack stack) { // TODO remove this IF // These are new style executors if constexpr (isNewStyleExecutor) { @@ -674,7 +672,9 @@ std::tuple ExecutionBlockImpl ExecutionBlockImplsize()); if (myCall.getLimit() == 0) { - return {ExecutionState::DONE, 0, block}; + return {ExecutionState::DONE, SkipResult{}, block}; } } - return {state, 0, block}; + return {state, SkipResult{}, block}; } else if (AqlCall::IsFullCountCall(myCall)) { auto const [state, skipped] = skipSome(ExecutionBlock::SkipAllSize()); if (state != ExecutionState::WAITING) { myCall.didSkip(skipped); } - return {state, skipped, nullptr}; + SkipResult skipRes{}; + skipRes.didSkip(skipped); + return {state, skipRes, nullptr}; } else if (AqlCall::IsFastForwardCall(myCall)) { // No idea if DONE is correct here... - return {ExecutionState::DONE, 0, nullptr}; + return {ExecutionState::DONE, SkipResult{}, nullptr}; } // Should never get here! THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); @@ -709,8 +711,8 @@ namespace arangodb::aql { template <> template <> -auto ExecutionBlockImpl>::injectConstantBlock>(SharedAqlItemBlockPtr block) - -> void { +auto ExecutionBlockImpl>::injectConstantBlock>( + SharedAqlItemBlockPtr block, SkipResult skipped) -> void { // reinitialize the DependencyProxy _dependencyProxy.reset(); @@ -718,9 +720,16 @@ auto ExecutionBlockImpl>::injectConstantBlock ExecutionBlockImpl>:: SharedAqlItemBlockPtr block = input.cloneToBlock(_engine->itemBlockManager(), *(infos().registersToKeep()), infos().numberOfOutputRegisters()); - - injectConstantBlock(block); + TRI_ASSERT(_skipped.nothingSkipped()); + _skipped.reset(); + // We inject an empty copy of our skipped here, + // This is resettet, but will maintain the size + injectConstantBlock(block, _skipped); // end of default initializeCursor return ExecutionBlock::initializeCursor(input); @@ -1205,16 +1217,10 @@ static auto fastForwardType(AqlCall const& call, Executor const& e) -> FastForwa TRI_ASSERT(call.hasHardLimit()); return FastForwardVariant::FULLCOUNT; } - // TODO: We only need to do this is the executor actually require to call. - // e.g. Modifications will always need to be called. Limit only if it needs to report fullCount - if constexpr (is_one_of_v, - ModificationExecutor, InsertModifier>, - ModificationExecutor, - ModificationExecutor, RemoveModifier>, - ModificationExecutor, - ModificationExecutor, UpdateReplaceModifier>, - ModificationExecutor, - ModificationExecutor, UpsertModifier>>) { + // TODO: We only need to do this if the executor is required to call. + // e.g. Modifications and SubqueryStart will always need to be called. Limit only if it needs to report fullCount + if constexpr (is_one_of_v || + executorHasSideEffects) { return FastForwardVariant::EXECUTOR; } return FastForwardVariant::FETCHER; @@ -1222,13 +1228,19 @@ static auto fastForwardType(AqlCall const& call, Executor const& e) -> FastForwa template auto ExecutionBlockImpl::executeFetcher(AqlCallStack& stack, AqlCallType const& aqlCall) - -> std::tuple { + -> std::tuple { if constexpr (isNewStyleExecutor) { // TODO The logic in the MultiDependencySingleRowFetcher branch should be // moved into the MultiDependencySingleRowFetcher. static_assert(isMultiDepExecutor == std::is_same_v); if constexpr (std::is_same_v) { + static_assert( + !executorHasSideEffects, + "there is a special implementation for side-effect executors to " + "exchange the stack. For the MultiDependencyFetcher this special " + "case is not implemented. There is no reason to disallow this " + "case here however, it is just not needed thus far."); // Note the aqlCall is an AqlCallSet in this case: static_assert(std::is_same_v>); TRI_ASSERT(_lastRange.numberDependencies() == _dependencies.size()); @@ -1237,14 +1249,25 @@ auto ExecutionBlockImpl::executeFetcher(AqlCallStack& stack, AqlCallTy _lastRange.setDependency(dependency, range); } return {state, skipped, _lastRange}; + } else if constexpr (executorHasSideEffects) { + // If the executor has side effects, we cannot bypass any subqueries + // by skipping them. SO we need to fetch all shadow rows in order to + // trigger this Executor with everthing from above. + // NOTE: The Executor needs to discard shadowRows, and do the accouting. + static_assert(std::is_same_v>); + auto fetchAllStack = stack.createEquivalentFetchAllShadowRowsStack(); + fetchAllStack.pushCall(aqlCall); + auto res = _rowFetcher.execute(fetchAllStack); + // Just make sure we did not Skip anything + TRI_ASSERT(std::get(res).nothingSkipped()); + return res; } else { // If we are SubqueryStart, we remove the top element of the stack // which belongs to the subquery enclosed by this // SubqueryStart and the partnered SubqueryEnd by *not* // pushing the upstream request. if constexpr (!std::is_same_v) { - auto callCopy = _upstreamRequest; - stack.pushCall(std::move(callCopy)); + stack.pushCall(std::move(aqlCall)); } auto const result = _rowFetcher.execute(stack); @@ -1424,6 +1447,10 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState // We still have shadowRows, we // need to forward them return ExecState::SHADOWROWS; + } else if (_outputItemRow->isFull()) { + // Fullfilled the call + // Need to return! + return ExecState::DONE; } else { if (didConsume) { // We did only consume the input @@ -1436,6 +1463,119 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState } } +template +auto ExecutionBlockImpl::nextStateAfterShadowRows(ExecutorState const& state, + DataRange const& range) const + noexcept -> ExecState { + if (state == ExecutorState::DONE) { + // We have consumed everything, we are + // Done with this query + return ExecState::DONE; + } else if (range.hasDataRow()) { + // Multiple concatenated Subqueries + // This case is disallowed for now, as we do not know the + // look-ahead call + TRI_ASSERT(false); + // If we would know we could now go into a continue with next subquery + // state. + return ExecState::DONE; + } else if (range.hasShadowRow()) { + // We still have shadowRows, we + // need to forward them + return ExecState::SHADOWROWS; + } else { + // End of input, we are done for now + // Need to call again + return ExecState::DONE; + } +} + +template +auto ExecutionBlockImpl::sideEffectShadowRowForwarding(AqlCallStack& stack, + SkipResult& skipResult) + -> ExecState { + TRI_ASSERT(executorHasSideEffects); + if (!stack.needToSkipSubquery()) { + // We need to really produce things here + // fall back to original version as any other executor. + return shadowRowForwarding(); + } + TRI_ASSERT(_outputItemRow); + TRI_ASSERT(_outputItemRow->isInitialized()); + TRI_ASSERT(!_outputItemRow->allRowsUsed()); + if (!_lastRange.hasShadowRow()) { + // We got back without a ShadowRow in the LastRange + // Let client call again + return ExecState::DONE; + } + + auto const& [state, shadowRow] = _lastRange.nextShadowRow(); + TRI_ASSERT(shadowRow.isInitialized()); + uint64_t depthSkippingNow = static_cast(stack.shadowRowDepthToSkip()); + uint64_t shadowDepth = shadowRow.getDepth(); + + bool didWriteRow = false; + if (shadowRow.isRelevant()) { + LOG_QUERY("1b257", DEBUG) << printTypeInfo() << " init executor."; + // We found a relevant shadow Row. + // We need to reset the Executor + resetExecutor(); + } + + if (depthSkippingNow > shadowDepth) { + // We are skipping the outermost Subquery. + // Simply drop this ShadowRow + } else if (depthSkippingNow == shadowDepth) { + // We are skipping on this subquery level. + // Skip the row, but report skipped 1. + AqlCall& shadowCall = stack.modifyCallAtDepth(shadowDepth); + if (shadowCall.needSkipMore()) { + shadowCall.didSkip(1); + skipResult.didSkipSubquery(1, shadowDepth); + } else { + TRI_ASSERT(shadowCall.hardLimit == 0); + // Simply drop this shadowRow! + } + } else { + // We got a shadowRow of a subquery we are not skipping here. + // Do proper reporting on it's call. + AqlCall& shadowCall = stack.modifyCallAtDepth(shadowDepth); + TRI_ASSERT(!shadowCall.needSkipMore() && shadowCall.getLimit() > 0); + _outputItemRow->copyRow(shadowRow); + shadowCall.didProduce(1); + + TRI_ASSERT(_outputItemRow->produced()); + _outputItemRow->advanceRow(); + didWriteRow = true; + } + if (state == ExecutorState::DONE) { + // We have consumed everything, we are + // Done with this query + return ExecState::DONE; + } else if (_lastRange.hasDataRow()) { + // Multiple concatenated Subqueries + // This case is disallowed for now, as we do not know the + // look-ahead call + TRI_ASSERT(false); + // If we would know we could now go into a continue with next subquery + // state. + return ExecState::DONE; + } else if (_lastRange.hasShadowRow()) { + // We still have shadowRows, we + // need to forward them + return ExecState::SHADOWROWS; + } else if (didWriteRow) { + // End of input, we are done for now + // Need to call again + return ExecState::DONE; + } else { + // Done with this subquery. + // We did not write any output yet. + // So we can continue with upstream. + return ExecState::UPSTREAM; + } +} + template auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState { TRI_ASSERT(_outputItemRow); @@ -1461,7 +1601,6 @@ auto ExecutionBlockImpl::shadowRowForwarding() -> ExecState { TRI_ASSERT(_outputItemRow->produced()); _outputItemRow->advanceRow(); - if (state == ExecutorState::DONE) { // We have consumed everything, we are // Done with this query @@ -1490,29 +1629,30 @@ auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRang AqlCall& clientCall) -> std::tuple { TRI_ASSERT(isNewStyleExecutor); - if constexpr (std::is_same_v) { - if (clientCall.needsFullCount() && clientCall.getOffset() == 0 && - clientCall.getLimit() == 0) { - // We can savely call skipRows. - // It will not report anything if the row is already consumed - return executeSkipRowsRange(_lastRange, clientCall); - } - // Do not fastForward anything, the Subquery start will handle it by itself - return {ExecutorState::DONE, NoStats{}, 0, AqlCall{}}; - } auto type = fastForwardType(clientCall, _executor); switch (type) { - case FastForwardVariant::FULLCOUNT: - case FastForwardVariant::EXECUTOR: { + case FastForwardVariant::FULLCOUNT: { LOG_QUERY("cb135", DEBUG) << printTypeInfo() << " apply full count."; auto [state, stats, skippedLocal, call] = executeSkipRowsRange(_lastRange, clientCall); - if (type == FastForwardVariant::EXECUTOR) { - // We do not report the skip - skippedLocal = 0; + if constexpr (is_one_of_v) { + // The executor will have used all Rows. + // However we need to drop them from the input + // here. + inputRange.skipAllRemainingDataRows(); } + return {state, stats, skippedLocal, call}; + } + case FastForwardVariant::EXECUTOR: { + LOG_QUERY("2890e", DEBUG) << printTypeInfo() << " fast forward."; + // We use a DUMMY Call to simulate fullCount. + AqlCall dummy; + dummy.hardLimit = 0; + dummy.fullCount = true; + auto [state, stats, skippedLocal, call] = executeSkipRowsRange(_lastRange, dummy); + if constexpr (is_one_of_v) { // The executor will have used all Rows. // However we need to drop them from the input @@ -1520,7 +1660,7 @@ auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRang inputRange.skipAllRemainingDataRows(); } - return {state, stats, skippedLocal, call}; + return {state, stats, 0, call}; } case FastForwardVariant::FETCHER: { LOG_QUERY("fa327", DEBUG) << printTypeInfo() << " bypass unused rows."; @@ -1531,8 +1671,8 @@ auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRang return fastForwardCall; } else { #ifndef _WIN32 - // For some reason our Windows compiler complains about this static assert - // in the cases that should be in the above constexpr path. + // For some reason our Windows compiler complains about this static + // assert in the cases that should be in the above constexpr path. // So simply not compile it in. static_assert(std::is_same_v); #endif @@ -1604,7 +1744,7 @@ auto ExecutionBlockImpl::executeFastForward(typename Fetcher::DataRang * SharedAqlItemBlockPtr -> The resulting data */ template -std::tuple +std::tuple ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { if constexpr (isNewStyleExecutor) { if (!stack.isRelevant()) { @@ -1612,7 +1752,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // We are bypassing subqueries. // This executor is not allowed to perform actions // However we need to maintain the upstream state. - size_t skippedLocal = 0; + SkipResult skippedLocal; typename Fetcher::DataRange bypassedRange{ExecutorState::HASMORE}; std::tie(_upstreamState, skippedLocal, bypassedRange) = executeFetcher(stack, _upstreamRequest); @@ -1638,25 +1778,44 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _execState == ExecState::UPSTREAM); } - // Skip can only be > 0 if we are in upstream cases. - TRI_ASSERT(_skipped == 0 || _execState == ExecState::UPSTREAM); + // In some executors we may write something into the output, but then return waiting. + // In this case we are not allowed to lose the call we have been working on, we have + // noted down created or skipped rows in there. + // The client is disallowed to change her mind anyways + // so we simply continue to work on the call we already have + // The guarantee is, if we have returned the block, and modified + // our local call, then the outputItemRow is not initialized + if (_outputItemRow != nullptr && _outputItemRow->isInitialized()) { + clientCall = _outputItemRow->getClientCall(); + } - if constexpr (std::is_same_v) { - // TODO: implement forwarding of SKIP properly: - // We need to modify the execute API to instead return a vector of skipped - // values. - // Then we can simply push a skip on the Stack here and let it forward. - // In case of a modifaction we need to NOT forward a skip, but instead do - // a limit := limit + offset call and a hardLimit 0 call on top of the stack. - TRI_ASSERT(!clientCall.needSkipMore()); + // Skip can only be > 0 if we are in upstream cases, or if we got injected a block + TRI_ASSERT(_skipped.nothingSkipped() || _execState == ExecState::UPSTREAM || + (std::is_same_v>)); + + if constexpr (executorHasSideEffects) { + if (!_skipped.nothingSkipped()) { + // We get woken up on upstream, but we have not reported our + // local skip value to downstream + // In the sideEffect executor we need to apply the skip values on the + // incomming stack, which has not been modified yet. + // NOTE: We only apply the skipping on subquery level. + TRI_ASSERT(_skipped.subqueryDepth() == stack.subqueryLevel() + 1); + for (size_t i = 0; i < stack.subqueryLevel(); ++i) { + auto skippedSub = _skipped.getSkipOnSubqueryLevel(i); + if (skippedSub > 0) { + auto& call = stack.modifyCallAtDepth(i); + call.didSkip(skippedSub); + } + } + } + } + if constexpr (std::is_same_v) { // In subqeryEndExecutor we actually manage two calls. // The clientClient is defined of what will go into the Executor. // on SubqueryEnd this call is generated based on the call from downstream stack.pushCall(std::move(clientCall)); - // TODO: Implement different kind of calls we need to inject into Executor - // based on modification, or on forwarding. - // FOr now use a fetchUnlimited Call always clientCall = AqlCall{}; } if (_execState == ExecState::UPSTREAM) { @@ -1680,6 +1839,16 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { case ExecState::CHECKCALL: { LOG_QUERY("cfe46", DEBUG) << printTypeInfo() << " determine next action on call " << clientCall; + + if constexpr (executorHasSideEffects) { + // If the executor has sideEffects, and we need to skip the results we would + // produce here because we actually skip the subquery, we instead do a + // hardLimit 0 (aka FastForward) call instead to the local Executor + if (stack.needToSkipSubquery()) { + _execState = ExecState::FASTFORWARD; + break; + } + } _execState = nextState(clientCall); break; } @@ -1705,7 +1874,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _executor.skipRowsRange(_lastRange, clientCall); if (subqueryState == ExecutionState::WAITING) { TRI_ASSERT(skippedLocal == 0); - return {subqueryState, 0, nullptr}; + return {subqueryState, SkipResult{}, nullptr}; } else if (subqueryState == ExecutionState::DONE) { state = ExecutorState::DONE; } else { @@ -1735,7 +1904,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } #endif localExecutorState = state; - _skipped += skippedLocal; + _skipped.didSkip(skippedLocal); _engine->_stats += stats; // The execute might have modified the client call. if (state == ExecutorState::DONE) { @@ -1787,7 +1956,7 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { std::tie(subqueryState, stats, call) = _executor.produceRows(_lastRange, *_outputItemRow); if (subqueryState == ExecutionState::WAITING) { - return {subqueryState, 0, nullptr}; + return {subqueryState, SkipResult{}, nullptr}; } else if (subqueryState == ExecutionState::DONE) { state = ExecutorState::DONE; } else { @@ -1829,10 +1998,30 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { case ExecState::FASTFORWARD: { LOG_QUERY("96e2c", DEBUG) << printTypeInfo() << " all produced, fast forward to end up (sub-)query."; + + AqlCall callCopy = clientCall; + if constexpr (executorHasSideEffects) { + if (stack.needToSkipSubquery()) { + // Fast Forward call. + callCopy = AqlCall{0, false, 0, AqlCall::LimitType::HARD}; + } + } + auto [state, stats, skippedLocal, call] = - executeFastForward(_lastRange, clientCall); + executeFastForward(_lastRange, callCopy); - _skipped += skippedLocal; + if constexpr (executorHasSideEffects) { + if (!stack.needToSkipSubquery()) { + // We need to modify the original call. + clientCall = callCopy; + } + // else: We are bypassing the results. + // Do not count them here. + } else { + clientCall = callCopy; + } + + _skipped.didSkip(skippedLocal); _engine->_stats += stats; localExecutorState = state; @@ -1859,14 +2048,13 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // executors. TRI_ASSERT(isMultiDepExecutor || !lastRangeHasDataRow()); TRI_ASSERT(!_lastRange.hasShadowRow()); - size_t skippedLocal = 0; + SkipResult skippedLocal; #ifdef ARANGODB_ENABLE_MAINTAINER_MODE auto subqueryLevelBefore = stack.subqueryLevel(); #endif std::tie(_upstreamState, skippedLocal, _lastRange) = executeFetcher(stack, _upstreamRequest); - #ifdef ARANGODB_ENABLE_MAINTAINER_MODE TRI_ASSERT(subqueryLevelBefore == stack.subqueryLevel()); #endif @@ -1876,18 +2064,45 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { // We might have some local accounting to this call. _clientRequest = clientCall; // We do not return anything in WAITING state, also NOT skipped. - return {_upstreamState, 0, nullptr}; + return {_upstreamState, SkipResult{}, nullptr}; } if constexpr (Executor::Properties::allowsBlockPassthrough == BlockPassthrough::Enable) { // We have a new range, passthrough can use this range. _hasUsedDataRangeBlock = false; } + + if constexpr (std::is_same_v) { + // We need to pop the last subquery from the returned skip + // We have not asked for a subquery skip. + TRI_ASSERT(skippedLocal.getSkipCount() == 0); + skippedLocal.decrementSubquery(); + } if constexpr (skipRowsType() == SkipRowsRangeVariant::FETCHER) { - _skipped += skippedLocal; // We skipped through passthrough, so count that a skip was solved. - clientCall.didSkip(skippedLocal); + _skipped.merge(skippedLocal, false); + clientCall.didSkip(skippedLocal.getSkipCount()); + } else if constexpr (is_one_of_v) { + // Subquery needs to include the topLevel Skip. + // But does not need to apply the count to clientCall. + _skipped.merge(skippedLocal, false); + // This is what has been asked for by the SubqueryEnd + auto subqueryCall = stack.popCall(); + subqueryCall.didSkip(skippedLocal.getSkipCount()); + stack.pushCall(std::move(subqueryCall)); + } else { + _skipped.merge(skippedLocal, true); } + if constexpr (std::is_same_v) { + // For the subqueryStart, we need to increment the SkipLevel by one + // as we may trigger this multiple times, check if we need to do it. + while (_skipped.subqueryDepth() < stack.subqueryLevel() + 1) { + // In fact, we only need to increase by 1 + TRI_ASSERT(_skipped.subqueryDepth() == stack.subqueryLevel()); + _skipped.incrementSubquery(); + } + } + if (_lastRange.hasShadowRow() && !_lastRange.peekShadowRow().isRelevant()) { _execState = ExecState::SHADOWROWS; } else { @@ -1924,9 +2139,19 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { } TRI_ASSERT(!_outputItemRow->allRowsUsed()); - - // This may write one or more rows. - _execState = shadowRowForwarding(); + if constexpr (executorHasSideEffects) { + _execState = sideEffectShadowRowForwarding(stack, _skipped); + } else { + // This may write one or more rows. + _execState = shadowRowForwarding(); + if constexpr (std::is_same_v) { + // we need to update the Top of the stack now + std::ignore = stack.popCall(); + // Copy the call + AqlCall modifiedCall = _outputItemRow->getClientCall(); + stack.pushCall(std::move(modifiedCall)); + } + } if constexpr (!std::is_same_v) { // Produce might have modified the clientCall // But only do this if we are not subquery. @@ -1952,17 +2177,29 @@ ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) { _outputItemRow.reset(); // We return skipped here, reset member - size_t skipped = _skipped; - _skipped = 0; + SkipResult skipped = _skipped; +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + if (!stack.is36Compatible()) { + if constexpr (std::is_same_v) { + TRI_ASSERT(skipped.subqueryDepth() == stack.subqueryLevel() /*we inected a call*/); + } else { + TRI_ASSERT(skipped.subqueryDepth() == stack.subqueryLevel() + 1 /*we took our call*/); + } + } +#endif + + _skipped.reset(); if (localExecutorState == ExecutorState::HASMORE || _lastRange.hasDataRow() || _lastRange.hasShadowRow()) { // We have skipped or/and return data, otherwise we cannot return HASMORE - TRI_ASSERT(skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0)); + TRI_ASSERT(!skipped.nothingSkipped() || + (outputBlock != nullptr && outputBlock->numEntries() > 0)); return {ExecutionState::HASMORE, skipped, std::move(outputBlock)}; } - // We must return skipped and/or data when reportingHASMORE + // We must return skipped and/or data when reporting HASMORE TRI_ASSERT(_upstreamState != ExecutionState::HASMORE || - (skipped > 0 || (outputBlock != nullptr && outputBlock->numEntries() > 0))); + (!skipped.nothingSkipped() || + (outputBlock != nullptr && outputBlock->numEntries() > 0))); return {_upstreamState, skipped, std::move(outputBlock)}; } else { // TODO this branch must never be taken with an executor that has not been diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index 8e1009c5b518..1b4365c78c40 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -52,6 +52,8 @@ class InputAqlItemRow; class OutputAqlItemRow; class Query; class ShadowAqlItemRow; +class SkipResult; +class ParallelUnsortedGatherExecutor; class MultiDependencySingleRowFetcher; template @@ -223,7 +225,7 @@ class ExecutionBlockImpl final : public ExecutionBlock { [[nodiscard]] std::pair initializeCursor(InputAqlItemRow const& input) override; template >>> - auto injectConstantBlock(SharedAqlItemBlockPtr block) -> void; + auto injectConstantBlock(SharedAqlItemBlockPtr block, SkipResult skipped) -> void; [[nodiscard]] Infos const& infos() const; @@ -242,9 +244,9 @@ class ExecutionBlockImpl final : public ExecutionBlock { /// * WAITING: We have async operation going on, nothing happend, please call again /// * HASMORE: Here is some data in the request range, there is still more, if required call again /// * DONE: Here is some data, and there will be no further data available. - /// 2. size_t: Amount of documents skipped. + /// 2. SkipResult: Amount of documents skipped. /// 3. SharedAqlItemBlockPtr: The next data block. - std::tuple execute(AqlCallStack stack) override; + std::tuple execute(AqlCallStack stack) override; template >>>> [[nodiscard]] RegisterId getOutputRegisterId() const noexcept; @@ -253,9 +255,9 @@ class ExecutionBlockImpl final : public ExecutionBlock { /** * @brief Inner execute() part, without the tracing calls. */ - std::tuple executeWithoutTrace(AqlCallStack stack); + std::tuple executeWithoutTrace(AqlCallStack stack); - std::tuple executeFetcher( + std::tuple executeFetcher( AqlCallStack& stack, AqlCallType const& aqlCall); std::tuple executeProduceRows( @@ -329,6 +331,26 @@ class ExecutionBlockImpl final : public ExecutionBlock { void resetExecutor(); + // Forwarding of ShadowRows if the executor has SideEffects. + // This skips over ShadowRows, and counts them in the correct + // position of the callStack as "skipped". + // as soon as we reach a place where there is no skip + // ordered in the outer shadow rows, this call + // will fall back to shadowRowForwardning. + [[nodiscard]] auto sideEffectShadowRowForwarding(AqlCallStack& stack, + SkipResult& skipResult) -> ExecState; + + /** + * @brief Transition to the next state after shadowRows + * + * @param state the state returned by the getShadowRowCall + * @param range the current data range + * @return ExecState The next state + */ + [[nodiscard]] auto nextStateAfterShadowRows(ExecutorState const& state, + DataRange const& range) const + noexcept -> ExecState; + void initOnce(); [[nodiscard]] auto executorNeedsCall(AqlCallType& call) const noexcept -> bool; @@ -361,7 +383,7 @@ class ExecutionBlockImpl final : public ExecutionBlock { InternalState _state; - size_t _skipped{}; + SkipResult _skipped{}; DataRange _lastRange; diff --git a/arangod/Aql/ExecutionEngine.cpp b/arangod/Aql/ExecutionEngine.cpp index 2fa51ea000df..a4e30799654e 100644 --- a/arangod/Aql/ExecutionEngine.cpp +++ b/arangod/Aql/ExecutionEngine.cpp @@ -39,6 +39,7 @@ #include "Aql/QueryRegistry.h" #include "Aql/RemoteExecutor.h" #include "Aql/ReturnExecutor.h" +#include "Aql/SkipResult.h" #include "Aql/WalkerWorker.h" #include "Basics/ScopeGuard.h" #include "Cluster/ServerState.h" @@ -564,16 +565,16 @@ std::pair ExecutionEngine::initializeCursor(SharedAqlIte } auto ExecutionEngine::execute(AqlCallStack const& stack) - -> std::tuple { + -> std::tuple { if (_query.killed()) { THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); } auto const res = _root->execute(stack); #ifdef ARANGODB_ENABLE_MAINTAINER_MODE if (std::get(res) == ExecutionState::WAITING) { - auto const skipped = std::get(res); + auto const skipped = std::get(res); auto const block = std::get(res); - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.nothingSkipped()); TRI_ASSERT(block == nullptr); } #endif @@ -581,7 +582,7 @@ auto ExecutionEngine::execute(AqlCallStack const& stack) } auto ExecutionEngine::executeForClient(AqlCallStack const& stack, std::string const& clientId) - -> std::tuple { + -> std::tuple { if (_query.killed()) { THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_KILLED); } @@ -597,9 +598,9 @@ auto ExecutionEngine::executeForClient(AqlCallStack const& stack, std::string co auto const res = rootBlock->executeForClient(stack, clientId); #ifdef ARANGODB_ENABLE_MAINTAINER_MODE if (std::get(res) == ExecutionState::WAITING) { - auto const skipped = std::get(res); + auto const skipped = std::get(res); auto const& block = std::get(res); - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.nothingSkipped()); TRI_ASSERT(block == nullptr); } #endif @@ -621,7 +622,7 @@ std::pair ExecutionEngine::getSome(size_t AqlCallStack compatibilityStack{AqlCall::SimulateGetSome(atMost), true}; auto const [state, skipped, block] = _root->execute(std::move(compatibilityStack)); // We cannot trigger a skip operation from here - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.nothingSkipped()); return {state, block}; } @@ -643,7 +644,7 @@ std::pair ExecutionEngine::skipSome(size_t atMost) { // We cannot be triggered within a subquery from earlier versions. // Also we cannot produce anything ourselfes here. TRI_ASSERT(block == nullptr); - return {state, skipped}; + return {state, skipped.getSkipCount()}; } Result ExecutionEngine::shutdownSync(int errorCode) noexcept try { diff --git a/arangod/Aql/ExecutionEngine.h b/arangod/Aql/ExecutionEngine.h index f9af9ff3d2c2..2b51c217ff6f 100644 --- a/arangod/Aql/ExecutionEngine.h +++ b/arangod/Aql/ExecutionEngine.h @@ -47,6 +47,7 @@ class ExecutionNode; class ExecutionPlan; class QueryRegistry; class Query; +class SkipResult; enum class SerializationFormat; class ExecutionEngine { @@ -98,10 +99,10 @@ class ExecutionEngine { std::pair shutdown(int errorCode); auto execute(AqlCallStack const& stack) - -> std::tuple; + -> std::tuple; auto executeForClient(AqlCallStack const& stack, std::string const& clientId) - -> std::tuple; + -> std::tuple; /// @brief getSome std::pair getSome(size_t atMost); diff --git a/arangod/Aql/ModificationExecutor.cpp b/arangod/Aql/ModificationExecutor.cpp index 6554c6574fed..f3712fd09199 100644 --- a/arangod/Aql/ModificationExecutor.cpp +++ b/arangod/Aql/ModificationExecutor.cpp @@ -201,7 +201,6 @@ template doCollect(input, output.numRowsLeft()); upstreamState = input.upstreamState(); } - if (_modifier.nrOfOperations() > 0) { _modifier.transact(); @@ -266,7 +265,6 @@ template stats.addWritesExecuted(_modifier.nrOfWritesExecuted()); stats.addWritesIgnored(_modifier.nrOfWritesIgnored()); } - call.didSkip(_modifier.nrOfOperations()); } } diff --git a/arangod/Aql/MultiDependencySingleRowFetcher.cpp b/arangod/Aql/MultiDependencySingleRowFetcher.cpp index b275f0c19567..fba33607ad79 100644 --- a/arangod/Aql/MultiDependencySingleRowFetcher.cpp +++ b/arangod/Aql/MultiDependencySingleRowFetcher.cpp @@ -147,6 +147,8 @@ std::pair MultiDependencySingleRowFetcher::fet ++dep._rowIndex; } } + // We have delivered a shadowRow, we now may get additional subquery skip counters again. + _didReturnSubquerySkips = false; } ExecutionState const state = allDone ? ExecutionState::DONE : ExecutionState::HASMORE; @@ -370,11 +372,11 @@ auto MultiDependencySingleRowFetcher::useStack(AqlCallStack const& stack) -> voi auto MultiDependencySingleRowFetcher::executeForDependency(size_t const dependency, AqlCallStack& stack) - -> std::tuple { + -> std::tuple { auto [state, skipped, block] = _dependencyProxy->executeForDependency(dependency, stack); if (state == ExecutionState::WAITING) { - return {state, 0, AqlItemBlockInputRange{ExecutorState::HASMORE}}; + return {state, SkipResult{}, AqlItemBlockInputRange{ExecutorState::HASMORE}}; } ExecutorState execState = state == ExecutionState::DONE ? ExecutorState::DONE : ExecutorState::HASMORE; @@ -382,16 +384,17 @@ auto MultiDependencySingleRowFetcher::executeForDependency(size_t const dependen _dependencyStates.at(dependency) = state; if (block == nullptr) { - return {state, skipped, AqlItemBlockInputRange{execState, skipped}}; + return {state, skipped, AqlItemBlockInputRange{execState, skipped.getSkipCount()}}; } TRI_ASSERT(block != nullptr); auto [start, end] = block->getRelevantRange(); - return {state, skipped, AqlItemBlockInputRange{execState, skipped, block, start}}; + return {state, skipped, + AqlItemBlockInputRange{execState, skipped.getSkipCount(), block, start}}; } auto MultiDependencySingleRowFetcher::execute(AqlCallStack const& stack, AqlCallSet const& aqlCallSet) - -> std::tuple>> { + -> std::tuple>> { TRI_ASSERT(_callsInFlight.size() == numberDependencies()); auto ranges = std::vector>{}; @@ -400,7 +403,7 @@ auto MultiDependencySingleRowFetcher::execute(AqlCallStack const& stack, auto depCallIdx = size_t{0}; auto allAskedDepsAreWaiting = true; auto askedAtLeastOneDep = false; - auto skippedTotal = size_t{0}; + auto skippedTotal = SkipResult{}; // Iterate in parallel over `_callsInFlight` and `aqlCall.calls`. // _callsInFlight[i] corresponds to aqlCalls.calls[k] iff // aqlCalls.calls[k].dependency = i. @@ -435,16 +438,46 @@ auto MultiDependencySingleRowFetcher::execute(AqlCallStack const& stack, // Got a result, call is no longer in flight maybeCallInFlight = std::nullopt; allAskedDepsAreWaiting = false; + + // NOTE: + // in this fetcher case we do not have and do not want to have + // any control of the order the upstream responses are entering. + // Every of the upstream response will contain an identical skipped + // stack on the subqueries. + // We only need to forward the skipping of any one of those. + // So we implemented the following logic to return the skip + // information for the first on that arrives and all other + // subquery skip informations will be discarded. + if (!_didReturnSubquerySkips) { + // We have nothing skipped locally. + TRI_ASSERT(skippedTotal.subqueryDepth() == 1); + TRI_ASSERT(skippedTotal.getSkipCount() == 0); + + // We forward the skip block as is. + // This will also include the skips on subquery level + skippedTotal = skipped; + // Do this only once. + // The first response will contain the amount of rows skipped + // in subquery + _didReturnSubquerySkips = true; + } else { + // We only need the skip amount on the top level. + // Another dependency has forwarded the subquery level skips + // already + skippedTotal.mergeOnlyTopLevel(skipped); + } + } else { - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.nothingSkipped()); } - skippedTotal += skipped; + ranges.emplace_back(dependency, range); } } auto const state = std::invoke([&]() { if (askedAtLeastOneDep && allAskedDepsAreWaiting) { + TRI_ASSERT(skippedTotal.nothingSkipped()); return ExecutionState::WAITING; } else { return upstreamState(); diff --git a/arangod/Aql/MultiDependencySingleRowFetcher.h b/arangod/Aql/MultiDependencySingleRowFetcher.h index 277ebf628997..13358f5e95c4 100644 --- a/arangod/Aql/MultiDependencySingleRowFetcher.h +++ b/arangod/Aql/MultiDependencySingleRowFetcher.h @@ -39,6 +39,7 @@ class AqlItemBlock; template class DependencyProxy; class ShadowAqlItemRow; +class SkipResult; /** * @brief Interface for all AqlExecutors that do need one @@ -137,10 +138,10 @@ class MultiDependencySingleRowFetcher { auto useStack(AqlCallStack const& stack) -> void; [[nodiscard]] auto execute(AqlCallStack const&, AqlCallSet const&) - -> std::tuple>>; + -> std::tuple>>; [[nodiscard]] auto executeForDependency(size_t dependency, AqlCallStack& stack) - -> std::tuple; + -> std::tuple; [[nodiscard]] auto upstreamState() const -> ExecutionState; @@ -158,6 +159,8 @@ class MultiDependencySingleRowFetcher { /// in initOnce() to make sure that init() is called exactly once. std::vector> _callsInFlight; + bool _didReturnSubquerySkips{false}; + private: /** * @brief Delegates to ExecutionBlock::fetchBlock() diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 0d344f22e0cb..9cd9e7b83bd2 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -74,9 +74,9 @@ namespace { -bool accessesCollectionVariable(arangodb::aql::ExecutionPlan const* plan, - arangodb::aql::ExecutionNode const* node, - ::arangodb::containers::HashSet& vars) { +bool accessesCollectionVariable( + arangodb::aql::ExecutionPlan const* plan, arangodb::aql::ExecutionNode const* node, + ::arangodb::containers::HashSet& vars) { using EN = arangodb::aql::ExecutionNode; if (node->getType() == EN::CALCULATION) { @@ -5796,9 +5796,9 @@ void arangodb::aql::optimizeTraversalsRule(Optimizer* opt, if (outVariable != nullptr && !n->isVarUsedLater(outVariable) && std::find(pruneVars.begin(), pruneVars.end(), outVariable) == pruneVars.end()) { outVariable = traversal->pathOutVariable(); - if (outVariable == nullptr || - (!n->isVarUsedLater(outVariable) && - std::find(pruneVars.begin(), pruneVars.end(), outVariable) == pruneVars.end())) { + if (outVariable == nullptr || (!n->isVarUsedLater(outVariable) && + std::find(pruneVars.begin(), pruneVars.end(), + outVariable) == pruneVars.end())) { // both traversal vertex and path outVariables not used later traversal->options()->setProduceVertices(false); modified = true; @@ -7275,12 +7275,12 @@ void arangodb::aql::moveFiltersIntoEnumerateRule(Optimizer* opt, ExecutionNode* filterParent = current->getFirstParent(); TRI_ASSERT(filterParent != nullptr); plan->unlinkNode(current); - + if (!current->isVarUsedLater(cn->outVariable())) { // also remove the calculation node plan->unlinkNode(cn); } - + current = filterParent; modified = true; } else if (current->getType() == EN::CALCULATION) { @@ -7403,19 +7403,11 @@ bool nodeMakesThisQueryLevelUnsuitableForSubquerySplicing(ExecutionNode const* n case ExecutionNode::DISTRIBUTE_CONSUMER: case ExecutionNode::SUBQUERY_START: case ExecutionNode::SUBQUERY_END: - // These nodes do not initiate a skip themselves, and thus are fine. - return false; case ExecutionNode::NORESULTS: - // no results currently cannot work, as they do not fetch from above. case ExecutionNode::LIMIT: - // limit blocks currently cannot work, both due to skipping and due to the - // limit and passthrough, which forbids passing shadow rows. - return true; - case ExecutionNode::COLLECT: { - auto const collectNode = ExecutionNode::castTo(node); - // Collect nodes skip iff using the COUNT method. - return collectNode->aggregationMethod() == CollectOptions::CollectMethod::COUNT; - } + case ExecutionNode::COLLECT: + // These nodes are fine + return false; case ExecutionNode::MAX_NODE_TYPE_VALUE: break; } @@ -7425,7 +7417,7 @@ bool nodeMakesThisQueryLevelUnsuitableForSubquerySplicing(ExecutionNode const* n "report this error. Try turning off the splice-subqueries rule to get " "your query working.", node->getTypeString().c_str()); -} +} // namespace void findSubqueriesSuitableForSplicing(ExecutionPlan const& plan, containers::SmallVector& result) { diff --git a/arangod/Aql/RemoteExecutor.cpp b/arangod/Aql/RemoteExecutor.cpp index c231fc4cb91e..f2f4afebd700 100644 --- a/arangod/Aql/RemoteExecutor.cpp +++ b/arangod/Aql/RemoteExecutor.cpp @@ -31,6 +31,7 @@ #include "Aql/InputAqlItemRow.h" #include "Aql/Query.h" #include "Aql/RestAqlHandler.h" +#include "Aql/SkipResult.h" #include "Basics/MutexLocker.h" #include "Basics/StringBuffer.h" #include "Basics/VelocyPackHelper.h" @@ -153,8 +154,7 @@ std::pair ExecutionBlockImpl ExecutionBlockImpl::shutdown(i } auto ExecutionBlockImpl::executeViaOldApi(AqlCallStack stack) - -> std::tuple { + -> std::tuple { // Use the old getSome/SkipSome API. auto myCall = stack.popCall(); @@ -444,7 +444,9 @@ auto ExecutionBlockImpl::executeViaOldApi(AqlCallStack stack) if (state != ExecutionState::WAITING) { myCall.didSkip(skipped); } - return {state, skipped, nullptr}; + SkipResult skipRes{}; + skipRes.didSkip(skipped); + return {state, skipRes, nullptr}; } else if (AqlCall::IsGetSomeCall(myCall)) { auto const [state, block] = getSomeWithoutTrace(myCall.getLimit()); // We do not need to count as softLimit will be overwritten, and hard cannot be set. @@ -452,20 +454,22 @@ auto ExecutionBlockImpl::executeViaOldApi(AqlCallStack stack) // However we can do a short-cut here to report DONE on hardLimit if we are on the top-level query. myCall.didProduce(block->size()); if (myCall.getLimit() == 0) { - return {ExecutionState::DONE, 0, block}; + return {ExecutionState::DONE, SkipResult{}, block}; } } - return {state, 0, block}; + return {state, SkipResult{}, block}; } else if (AqlCall::IsFullCountCall(myCall)) { auto const [state, skipped] = skipSome(ExecutionBlock::SkipAllSize()); if (state != ExecutionState::WAITING) { myCall.didSkip(skipped); } - return {state, skipped, nullptr}; + SkipResult skipRes{}; + skipRes.didSkip(skipped); + return {state, skipRes, nullptr}; } else if (AqlCall::IsFastForwardCall(myCall)) { // No idea if DONE is correct here... - return {ExecutionState::DONE, 0, nullptr}; + return {ExecutionState::DONE, SkipResult{}, nullptr}; } // Should never get here! @@ -473,14 +477,14 @@ auto ExecutionBlockImpl::executeViaOldApi(AqlCallStack stack) } auto ExecutionBlockImpl::execute(AqlCallStack stack) - -> std::tuple { + -> std::tuple { traceExecuteBegin(stack); auto res = executeWithoutTrace(stack); return traceExecuteEnd(res); } auto ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) --> std::tuple { + -> std::tuple { if (ADB_UNLIKELY(api() == Api::GET_SOME)) { return executeViaOldApi(stack); } @@ -489,7 +493,7 @@ auto ExecutionBlockImpl::executeWithoutTrace(AqlCallStack stack) } auto ExecutionBlockImpl::executeViaNewApi(AqlCallStack callStack) - -> std::tuple { + -> std::tuple { // silence tests -- we need to introduce new failure tests for fetchers TRI_IF_FAILURE("ExecutionBlock::getOrSkipSome1") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); @@ -509,7 +513,7 @@ auto ExecutionBlockImpl::executeViaNewApi(AqlCallStack callStack if (_requestInFlight) { // Already sent a shutdown request, but haven't got an answer yet. - return {ExecutionState::WAITING, 0, nullptr}; + return {ExecutionState::WAITING, SkipResult{}, nullptr}; } // For every call we simply forward via HTTP @@ -553,7 +557,7 @@ auto ExecutionBlockImpl::executeViaNewApi(AqlCallStack callStack THROW_ARANGO_EXCEPTION(res); } - return {ExecutionState::WAITING, 0, nullptr}; + return {ExecutionState::WAITING, SkipResult{}, nullptr}; } auto ExecutionBlockImpl::deserializeExecuteCallResultBody(VPackSlice const slice) const @@ -564,14 +568,16 @@ auto ExecutionBlockImpl::deserializeExecuteCallResultBody(VPackS if (ADB_UNLIKELY(!slice.isObject())) { using namespace std::string_literals; - return Result{TRI_ERROR_TYPE_ERROR, "When parsing execute result: expected object, got "s + slice.typeName()}; + return Result{TRI_ERROR_TYPE_ERROR, + "When parsing execute result: expected object, got "s + slice.typeName()}; } if (auto value = slice.get(StaticStrings::AqlRemoteResult); !value.isNone()) { return AqlExecuteResult::fromVelocyPack(value, _engine->itemBlockManager()); } - return Result{TRI_ERROR_TYPE_ERROR, "When parsing execute result: field result missing"}; + return Result{TRI_ERROR_TYPE_ERROR, + "When parsing execute result: field result missing"}; } auto ExecutionBlockImpl::serializeExecuteCallBody(AqlCallStack const& callStack) const @@ -661,10 +667,10 @@ Result ExecutionBlockImpl::sendAsyncRequest(fuerte::RestVerb typ req->header.addMeta("x-shard-id", _ownName); req->header.addMeta("shard-id", _ownName); // deprecated in 3.7, remove later } - + LOG_TOPIC("2713c", DEBUG, Logger::COMMUNICATION) - << "request to '" << _server - << "' '" << fuerte::to_string(type) << " " << req->header.path << "'"; + << "request to '" << _server << "' '" << fuerte::to_string(type) << " " + << req->header.path << "'"; network::ConnectionPtr conn = pool->leaseConnection(spec.endpoint); diff --git a/arangod/Aql/RemoteExecutor.h b/arangod/Aql/RemoteExecutor.h index 49244489cf00..cd8da9a7afbb 100644 --- a/arangod/Aql/RemoteExecutor.h +++ b/arangod/Aql/RemoteExecutor.h @@ -32,12 +32,16 @@ #include -namespace arangodb::fuerte { inline namespace v1 { +namespace arangodb::fuerte { +inline namespace v1 { enum class RestVerb; -}} +} +} // namespace arangodb::fuerte namespace arangodb::aql { +class SkipResult; + // The RemoteBlock is actually implemented by specializing ExecutionBlockImpl, // so this class only exists to identify the specialization. class RemoteExecutor final {}; @@ -67,7 +71,7 @@ class ExecutionBlockImpl : public ExecutionBlock { std::pair shutdown(int errorCode) override; - std::tuple execute(AqlCallStack stack) override; + std::tuple execute(AqlCallStack stack) override; [[nodiscard]] auto api() const noexcept -> Api; @@ -85,13 +89,13 @@ class ExecutionBlockImpl : public ExecutionBlock { std::pair skipSomeWithoutTrace(size_t atMost); auto executeWithoutTrace(AqlCallStack stack) - -> std::tuple; + -> std::tuple; auto executeViaOldApi(AqlCallStack stack) - -> std::tuple; + -> std::tuple; auto executeViaNewApi(AqlCallStack stack) - -> std::tuple; + -> std::tuple; [[nodiscard]] auto deserializeExecuteCallResultBody(velocypack::Slice) const -> ResultT; @@ -166,6 +170,6 @@ class ExecutionBlockImpl : public ExecutionBlock { Api _apiToUse = Api::EXECUTE; }; -} // namespace arangodb +} // namespace arangodb::aql #endif // ARANGOD_AQL_REMOTE_EXECUTOR_H diff --git a/arangod/Aql/RestAqlHandler.cpp b/arangod/Aql/RestAqlHandler.cpp index fa57a645b8b2..725eac165320 100644 --- a/arangod/Aql/RestAqlHandler.cpp +++ b/arangod/Aql/RestAqlHandler.cpp @@ -740,7 +740,7 @@ RestStatus RestAqlHandler::handleUseQuery(std::string const& operation, auto& executeCall = maybeExecuteCall.get(); auto items = SharedAqlItemBlockPtr{}; - auto skipped = size_t{}; + auto skipped = SkipResult{}; auto state = ExecutionState::HASMORE; // shardId is set IFF the root node is scatter or distribute diff --git a/arangod/Aql/ScatterExecutor.cpp b/arangod/Aql/ScatterExecutor.cpp index bdf56efdb391..b81449906d9a 100644 --- a/arangod/Aql/ScatterExecutor.cpp +++ b/arangod/Aql/ScatterExecutor.cpp @@ -64,10 +64,11 @@ auto ScatterExecutor::ClientBlockData::clear() -> void { _executorHasMore = false; } -auto ScatterExecutor::ClientBlockData::addBlock(SharedAqlItemBlockPtr block) -> void { +auto ScatterExecutor::ClientBlockData::addBlock(SharedAqlItemBlockPtr block, + SkipResult skipped) -> void { // NOTE: - // There given ItemBlock will be reused in all requesting blocks. - // However, the next followwing block could be passthrough. + // The given ItemBlock will be reused in all requesting blocks. + // However, the next following block could be passthrough. // If it is, it will modify that data stored in block. // If now anther client requests the same block, it is not // the original any more, but a modified version. @@ -75,20 +76,20 @@ auto ScatterExecutor::ClientBlockData::addBlock(SharedAqlItemBlockPtr block) -> // is empty. If another peer-calculation has written to this value // this assertion does not hold true anymore. // Hence we are required to do an indepth cloning here. - _queue.emplace_back(block->slice(0, block->size())); + _queue.emplace_back(block->slice(0, block->size()), skipped); } auto ScatterExecutor::ClientBlockData::hasDataFor(AqlCall const& call) -> bool { return _executorHasMore || !_queue.empty(); } -auto ScatterExecutor::ClientBlockData::execute(AqlCall call, ExecutionState upstreamState) - -> std::tuple { +auto ScatterExecutor::ClientBlockData::execute(AqlCallStack callStack, ExecutionState upstreamState) + -> std::tuple { TRI_ASSERT(_executor != nullptr); // Make sure we actually have data before you call execute - TRI_ASSERT(hasDataFor(call)); + TRI_ASSERT(hasDataFor(callStack.peek())); if (!_executorHasMore) { - auto const& block = _queue.front(); + auto const& [block, skipResult] = _queue.front(); // This cast is guaranteed, we create this a couple lines above and only // this executor is used here. // Unfortunately i did not get a version compiled were i could only forward @@ -96,12 +97,11 @@ auto ScatterExecutor::ClientBlockData::execute(AqlCall call, ExecutionState upst auto casted = static_cast>*>(_executor.get()); TRI_ASSERT(casted != nullptr); - casted->injectConstantBlock(block); + casted->injectConstantBlock(block, skipResult); _executorHasMore = true; _queue.pop_front(); } - AqlCallStack stack{call}; - auto [state, skipped, result] = _executor->execute(stack); + auto [state, skipped, result] = _executor->execute(callStack); // We have all data locally cannot wait here. TRI_ASSERT(state != ExecutionState::WAITING); @@ -124,12 +124,12 @@ auto ScatterExecutor::ClientBlockData::execute(AqlCall call, ExecutionState upst ScatterExecutor::ScatterExecutor(ExecutorInfos const&){}; -auto ScatterExecutor::distributeBlock(SharedAqlItemBlockPtr block, +auto ScatterExecutor::distributeBlock(SharedAqlItemBlockPtr block, SkipResult skipped, std::unordered_map& blockMap) const -> void { // Scatter returns every block on every client as is. for (auto& [id, list] : blockMap) { - list.addBlock(block); + list.addBlock(block, skipped); } } diff --git a/arangod/Aql/ScatterExecutor.h b/arangod/Aql/ScatterExecutor.h index 53bd1bb1b5c5..10a1316cec02 100644 --- a/arangod/Aql/ScatterExecutor.h +++ b/arangod/Aql/ScatterExecutor.h @@ -31,6 +31,7 @@ namespace arangodb { namespace aql { +class SkipResult; class ExecutionEngine; class ScatterNode; @@ -56,14 +57,14 @@ class ScatterExecutor { ExecutorInfos const& scatterInfos); auto clear() -> void; - auto addBlock(SharedAqlItemBlockPtr block) -> void; + auto addBlock(SharedAqlItemBlockPtr block, SkipResult skipped) -> void; auto hasDataFor(AqlCall const& call) -> bool; - auto execute(AqlCall call, ExecutionState upstreamState) - -> std::tuple; + auto execute(AqlCallStack callStack, ExecutionState upstreamState) + -> std::tuple; private: - std::deque _queue; + std::deque> _queue; // This is unique_ptr to get away with everything beeing forward declared... std::unique_ptr _executor; bool _executorHasMore; @@ -72,7 +73,7 @@ class ScatterExecutor { ScatterExecutor(ExecutorInfos const&); ~ScatterExecutor() = default; - auto distributeBlock(SharedAqlItemBlockPtr block, + auto distributeBlock(SharedAqlItemBlockPtr block, SkipResult skipped, std::unordered_map& blockMap) const -> void; }; diff --git a/arangod/Aql/SingleRowFetcher.cpp b/arangod/Aql/SingleRowFetcher.cpp index 62e054c5b875..44acf7bdc00a 100644 --- a/arangod/Aql/SingleRowFetcher.cpp +++ b/arangod/Aql/SingleRowFetcher.cpp @@ -30,6 +30,7 @@ #include "Aql/ExecutionBlock.h" #include "Aql/ExecutionState.h" #include "Aql/InputAqlItemRow.h" +#include "Aql/SkipResult.h" using namespace arangodb; using namespace arangodb::aql; @@ -77,28 +78,31 @@ SingleRowFetcher::fetchBlockForPassthrough(size_t atMost) { } template -std::tuple +std::tuple SingleRowFetcher::execute(AqlCallStack& stack) { auto [state, skipped, block] = _dependencyProxy->execute(stack); if (state == ExecutionState::WAITING) { // On waiting we have nothing to return - return {state, 0, AqlItemBlockInputRange{ExecutorState::HASMORE}}; + return {state, SkipResult{}, AqlItemBlockInputRange{ExecutorState::HASMORE}}; } if (block == nullptr) { if (state == ExecutionState::HASMORE) { - return {state, skipped, AqlItemBlockInputRange{ExecutorState::HASMORE, skipped}}; + return {state, skipped, + AqlItemBlockInputRange{ExecutorState::HASMORE, skipped.getSkipCount()}}; } - return {state, skipped, AqlItemBlockInputRange{ExecutorState::DONE, skipped}}; + return {state, skipped, + AqlItemBlockInputRange{ExecutorState::DONE, skipped.getSkipCount()}}; } auto [start, end] = block->getRelevantRange(); if (state == ExecutionState::HASMORE) { TRI_ASSERT(block != nullptr); return {state, skipped, - AqlItemBlockInputRange{ExecutorState::HASMORE, skipped, block, start}}; + AqlItemBlockInputRange{ExecutorState::HASMORE, + skipped.getSkipCount(), block, start}}; } return {state, skipped, - AqlItemBlockInputRange{ExecutorState::DONE, skipped, block, start}}; + AqlItemBlockInputRange{ExecutorState::DONE, skipped.getSkipCount(), block, start}}; } template diff --git a/arangod/Aql/SingleRowFetcher.h b/arangod/Aql/SingleRowFetcher.h index e33717eff395..f33447576c01 100644 --- a/arangod/Aql/SingleRowFetcher.h +++ b/arangod/Aql/SingleRowFetcher.h @@ -40,6 +40,7 @@ namespace arangodb::aql { class AqlItemBlock; template class DependencyProxy; +class SkipResult; /** * @brief Interface for all AqlExecutors that do only need one @@ -74,7 +75,7 @@ class SingleRowFetcher { * size_t => Amount of documents skipped * DataRange => Resulting data */ - std::tuple execute(AqlCallStack& stack); + std::tuple execute(AqlCallStack& stack); /** * @brief Fetch one new AqlItemRow from upstream. diff --git a/arangod/Aql/SkipResult.cpp b/arangod/Aql/SkipResult.cpp new file mode 100644 index 000000000000..094c13bb99fa --- /dev/null +++ b/arangod/Aql/SkipResult.cpp @@ -0,0 +1,184 @@ + +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2018 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#include "SkipResult.h" + +#include "Cluster/ResultT.h" + +#include +#include +#include + +using namespace arangodb::aql; + +SkipResult::SkipResult() {} + +SkipResult::SkipResult(SkipResult const& other) : _skipped{other._skipped} {} + +auto SkipResult::getSkipCount() const noexcept -> size_t { + TRI_ASSERT(!_skipped.empty()); + return _skipped.back(); +} + +auto SkipResult::didSkip(size_t skipped) -> void { + TRI_ASSERT(!_skipped.empty()); + _skipped.back() += skipped; +} + +auto SkipResult::didSkipSubquery(size_t skipped, size_t depth) -> void { + TRI_ASSERT(!_skipped.empty()); + TRI_ASSERT(_skipped.size() > depth + 1); + size_t index = _skipped.size() - depth - 2; + size_t& localSkip = _skipped.at(index); + localSkip += skipped; +} + +auto SkipResult::getSkipOnSubqueryLevel(size_t depth) -> size_t { + TRI_ASSERT(!_skipped.empty()); + TRI_ASSERT(_skipped.size() > depth); + return _skipped.at(depth); +} + +auto SkipResult::nothingSkipped() const noexcept -> bool { + TRI_ASSERT(!_skipped.empty()); + return std::all_of(_skipped.begin(), _skipped.end(), + [](size_t const& e) -> bool { return e == 0; }); +} + +auto SkipResult::toVelocyPack(VPackBuilder& builder) const noexcept -> void { + VPackArrayBuilder guard(&builder); + TRI_ASSERT(!_skipped.empty()); + for (auto const& s : _skipped) { + builder.add(VPackValue(s)); + } +} + +auto SkipResult::fromVelocyPack(VPackSlice slice) -> arangodb::ResultT { + if (!slice.isArray()) { + auto message = std::string{ + "When deserializating AqlExecuteResult: When reading skipped: " + "Unexpected type "}; + message += slice.typeName(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + if (slice.isEmptyArray()) { + auto message = std::string{ + "When deserializating AqlExecuteResult: When reading skipped: " + "Got an empty list of skipped values."}; + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + try { + SkipResult res; + auto it = VPackArrayIterator(slice); + while (it.valid()) { + auto val = it.value(); + if (!val.isInteger()) { + auto message = std::string{ + "When deserializating AqlExecuteResult: When reading skipped: " + "Unexpected type "}; + message += slice.typeName(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } + if (!it.isFirst()) { + res.incrementSubquery(); + } + res.didSkip(val.getNumber()); + ++it; + } + return {res}; + } catch (velocypack::Exception const& ex) { + auto message = std::string{ + "When deserializating AqlExecuteResult: When reading skipped: "}; + message += ex.what(); + return Result(TRI_ERROR_TYPE_ERROR, std::move(message)); + } +} + +auto SkipResult::incrementSubquery() -> void { _skipped.emplace_back(0); } +auto SkipResult::decrementSubquery() -> void { + TRI_ASSERT(!_skipped.empty()); + _skipped.pop_back(); + TRI_ASSERT(!_skipped.empty()); +} +auto SkipResult::subqueryDepth() const noexcept -> size_t { + TRI_ASSERT(!_skipped.empty()); + return _skipped.size(); +} + +auto SkipResult::reset() -> void { + for (size_t i = 0; i < _skipped.size(); ++i) { + _skipped[i] = 0; + } +} + +auto SkipResult::merge(SkipResult const& other, bool excludeTopLevel) noexcept -> void { + _skipped.reserve(other.subqueryDepth()); + while (other.subqueryDepth() > subqueryDepth()) { + incrementSubquery(); + } + TRI_ASSERT(other._skipped.size() <= _skipped.size()); + for (size_t i = 0; i < other._skipped.size(); ++i) { + if (excludeTopLevel && i + 1 == other._skipped.size()) { + // Do not copy top level + continue; + } + _skipped[i] += other._skipped[i]; + } +} + +auto SkipResult::mergeOnlyTopLevel(SkipResult const& other) noexcept -> void { + _skipped.reserve(other.subqueryDepth()); + while (other.subqueryDepth() > subqueryDepth()) { + incrementSubquery(); + } + _skipped.back() += other._skipped.back(); +} + +auto SkipResult::operator+=(SkipResult const& b) noexcept -> SkipResult& { + didSkip(b.getSkipCount()); + return *this; +} + +auto SkipResult::operator==(SkipResult const& b) const noexcept -> bool { + if (_skipped.size() != b._skipped.size()) { + return false; + } + for (size_t i = 0; i < _skipped.size(); ++i) { + if (_skipped[i] != b._skipped[i]) { + return false; + } + } + return true; +} + +auto SkipResult::operator!=(SkipResult const& b) const noexcept -> bool { + return !(*this == b); +} +namespace arangodb::aql { +std::ostream& operator<<(std::ostream& stream, arangodb::aql::SkipResult const& result) { + VPackBuilder temp; + result.toVelocyPack(temp); + stream << temp.toJson(); + return stream; +} +} // namespace arangodb::aql diff --git a/arangod/Aql/SkipResult.h b/arangod/Aql/SkipResult.h new file mode 100644 index 000000000000..6850f26acda7 --- /dev/null +++ b/arangod/Aql/SkipResult.h @@ -0,0 +1,89 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2018 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_SKIP_RESULT_H +#define ARANGOD_AQL_SKIP_RESULT_H + +// for size_t +#include +#include +#include + +namespace arangodb { +template +class ResultT; +} +namespace arangodb::velocypack { +class Builder; +class Slice; +} // namespace arangodb::velocypack + +namespace arangodb::aql { + +class SkipResult { + public: + static auto fromVelocyPack(velocypack::Slice) -> arangodb::ResultT; + + SkipResult(); + + ~SkipResult() = default; + + SkipResult(SkipResult const& other); + SkipResult& operator=(const SkipResult&) = default; + + auto getSkipCount() const noexcept -> size_t; + + auto didSkip(size_t skipped) -> void; + + auto didSkipSubquery(size_t skipped, size_t depth) -> void; + + auto getSkipOnSubqueryLevel(size_t depth) -> size_t; + + auto nothingSkipped() const noexcept -> bool; + + auto toVelocyPack(arangodb::velocypack::Builder& builder) const noexcept -> void; + + auto incrementSubquery() -> void; + + auto decrementSubquery() -> void; + + auto subqueryDepth() const noexcept -> size_t; + + auto reset() -> void; + + auto merge(SkipResult const& other, bool excludeTopLevel) noexcept -> void; + auto mergeOnlyTopLevel(SkipResult const& other) noexcept -> void; + + auto operator+=(SkipResult const& b) noexcept -> SkipResult&; + + auto operator==(SkipResult const& b) const noexcept -> bool; + auto operator!=(SkipResult const& b) const noexcept -> bool; + + private: + std::vector _skipped{0}; +}; + +std::ostream& operator<<(std::ostream&, arangodb::aql::SkipResult const&); + +} // namespace arangodb::aql + +#endif diff --git a/arangod/Aql/SortingGatherExecutor.h b/arangod/Aql/SortingGatherExecutor.h index 96f179bee753..35bc09dcb961 100644 --- a/arangod/Aql/SortingGatherExecutor.h +++ b/arangod/Aql/SortingGatherExecutor.h @@ -29,6 +29,8 @@ #include "Aql/ExecutorInfos.h" #include "Aql/InputAqlItemRow.h" +#include + namespace arangodb { namespace transaction { diff --git a/arangod/Aql/SubqueryExecutor.cpp b/arangod/Aql/SubqueryExecutor.cpp index c84b47f0346f..76be9ef6c6cc 100644 --- a/arangod/Aql/SubqueryExecutor.cpp +++ b/arangod/Aql/SubqueryExecutor.cpp @@ -77,88 +77,51 @@ SubqueryExecutor::~SubqueryExecutor() = default; template std::pair SubqueryExecutor::produceRows(OutputAqlItemRow& output) { -#if 0 - if (_state == ExecutorState::DONE && !_input.isInitialized()) { - // We have seen DONE upstream, and we have discarded our local reference - // to the last input, we will not be able to produce results anymore. - return {_state, NoStats{}}; - } - while (true) { - if (_subqueryInitialized) { - // Continue in subquery - - // Const case - if (_infos.isConst() && !_input.isFirstDataRowInBlock()) { - // Simply write - writeOutput(output); - return {_state, NoStats{}}; - } - - // Non const case, or first run in const - auto res = _subquery.getSome(ExecutionBlock::DefaultBatchSize); - if (res.first == ExecutionState::WAITING) { - TRI_ASSERT(res.second == nullptr); - return {res.first, NoStats{}}; - } - // We get a result - if (res.second != nullptr) { - TRI_IF_FAILURE("SubqueryBlock::executeSubquery") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - - if (_infos.returnsData()) { - TRI_ASSERT(_subqueryResults != nullptr); - _subqueryResults->emplace_back(std::move(res.second)); - } - } - - // Subquery DONE - if (res.first == ExecutionState::DONE) { - writeOutput(output); - return {_state, NoStats{}}; - } + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} - } else { - // init new subquery - if (!_input) { - std::tie(_state, _input) = _fetcher.fetchRow(); - if (_state == ExecutionState::WAITING) { - TRI_ASSERT(!_input); - return {_state, NoStats{}}; - } - if (!_input) { - TRI_ASSERT(_state == ExecutionState::DONE); +template +auto SubqueryExecutor::initializeSubquery(AqlItemBlockInputRange& input) + -> std::tuple { + // init new subquery + if (!_input) { + std::tie(_state, _input) = input.nextDataRow(); + LOG_DEVEL_SQ << uint64_t(this) << " nextDataRow: " << _state << " " + << _input.isInitialized(); + if (!_input) { + LOG_DEVEL_SQ << uint64_t(this) << "exit, no more input" << _state; + return {translatedReturnType(), false}; + } + } - // We are done! - return {_state, NoStats{}}; - } - } + TRI_ASSERT(_input); + if (!_infos.isConst() || _input.isFirstDataRowInBlock()) { + LOG_DEVEL_SQ << "Subquery: Initialize cursor"; + auto [state, result] = _subquery.initializeCursor(_input); + if (state == ExecutionState::WAITING) { + LOG_DEVEL_SQ << "Waiting on initialize cursor"; + return {state, false}; + } - TRI_ASSERT(_input); - if (!_infos.isConst() || _input.isFirstDataRowInBlock()) { - auto initRes = _subquery.initializeCursor(_input); - if (initRes.first == ExecutionState::WAITING) { - return {ExecutionState::WAITING, NoStats{}}; - } - if (initRes.second.fail()) { - // Error during initialize cursor - THROW_ARANGO_EXCEPTION(initRes.second); - } - _subqueryResults = std::make_unique>(); - } - // on const subquery we can retoggle init as soon as we have new input. - _subqueryInitialized = true; + if (result.fail()) { + // Error during initialize cursor + THROW_ARANGO_EXCEPTION(result); } + _subqueryResults = std::make_unique>(); } -#endif - TRI_ASSERT(false); - THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); + // on const subquery we can retoggle init as soon as we have new input. + _subqueryInitialized = true; + return {translatedReturnType(), true}; } template auto SubqueryExecutor::produceRows(AqlItemBlockInputRange& input, OutputAqlItemRow& output) -> std::tuple { + // We need to return skip in skipRows before + TRI_ASSERT(_skipped == 0); + auto getUpstreamCall = [&]() { AqlCall upstreamCall = output.getClientCall(); if constexpr (isModificationSubquery) { @@ -175,7 +138,12 @@ auto SubqueryExecutor::produceRows(AqlItemBlockInputRang // to the last input, we will not be able to produce results anymore. return {translatedReturnType(), NoStats{}, getUpstreamCall()}; } - while (true) { + if (output.isFull()) { + // This can happen if there is no upstream + _state = input.upstreamState(); + } + + while (!output.isFull()) { if (_subqueryInitialized) { // Continue in subquery @@ -185,12 +153,12 @@ auto SubqueryExecutor::produceRows(AqlItemBlockInputRang writeOutput(output); LOG_DEVEL_SQ << uint64_t(this) << "wrote output is const " << _state << " " << getUpstreamCall(); - return {translatedReturnType(), NoStats{}, getUpstreamCall()}; + continue; } // Non const case, or first run in const auto [state, skipped, block] = _subquery.execute(AqlCallStack(AqlCall{})); - TRI_ASSERT(skipped == 0); + TRI_ASSERT(skipped.nothingSkipped()); if (state == ExecutionState::WAITING) { return {state, NoStats{}, getUpstreamCall()}; } @@ -214,40 +182,22 @@ auto SubqueryExecutor::produceRows(AqlItemBlockInputRang writeOutput(output); LOG_DEVEL_SQ << uint64_t(this) << "wrote output subquery done " << _state << " " << getUpstreamCall(); - return {translatedReturnType(), NoStats{}, getUpstreamCall()}; } - } else { - // init new subquery - if (!_input) { - std::tie(_state, _input) = input.nextDataRow(); - LOG_DEVEL_SQ << uint64_t(this) << " nextDataRow: " << _state << " " - << _input.isInitialized(); - if (!_input) { - LOG_DEVEL_SQ << uint64_t(this) << "exit produce, no more input" << _state; - return {translatedReturnType(), NoStats{}, getUpstreamCall()}; - } + auto const [state, initialized] = initializeSubquery(input); + if (state == ExecutionState::WAITING) { + LOG_DEVEL_SQ << "Waiting on initialize cursor"; + return {state, NoStats{}, AqlCall{}}; } - - TRI_ASSERT(_input); - if (!_infos.isConst() || _input.isFirstDataRowInBlock()) { - LOG_DEVEL_SQ << "Subquery: Initialize cursor"; - auto [state, result] = _subquery.initializeCursor(_input); - if (state == ExecutionState::WAITING) { - LOG_DEVEL_SQ << "Waiting on initialize cursor"; - return {state, NoStats{}, AqlCall{}}; - } - - if (result.fail()) { - // Error during initialize cursor - THROW_ARANGO_EXCEPTION(result); - } - _subqueryResults = std::make_unique>(); + if (!initialized) { + TRI_ASSERT(!_input); + return {state, NoStats{}, getUpstreamCall()}; } - // on const subquery we can retoggle init as soon as we have new input. - _subqueryInitialized = true; + TRI_ASSERT(_subqueryInitialized); } } + + return {translatedReturnType(), NoStats{}, getUpstreamCall()}; } template @@ -256,6 +206,7 @@ void SubqueryExecutor::writeOutput(OutputAqlItemRow& out TRI_IF_FAILURE("SubqueryBlock::getSome") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } + TRI_ASSERT(!output.isFull()); if (!_infos.isConst() || _input.isFirstDataRowInBlock()) { // In the non const case we need to move the data into the output for every // row. @@ -326,8 +277,6 @@ auto SubqueryExecutor::skipRowsRange<>(AqlItemBlockInputRange& inputRange, return upstreamCall; }; - size_t skipped = 0; - LOG_DEVEL_SQ << uint64_t(this) << "skipRowsRange " << call; if (_state == ExecutorState::DONE && !_input.isInitialized()) { @@ -335,78 +284,62 @@ auto SubqueryExecutor::skipRowsRange<>(AqlItemBlockInputRange& inputRange, // to the last input, we will not be able to produce results anymore. return {translatedReturnType(), NoStats{}, 0, getUpstreamCall()}; } - while (true) { + TRI_ASSERT(call.needSkipMore()); + // We cannot have a modifying subquery considered const + TRI_ASSERT(!_infos.isConst()); + bool isFullCount = call.getLimit() == 0 && call.getOffset() == 0; + while (isFullCount || _skipped < call.getOffset()) { if (_subqueryInitialized) { // Continue in subquery - // Const case - if (_infos.isConst() && !_input.isFirstDataRowInBlock()) { - // Simply write - _subqueryInitialized = false; - _input = InputAqlItemRow(CreateInvalidInputRowHint{}); - skipped += 1; - call.didSkip(1); - LOG_DEVEL_SQ << uint64_t(this) << "did skip one"; - return {translatedReturnType(), NoStats{}, skipped, getUpstreamCall()}; - } - - // Non const case, or first run in const - auto [state, skipped, block] = _subquery.execute(AqlCallStack(AqlCall{})); - TRI_ASSERT(skipped == 0); + // While skipping we do not care for the result. + // Simply jump over it. + AqlCall subqueryCall{}; + subqueryCall.hardLimit = 0; + auto [state, skipRes, block] = _subquery.execute(AqlCallStack(subqueryCall)); + TRI_ASSERT(skipRes.nothingSkipped()); if (state == ExecutionState::WAITING) { return {state, NoStats{}, 0, getUpstreamCall()}; } - - // We get a result - if (block != nullptr) { - TRI_IF_FAILURE("SubqueryBlock::executeSubquery") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - - if (_infos.returnsData()) { - TRI_ASSERT(_subqueryResults != nullptr); - _subqueryResults->emplace_back(std::move(block)); - } + // We get a result, but we asked for no rows. + // so please give us no rows. + TRI_ASSERT(block == nullptr); + TRI_IF_FAILURE("SubqueryBlock::executeSubquery") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } // Subquery DONE if (state == ExecutionState::DONE) { _subqueryInitialized = false; _input = InputAqlItemRow(CreateInvalidInputRowHint{}); - skipped += 1; - call.didSkip(1); + _skipped += 1; LOG_DEVEL_SQ << uint64_t(this) << "did skip one"; - return {translatedReturnType(), NoStats{}, skipped, getUpstreamCall()}; } } else { - // init new subquery - if (!_input) { - std::tie(_state, _input) = inputRange.nextDataRow(); - - if (!_input) { - LOG_DEVEL_SQ << uint64_t(this) << "skipped nothing waiting for input " << _state; - return {translatedReturnType(), NoStats{}, skipped, getUpstreamCall()}; - } + auto const [state, initialized] = initializeSubquery(inputRange); + if (state == ExecutionState::WAITING) { + LOG_DEVEL_SQ << "Waiting on initialize cursor"; + return {state, NoStats{}, 0, AqlCall{}}; } - - TRI_ASSERT(_input); - if (!_infos.isConst() || _input.isFirstDataRowInBlock()) { - auto [state, result] = _subquery.initializeCursor(_input); - if (state == ExecutionState::WAITING) { - return {state, NoStats{}, 0, getUpstreamCall()}; + if (!initialized) { + TRI_ASSERT(!_input); + if (state == ExecutionState::DONE) { + // We are done, we will not get any more input. + break; } - - if (result.fail()) { - // Error during initialize cursor - THROW_ARANGO_EXCEPTION(result); - } - _subqueryResults = std::make_unique>(); + return {state, NoStats{}, 0, getUpstreamCall()}; } - // on const subquery we can retoggle init as soon as we have new input. - _subqueryInitialized = true; + TRI_ASSERT(_subqueryInitialized); } } + // If we get here, we are done with one set of skipping. + // We either skipped the offset + // or the fullCount + // or both if limit == 0. + call.didSkip(_skipped); + _skipped = 0; + return {translatedReturnType(), NoStats{}, call.getSkipCount(), getUpstreamCall()}; } template class ::arangodb::aql::SubqueryExecutor; diff --git a/arangod/Aql/SubqueryExecutor.h b/arangod/Aql/SubqueryExecutor.h index b9e5c299827b..b28823180849 100644 --- a/arangod/Aql/SubqueryExecutor.h +++ b/arangod/Aql/SubqueryExecutor.h @@ -120,6 +120,19 @@ class SubqueryExecutor { */ auto translatedReturnType() const noexcept -> ExecutionState; + /** + * @brief Initiliaze the subquery with next input row + * Throws if there was an error during initialize cursor + * + * + * @param input Container for more data + * @return std::tuple Result state (WAITING or + * translatedReturnType()) + * bool flag if we have initialized the query, if not, we require more data. + */ + auto initializeSubquery(AqlItemBlockInputRange& input) + -> std::tuple; + private: Fetcher& _fetcher; SubqueryExecutorInfos& _infos; @@ -144,6 +157,8 @@ class SubqueryExecutor { // Cache for the input row we are currently working on InputAqlItemRow _input; + + size_t _skipped = 0; }; } // namespace aql } // namespace arangodb diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index c5ae8e10dc53..69ab26c764ef 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -349,6 +349,7 @@ set(LIB_ARANGO_AQL_SOURCES Aql/SimpleModifier.cpp Aql/SingleRemoteModificationExecutor.cpp Aql/SingleRowFetcher.cpp + Aql/SkipResult.cpp Aql/SortCondition.cpp Aql/SortExecutor.cpp Aql/SortNode.cpp diff --git a/tests/Aql/DependencyProxyMock.cpp b/tests/Aql/DependencyProxyMock.cpp index 620038a51a0c..05a4dc621b0c 100644 --- a/tests/Aql/DependencyProxyMock.cpp +++ b/tests/Aql/DependencyProxyMock.cpp @@ -25,6 +25,8 @@ #include "gtest/gtest.h" +#include "Aql/SkipResult.h" + #include namespace arangodb::tests::aql { @@ -130,10 +132,11 @@ DependencyProxyMock& DependencyProxyMock:: } template -std::tuple +std::tuple DependencyProxyMock::execute(AqlCallStack& stack) { TRI_ASSERT(_block != nullptr); - return {arangodb::aql::ExecutionState::DONE, 0, _block}; + SkipResult res{}; + return {arangodb::aql::ExecutionState::DONE, res, _block}; } template diff --git a/tests/Aql/DependencyProxyMock.h b/tests/Aql/DependencyProxyMock.h index 2d7c6d8b89af..5d873b84ea47 100644 --- a/tests/Aql/DependencyProxyMock.h +++ b/tests/Aql/DependencyProxyMock.h @@ -33,6 +33,9 @@ #include namespace arangodb { +namespace aql { +class SkipResult; +} namespace tests { namespace aql { @@ -51,7 +54,7 @@ class DependencyProxyMock : public ::arangodb::aql::DependencyProxy skipSome(size_t atMost) override; - std::tuple execute( + std::tuple execute( arangodb::aql::AqlCallStack& stack) override; private: @@ -101,8 +104,7 @@ class MultiDependencyProxyMock // NOLINTNEXTLINE google-default-arguments std::pair fetchBlockForDependency( - size_t dependency, - size_t atMost = arangodb::aql::ExecutionBlock::DefaultBatchSize) override; + size_t dependency, size_t atMost = arangodb::aql::ExecutionBlock::DefaultBatchSize) override; std::pair skipSomeForDependency(size_t dependency, size_t atMost) override; diff --git a/tests/Aql/ExecutionBlockImplTest.cpp b/tests/Aql/ExecutionBlockImplTest.cpp index 5199c766d248..97bf70ad6ec8 100644 --- a/tests/Aql/ExecutionBlockImplTest.cpp +++ b/tests/Aql/ExecutionBlockImplTest.cpp @@ -58,7 +58,7 @@ using LambdaExe = TestLambdaSkipExecutor; // This test is supposed to only test getSome return values, // it is not supposed to test the fetch logic! - +#if 0 class ExecutionBlockImplTest : public ::testing::Test { protected: // ExecutionState state @@ -400,6 +400,7 @@ TEST_F(ExecutionBlockImplTest, ASSERT_EQ(state, ExecutionState::DONE); ASSERT_EQ(block, nullptr); } +#endif /** * @brief Shared Test case initializer to test the execute API @@ -691,7 +692,7 @@ class ExecutionBlockImplExecuteSpecificTest : public SharedExecutionBlockImplTes * @return std::tuple Response of execute(call); */ auto runTest(ProduceCall& prod, SkipCall& skip, AqlCall call) - -> std::tuple { + -> std::tuple { AqlCallStack stack{std::move(call)}; auto singleton = createSingleton(); if (GetParam()) { @@ -775,7 +776,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_unlimited_call) { auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); // Once with empty, once with the line by Singleton EXPECT_EQ(nrCalls, 2); @@ -797,7 +798,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_softlimit_call) { auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); // Once with empty, once with the line by Singleton EXPECT_EQ(nrCalls, 2); @@ -819,7 +820,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_hardlimit_call) { auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); // Once with empty, once with the line by Singleton EXPECT_EQ(nrCalls, 2); @@ -838,7 +839,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_offset_call) { auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); if (GetParam()) { // Do never call skip, pass through EXPECT_EQ(nrCalls, 0); @@ -866,7 +867,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, test_toplevel_offset_only_call) { auto [state, skipped, block] = runTest(execImpl, skipCall, fullCall); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); if (GetParam()) { // Do never call skip, pass through EXPECT_EQ(nrCalls, 0); @@ -902,7 +903,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, test_relevant_shadowrow_does_not_f // First call. Fetch all rows (data only) auto const& [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(block, nullptr); EXPECT_EQ(block->size(), ExecutionBlock::DefaultBatchSize); EXPECT_FALSE(block->hasShadowRows()); @@ -911,7 +912,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, test_relevant_shadowrow_does_not_f // Second call. only a single shadowRow left auto const& [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(block, nullptr); EXPECT_EQ(block->size(), 1); EXPECT_TRUE(block->hasShadowRows()); @@ -945,7 +946,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, set_of_shadowrows_does_not_fit_in_ // First call. Fetch all rows (data only) auto const& [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(block, nullptr); EXPECT_EQ(block->size(), ExecutionBlock::DefaultBatchSize); EXPECT_FALSE(block->hasShadowRows()); @@ -954,7 +955,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, set_of_shadowrows_does_not_fit_in_ // Second call. only the shadowRows are left auto const& [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(block, nullptr); ASSERT_EQ(block->size(), 2); EXPECT_TRUE(block->hasShadowRows()); @@ -995,7 +996,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, set_of_shadowrows_does_not_fit_ful // First call. Fetch all rows (data + relevant shadow row) auto const& [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(block, nullptr); EXPECT_EQ(block->size(), ExecutionBlock::DefaultBatchSize); EXPECT_TRUE(block->hasShadowRows()); @@ -1007,7 +1008,7 @@ TEST_P(ExecutionBlockImplExecuteSpecificTest, set_of_shadowrows_does_not_fit_ful // Second call. only the shadowRows are left auto const& [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(block, nullptr); EXPECT_EQ(block->size(), 1); EXPECT_TRUE(block->hasShadowRows()); @@ -1602,7 +1603,7 @@ class ExecutionBlockImplExecuteIntegrationTest * @param testReg The register to evaluate * @param numShadowRows Number of preceeding shadowRows in result. */ - void ValidateResult(std::shared_ptr data, size_t skipped, + void ValidateResult(std::shared_ptr data, SkipResult skipped, SharedAqlItemBlockPtr result, RegisterId testReg, size_t numShadowRows = 0) { auto const& call = getCall(); @@ -1611,7 +1612,8 @@ class ExecutionBlockImplExecuteIntegrationTest TRI_ASSERT(data->slice().isArray()); VPackSlice expected = data->slice(); - ValidateSkipMatches(call, static_cast(expected.length()), skipped); + ValidateSkipMatches(call, static_cast(expected.length()), + skipped.getSkipCount()); VPackArrayIterator expectedIt{expected}; // Skip Part @@ -1687,7 +1689,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_waiting_block_mock) { auto [state, skipped, block] = testee.execute(stack); if (doesWaiting()) { EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); std::tie(state, skipped, block) = testee.execute(stack); } @@ -1721,7 +1723,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_produce_only) { if (doesWaiting()) { auto const [state, skipped, block] = producer->execute(stack); EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } auto const [state, skipped, block] = producer->execute(stack); @@ -1755,7 +1757,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_produce_using_two) { if (doesWaiting()) { auto const [state, skipped, block] = producer->execute(stack); EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } auto const [state, skipped, block] = producer->execute(stack); @@ -1816,7 +1818,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, DISABLED_test_call_forwarding_p if (doesWaiting()) { auto const [state, skipped, block] = lower->execute(stack); EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); // Reset call counters upperState.reset(); @@ -1900,7 +1902,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, DISABLED_test_call_forwarding_i if (doesWaiting()) { auto const [state, skipped, block] = lower->execute(stack); EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } auto const [state, skipped, block] = lower->execute(stack); @@ -1945,7 +1947,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_multiple_upstream_calls) { size_t killSwitch = 0; while (state == ExecutionState::WAITING) { EXPECT_TRUE(doesWaiting()); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); std::tie(state, skipped, block) = testee->execute(stack); // Kill switch to avoid endless loop in case of error. @@ -2004,7 +2006,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_multiple_upstream_calls_pa size_t waited = 0; while (state == ExecutionState::WAITING && waited < 2 /* avoid endless waiting*/) { EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); waited++; std::tie(state, skipped, block) = testee->execute(stack); @@ -2014,10 +2016,10 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_multiple_upstream_calls_pa EXPECT_EQ(block, nullptr); if (fullCount) { // We skipped everything - EXPECT_EQ(skipped, 1000); + EXPECT_EQ(skipped.getSkipCount(), 1000); EXPECT_EQ(state, ExecutionState::DONE); } else { - EXPECT_EQ(skipped, offset); + EXPECT_EQ(skipped.getSkipCount(), offset); EXPECT_EQ(state, ExecutionState::HASMORE); } } else { @@ -2033,7 +2035,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_multiple_upstream_calls_pa size_t waited = 0; while (state == ExecutionState::WAITING && waited < 3 /* avoid endless waiting*/) { EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); waited++; std::tie(state, skipped, block) = testee->execute(stack); @@ -2051,8 +2053,8 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_multiple_upstream_calls_pa ASSERT_EQ(block->size(), 1); // Book-keeping for call. // We need to request data from above with the correct call. - if (skipped > 0) { - call.didSkip(skipped); + if (!skipped.nothingSkipped()) { + call.didSkip(skipped.getSkipCount()); } call.didProduce(1); auto got = block->getValueReference(0, outReg).slice(); @@ -2061,15 +2063,15 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, test_multiple_upstream_calls_pa << " in row " << i << " and register " << outReg; if (i == 0) { // The first data row includes skip - EXPECT_EQ(skipped, offset); + EXPECT_EQ(skipped.getSkipCount(), offset); } else { if (call.getLimit() == 0 && call.hasHardLimit() && call.needsFullCount()) { // The last row, with fullCount needs to contain data. - EXPECT_EQ(skipped, 1000 - limit - offset); + EXPECT_EQ(skipped.getSkipCount(), 1000 - limit - offset); } else { // Do not skip on later data rows // Except the last one on fullcount - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); } } // NOTE: We might want to get into this situation. @@ -2136,7 +2138,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, only_relevant_shadowRows) { if (doesWaiting()) { // We wait between lines EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); std::tie(state, skipped, block) = testee->execute(stack); } @@ -2147,7 +2149,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, only_relevant_shadowRows) { EXPECT_EQ(state, ExecutionState::HASMORE); } // Cannot skip a shadowRow - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(block, nullptr); ASSERT_EQ(block->size(), 1); EXPECT_TRUE(block->hasShadowRows()); @@ -2194,7 +2196,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, input_and_relevant_shadowRow) { if (doesWaiting()) { auto const [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } auto const [state, skipped, block] = testee->execute(stack); @@ -2246,7 +2248,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, input_and_non_relevant_shadowRo if (doesWaiting()) { auto const [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } auto const [state, skipped, block] = testee->execute(stack); @@ -2315,7 +2317,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, multiple_subqueries) { if (doesWaiting()) { auto const [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } auto const [state, skipped, block] = testee->execute(stack); @@ -2337,7 +2339,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, multiple_subqueries) { testee->execute(forwardStack); // We do not care for any data left EXPECT_EQ(forwardState, ExecutionState::HASMORE); - EXPECT_EQ(forwardSkipped, 0); + EXPECT_EQ(forwardSkipped.getSkipCount(), 0); // However there need to be two shadow rows ASSERT_NE(forwardBlock, nullptr); ASSERT_EQ(forwardBlock->size(), 2); @@ -2396,7 +2398,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, empty_subquery) { // we only wait exactly once, only one block upstream that is not sliced. auto const& [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } auto call = getCall(); @@ -2408,10 +2410,10 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, empty_subquery) { EXPECT_EQ(state, ExecutionState::HASMORE); ASSERT_NE(block, nullptr); if (skip) { - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); EXPECT_EQ(block->size(), 2); } else { - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block->size(), 3); } size_t row = 0; @@ -2446,7 +2448,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, empty_subquery) { auto const& [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::HASMORE); ASSERT_NE(block, nullptr); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block->size(), 1); size_t row = 0; AssertIsShadowRowOfDepth(block, row, 0); @@ -2472,7 +2474,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, empty_subquery) { auto const& [state, skipped, block] = testee->execute(stack); EXPECT_EQ(state, ExecutionState::DONE); ASSERT_NE(block, nullptr); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block->size(), 2); size_t row = 0; AssertIsShadowRowOfDepth(block, row, 0); @@ -2542,7 +2544,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, auto [state, skipped, block] = testee.execute(stack); if (doesWaiting()) { EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); std::tie(state, skipped, block) = testee.execute(stack); } @@ -2601,7 +2603,7 @@ TEST_P(ExecutionBlockImplExecuteIntegrationTest, DISABLED_test_outer_subquery_fo auto [state, skipped, block] = testee.execute(stack); if (doesWaiting()) { EXPECT_EQ(state, ExecutionState::WAITING); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); std::tie(state, skipped, block) = testee.execute(stack); } diff --git a/tests/Aql/ExecutionBlockImplTestInstances.cpp b/tests/Aql/ExecutionBlockImplTestInstances.cpp index b5bfc68a589b..a7af31f4c8f6 100644 --- a/tests/Aql/ExecutionBlockImplTestInstances.cpp +++ b/tests/Aql/ExecutionBlockImplTestInstances.cpp @@ -3,7 +3,7 @@ #include "TestExecutorHelper.h" #include "TestLambdaExecutor.h" -template class ::arangodb::aql::ExecutionBlockImpl; -template class ::arangodb::aql::ExecutionBlockImpl; +// template class ::arangodb::aql::ExecutionBlockImpl; +// template class ::arangodb::aql::ExecutionBlockImpl; template class ::arangodb::aql::ExecutionBlockImpl; template class ::arangodb::aql::ExecutionBlockImpl; diff --git a/tests/Aql/ExecutorTestHelper.h b/tests/Aql/ExecutorTestHelper.h index c3b2686246f1..1a007f01782f 100644 --- a/tests/Aql/ExecutorTestHelper.h +++ b/tests/Aql/ExecutorTestHelper.h @@ -210,7 +210,7 @@ struct ExecutorTestHelper { ExecutorTestHelper(ExecutorTestHelper const&) = delete; ExecutorTestHelper(ExecutorTestHelper&&) = delete; explicit ExecutorTestHelper(arangodb::aql::Query& query) - : _expectedSkip{0}, + : _expectedSkip{}, _expectedState{ExecutionState::HASMORE}, _testStats{false}, _unorderedOutput{false}, @@ -300,8 +300,24 @@ struct ExecutorTestHelper { return *this; } - auto expectSkipped(std::size_t skip) -> ExecutorTestHelper& { - _expectedSkip = skip; + /** + * @brief + * + * @tparam Ts numeric type, can actually only be size_t + * @param skipOnLevel List of skip counters returned per level. subquery skips first, the last entry is the skip on the executor + * @return ExecutorTestHelper& chaining! + */ + template + auto expectSkipped(T skipFirst, Ts const... skipOnHigherLevel) -> ExecutorTestHelper& { + _expectedSkip = SkipResult{}; + // This is obvious, proof: Homework. + (_expectedSkip.didSkip(static_cast(skipFirst)), ..., + (_expectedSkip.incrementSubquery(), + _expectedSkip.didSkip(static_cast(skipOnHigherLevel)))); + + // NOTE: the above will increment didSkip by the first entry. + // For all following entries it will first increment the subquery depth + // and then add the didSkip on them. return *this; } @@ -377,7 +393,7 @@ struct ExecutorTestHelper { auto inputBlock = generateInputRanges(itemBlockManager); - auto skippedTotal = size_t{0}; + auto skippedTotal = SkipResult{}; auto finalState = ExecutionState::HASMORE; TRI_ASSERT(!_pipeline.empty()); @@ -387,7 +403,7 @@ struct ExecutorTestHelper { if (!loop) { auto const [state, skipped, result] = _pipeline.get().front()->execute(_callStack); - skippedTotal = skipped; + skippedTotal.merge(skipped, false); finalState = state; if (result != nullptr) { allResults.add(result); @@ -397,8 +413,8 @@ struct ExecutorTestHelper { auto const [state, skipped, result] = _pipeline.get().front()->execute(_callStack); finalState = state; auto call = _callStack.popCall(); - skippedTotal += skipped; - call.didSkip(skipped); + skippedTotal.merge(skipped, false); + call.didSkip(skipped.getSkipCount()); if (result != nullptr) { call.didProduce(result->size()); allResults.add(result); @@ -409,7 +425,6 @@ struct ExecutorTestHelper { (!_callStack.peek().hasSoftLimit() || (_callStack.peek().getLimit() + _callStack.peek().getOffset()) > 0)); } - EXPECT_EQ(skippedTotal, _expectedSkip); EXPECT_EQ(finalState, _expectedState); SharedAqlItemBlockPtr result = allResults.steal(); @@ -500,7 +515,7 @@ struct ExecutorTestHelper { MatrixBuilder _output; std::vector> _outputShadowRows{}; std::array _outputRegisters; - std::size_t _expectedSkip; + SkipResult _expectedSkip; ExecutionState _expectedState; ExecutionStats _expectedStats; bool _testStats; diff --git a/tests/Aql/HashedCollectExecutorTest.cpp b/tests/Aql/HashedCollectExecutorTest.cpp index 6aca1b82d975..63dffa7cd6d9 100644 --- a/tests/Aql/HashedCollectExecutorTest.cpp +++ b/tests/Aql/HashedCollectExecutorTest.cpp @@ -257,7 +257,7 @@ TEST_P(HashedCollectExecutorTest, collect_only_soft_less_second_call) { AqlCallStack stack{call}; auto const [state, skipped, result] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(result, nullptr); asserthelper::ValidateBlocksAreEqualUnordered(result, buildExpectedOutput(), matchedRows, 2, registersToTest); @@ -270,7 +270,7 @@ TEST_P(HashedCollectExecutorTest, collect_only_soft_less_second_call) { AqlCallStack stack{call}; auto const [state, skipped, result] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(result, nullptr); asserthelper::ValidateBlocksAreEqualUnordered(result, buildExpectedOutput(), matchedRows, 0, registersToTest); diff --git a/tests/Aql/IdExecutorTest.cpp b/tests/Aql/IdExecutorTest.cpp index ac203cf39da3..4ec6f1b94df8 100644 --- a/tests/Aql/IdExecutorTest.cpp +++ b/tests/Aql/IdExecutorTest.cpp @@ -297,7 +297,7 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_get) { AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } { @@ -312,7 +312,7 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_get) { AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_NE(block, nullptr); EXPECT_EQ(block->size(), 1); auto const& val = block->getValueReference(0, 0); @@ -340,7 +340,7 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_skip) { AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } { @@ -356,7 +356,7 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_skip) { AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); ASSERT_EQ(block, nullptr); } } @@ -381,7 +381,7 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_fullCount) { AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } { @@ -398,7 +398,7 @@ TEST_F(IdExecutionBlockTest, test_initialize_cursor_fullCount) { AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); ASSERT_EQ(block, nullptr); } } @@ -443,7 +443,8 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher) { // Inject block auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}}); - testee.injectConstantBlock(inputBlock); + + testee.injectConstantBlock(inputBlock, SkipResult{}); } { // Now call with too small hardLimit @@ -455,9 +456,9 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher) { auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); if (useFullCount()) { - EXPECT_EQ(skipped, 4); + EXPECT_EQ(skipped.getSkipCount(), 4); } else { - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); } asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); @@ -468,7 +469,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher) { AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } } @@ -480,7 +481,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_at_end) { auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}}, {{5, 0}, {6, 1}}); - testee.injectConstantBlock(inputBlock); + testee.injectConstantBlock(inputBlock, SkipResult{}); } { // Now call with too small hardLimit @@ -493,9 +494,9 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_at_end) { auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); if (useFullCount()) { - EXPECT_EQ(skipped, 2); + EXPECT_EQ(skipped.getSkipCount(), 2); } else { - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); } asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } @@ -505,7 +506,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_at_end) { AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } } @@ -517,7 +518,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_in_between) { auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}}, {{3, 0}, {4, 1}, {6, 0}}); - testee.injectConstantBlock(inputBlock); + testee.injectConstantBlock(inputBlock, SkipResult{}); } { // Now call with too small hardLimit @@ -530,9 +531,9 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_in_between) { auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::HASMORE); if (useFullCount()) { - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); } else { - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); } asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } @@ -544,7 +545,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_shadow_rows_in_between) { AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } } @@ -557,7 +558,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_consecutive_shadow_rows) auto inputBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}, {4}, {5}, {6}}, {{3, 0}, {4, 1}, {5, 0}, {6, 0}}); - testee.injectConstantBlock(inputBlock); + testee.injectConstantBlock(inputBlock, SkipResult{}); } // We can only return until the next top-level shadow row is reached. { @@ -571,9 +572,9 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_consecutive_shadow_rows) auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::HASMORE); if (useFullCount()) { - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); } else { - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); } asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } @@ -586,7 +587,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_consecutive_shadow_rows) AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } { @@ -598,7 +599,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_consecutive_shadow_rows) AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); asserthelper::ValidateBlocksAreEqual(block, expectedOutputBlock); } { @@ -607,7 +608,7 @@ TEST_P(BlockOverloadTest, test_hardlimit_const_fetcher_consecutive_shadow_rows) AqlCallStack stack(std::move(call)); auto const& [state, skipped, block] = testee.execute(stack); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(block, nullptr); } } diff --git a/tests/Aql/RemoteExecutorTest.cpp b/tests/Aql/RemoteExecutorTest.cpp index c5e4bef76825..8a82034d4b60 100644 --- a/tests/Aql/RemoteExecutorTest.cpp +++ b/tests/Aql/RemoteExecutorTest.cpp @@ -71,9 +71,7 @@ class DeSerializeAqlCallTest : public ::testing::TestWithParam { public: DeSerializeAqlCallTest() = default; - void SetUp() override { - aqlCall = GetParam(); - } + void SetUp() override { aqlCall = GetParam(); } protected: AqlCall aqlCall{}; @@ -118,20 +116,18 @@ class DeSerializeAqlCallStackTest : public ::testing::TestWithParam::error(-1); }); - ASSERT_TRUE(maybeDeSerializedCallStack.ok()) << maybeDeSerializedCallStack.errorMessage(); + ASSERT_TRUE(maybeDeSerializedCallStack.ok()) + << maybeDeSerializedCallStack.errorMessage(); auto const deSerializedCallStack = *maybeDeSerializedCallStack; ASSERT_EQ(aqlCallStack, deSerializedCallStack); } -INSTANTIATE_TEST_CASE_P(DeSerializeAqlCallStackTestVariations, DeSerializeAqlCallStackTest, testingAqlCallStacks); - +INSTANTIATE_TEST_CASE_P(DeSerializeAqlCallStackTestVariations, + DeSerializeAqlCallStackTest, testingAqlCallStacks); class DeSerializeAqlExecuteResultTest : public ::testing::TestWithParam { public: DeSerializeAqlExecuteResultTest() = default; - void SetUp() override { - aqlExecuteResult = GetParam(); - } + void SetUp() override { aqlExecuteResult = GetParam(); } protected: - AqlExecuteResult aqlExecuteResult{ExecutionState::DONE, 0, nullptr}; + AqlExecuteResult aqlExecuteResult{ExecutionState::DONE, SkipResult{}, nullptr}; }; ResourceMonitor resourceMonitor{}; AqlItemBlockManager manager{&resourceMonitor, SerializationFormat::SHADOWROWS}; +auto MakeSkipResult(size_t const i) -> SkipResult { + SkipResult res{}; + res.didSkip(i); + return res; +} + auto const testingAqlExecuteResults = ::testing::ValuesIn(std::array{ - AqlExecuteResult{ExecutionState::DONE, 0, nullptr}, - AqlExecuteResult{ExecutionState::HASMORE, 0, nullptr}, - AqlExecuteResult{ExecutionState::HASMORE, 4, nullptr}, - AqlExecuteResult{ExecutionState::DONE, 0, buildBlock<1>(manager, {{42}})}, - AqlExecuteResult{ExecutionState::HASMORE, 3, buildBlock<2>(manager, {{3, 42}, {4, 41}})}, + AqlExecuteResult{ExecutionState::DONE, MakeSkipResult(0), nullptr}, + AqlExecuteResult{ExecutionState::HASMORE, MakeSkipResult(4), nullptr}, + AqlExecuteResult{ExecutionState::DONE, MakeSkipResult(0), buildBlock<1>(manager, {{42}})}, + AqlExecuteResult{ExecutionState::HASMORE, MakeSkipResult(3), + buildBlock<2>(manager, {{3, 42}, {4, 41}})}, }); TEST_P(DeSerializeAqlExecuteResultTest, testSuite) { @@ -203,7 +204,8 @@ TEST_P(DeSerializeAqlExecuteResultTest, testSuite) { ASSERT_EQ(aqlExecuteResult.state(), deSerializedAqlExecuteResult.state()); ASSERT_EQ(aqlExecuteResult.skipped(), deSerializedAqlExecuteResult.skipped()); - ASSERT_EQ(aqlExecuteResult.block() == nullptr, deSerializedAqlExecuteResult.block() == nullptr); + ASSERT_EQ(aqlExecuteResult.block() == nullptr, + deSerializedAqlExecuteResult.block() == nullptr); if (aqlExecuteResult.block() != nullptr) { ASSERT_EQ(*aqlExecuteResult.block(), *deSerializedAqlExecuteResult.block()) << "left: " << blockToString(aqlExecuteResult.block()) @@ -212,6 +214,7 @@ TEST_P(DeSerializeAqlExecuteResultTest, testSuite) { ASSERT_EQ(aqlExecuteResult, deSerializedAqlExecuteResult); } -INSTANTIATE_TEST_CASE_P(DeSerializeAqlExecuteResultTestVariations, DeSerializeAqlExecuteResultTest, testingAqlExecuteResults); +INSTANTIATE_TEST_CASE_P(DeSerializeAqlExecuteResultTestVariations, + DeSerializeAqlExecuteResultTest, testingAqlExecuteResults); } // namespace arangodb::tests::aql diff --git a/tests/Aql/ScatterExecutorTest.cpp b/tests/Aql/ScatterExecutorTest.cpp index 883a8b8498c0..d5cd0968fb98 100644 --- a/tests/Aql/ScatterExecutorTest.cpp +++ b/tests/Aql/ScatterExecutorTest.cpp @@ -159,7 +159,7 @@ TEST_P(RandomOrderTest, all_clients_should_get_the_block) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ValidateBlocksAreEqual(block, inputBlock); } } @@ -179,7 +179,7 @@ TEST_P(RandomOrderTest, all_clients_can_skip_the_block) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 3); + EXPECT_EQ(skipped.getSkipCount(), 3); EXPECT_EQ(block, nullptr); } } @@ -201,7 +201,7 @@ TEST_P(RandomOrderTest, all_clients_can_fullcount_the_block) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 2); + EXPECT_EQ(skipped.getSkipCount(), 2); ValidateBlocksAreEqual(block, expectedBlock); } } @@ -223,7 +223,7 @@ TEST_P(RandomOrderTest, all_clients_can_have_different_calls) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ValidateBlocksAreEqual(block, inputBlock); } else if (client == "b") { AqlCall call{}; @@ -232,7 +232,7 @@ TEST_P(RandomOrderTest, all_clients_can_have_different_calls) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 2); + EXPECT_EQ(skipped.getSkipCount(), 2); auto expectedBlock = buildBlock<1>(itemBlockManager, {{2}, {3}}); ValidateBlocksAreEqual(block, expectedBlock); } else if (client == "c") { @@ -242,7 +242,7 @@ TEST_P(RandomOrderTest, all_clients_can_have_different_calls) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); auto expectedBlock = buildBlock<1>(itemBlockManager, {{0}, {1}}); ValidateBlocksAreEqual(block, expectedBlock); } @@ -254,7 +254,7 @@ TEST_P(RandomOrderTest, all_clients_can_have_different_calls) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); auto expectedBlock = buildBlock<1>(itemBlockManager, {{3}, {4}}); ValidateBlocksAreEqual(block, expectedBlock); } @@ -283,7 +283,7 @@ TEST_P(RandomOrderTest, get_does_not_jump_over_shadowrows) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ValidateBlocksAreEqual(block, firstExpectedBlock); } @@ -295,7 +295,7 @@ TEST_P(RandomOrderTest, get_does_not_jump_over_shadowrows) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ValidateBlocksAreEqual(block, secondExpectedBlock); } } @@ -321,7 +321,7 @@ TEST_P(RandomOrderTest, handling_of_higher_depth_shadowrows_produce) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ValidateBlocksAreEqual(block, firstExpectedBlock); } @@ -333,7 +333,7 @@ TEST_P(RandomOrderTest, handling_of_higher_depth_shadowrows_produce) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ValidateBlocksAreEqual(block, secondExpectedBlock); } } @@ -360,7 +360,7 @@ TEST_P(RandomOrderTest, handling_of_higher_depth_shadowrows_skip) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 2); + EXPECT_EQ(skipped.getSkipCount(), 2); ValidateBlocksAreEqual(block, firstExpectedBlock); } @@ -372,7 +372,7 @@ TEST_P(RandomOrderTest, handling_of_higher_depth_shadowrows_skip) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ValidateBlocksAreEqual(block, secondExpectedBlock); } } @@ -398,7 +398,7 @@ TEST_P(RandomOrderTest, handling_of_consecutive_shadow_rows) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); auto expected = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}}, {{2, 0}, {3, 1}}); ValidateBlocksAreEqual(block, expected); @@ -409,7 +409,7 @@ TEST_P(RandomOrderTest, handling_of_consecutive_shadow_rows) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); auto expected = buildBlock<1>(itemBlockManager, {{4}, {5}}, {{0, 0}, {1, 1}}); ValidateBlocksAreEqual(block, expected); } @@ -434,7 +434,7 @@ TEST_P(RandomOrderTest, shadowrows_with_different_call_types) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); auto expectedBlock = buildBlock<1>(itemBlockManager, {{0}, {1}, {2}, {3}}, {{3, 0}}); ValidateBlocksAreEqual(block, expectedBlock); @@ -445,7 +445,7 @@ TEST_P(RandomOrderTest, shadowrows_with_different_call_types) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 2); + EXPECT_EQ(skipped.getSkipCount(), 2); auto expectedBlock = buildBlock<1>(itemBlockManager, {{2}, {3}}, {{1, 0}}); ValidateBlocksAreEqual(block, expectedBlock); } else if (client == "c") { @@ -455,7 +455,7 @@ TEST_P(RandomOrderTest, shadowrows_with_different_call_types) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); auto expectedBlock = buildBlock<1>(itemBlockManager, {{0}, {1}}); ValidateBlocksAreEqual(block, expectedBlock); } @@ -467,7 +467,7 @@ TEST_P(RandomOrderTest, shadowrows_with_different_call_types) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::HASMORE); - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); auto expectedBlock = buildBlock<1>(itemBlockManager, {{3}}, {{0, 0}}); ValidateBlocksAreEqual(block, expectedBlock); } @@ -484,7 +484,7 @@ TEST_P(RandomOrderTest, shadowrows_with_different_call_types) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); auto expectedBlock = buildBlock<1>(itemBlockManager, {{4}, {5}}, {{1, 0}}); ValidateBlocksAreEqual(block, expectedBlock); } else if (client == "b") { @@ -493,7 +493,7 @@ TEST_P(RandomOrderTest, shadowrows_with_different_call_types) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); auto expectedBlock = buildBlock<1>(itemBlockManager, {{4}, {5}}, {{1, 0}}); ValidateBlocksAreEqual(block, expectedBlock); } else if (client == "c") { @@ -503,7 +503,7 @@ TEST_P(RandomOrderTest, shadowrows_with_different_call_types) { AqlCallStack stack{call}; auto const [state, skipped, block] = testee.executeForClient(stack, client); EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 1); + EXPECT_EQ(skipped.getSkipCount(), 1); auto expectedBlock = buildBlock<1>(itemBlockManager, {{5}}, {{0, 0}}); ValidateBlocksAreEqual(block, expectedBlock); } @@ -579,7 +579,7 @@ TEST_F(ScatterExecutionBlockTest, any_ordering_of_calls_is_fine) { } else { EXPECT_EQ(state, ExecutionState::HASMORE); } - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); ASSERT_TRUE(callNr < blocks.size()); ValidateBlocksAreEqual(block, blocks[callNr]); callNr++; diff --git a/tests/Aql/SingleRowFetcherTest.cpp b/tests/Aql/SingleRowFetcherTest.cpp index f0ed040e2783..7ea984f8b9e6 100644 --- a/tests/Aql/SingleRowFetcherTest.cpp +++ b/tests/Aql/SingleRowFetcherTest.cpp @@ -1228,7 +1228,7 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_oneAndDone // First no data row auto [state, skipped, input] = testee.execute(stack); EXPECT_EQ(input.getRowIndex(), 0); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(state, ExecutionState::DONE); } // testee is destroyed here } @@ -1263,7 +1263,7 @@ TEST_F(SingleRowFetcherTestPassBlocks, handling_shadowrows_in_execute_twoAndHasM auto [state, skipped, input] = testee.execute(stack); // We only have one block, no more calls to execute necessary EXPECT_EQ(state, ExecutionState::DONE); - EXPECT_EQ(skipped, 0); + EXPECT_EQ(skipped.getSkipCount(), 0); EXPECT_EQ(input.getRowIndex(), 0); // Now validate the input range diff --git a/tests/Aql/SkipResultTest.cpp b/tests/Aql/SkipResultTest.cpp new file mode 100644 index 000000000000..958ab0ad5563 --- /dev/null +++ b/tests/Aql/SkipResultTest.cpp @@ -0,0 +1,267 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2018 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#include "gtest/gtest.h" + +#include "Aql/SkipResult.h" +#include "Cluster/ResultT.h" + +#include +#include + +using namespace arangodb; +using namespace arangodb::aql; + +namespace arangodb { +namespace tests { +namespace aql { + +class SkipResultTest : public ::testing::Test { + protected: + SkipResultTest() {} +}; + +TEST_F(SkipResultTest, defaults_to_0_skip) { + SkipResult testee{}; + EXPECT_EQ(testee.getSkipCount(), 0); +} + +TEST_F(SkipResultTest, counts_skip) { + SkipResult testee{}; + testee.didSkip(5); + EXPECT_EQ(testee.getSkipCount(), 5); +} + +TEST_F(SkipResultTest, accumulates_skips) { + SkipResult testee{}; + testee.didSkip(3); + testee.didSkip(6); + testee.didSkip(8); + EXPECT_EQ(testee.getSkipCount(), 17); +} + +TEST_F(SkipResultTest, is_copyable) { + SkipResult original{}; + original.didSkip(6); + SkipResult testee{original}; + + EXPECT_EQ(testee.getSkipCount(), original.getSkipCount()); + + original.didSkip(7); + EXPECT_NE(testee.getSkipCount(), original.getSkipCount()); +} + +TEST_F(SkipResultTest, can_report_if_we_skip) { + SkipResult testee{}; + EXPECT_TRUE(testee.nothingSkipped()); + testee.didSkip(3); + EXPECT_FALSE(testee.nothingSkipped()); + testee.didSkip(6); + EXPECT_FALSE(testee.nothingSkipped()); +} + +TEST_F(SkipResultTest, serialize_deserialize_empty) { + SkipResult original{}; + VPackBuilder builder; + original.toVelocyPack(builder); + auto maybeTestee = SkipResult::fromVelocyPack(builder.slice()); + ASSERT_FALSE(maybeTestee.fail()); + auto testee = maybeTestee.get(); + EXPECT_EQ(testee.nothingSkipped(), original.nothingSkipped()); + EXPECT_EQ(testee.getSkipCount(), original.getSkipCount()); + EXPECT_EQ(testee, original); +} + +TEST_F(SkipResultTest, serialize_deserialize_with_count) { + SkipResult original{}; + original.didSkip(6); + VPackBuilder builder; + original.toVelocyPack(builder); + auto maybeTestee = SkipResult::fromVelocyPack(builder.slice()); + ASSERT_FALSE(maybeTestee.fail()); + auto testee = maybeTestee.get(); + EXPECT_EQ(testee.nothingSkipped(), original.nothingSkipped()); + EXPECT_EQ(testee.getSkipCount(), original.getSkipCount()); + EXPECT_EQ(testee, original); +} + +TEST_F(SkipResultTest, can_be_added) { + SkipResult a{}; + a.didSkip(6); + SkipResult b{}; + b.didSkip(7); + a += b; + EXPECT_EQ(a.getSkipCount(), 13); +} + +TEST_F(SkipResultTest, can_add_a_subquery_depth) { + SkipResult a{}; + a.didSkip(5); + EXPECT_EQ(a.getSkipCount(), 5); + a.incrementSubquery(); + EXPECT_EQ(a.getSkipCount(), 0); + a.didSkip(7); + EXPECT_EQ(a.getSkipCount(), 7); + a.decrementSubquery(); + EXPECT_EQ(a.getSkipCount(), 5); +} + +TEST_F(SkipResultTest, nothing_skip_on_subquery) { + SkipResult a{}; + EXPECT_TRUE(a.nothingSkipped()); + a.didSkip(6); + EXPECT_FALSE(a.nothingSkipped()); + a.incrementSubquery(); + EXPECT_EQ(a.getSkipCount(), 0); + EXPECT_FALSE(a.nothingSkipped()); +} + +TEST_F(SkipResultTest, serialize_deserialize_with_a_subquery) { + SkipResult original{}; + original.didSkip(6); + original.incrementSubquery(); + original.didSkip(2); + + VPackBuilder builder; + original.toVelocyPack(builder); + auto maybeTestee = SkipResult::fromVelocyPack(builder.slice()); + ASSERT_FALSE(maybeTestee.fail()); + auto testee = maybeTestee.get(); + // Use built_in eq + EXPECT_EQ(testee, original); + // Manual test + EXPECT_EQ(testee.nothingSkipped(), original.nothingSkipped()); + EXPECT_EQ(testee.getSkipCount(), original.getSkipCount()); + EXPECT_EQ(testee.subqueryDepth(), original.subqueryDepth()); + original.decrementSubquery(); + testee.decrementSubquery(); + EXPECT_EQ(testee.nothingSkipped(), original.nothingSkipped()); + EXPECT_EQ(testee.getSkipCount(), original.getSkipCount()); + EXPECT_EQ(testee.subqueryDepth(), original.subqueryDepth()); +} + +TEST_F(SkipResultTest, equality) { + auto buildTestSet = []() -> std::vector { + SkipResult empty{}; + SkipResult skip1{}; + skip1.didSkip(6); + + SkipResult skip2{}; + skip2.didSkip(8); + + SkipResult subQuery1{}; + subQuery1.incrementSubquery(); + subQuery1.didSkip(4); + + SkipResult subQuery2{}; + subQuery2.didSkip(8); + subQuery2.incrementSubquery(); + subQuery2.didSkip(4); + + SkipResult subQuery3{}; + subQuery3.didSkip(8); + subQuery3.incrementSubquery(); + return {empty, skip1, skip2, subQuery1, subQuery2, subQuery3}; + }; + + // We create two identical sets with different entries + auto set1 = buildTestSet(); + auto set2 = buildTestSet(); + for (size_t i = 0; i < set1.size(); ++i) { + for (size_t j = 0; j < set2.size(); ++j) { + // Addresses are different + EXPECT_NE(&set1.at(i), &set2.at(j)); + // Identical index => Equal object + if (i == j) { + EXPECT_EQ(set1.at(i), set2.at(j)); + } else { + EXPECT_NE(set1.at(i), set2.at(j)); + } + } + } +} + +TEST_F(SkipResultTest, merge_with_toplevel) { + SkipResult a{}; + a.didSkip(12); + a.incrementSubquery(); + a.didSkip(8); + + SkipResult b{}; + b.didSkip(9); + b.incrementSubquery(); + b.didSkip(2); + + a.merge(b, false); + + SkipResult expected{}; + expected.didSkip(12); + expected.didSkip(9); + expected.incrementSubquery(); + expected.didSkip(8); + expected.didSkip(2); + EXPECT_EQ(a, expected); +} + +TEST_F(SkipResultTest, merge_without_toplevel) { + SkipResult a{}; + a.didSkip(12); + a.incrementSubquery(); + a.didSkip(8); + + SkipResult b{}; + b.didSkip(9); + b.incrementSubquery(); + b.didSkip(2); + + a.merge(b, true); + + SkipResult expected{}; + expected.didSkip(12); + expected.didSkip(9); + expected.incrementSubquery(); + expected.didSkip(8); + EXPECT_EQ(a, expected); +} + +TEST_F(SkipResultTest, reset) { + SkipResult a{}; + a.didSkip(12); + a.incrementSubquery(); + a.didSkip(8); + + EXPECT_EQ(a.getSkipCount(), 8); + EXPECT_EQ(a.subqueryDepth(), 2); + EXPECT_FALSE(a.nothingSkipped()); + a.reset(); + + EXPECT_EQ(a.getSkipCount(), 0); + EXPECT_EQ(a.subqueryDepth(), 2); + EXPECT_TRUE(a.nothingSkipped()); + + a.decrementSubquery(); + EXPECT_EQ(a.getSkipCount(), 0); +} + +} // namespace aql +} // namespace tests +} // namespace arangodb diff --git a/tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp b/tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp index 94e9940f4e42..19b3d524716f 100644 --- a/tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp +++ b/tests/Aql/SpliceSubqueryOptimizerRuleTest.cpp @@ -313,49 +313,41 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_sort) { verifyQueryResult(query, expected->slice()); } -// Must be changed as soon as the subquery implementation with shadow rows handle skipping, -// and the splice-subqueries optimizer rule is changed to allow it. -TEST_F(SpliceSubqueryNodeOptimizerRuleTest, dont_splice_subquery_with_skip__inner_limit_offset) { +TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_skip__inner_limit_offset) { auto const queryString = R"aql(FOR i IN 0..2 LET a = (FOR j IN 0..2 LIMIT 1, 1 RETURN 3*i + j) RETURN FIRST(a))aql"; auto const expectedString = R"res([1, 4, 7])res"; - verifySubquerySplicing(queryString, 0, 1); + verifySubquerySplicing(queryString, 1); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice()); } -// Must be changed as soon as the subquery implementation with shadow rows handle skipping, -// and the splice-subqueries optimizer rule is changed to allow it. -TEST_F(SpliceSubqueryNodeOptimizerRuleTest, dont_splice_subquery_with_skip__outer_limit_offset) { +TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_skip__outer_limit_offset) { auto const queryString = R"aql(FOR i IN 0..2 LET a = (FOR j IN 0..2 RETURN 3*i + j) LIMIT 1, 1 RETURN FIRST(a))aql"; auto const expectedString = R"res([3])res"; - verifySubquerySplicing(queryString, 0, 1); + verifySubquerySplicing(queryString, 1); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice()); } -// Must be changed as soon as the subquery implementation with shadow rows handle skipping, -// and the splice-subqueries optimizer rule is changed to allow it. -TEST_F(SpliceSubqueryNodeOptimizerRuleTest, dont_splice_subquery_with_skip__inner_collect_count) { +TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_skip__inner_collect_count) { auto const queryString = R"aql(FOR i IN 0..2 LET a = (FOR j IN 0..i COLLECT WITH COUNT INTO n RETURN n) RETURN FIRST(a))aql"; auto const expectedString = R"res([1, 2, 3])res"; - verifySubquerySplicing(queryString, 0, 1); + verifySubquerySplicing(queryString, 1); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice()); } -// Must be changed as soon as the subquery implementation with shadow rows handle skipping, -// and the splice-subqueries optimizer rule is changed to allow it. -TEST_F(SpliceSubqueryNodeOptimizerRuleTest, dont_splice_subquery_with_skip__outer_collect_count) { +TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_skip__outer_collect_count) { // the RAND() is there to avoid the subquery being removed auto const queryString = R"aql(FOR i IN 0..2 LET a = (FOR j IN 0..FLOOR(2*RAND()) RETURN 1) @@ -363,14 +355,14 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, dont_splice_subquery_with_skip__oute RETURN n)aql"; auto const expectedString = R"res([3])res"; - verifySubquerySplicing(queryString, 0, 1); + verifySubquerySplicing(queryString, 1); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice()); } // Must be changed as soon as the subquery implementation with shadow rows handle skipping, // and the splice-subqueries optimizer rule is changed to allow it. -TEST_F(SpliceSubqueryNodeOptimizerRuleTest, dont_splice_subquery_with_skip__full_count) { +TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_skip__full_count) { // the RAND() is there to avoid the subquery being removed auto const queryString = R"aql(FOR i IN 0..2 LET a = (FOR j IN 0..FLOOR(2*RAND()) RETURN 1) @@ -378,7 +370,7 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, dont_splice_subquery_with_skip__full RETURN i)aql"; auto const expectedString = R"res([0])res"; - verifySubquerySplicing(queryString, 0, 1, "{}", R"opts({"fullCount": true})opts"); + verifySubquerySplicing(queryString, 1, 0, "{}", R"opts({"fullCount": true})opts"); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice()); } @@ -412,7 +404,7 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_nested_subquery_with_innermos FOR i IN 0..1 LET js = ( // this subquery should be spliced FOR j IN 0..1 + FLOOR(RAND()) - LET ks = ( // this subquery should not be spliced + LET ks = ( // this subquery should be spliced FOR k IN 0..2 + FLOOR(RAND()) LIMIT 1, 2 RETURN 6*i + 3*j + k @@ -423,7 +415,7 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_nested_subquery_with_innermos )aql"; auto const expectedString = R"res([[[1, 2], [4, 5]], [[7, 8], [10, 11]]])res"; - verifySubquerySplicing(queryString, 1, 1); + verifySubquerySplicing(queryString, 2); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice()); } @@ -440,7 +432,7 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_nested_subquery_with_innermos )aql"; auto const expectedString = R"res([{"a": 1, "b": [[3, 4]]}, {"a": 2, "b": [[3, 4]]}])res"; - verifySubquerySplicing(queryString, 1, 1); + verifySubquerySplicing(queryString, 2); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice()); } @@ -450,7 +442,7 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_nested_subquery_with_innermos TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_nested_subquery_with_outermost_skip) { auto const queryString = R"aql( FOR i IN 0..2 - LET js = ( // this subquery should not be spliced + LET js = ( // this subquery should be spliced FOR j IN 0..1 + FLOOR(RAND()) LET ks = ( // this subquery should be spliced FOR k IN 0..1 + FLOOR(RAND()) @@ -463,19 +455,19 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_nested_subquery_with_outermos )aql"; auto const expectedString = R"res([[[4, 5], [6, 7]], [[8, 9], [10, 11]]])res"; - verifySubquerySplicing(queryString, 1, 1); + verifySubquerySplicing(queryString, 2); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice()); } // Must be changed as soon as the subquery implementation with shadow rows handle skipping, // and the splice-subqueries optimizer rule is changed to allow it. -TEST_F(SpliceSubqueryNodeOptimizerRuleTest, dont_splice_subquery_with_limit_and_no_offset) { +TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_limit_and_no_offset) { auto query = R"aql( FOR i IN 2..4 LET a = (FOR j IN [i, i+10, i+20] LIMIT 0, 1 RETURN j) RETURN FIRST(a))aql"; - verifySubquerySplicing(query, 0, 1); + verifySubquerySplicing(query, 1); auto expected = arangodb::velocypack::Parser::fromJson(R"([2, 3, 4])"); verifyQueryResult(query, expected->slice()); @@ -496,7 +488,6 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_nested_empty_subqueries) { RETURN [results] )aql"; auto const expectedString = R"res([[[]]])res"; - verifySubquerySplicing(queryString, 2); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice()); @@ -521,7 +512,7 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_upsert) { auto const bindString = R"bind({"key": "myKey"})bind"; auto const expectedString = R"res([["UnitTestCollection/myKey"]])res"; - verifySubquerySplicing(queryString, 1, 1, bindString); + verifySubquerySplicing(queryString, 2, 0, bindString); auto expected = arangodb::velocypack::Parser::fromJson(expectedString); verifyQueryResult(queryString, expected->slice(), bindString); @@ -781,8 +772,7 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_collect_in_subq verifyQueryResult(queryString, expected->slice()); } -// Disabled as long as the subquery implementation with shadow rows cannot yet handle skipping. -TEST_F(SpliceSubqueryNodeOptimizerRuleTest, DISABLED_splice_subquery_with_limit_and_offset) { +TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_with_limit_and_offset) { auto query = R"aql( FOR i IN 2..4 LET a = (FOR j IN [0, i, i+10] LIMIT 1, 1 RETURN j) @@ -793,9 +783,7 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, DISABLED_splice_subquery_with_limit_ verifyQueryResult(query, expected->slice()); } -// Disabled as long as the subquery implementation with shadow rows cannot yet handle skipping. -TEST_F(SpliceSubqueryNodeOptimizerRuleTest, - DISABLED_splice_subquery_collect_within_empty_nested_subquery) { +TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_collect_within_empty_nested_subquery) { auto query = R"aql( FOR k IN 1..2 LET sub1 = ( @@ -813,6 +801,46 @@ TEST_F(SpliceSubqueryNodeOptimizerRuleTest, verifyQueryResult(query, expected->slice()); } +TEST_F(SpliceSubqueryNodeOptimizerRuleTest, splice_subquery_skip_nodes) { + auto query = R"aql( + FOR k IN 1..10 + LET sub1 = ( + FOR j IN 1..10 + LET sub2 = ( + FOR i IN 1..4 + LIMIT 2,10 + RETURN i + ) + LIMIT 2,10 + RETURN [j, sub2] + ) + LIMIT 3, 10 + RETURN [k, sub1])aql"; + verifySubquerySplicing(query, 2); + + VPackBuilder builder; + builder.openArray(); + for (size_t k = 4; k <= 10; ++k) { + builder.openArray(); + builder.add(VPackValue(k)); + builder.openArray(); + for (size_t j = 3; j <= 10; ++j) { + builder.openArray(); + builder.add(VPackValue(j)); + builder.openArray(); + for (size_t i = 3; i <= 4; ++i) { + builder.add(VPackValue(i)); + } + builder.close(); + builder.close(); + } + builder.close(); + builder.close(); + } + builder.close(); + verifyQueryResult(query, builder.slice()); +} + // TODO Check isInSplicedSubquery // TODO Test cluster rules diff --git a/tests/Aql/SplicedSubqueryIntegrationTest.cpp b/tests/Aql/SplicedSubqueryIntegrationTest.cpp index f6f37d99f4b1..943afe458c2a 100644 --- a/tests/Aql/SplicedSubqueryIntegrationTest.cpp +++ b/tests/Aql/SplicedSubqueryIntegrationTest.cpp @@ -236,24 +236,34 @@ class SplicedSubqueryIntegrationTest auto createSkipCall() -> SkipCall { return [](AqlItemBlockInputRange& input, AqlCall& call) -> std::tuple { - auto skipped = size_t{0}; + while (call.shouldSkip() && input.skippedInFlight() > 0) { + if (call.getOffset() > 0) { + call.didSkip(input.skip(call.getOffset())); + } else { + EXPECT_TRUE(call.needsFullCount()); + EXPECT_EQ(call.getLimit(), 0); + EXPECT_TRUE(call.hasHardLimit()); + call.didSkip(input.skipAll()); + } + } + // If we overfetched and have data, throw it away while (input.hasDataRow() && call.shouldSkip()) { auto const& [state, inputRow] = input.nextDataRow(); EXPECT_TRUE(inputRow.isInitialized()); call.didSkip(1); - skipped++; } auto upstreamCall = AqlCall{call}; - return {input.upstreamState(), NoStats{}, skipped, upstreamCall}; + return {input.upstreamState(), NoStats{}, call.getSkipCount(), upstreamCall}; }; }; // Asserts if called. This is to check that when we use skip to // skip over a subquery, the subquery's produce is not invoked + // with data auto createAssertCall() -> ProduceCall { return [](AqlItemBlockInputRange& input, OutputAqlItemRow& output) -> std::tuple { - EXPECT_TRUE(false); + EXPECT_FALSE(input.hasDataRow()); NoStats stats{}; AqlCall call{}; @@ -333,7 +343,7 @@ TEST_P(SplicedSubqueryIntegrationTest, single_subquery) { .run(); }; -TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_skip_and_produce) { +TEST_P(SplicedSubqueryIntegrationTest, single_subquery_skip_and_produce) { auto call = AqlCall{5}; auto pipeline = createSubquery(); ExecutorTestHelper<1, 2>{*fakedQuery} @@ -347,7 +357,7 @@ TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_skip_and_produce .run(); }; -TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_skip_all) { +TEST_P(SplicedSubqueryIntegrationTest, single_subquery_skip_all) { auto call = AqlCall{20}; auto pipeline = createSubquery(); ExecutorTestHelper<1, 2>{*fakedQuery} @@ -361,7 +371,7 @@ TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_skip_all) { .run(); }; -TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_fullcount) { +TEST_P(SplicedSubqueryIntegrationTest, single_subquery_fullcount) { auto call = AqlCall{0, true, 0, AqlCall::LimitType::HARD}; auto pipeline = createSubquery(); ExecutorTestHelper<1, 2>{*fakedQuery} @@ -375,6 +385,8 @@ TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_fullcount) { .run(); }; +// NOTE: This test can be enabled if we can continue +// working on the second subquery without returning to consumer TEST_P(SplicedSubqueryIntegrationTest, DISABLED_single_subquery_skip_produce_count) { auto call = AqlCall{2, true, 2, AqlCall::LimitType::HARD}; auto pipeline = createSubquery(); @@ -442,7 +454,7 @@ TEST_P(SplicedSubqueryIntegrationTest, do_nothing_in_subquery) { .run(); }; -TEST_P(SplicedSubqueryIntegrationTest, DISABLED_check_call_passes_subquery) { +TEST_P(SplicedSubqueryIntegrationTest, check_call_passes_subquery) { auto call = AqlCall{10}; auto pipeline = concatPipelines(createCallAssertPipeline(call), createSubquery()); @@ -456,8 +468,9 @@ TEST_P(SplicedSubqueryIntegrationTest, DISABLED_check_call_passes_subquery) { .run(); }; -TEST_P(SplicedSubqueryIntegrationTest, DISABLED_check_skipping_subquery) { +TEST_P(SplicedSubqueryIntegrationTest, check_skipping_subquery) { auto call = AqlCall{10}; + LOG_DEVEL << call; auto pipeline = createSubquery(createAssertPipeline()); executorTestHelper.setPipeline(std::move(pipeline)) @@ -465,7 +478,23 @@ TEST_P(SplicedSubqueryIntegrationTest, DISABLED_check_skipping_subquery) { .setInputSplitType(getSplit()) .setCall(call) .expectOutput({0}, {}) - .expectSkipped(0) + .expectSkipped(8) .expectedState(ExecutionState::DONE) .run(); }; + +TEST_P(SplicedSubqueryIntegrationTest, check_soft_limit_subquery) { + auto call = AqlCall{0, false, 4, AqlCall::LimitType::SOFT}; + LOG_DEVEL << call; + auto pipeline = createSubquery(createAssertPipeline()); + + ExecutorTestHelper<1, 2>{*fakedQuery} + .setPipeline(std::move(pipeline)) + .setInputValueList(1, 2, 5, 2, 1, 5, 7, 1) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, {{1, R"([])"}, {2, R"([])"}, {5, R"([])"}, {2, R"([])"}}) + .expectSkipped(0) + .expectedState(ExecutionState::HASMORE) + .run(); +}; \ No newline at end of file diff --git a/tests/Aql/SubqueryStartExecutorTest.cpp b/tests/Aql/SubqueryStartExecutorTest.cpp index dc02b404e2d0..d73fbc9cc5e8 100644 --- a/tests/Aql/SubqueryStartExecutorTest.cpp +++ b/tests/Aql/SubqueryStartExecutorTest.cpp @@ -101,7 +101,7 @@ TEST_P(SubqueryStartExecutorTest, empty_input_does_not_add_shadow_rows) { .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::DONE) .expectOutput({0}, {}) - .expectSkipped(0) + .expectSkipped(0, 0) .setCallStack(queryStack(AqlCall{}, AqlCall{})) .run(); } @@ -112,7 +112,7 @@ TEST_P(SubqueryStartExecutorTest, adds_a_shadowrow_after_single_input) { .setInputValue({{R"("a")"}}) .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::DONE) - .expectSkipped(0) + .expectSkipped(0, 0) .expectOutput({0}, {{R"("a")"}, {R"("a")"}}, {{1, 0}}) .setCallStack(queryStack(AqlCall{}, AqlCall{})) .run(); @@ -128,7 +128,7 @@ TEST_P(SubqueryStartExecutorTest, .setInputValue({{{R"("a")"}}, {{R"("b")"}}, {{R"("c")"}}}) .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::HASMORE) - .expectSkipped(0) + .expectSkipped(0, 0) .expectOutput({0}, {{R"("a")"}, {R"("a")"}}, {{1, 0}}) .setCallStack(queryStack(AqlCall{}, AqlCall{})) .run(); @@ -144,7 +144,7 @@ TEST_P(SubqueryStartExecutorTest, DISABLED_adds_a_shadowrow_after_every_input_li .setInputValue({{{R"("a")"}}, {{R"("b")"}}, {{R"("c")"}}}) .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::DONE) - .expectSkipped(0) + .expectSkipped(0, 0) .expectOutput({0}, {{R"("a")"}, {R"("a")"}, {R"("b")"}, {R"("b")"}, {R"("c")"}, {R"("c")"}}, {{1, 0}, {3, 0}, {5, 0}}) .setCallStack(queryStack(AqlCall{}, AqlCall{})) @@ -159,7 +159,7 @@ TEST_P(SubqueryStartExecutorTest, adds_a_shadowrow_after_every_input_line) { .setInputValue({{{R"("a")"}}, {{R"("b")"}}, {{R"("c")"}}}) .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::DONE) - .expectSkipped(0) + .expectSkipped(0, 0) .expectOutput({0}, {{R"("a")"}, {R"("a")"}, {R"("b")"}, {R"("b")"}, {R"("c")"}, {R"("c")"}}, {{1, 0}, {3, 0}, {5, 0}}) .setCallStack(queryStack(AqlCall{}, AqlCall{})) @@ -181,7 +181,7 @@ TEST_P(SubqueryStartExecutorTest, shadow_row_does_not_fit_in_current_block) { .setInputValue({{R"("a")"}}) .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::HASMORE) - .expectSkipped(0) + .expectSkipped(0, 0) .expectOutput({0}, {{R"("a")"}}, {}) .setCallStack(queryStack(AqlCall{}, AqlCall{})) .run(); @@ -194,7 +194,7 @@ TEST_P(SubqueryStartExecutorTest, shadow_row_does_not_fit_in_current_block) { .setInputValue({{R"("a")"}}) .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::DONE) - .expectSkipped(0) + .expectSkipped(0, 0) .expectOutput({0}, {{R"("a")"}, {R"("a")"}}, {{1, 0}}) .setCallStack(queryStack(AqlCall{}, AqlCall{})) .run(true); @@ -208,7 +208,7 @@ TEST_P(SubqueryStartExecutorTest, skip_in_subquery) { .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::DONE) .expectOutput({0}, {{R"("a")"}}, {{0, 0}}) - .expectSkipped(1) + .expectSkipped(0, 1) .setCallStack(queryStack(AqlCall{}, AqlCall{10, false})) .run(); } @@ -220,7 +220,7 @@ TEST_P(SubqueryStartExecutorTest, fullCount_in_subquery) { .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::DONE) .expectOutput({0}, {{R"("a")"}}, {{0, 0}}) - .expectSkipped(1) + .expectSkipped(0, 1) .setCallStack(queryStack(AqlCall{}, AqlCall{0, true, 0, AqlCall::LimitType::HARD})) .run(); } @@ -234,12 +234,20 @@ TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding) { ExecutionNode::SUBQUERY_START)) .addConsumer(helper.createExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START)); + + if (GetCompatMode() == CompatibilityMode::VERSION36) { + // We will not get this infromation because the + // query stack is too small on purpose + helper.expectSkipped(0, 0); + } else { + helper.expectSkipped(0, 0, 0); + } + helper.setPipeline(std::move(pipe)) .setInputValue({{R"("a")"}}) .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::DONE) .expectOutput({0}, {{R"("a")"}, {R"("a")"}, {R"("a")"}}, {{1, 0}, {2, 1}}) - .expectSkipped(0) .setCallStack(stack) .run(); } @@ -253,12 +261,20 @@ TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding_many_inputs_single_call) ExecutionNode::SUBQUERY_START)) .addConsumer(helper.createExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START)); + + if (GetCompatMode() == CompatibilityMode::VERSION36) { + // We will not get this infromation because the + // query stack is too small on purpose + helper.expectSkipped(0, 0); + } else { + helper.expectSkipped(0, 0, 0); + } + helper.setPipeline(std::move(pipe)) .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}}) .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::HASMORE) .expectOutput({0}, {{R"("a")"}, {R"("a")"}, {R"("a")"}}, {{1, 0}, {2, 1}}) - .expectSkipped(0) .setCallStack(stack) .run(); } @@ -272,6 +288,13 @@ TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding_many_inputs_many_request ExecutionNode::SUBQUERY_START)) .addConsumer(helper.createExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START)); + if (GetCompatMode() == CompatibilityMode::VERSION36) { + // We will not get this infromation because the + // query stack is too small on purpose + helper.expectSkipped(0, 0); + } else { + helper.expectSkipped(0, 0, 0); + } helper.setPipeline(std::move(pipe)) .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}}) .expectedStats(ExecutionStats{}) @@ -280,7 +303,6 @@ TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding_many_inputs_many_request {0}, {{R"("a")"}, {R"("a")"}, {R"("a")"}, {R"("b")"}, {R"("b")"}, {R"("b")"}, {R"("c")"}, {R"("c")"}, {R"("c")"}}, {{1, 0}, {2, 1}, {4, 0}, {5, 1}, {7, 0}, {8, 1}}) - .expectSkipped(0) .setCallStack(stack) .run(true); } @@ -303,12 +325,19 @@ TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding_many_inputs_not_enough_s ExecutionNode::SUBQUERY_START)) .addConsumer(helper.createExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START)); + + if (GetCompatMode() == CompatibilityMode::VERSION36) { + // We will not get this infromation because the + // query stack is too small on purpose + helper.expectSkipped(0, 0); + } else { + helper.expectSkipped(0, 0, 0); + } helper.setPipeline(std::move(pipe)) .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}}) .expectedStats(ExecutionStats{}) .expectedState(ExecutionState::HASMORE) .expectOutput({0}, {{R"("a")"}, {R"("a")"}}, {{1, 0}}) - .expectSkipped(0) .setCallStack(stack) .run(); } @@ -323,6 +352,15 @@ TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding_many_inputs_not_enough_s ExecutionNode::SUBQUERY_START)) .addConsumer(helper.createExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START)); + + if (GetCompatMode() == CompatibilityMode::VERSION36) { + // We will not get this infromation because the + // query stack is too small on purpose + helper.expectSkipped(0, 0); + } else { + helper.expectSkipped(0, 0, 0); + } + helper.setPipeline(std::move(pipe)) .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}}) .expectedStats(ExecutionStats{}) @@ -331,12 +369,105 @@ TEST_P(SubqueryStartExecutorTest, shadow_row_forwarding_many_inputs_not_enough_s {0}, {{R"("a")"}, {R"("a")"}, {R"("a")"}, {R"("b")"}, {R"("b")"}, {R"("b")"}, {R"("c")"}, {R"("c")"}, {R"("c")"}}, {{1, 0}, {2, 1}, {4, 0}, {5, 1}, {7, 0}, {8, 1}}) - .expectSkipped(0) .setCallStack(stack) .run(true); } } -// TODO: -// * Add tests for Skipping -// - on Higher level subquery +TEST_P(SubqueryStartExecutorTest, skip_in_outer_subquery) { + if (GetCompatMode() == CompatibilityMode::VERSION37) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}, {R"("b")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput({0}, {{R"("b")"}, {R"("b")"}}, {{1, 0}}) + .expectSkipped(1, 0) + .setCallStack(queryStack(AqlCall{1, false, AqlCall::Infinity{}}, AqlCall{})) + .run(); + } else { + // The feature is not available in 3.6 or earlier. + } +} + +TEST_P(SubqueryStartExecutorTest, DISABLED_skip_only_in_outer_subquery) { + if (GetCompatMode() == CompatibilityMode::VERSION37) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}, {R"("b")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput({0}, {}) + .expectSkipped(1, 0) + .setCallStack(queryStack(AqlCall{1, false}, AqlCall{})) + .run(); + } else { + // The feature is not available in 3.7 or earlier. + } +} + +TEST_P(SubqueryStartExecutorTest, fullCount_in_outer_subquery) { + if (GetCompatMode() == CompatibilityMode::VERSION37) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}, {R"("d")"}, {R"("e")"}, {R"("f")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput({0}, {}) + .expectSkipped(6, 0) + .setCallStack(queryStack(AqlCall{0, true, 0, AqlCall::LimitType::HARD}, AqlCall{})) + .run(); + } else { + // The feature is not available in 3.7 or earlier. + } +} + +TEST_P(SubqueryStartExecutorTest, fastForward_in_inner_subquery) { + if (GetCompatMode() == CompatibilityMode::VERSION37) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}, {R"("d")"}, {R"("e")"}, {R"("f")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::HASMORE) + .expectOutput({0}, {{R"("a")"}}, {{0, 0}}) + .expectSkipped(0, 0) + .setCallStack(queryStack(AqlCall{0, false, AqlCall::Infinity{}}, + AqlCall{0, false, 0, AqlCall::LimitType::HARD})) + .run(); + } else { + // The feature is not available in 3.7 or earlier. + } +} + +TEST_P(SubqueryStartExecutorTest, skip_out_skip_in) { + if (GetCompatMode() == CompatibilityMode::VERSION37) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}, {R"("d")"}, {R"("e")"}, {R"("f")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::HASMORE) + .expectOutput({0}, {{R"("c")"}}, {{0, 0}}) + .expectSkipped(2, 1) + .setCallStack(queryStack(AqlCall{2, false, AqlCall::Infinity{}}, + AqlCall{10, false, AqlCall::Infinity{}})) + .run(); + } else { + // The feature is not available in 3.7 or earlier. + } +} + +TEST_P(SubqueryStartExecutorTest, fullbypass_in_outer_subquery) { + if (GetCompatMode() == CompatibilityMode::VERSION37) { + ExecutorTestHelper<1, 1>(*fakedQuery) + .setExecBlock(MakeBaseInfos(1), ExecutionNode::SUBQUERY_START) + .setInputValue({{R"("a")"}, {R"("b")"}, {R"("c")"}, {R"("d")"}, {R"("e")"}, {R"("f")"}}) + .expectedStats(ExecutionStats{}) + .expectedState(ExecutionState::DONE) + .expectOutput({0}, {}) + .expectSkipped(0, 0) + .setCallStack(queryStack(AqlCall{0, false, 0, AqlCall::LimitType::HARD}, AqlCall{})) + .run(); + } else { + // The feature is not available in 3.7 or earlier. + } +} diff --git a/tests/Aql/WaitingExecutionBlockMock.cpp b/tests/Aql/WaitingExecutionBlockMock.cpp index c48de11e02e4..5ed988f8478e 100644 --- a/tests/Aql/WaitingExecutionBlockMock.cpp +++ b/tests/Aql/WaitingExecutionBlockMock.cpp @@ -27,6 +27,7 @@ #include "Aql/ExecutionEngine.h" #include "Aql/ExecutionState.h" #include "Aql/QueryOptions.h" +#include "Aql/SkipResult.h" #include "Logger/LogMacros.h" @@ -113,7 +114,7 @@ std::pair WaitingExecutionBlockMock::skip } } -std::tuple WaitingExecutionBlockMock::execute(AqlCallStack stack) { +std::tuple WaitingExecutionBlockMock::execute(AqlCallStack stack) { traceExecuteBegin(stack); auto res = executeWithoutTrace(stack); traceExecuteEnd(res); @@ -121,7 +122,7 @@ std::tuple WaitingExecutionBlockM } // NOTE: Does not care for shadowrows! -std::tuple WaitingExecutionBlockMock::executeWithoutTrace( +std::tuple WaitingExecutionBlockMock::executeWithoutTrace( AqlCallStack stack) { while (!stack.isRelevant()) { stack.pop(); @@ -135,7 +136,7 @@ std::tuple WaitingExecutionBlockM if (_variant != WaitingBehaviour::NEVER && !_hasWaited) { // If we ordered waiting check on _hasWaited and wait if not _hasWaited = true; - return {ExecutionState::WAITING, 0, nullptr}; + return {ExecutionState::WAITING, SkipResult{}, nullptr}; } if (_variant == WaitingBehaviour::ALWAYS) { // If we always wait, reset. @@ -154,7 +155,9 @@ std::tuple WaitingExecutionBlockM // Sorry we can only return one block. // This means we have prepared the first block. // But still need more data. - return {ExecutionState::HASMORE, skipped, result}; + SkipResult skipRes{}; + skipRes.didSkip(skipped); + return {ExecutionState::HASMORE, skipRes, result}; } else { dropBlock(); continue; @@ -177,7 +180,9 @@ std::tuple WaitingExecutionBlockM // Sorry we can only return one block. // This means we have prepared the first block. // But still need more data. - return {ExecutionState::HASMORE, skipped, result}; + SkipResult skipRes{}; + skipRes.didSkip(skipped); + return {ExecutionState::HASMORE, skipRes, result}; } size_t canReturn = _data.front()->size() - _inflight; @@ -212,16 +217,20 @@ std::tuple WaitingExecutionBlockM dropBlock(); } } + SkipResult skipRes{}; + skipRes.didSkip(skipped); if (!_data.empty()) { - return {ExecutionState::HASMORE, skipped, result}; + return {ExecutionState::HASMORE, skipRes, result}; } else if (result != nullptr && result->size() < myCall.hardLimit) { - return {ExecutionState::HASMORE, skipped, result}; + return {ExecutionState::HASMORE, skipRes, result}; } else { - return {ExecutionState::DONE, skipped, result}; + return {ExecutionState::DONE, skipRes, result}; } } } - return {ExecutionState::DONE, skipped, result}; + SkipResult skipRes{}; + skipRes.didSkip(skipped); + return {ExecutionState::DONE, skipRes, result}; } void WaitingExecutionBlockMock::dropBlock() { diff --git a/tests/Aql/WaitingExecutionBlockMock.h b/tests/Aql/WaitingExecutionBlockMock.h index 2c147ba08e91..b1ad73e01bd8 100644 --- a/tests/Aql/WaitingExecutionBlockMock.h +++ b/tests/Aql/WaitingExecutionBlockMock.h @@ -35,6 +35,7 @@ class AqlItemBlock; class ExecutionEngine; class ExecutionNode; struct ResourceMonitor; +class SkipResult; } // namespace aql namespace tests { @@ -106,15 +107,15 @@ class WaitingExecutionBlockMock final : public arangodb::aql::ExecutionBlock { */ std::pair skipSome(size_t atMost) override; - std::tuple execute( + std::tuple execute( arangodb::aql::AqlCallStack stack) override; private: void dropBlock(); // Implementation of execute - std::tuple executeWithoutTrace( - arangodb::aql::AqlCallStack stack); + std::tuple + executeWithoutTrace(arangodb::aql::AqlCallStack stack); private: std::deque _data; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9ed9e68f7a40..4c8d6934d0dd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -64,6 +64,7 @@ set(ARANGODB_TESTS_SOURCES Aql/RowFetcherHelper.cpp Aql/ScatterExecutorTest.cpp Aql/ShortestPathExecutorTest.cpp + Aql/SkipResultTest.cpp Aql/SingleRowFetcherTest.cpp Aql/SortedCollectExecutorTest.cpp Aql/SortExecutorTest.cpp diff --git a/tests/js/server/aql/aql-optimizer-indexes.js b/tests/js/server/aql/aql-optimizer-indexes.js index a08553b36d69..b42fd4955654 100644 --- a/tests/js/server/aql/aql-optimizer-indexes.js +++ b/tests/js/server/aql/aql-optimizer-indexes.js @@ -862,13 +862,9 @@ function optimizerIndexesTestSuite () { }); assertEqual("SingletonNode", nodeTypes[0], query); - assertEqual("SubqueryNode", nodeTypes[1], query); - - var subNodeTypes = plan.nodes[1].subquery.nodes.map(function(node) { - return node.type; - }); - assertNotEqual(-1, subNodeTypes.indexOf("IndexNode"), query); - assertEqual(-1, subNodeTypes.indexOf("SortNode"), query); + assertEqual("SubqueryStartNode", nodeTypes[1], query); + assertNotEqual(-1, nodeTypes.indexOf("IndexNode"), query); + assertEqual(-1, nodeTypes.indexOf("SortNode"), query); assertEqual("ReturnNode", nodeTypes[nodeTypes.length - 1], query); var results = AQL_EXECUTE(query, {}, opt); @@ -3776,26 +3772,39 @@ function optimizerIndexesMultiCollectionTestSuite () { var query = "FOR i IN " + c1.name() + " LET res = (FOR j IN " + c2.name() + " FILTER j.value == i.value SORT j.ref LIMIT 1 RETURN j) SORT res[0] RETURN i"; var plan = AQL_EXPLAIN(query, {}, opt).plan; - var nodeTypes = plan.nodes.map(function(node) { - return node.type; - }); + let idx = -1; + const nodeTypes = []; + const subqueryTypes = []; + { + let inSubquery = false; + for (const node of plan.nodes) { + const n = node.type; + if (n === "SubqueryStartNode" ) { + nodeTypes.push(n); + inSubquery = true; + } else if (n === "SubqueryEndNode" ) { + nodeTypes.push(n); + inSubquery = false; + } else if (inSubquery) { + if (n === "IndexNode") { + idx = node; + } + subqueryTypes.push(n); + } else { + nodeTypes.push(n); + } + } + } assertEqual("SingletonNode", nodeTypes[0], query); assertEqual(-1, nodeTypes.indexOf("IndexNode"), query); // no index for outer query assertNotEqual(-1, nodeTypes.indexOf("SortNode"), query); // sort node for outer query - var sub = nodeTypes.indexOf("SubqueryNode"); + var sub = nodeTypes.indexOf("SubqueryStartNode"); assertNotEqual(-1, sub); - - var subNodeTypes = plan.nodes[sub].subquery.nodes.map(function(node) { - return node.type; - }); - - assertEqual("SingletonNode", subNodeTypes[0], query); - var idx = subNodeTypes.indexOf("IndexNode"); assertNotEqual(-1, idx, query); // index used for inner query - assertEqual("hash", plan.nodes[sub].subquery.nodes[idx].indexes[0].type); - assertNotEqual(-1, subNodeTypes.indexOf("SortNode"), query); // must have sort node for inner query + assertEqual("hash", idx.indexes[0].type); + assertNotEqual(-1, subqueryTypes.indexOf("SortNode"), query); // must have sort node for inner query }, //////////////////////////////////////////////////////////////////////////////// @@ -3807,26 +3816,39 @@ function optimizerIndexesMultiCollectionTestSuite () { var query = "FOR i IN " + c1.name() + " LET res = (FOR j IN " + c2.name() + " FILTER j.value == i.value SORT j.value LIMIT 1 RETURN j) SORT res[0] RETURN i"; var plan = AQL_EXPLAIN(query, {}, opt).plan; - var nodeTypes = plan.nodes.map(function(node) { - return node.type; - }); - + const nodeTypes = []; + const subqueryTypes = []; + let idx = -1; + { + let inSubquery = false; + for (const node of plan.nodes) { + const n = node.type; + if (n === "SubqueryStartNode" ) { + nodeTypes.push(n); + inSubquery = true; + } else if (n === "SubqueryEndNode" ) { + nodeTypes.push(n); + inSubquery = false; + } else if (inSubquery) { + if (n === "IndexNode") { + idx = node; + } + subqueryTypes.push(n); + } else { + nodeTypes.push(n); + } + } + } assertEqual("SingletonNode", nodeTypes[0], query); assertEqual(-1, nodeTypes.indexOf("IndexNode"), query); // no index for outer query assertNotEqual(-1, nodeTypes.indexOf("SortNode"), query); // sort node for outer query - var sub = nodeTypes.indexOf("SubqueryNode"); + var sub = nodeTypes.indexOf("SubqueryStartNode"); assertNotEqual(-1, sub); - var subNodeTypes = plan.nodes[sub].subquery.nodes.map(function(node) { - return node.type; - }); - - assertEqual("SingletonNode", subNodeTypes[0], query); - var idx = subNodeTypes.indexOf("IndexNode"); - assertNotEqual(-1, idx, query); // index used for inner query - assertEqual("hash", plan.nodes[sub].subquery.nodes[idx].indexes[0].type); - assertEqual(-1, subNodeTypes.indexOf("SortNode"), query); // must not have sort node for inner query + assertNotEqual(-1, subqueryTypes.indexOf("IndexNode"), query); // index used for inner query + assertEqual("hash", idx.indexes[0].type); + assertEqual(-1, subqueryTypes.indexOf("SortNode"), query); // must not have sort node for inner query }, //////////////////////////////////////////////////////////////////////////////// @@ -3839,26 +3861,40 @@ function optimizerIndexesMultiCollectionTestSuite () { var query = "FOR i IN " + c1.name() + " LET res = (FOR j IN " + c2.name() + " FILTER j.value == i.value SORT j.ref LIMIT 1 RETURN j) SORT res[0] RETURN i"; var plan = AQL_EXPLAIN(query, {}, opt).plan; - var nodeTypes = plan.nodes.map(function(node) { - return node.type; - }); + const nodeTypes = []; + const subqueryTypes = []; + let idx = -1; + { + let inSubquery = false; + for (const node of plan.nodes) { + const n = node.type; + if (n === "SubqueryStartNode" ) { + nodeTypes.push(n); + inSubquery = true; + } else if (n === "SubqueryEndNode" ) { + nodeTypes.push(n); + inSubquery = false; + } else if (inSubquery) { + if (n === "IndexNode") { + idx = node; + } + subqueryTypes.push(n); + } else { + nodeTypes.push(n); + } + } + } assertEqual("SingletonNode", nodeTypes[0], query); assertEqual(-1, nodeTypes.indexOf("IndexNode"), query); // no index for outer query assertNotEqual(-1, nodeTypes.indexOf("SortNode"), query); // sort node for outer query - var sub = nodeTypes.indexOf("SubqueryNode"); + var sub = nodeTypes.indexOf("SubqueryStartNode"); assertNotEqual(-1, sub); - - var subNodeTypes = plan.nodes[sub].subquery.nodes.map(function(node) { - return node.type; - }); - - assertEqual("SingletonNode", subNodeTypes[0], query); - var idx = subNodeTypes.indexOf("IndexNode"); - assertNotEqual(-1, idx, query); // index used for inner query - assertEqual("hash", plan.nodes[sub].subquery.nodes[idx].indexes[0].type); - assertNotEqual(-1, subNodeTypes.indexOf("SortNode"), query); // must have sort node for inner query + assertNotEqual(-1, idx); + assertNotEqual(-1, subqueryTypes.indexOf("IndexNode"), query); // index used for inner query + assertEqual("hash", idx.indexes[0].type); + assertNotEqual(-1, subqueryTypes.indexOf("SortNode"), query); // must have sort node for inner query }, //////////////////////////////////////////////////////////////////////////////// @@ -3871,26 +3907,40 @@ function optimizerIndexesMultiCollectionTestSuite () { var query = "FOR i IN " + c1.name() + " LET res = (FOR j IN " + c2.name() + " FILTER j.value == i.value SORT j.value LIMIT 1 RETURN j) SORT res[0] RETURN i"; var plan = AQL_EXPLAIN(query, {}, opt).plan; - var nodeTypes = plan.nodes.map(function(node) { - return node.type; - }); + const nodeTypes = []; + const subqueryTypes = []; + let idx = -1; + { + let inSubquery = false; + for (const node of plan.nodes) { + const n = node.type; + if (n === "SubqueryStartNode" ) { + nodeTypes.push(n); + inSubquery = true; + } else if (n === "SubqueryEndNode" ) { + nodeTypes.push(n); + inSubquery = false; + } else if (inSubquery) { + if (n === "IndexNode") { + idx = node; + } + subqueryTypes.push(n); + } else { + nodeTypes.push(n); + } + } + } assertEqual("SingletonNode", nodeTypes[0], query); assertEqual(-1, nodeTypes.indexOf("IndexNode"), query); // no index for outer query assertNotEqual(-1, nodeTypes.indexOf("SortNode"), query); // sort node for outer query - var sub = nodeTypes.indexOf("SubqueryNode"); - assertNotEqual(-1, sub); + assertNotEqual(-1, nodeTypes.indexOf("SubqueryStartNode"), query); - var subNodeTypes = plan.nodes[sub].subquery.nodes.map(function(node) { - return node.type; - }); - - assertEqual("SingletonNode", subNodeTypes[0], query); - var idx = subNodeTypes.indexOf("IndexNode"); + assertNotEqual(-1, subqueryTypes.indexOf("IndexNode"), query); assertNotEqual(-1, idx, query); // index used for inner query - assertEqual("hash", plan.nodes[sub].subquery.nodes[idx].indexes[0].type); - assertEqual(-1, subNodeTypes.indexOf("SortNode"), query); // we're filtering on a constant, must not have sort node for inner query + assertEqual("hash", idx.indexes[0].type); + assertEqual(-1, subqueryTypes.indexOf("SortNode"), query); // we're filtering on a constant, must not have sort node for inner query }, //////////////////////////////////////////////////////////////////////////////// @@ -3903,26 +3953,40 @@ function optimizerIndexesMultiCollectionTestSuite () { var query = "FOR i IN " + c1.name() + " LET res = (FOR z IN 1..2 FOR j IN " + c2.name() + " FILTER j.value == i.value SORT j.value LIMIT 1 RETURN j) SORT res[0] RETURN i"; var plan = AQL_EXPLAIN(query, {}, opt).plan; - var nodeTypes = plan.nodes.map(function(node) { - return node.type; - }); + const nodeTypes = []; + const subqueryTypes = []; + let idx = -1; + { + let inSubquery = false; + for (const node of plan.nodes) { + const n = node.type; + if (n === "SubqueryStartNode" ) { + nodeTypes.push(n); + inSubquery = true; + } else if (n === "SubqueryEndNode" ) { + nodeTypes.push(n); + inSubquery = false; + } else if (inSubquery) { + if (n === "IndexNode") { + idx = node; + } + subqueryTypes.push(n); + } else { + nodeTypes.push(n); + } + } + } assertEqual("SingletonNode", nodeTypes[0], query); assertEqual(-1, nodeTypes.indexOf("IndexNode"), query); // no index for outer query assertNotEqual(-1, nodeTypes.indexOf("SortNode"), query); // sort node for outer query - var sub = nodeTypes.indexOf("SubqueryNode"); - assertNotEqual(-1, sub); - - var subNodeTypes = plan.nodes[sub].subquery.nodes.map(function(node) { - return node.type; - }); - - assertEqual("SingletonNode", subNodeTypes[0], query); - var idx = subNodeTypes.indexOf("IndexNode"); + var sub = nodeTypes.indexOf("SubqueryStartNode"); + assertNotEqual(-1, sub, query); + assertNotEqual(-1, subqueryTypes.indexOf("IndexNode"), query); assertNotEqual(-1, idx, query); // index used for inner query - assertEqual("hash", plan.nodes[sub].subquery.nodes[idx].indexes[0].type); - assertNotEqual(-1, subNodeTypes.indexOf("SortNode"), query); // we're filtering on a constant, but we're in an inner loop + assertEqual("hash", idx.indexes[0].type); + assertNotEqual(-1, subqueryTypes.indexOf("SortNode"), query); // we're filtering on a constant, but we're in an inner loop }, //////////////////////////////////////////////////////////////////////////////// @@ -3935,26 +3999,39 @@ function optimizerIndexesMultiCollectionTestSuite () { var query = "FOR i IN " + c1.name() + " LET res = (FOR j IN " + c2.name() + " FILTER j.ref == i.ref SORT j.ref LIMIT 1 RETURN j) SORT res[0] RETURN i"; var plan = AQL_EXPLAIN(query, {}, opt).plan; - var nodeTypes = plan.nodes.map(function(node) { - return node.type; - }); + const nodeTypes = []; + const subqueryTypes = []; + let idx = -1; + { + let inSubquery = false; + for (const node of plan.nodes) { + const n = node.type; + if (n === "SubqueryStartNode" ) { + nodeTypes.push(n); + inSubquery = true; + } else if (n === "SubqueryEndNode" ) { + nodeTypes.push(n); + inSubquery = false; + } else if (inSubquery) { + if (n === "IndexNode") { + idx = node; + } + subqueryTypes.push(n); + } else { + nodeTypes.push(n); + } + } + } assertEqual("SingletonNode", nodeTypes[0], query); assertEqual(-1, nodeTypes.indexOf("IndexNode"), query); // no index for outer query assertNotEqual(-1, nodeTypes.indexOf("SortNode"), query); // sort node for outer query + assertNotEqual(-1, nodeTypes.indexOf("SubqueryStartNode"), query); - var sub = nodeTypes.indexOf("SubqueryNode"); - assertNotEqual(-1, sub); - - var subNodeTypes = plan.nodes[sub].subquery.nodes.map(function(node) { - return node.type; - }); - - assertEqual("SingletonNode", subNodeTypes[0], query); - var idx = subNodeTypes.indexOf("IndexNode"); + assertNotEqual(-1, subqueryTypes.indexOf("IndexNode"), query); assertNotEqual(-1, idx, query); // index used for inner query - assertEqual("skiplist", plan.nodes[sub].subquery.nodes[idx].indexes[0].type); - assertEqual(-1, subNodeTypes.indexOf("SortNode"), query); // must not have sort node for inner query + assertEqual("skiplist", idx.indexes[0].type); + assertEqual(-1, subqueryTypes.indexOf("SortNode"), query); // must not have sort node for inner query }, //////////////////////////////////////////////////////////////////////////////// diff --git a/tests/js/server/aql/aql-optimizer-rule-move-calculations-down.js b/tests/js/server/aql/aql-optimizer-rule-move-calculations-down.js index 54e7c8d0bd7d..37023b6157d1 100644 --- a/tests/js/server/aql/aql-optimizer-rule-move-calculations-down.js +++ b/tests/js/server/aql/aql-optimizer-rule-move-calculations-down.js @@ -371,7 +371,7 @@ function optimizerRuleTestSuite () { expected.push("test" + i + "-" + i); } - var query = "FOR i IN 0..100 LET result = (UPDATE {_key: CONCAT('test', TO_STRING(i))} WITH {updated: true} IN " + cn + " RETURN CONCAT(NEW._key, '-', NEW.value)) LIMIT 10 RETURN result[0]"; + var query = "FOR i IN 0..99 LET result = (UPDATE {_key: CONCAT('test', TO_STRING(i))} WITH {updated: true} IN " + cn + " RETURN CONCAT(NEW._key, '-', NEW.value)) LIMIT 10 RETURN result[0]"; var planDisabled = AQL_EXPLAIN(query, {}, paramDisabled); var planEnabled = AQL_EXPLAIN(query, {}, paramEnabled); diff --git a/tests/js/server/aql/aql-optimizer-rule-no-document-materialization-arangosearch.js b/tests/js/server/aql/aql-optimizer-rule-no-document-materialization-arangosearch.js index d6fb5cfe0102..59626c692b9a 100644 --- a/tests/js/server/aql/aql-optimizer-rule-no-document-materialization-arangosearch.js +++ b/tests/js/server/aql/aql-optimizer-rule-no-document-materialization-arangosearch.js @@ -145,10 +145,8 @@ function noDocumentMaterializationArangoSearchRuleTestSuite () { "SORT CONCAT(a, e) LIMIT 10 RETURN d.obj.e.e1"; let plan = AQL_EXPLAIN(query).plan; assertTrue(plan.nodes.filter(obj => { - return obj.type === "SubqueryNode"; - })[0].subquery.nodes.filter(obj => { return obj.type === "EnumerateViewNode"; - })[0].noMaterialization); + })[1].noMaterialization); let result = AQL_EXECUTE(query); assertEqual(2, result.json.length); let expectedKeys = new Set([14, 4]); diff --git a/tests/js/server/aql/aql-subquery.js b/tests/js/server/aql/aql-subquery.js index a2cc8ded83b7..d3002e80137f 100644 --- a/tests/js/server/aql/aql-subquery.js +++ b/tests/js/server/aql/aql-subquery.js @@ -343,8 +343,8 @@ function ahuacatlSubqueryTestSuite () { /// A count collect block will produce an output even if it does not get an input /// specifically it will rightfully count 0. /// The insert block will write into the collection if it gets an input. -/// So the assertion here is, that if a subquery has no input, than all it's -/// Parts do not have side-effects, but the subquery still prduces valid results +/// Even if the outer subquery is skipped. Henve we require to have documents +/// inserted here. //////////////////////////////////////////////////////////////////////////////// testCollectWithinEmptyNestedSubquery: function () { const colName = "UnitTestSubqueryCollection"; @@ -367,7 +367,7 @@ function ahuacatlSubqueryTestSuite () { var actual = getQueryResults(query); assertEqual(expected, actual); - assertEqual(db[colName].count(), 0); + assertEqual(db[colName].count(), 1); } finally { db._drop(colName); } From cb4258070aaafc0c3538cef29c680e13e0742df0 Mon Sep 17 00:00:00 2001 From: Markus Pfeiffer Date: Thu, 12 Mar 2020 15:11:37 +0000 Subject: [PATCH 101/122] Add some FilterExecutor and CalculationExecutor tests (#11231) * Add some FilterExecutor tests * Calculation executor tests * Some more testcode * Add NoneEntry for EntryBuilder * Add some tests for CalculationExecutor Co-authored-by: Michael Hackstein --- tests/Aql/AqlItemBlockHelper.h | 23 +- tests/Aql/CalculationExecutorTest.cpp | 485 +++++++++++----------- tests/Aql/FilterExecutorTest.cpp | 554 +++++--------------------- 3 files changed, 365 insertions(+), 697 deletions(-) diff --git a/tests/Aql/AqlItemBlockHelper.h b/tests/Aql/AqlItemBlockHelper.h index fac8f0d7d36b..e6f9db2ad1ea 100644 --- a/tests/Aql/AqlItemBlockHelper.h +++ b/tests/Aql/AqlItemBlockHelper.h @@ -77,7 +77,9 @@ namespace arangodb { namespace tests { namespace aql { -using EntryBuilder = std::variant; +struct NoneEntry {}; + +using EntryBuilder = std::variant; template <::arangodb::aql::RegisterId columns> using RowBuilder = std::array; @@ -112,15 +114,16 @@ SharedAqlItemBlockPtr buildBlock(AqlItemBlockManager& manager, for (size_t row = 0; row < matrix.size(); row++) { for (RegisterId col = 0; col < columns; col++) { auto const& entry = matrix[row][col]; - auto value = std::visit( - overload{ - [](int i) { return AqlValue{AqlValueHintInt{i}}; }, - [](const char* json) { - VPackBufferPtr tmpVpack = vpackFromJsonString(json); - return AqlValue{AqlValueHintCopy{tmpVpack->data()}}; - }, - }, - entry); + auto value = + std::visit(overload{ + [](NoneEntry) { return AqlValue{}; }, + [](int i) { return AqlValue{AqlValueHintInt{i}}; }, + [](const char* json) { + VPackBufferPtr tmpVpack = vpackFromJsonString(json); + return AqlValue{AqlValueHintCopy{tmpVpack->data()}}; + }, + }, + entry); block->setValue(row, col, value); } } diff --git a/tests/Aql/CalculationExecutorTest.cpp b/tests/Aql/CalculationExecutorTest.cpp index 37db9313c92d..1682edcdfc62 100644 --- a/tests/Aql/CalculationExecutorTest.cpp +++ b/tests/Aql/CalculationExecutorTest.cpp @@ -65,11 +65,16 @@ namespace arangodb { namespace tests { namespace aql { +using CalculationExecutorTestHelper = ExecutorTestHelper<2, 2>; +using CalculationExecutorSplitType = CalculationExecutorTestHelper::SplitType; +using CalculationExecutorInputParam = std::tuple; + // TODO Add tests for both // CalculationExecutor and // CalculationExecutor! -class CalculationExecutorTest : public AqlExecutorTestCase { +class CalculationExecutorTest + : public AqlExecutorTestCaseWithParam { protected: ExecutionState state; AqlItemBlockManager itemBlockManager; @@ -99,261 +104,257 @@ class CalculationExecutorTest : public AqlExecutorTestCase { std::unordered_set{} /*to clear*/, std::unordered_set{} /*to keep*/, *fakedQuery.get() /*query*/, expr /*expression*/, - std::vector{&var} /*expression in variables*/, - std::vector{inRegID} /*expression in registers*/) {} + std::vector{&var} /*expression input variables*/, + std::vector{inRegID} /*expression input registers*/) {} + + auto getSplit() -> CalculationExecutorSplitType { + auto [split] = GetParam(); + return split; + } + + auto buildInfos() -> CalculationExecutorInfos { + return CalculationExecutorInfos{0, 1, 1, {}, {}, *fakedQuery.get(), + expr, {&var}, {0}}; + } }; -TEST_F(CalculationExecutorTest, there_are_no_rows_upstream_the_producer_does_not_wait) { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( - itemBlockManager, input.steal(), false); - CalculationExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +template +const CalculationExecutorSplitType splitIntoBlocks = + CalculationExecutorSplitType{std::vector{vs...}}; +template +const CalculationExecutorSplitType splitStep = CalculationExecutorSplitType{step}; + +INSTANTIATE_TEST_CASE_P(CalculationExecutor, CalculationExecutorTest, + ::testing::Values(splitIntoBlocks<2, 3>, splitIntoBlocks<3, 4>, + splitStep<1>, splitStep<2>)); + +TEST_P(CalculationExecutorTest, reference_empty_input) { + // auto infos = buildInfos(); + AqlCall call{}; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue({}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); } -TEST_F(CalculationExecutorTest, there_are_no_rows_upstream_the_producer_waits) { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( - itemBlockManager, input.steal(), true); - CalculationExecutor testee(fetcher, infos); - // Use this instead of std::ignore, so the tests will be noticed and - // updated when someone changes the stats type in the return value of - // EnumerateListExecutor::produceRows(). - NoStats stats{}; - - OutputAqlItemRow result{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); +TEST_P(CalculationExecutorTest, reference_some_input) { + AqlCall call{}; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue(MatrixBuilder<2>{ + RowBuilder<2>{0, NoneEntry{}}, RowBuilder<2>{1, NoneEntry{}}, + RowBuilder<2>{R"("a")", NoneEntry{}}, RowBuilder<2>{2, NoneEntry{}}, + RowBuilder<2>{3, NoneEntry{}}, RowBuilder<2>{4, NoneEntry{}}, + RowBuilder<2>{5, NoneEntry{}}, RowBuilder<2>{6, NoneEntry{}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, MatrixBuilder<2>{RowBuilder<2>{0, 0}, RowBuilder<2>{1, 1}, + RowBuilder<2>{R"("a")", R"("a")"}, + RowBuilder<2>{2, 2}, RowBuilder<2>{3, 3}, + RowBuilder<2>{4, 4}, RowBuilder<2>{5, 5}, + RowBuilder<2>{6, 6}}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(true); } -TEST_F(CalculationExecutorTest, there_are_rows_in_the_upstream_the_producer_does_not_wait) { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; - auto input = VPackParser::fromJson("[ [0], [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( - itemBlockManager, input->steal(), false); - CalculationExecutor testee(fetcher, infos); - NoStats stats{}; - - OutputAqlItemRow row{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - - // 1 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - // 2 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - // 3 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(row.produced()); - - // verify calculation - { - AqlValue value; - auto block = row.stealBlock(); - for (std::size_t index = 0; index < 3; index++) { - value = block->getValue(index, outRegID); - ASSERT_TRUE(value.isNumber()); - ASSERT_EQ(value.toInt64(), static_cast(index + 1)); - } - } +TEST_P(CalculationExecutorTest, referece_some_input_skip) { + AqlCall call{}; + call.offset = 4; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue(MatrixBuilder<2>{ + RowBuilder<2>{0, NoneEntry{}}, RowBuilder<2>{1, NoneEntry{}}, + RowBuilder<2>{R"("a")", NoneEntry{}}, RowBuilder<2>{2, NoneEntry{}}, + RowBuilder<2>{3, NoneEntry{}}, RowBuilder<2>{4, NoneEntry{}}, + RowBuilder<2>{5, NoneEntry{}}, RowBuilder<2>{6, NoneEntry{}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, MatrixBuilder<2>{RowBuilder<2>{3, 3}, RowBuilder<2>{4, 4}, + RowBuilder<2>{5, 5}, RowBuilder<2>{6, 6}}) + .allowAnyOutputOrder(false) + .expectSkipped(4) + .expectedState(ExecutionState::DONE) + .run(true); } -TEST_F(CalculationExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) { - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; - auto input = VPackParser::fromJson("[ [0], [1], [2] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( - itemBlockManager, input->steal(), true); - CalculationExecutor testee(fetcher, infos); - NoStats stats{}; - - OutputAqlItemRow row{std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()}; - - // waiting - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - - // 1 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - // waiting - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - - // 2 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - // waiting - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - - // 3 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(row.produced()); +TEST_P(CalculationExecutorTest, reference_some_input_limit) { + AqlCall call{}; + call.hardLimit = 4; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue(MatrixBuilder<2>{ + RowBuilder<2>{0, NoneEntry{}}, RowBuilder<2>{1, NoneEntry{}}, + RowBuilder<2>{R"("a")", NoneEntry{}}, RowBuilder<2>{2, NoneEntry{}}, + RowBuilder<2>{3, NoneEntry{}}, RowBuilder<2>{4, NoneEntry{}}, + RowBuilder<2>{5, NoneEntry{}}, RowBuilder<2>{6, NoneEntry{}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, MatrixBuilder<2>{RowBuilder<2>{0, 0}, RowBuilder<2>{1, 1}, + RowBuilder<2>{R"("a")", R"("a")"}, + RowBuilder<2>{2, 2}}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(true); } -TEST_F(CalculationExecutorTest, test_produce_datarange) { - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; - CalculationExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, {{R"(0)"}, {R"(1)"}, {R"(2)"}}); - - AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; - OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - EXPECT_EQ(output.numRowsWritten(), 0); - auto const [state, stats, call] = testee.produceRows(input, output); - EXPECT_EQ(output.numRowsWritten(), 3); - - EXPECT_EQ(state, ExecutorState::DONE); - // verify calculation - { - AqlValue value; - auto block = output.stealBlock(); - for (std::size_t index = 0; index < 3; index++) { - value = block->getValue(index, outRegID); - ASSERT_TRUE(value.isNumber()); - ASSERT_EQ(value.toInt64(), static_cast(index + 1)); - } - } +TEST_P(CalculationExecutorTest, reference_some_input_limit_fullcount) { + AqlCall call{}; + call.hardLimit = 4; + call.fullCount = true; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue(MatrixBuilder<2>{ + RowBuilder<2>{0, NoneEntry{}}, RowBuilder<2>{1, NoneEntry{}}, + RowBuilder<2>{R"("a")", NoneEntry{}}, RowBuilder<2>{2, NoneEntry{}}, + RowBuilder<2>{3, NoneEntry{}}, RowBuilder<2>{4, NoneEntry{}}, + RowBuilder<2>{5, NoneEntry{}}, RowBuilder<2>{6, NoneEntry{}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, MatrixBuilder<2>{RowBuilder<2>{0, 0}, RowBuilder<2>{1, 1}, + RowBuilder<2>{R"("a")", R"("a")"}, + RowBuilder<2>{2, 2}}) + .allowAnyOutputOrder(false) + .expectSkipped(4) + .expectedState(ExecutionState::DONE) + .run(true); } -TEST_F(CalculationExecutorTest, test_produce_datarange_need_more) { - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; - CalculationExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, {{R"(0)"}, {R"(1)"}, {R"(2)"}}); - - AqlItemBlockInputRange input{ExecutorState::HASMORE, 0, inBlock, 0}; - OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear(), - AqlCall{0, 3, AqlCall::Infinity{}, false}); - - auto myCall = output.getClientCall(); - EXPECT_EQ(myCall.getLimit(), 3); - EXPECT_EQ(output.numRowsWritten(), 0); - - auto const [state, stats, outputCall] = testee.produceRows(input, output); - EXPECT_EQ(output.numRowsWritten(), 3); - - EXPECT_EQ(state, ExecutorState::HASMORE); - // verify calculation - { - AqlValue value; - auto block = output.stealBlock(); - for (std::size_t index = 0; index < 3; index++) { - value = block->getValue(index, outRegID); - ASSERT_TRUE(value.isNumber()); - ASSERT_EQ(value.toInt64(), static_cast(index + 1)); - } - } - // Test the Call we send to upstream - EXPECT_EQ(outputCall.offset, 0); - EXPECT_FALSE(outputCall.hasHardLimit()); - // Avoid overfetching. I do not have a strong requirement on this - // test, however this is what we do right now. - EXPECT_EQ(outputCall.getLimit(), 0); - EXPECT_FALSE(outputCall.fullCount); +TEST_P(CalculationExecutorTest, condition_some_input) { + AqlCall call{}; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue(MatrixBuilder<2>{ + RowBuilder<2>{0, NoneEntry{}}, RowBuilder<2>{1, NoneEntry{}}, + RowBuilder<2>{R"("a")", NoneEntry{}}, RowBuilder<2>{2, NoneEntry{}}, + RowBuilder<2>{3, NoneEntry{}}, RowBuilder<2>{4, NoneEntry{}}, + RowBuilder<2>{5, NoneEntry{}}, RowBuilder<2>{6, NoneEntry{}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, + MatrixBuilder<2>{RowBuilder<2>{0, 1}, RowBuilder<2>{1, 2}, + RowBuilder<2>{R"("a")", 1}, RowBuilder<2>{2, 3}, + RowBuilder<2>{3, 4}, RowBuilder<2>{4, 5}, + RowBuilder<2>{5, 6}, RowBuilder<2>{6, 7}}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(true); } -TEST_F(CalculationExecutorTest, DISABLED_test_produce_datarange_has_more) { // TODO: fix and re-enable after this executor newStyle is active - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Enable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - SharedAqlItemBlockPtr block{new AqlItemBlock(itemBlockManager, 1000, 2)}; - CalculationExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, {{R"(0)"}, {R"(1)"}, {R"(2)"}, {R"(3)"}, {R"(4)"}}); - - AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; - OutputAqlItemRow output(std::move(block), infos.getOutputRegisters(), - infos.registersToKeep(), infos.registersToClear()); - EXPECT_EQ(output.numRowsWritten(), 0); - AqlCall myCall{0, 3, AqlCall::Infinity{}, false}; - output.setCall(std::move(myCall)); - - auto const [state, stats, call] = testee.produceRows(input, output); - EXPECT_EQ(output.numRowsWritten(), 3); - - EXPECT_EQ(state, ExecutorState::HASMORE); - EXPECT_TRUE(input.hasDataRow()); - // We still have two values in block: 3 and 4 - { - // pop 3 - auto const [state, row] = input.nextDataRow(); - EXPECT_EQ(state, ExecutorState::HASMORE); - EXPECT_EQ(row.getValue(0).toInt64(), 3); - } - { - // pop 4 - auto const [state, row] = input.nextDataRow(); - EXPECT_EQ(state, ExecutorState::DONE); - EXPECT_EQ(row.getValue(0).toInt64(), 4); - } - EXPECT_FALSE(input.hasDataRow()); +TEST_P(CalculationExecutorTest, condition_some_input_skip) { + AqlCall call{}; + call.offset = 4; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue(MatrixBuilder<2>{ + RowBuilder<2>{0, NoneEntry{}}, RowBuilder<2>{1, NoneEntry{}}, + RowBuilder<2>{R"("a")", NoneEntry{}}, RowBuilder<2>{2, NoneEntry{}}, + RowBuilder<2>{3, NoneEntry{}}, RowBuilder<2>{4, NoneEntry{}}, + RowBuilder<2>{5, NoneEntry{}}, RowBuilder<2>{6, NoneEntry{}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, MatrixBuilder<2>{RowBuilder<2>{3, 4}, RowBuilder<2>{4, 5}, + RowBuilder<2>{5, 6}, RowBuilder<2>{6, 7}}) + .allowAnyOutputOrder(false) + .expectSkipped(4) + .expectedState(ExecutionState::DONE) + .run(true); +} + +TEST_P(CalculationExecutorTest, condition_some_input_limit) { + AqlCall call{}; + call.hardLimit = 4; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue(MatrixBuilder<2>{ + RowBuilder<2>{0, NoneEntry{}}, RowBuilder<2>{1, NoneEntry{}}, + RowBuilder<2>{R"("a")", NoneEntry{}}, RowBuilder<2>{2, NoneEntry{}}, + RowBuilder<2>{3, NoneEntry{}}, RowBuilder<2>{4, NoneEntry{}}, + RowBuilder<2>{5, NoneEntry{}}, RowBuilder<2>{6, NoneEntry{}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, + MatrixBuilder<2>{RowBuilder<2>{0, 1}, RowBuilder<2>{1, 2}, + RowBuilder<2>{R"("a")", 1}, RowBuilder<2>{2, 3}}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(true); +} + +TEST_P(CalculationExecutorTest, condition_some_input_limit_fullcount) { + AqlCall call{}; + call.hardLimit = 4; + call.fullCount = true; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue(MatrixBuilder<2>{ + RowBuilder<2>{0, NoneEntry{}}, RowBuilder<2>{1, NoneEntry{}}, + RowBuilder<2>{R"("a")", NoneEntry{}}, RowBuilder<2>{2, NoneEntry{}}, + RowBuilder<2>{3, NoneEntry{}}, RowBuilder<2>{4, NoneEntry{}}, + RowBuilder<2>{5, NoneEntry{}}, RowBuilder<2>{6, NoneEntry{}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, + MatrixBuilder<2>{RowBuilder<2>{0, 1}, RowBuilder<2>{1, 2}, + RowBuilder<2>{R"("a")", 1}, RowBuilder<2>{2, 3}}) + .allowAnyOutputOrder(false) + .expectSkipped(4) + .expectedState(ExecutionState::DONE) + .run(true); +} + +// Could be fixed and enabled if one enabled the V8 engine +TEST_P(CalculationExecutorTest, DISABLED_v8condition_some_input) { + // auto infos = buildInfos(); + AqlCall call{}; + ExecutionStats stats{}; + + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock>(std::move(infos)) + .setInputValue(MatrixBuilder<2>{ + RowBuilder<2>{0, NoneEntry{}}, RowBuilder<2>{1, NoneEntry{}}, + RowBuilder<2>{R"("a")", NoneEntry{}}, RowBuilder<2>{2, NoneEntry{}}, + RowBuilder<2>{3, NoneEntry{}}, RowBuilder<2>{4, NoneEntry{}}, + RowBuilder<2>{5, NoneEntry{}}, RowBuilder<2>{6, NoneEntry{}}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, + MatrixBuilder<2>{RowBuilder<2>{0, 1}, RowBuilder<2>{1, 2}, + RowBuilder<2>{R"("a")", 1}, RowBuilder<2>{2, 3}, + RowBuilder<2>{3, 4}, RowBuilder<2>{4, 5}, + RowBuilder<2>{5, 6}, RowBuilder<2>{6, 7}}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(true); } } // namespace aql diff --git a/tests/Aql/FilterExecutorTest.cpp b/tests/Aql/FilterExecutorTest.cpp index a51463030664..24c3f1235fc5 100644 --- a/tests/Aql/FilterExecutorTest.cpp +++ b/tests/Aql/FilterExecutorTest.cpp @@ -23,9 +23,11 @@ /// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// +#include "gtest/gtest.h" + #include "AqlItemBlockHelper.h" +#include "ExecutorTestHelper.h" #include "RowFetcherHelper.h" -#include "gtest/gtest.h" #include "Aql/AqlCall.h" #include "Aql/AqlItemBlock.h" @@ -46,7 +48,11 @@ namespace arangodb { namespace tests { namespace aql { -class FilterExecutorTest : public ::testing::Test { +using FilterExecutorTestHelper = ExecutorTestHelper<2, 2>; +using FilterExecutorSplitType = FilterExecutorTestHelper::SplitType; +using FilterExecutorInputParam = std::tuple; + +class FilterExecutorTest : public AqlExecutorTestCaseWithParam { protected: ExecutionState state; ResourceMonitor monitor; @@ -62,460 +68,118 @@ class FilterExecutorTest : public ::testing::Test { outputRegisters(make_shared_unordered_set()), registersToKeep(outputRegisters), infos(0, 1, 1, {}, {}) {} -}; - -TEST_F(FilterExecutorTest, there_are_no_rows_upstream_the_producer_does_not_wait) { - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input.steal(), false); - FilterExecutor testee(fetcher, infos); - FilterStats stats{}; - - OutputAqlItemRow result(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); -} - -TEST_F(FilterExecutorTest, there_are_no_rows_upstream_the_producer_waits) { - VPackBuilder input; - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input.steal(), true); - FilterExecutor testee(fetcher, infos); - FilterStats stats{}; - - OutputAqlItemRow result(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); -} - -TEST_F(FilterExecutorTest, there_are_rows_in_the_upstream_the_producer_does_not_wait) { - auto input = VPackParser::fromJson( - "[ [true], [false], [true], [false], [false], [true] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), false); - FilterExecutor testee(fetcher, infos); - FilterStats stats{}; - - OutputAqlItemRow row(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_EQ(stats.getFiltered(), 0); - ASSERT_TRUE(row.produced()); - - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_EQ(stats.getFiltered(), 1); - ASSERT_TRUE(row.produced()); - - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_EQ(stats.getFiltered(), 2); - ASSERT_TRUE(row.produced()); - - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_EQ(stats.getFiltered(), 0); - ASSERT_FALSE(row.produced()); -} - -TEST_F(FilterExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) { - auto input = VPackParser::fromJson( - "[ [true], [false], [true], [false], [false], [true] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), true); - FilterExecutor testee(fetcher, infos); - FilterStats stats{}; - - OutputAqlItemRow row(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - - /* - 1 produce => WAIT RES1 - 2 produce => HASMORE, Row 1 RES1 - 3 => WAIT RES2 - 4 => WAIT RES2 - 5 => HASMORE, Row 3 RES2 - 6 => WAIT, RES3 - 7 => WAIT, RES3 - 8 => WAIT, RES3 - 9 => DONE, Row 6 RES3 - */ - - // 1 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - // 2 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - ASSERT_EQ(stats.getFiltered(), 0); - - // 3 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - // 4 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - // We have one filter here - ASSERT_EQ(stats.getFiltered(), 1); - - // 5 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - ASSERT_EQ(stats.getFiltered(), 0); - - // 6 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - // 7 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - ASSERT_EQ(stats.getFiltered(), 1); - - // 7 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(row.produced()); - ASSERT_EQ(stats.getFiltered(), 1); - - // 8 - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - ASSERT_EQ(stats.getFiltered(), 0); -} - -TEST_F(FilterExecutorTest, - there_are_rows_in_the_upstream_and_the_last_one_has_to_be_filtered_the_producer_does_not_wait) { - auto input = VPackParser::fromJson( - "[ [true], [false], [true], [false], [false], [true], [false] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), false); - FilterExecutor testee(fetcher, infos); - FilterStats stats{}; - - OutputAqlItemRow row(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_EQ(stats.getFiltered(), 0); - ASSERT_TRUE(row.produced()); - - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_EQ(stats.getFiltered(), 1); - ASSERT_TRUE(row.produced()); - - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_EQ(stats.getFiltered(), 2); - ASSERT_TRUE(row.produced()); - row.advanceRow(); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_EQ(stats.getFiltered(), 1); - ASSERT_FALSE(row.produced()); - - std::tie(state, stats) = testee.produceRows(row); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_EQ(stats.getFiltered(), 0); - ASSERT_FALSE(row.produced()); -} - -TEST_F(FilterExecutorTest, - there_are_rows_in_the_upstream_and_the_last_one_has_to_be_filtered_the_producer_waits) { - auto input = VPackParser::fromJson( - "[ [true], [false], [true], [false], [false], [true], [false] ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, input->steal(), true); - FilterExecutor testee(fetcher, infos); - FilterStats stats{}; - - OutputAqlItemRow result(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - - /* - produce => WAIT RES1 - produce => HASMORE, Row 1 RES1 - => WAIT RES2 - => WAIT RES2 - => HASMORE, Row 3 RES2 - => WAIT, RES3 - => WAIT, RES3 - => WAIT, RES3 - => HASMORE, Row 6 RES3 - => WAITING, RES3 - => DONE, no output! RES3 - */ - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 1); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 1); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 1); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::HASMORE); - ASSERT_TRUE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - result.advanceRow(); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::WAITING); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 0); - - std::tie(state, stats) = testee.produceRows(result); - ASSERT_EQ(state, ExecutionState::DONE); - ASSERT_FALSE(result.produced()); - ASSERT_EQ(stats.getFiltered(), 1); -} - -TEST_F(FilterExecutorTest, test_produce_datarange) { - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - FilterExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, - {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); + auto getSplit() -> FilterExecutorSplitType { + auto [split] = GetParam(); + return split; + } - AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; + auto buildInfos() -> FilterExecutorInfos { + return FilterExecutorInfos{0, 2, 2, {}, {0, 1}}; + } +}; - OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - EXPECT_EQ(output.numRowsWritten(), 0); - auto const [state, stats, call] = testee.produceRows(input, output); - EXPECT_EQ(state, ExecutorState::DONE); - EXPECT_EQ(stats.getFiltered(), 2); - EXPECT_EQ(output.numRowsWritten(), 3); - EXPECT_FALSE(input.hasDataRow()); +template +const FilterExecutorSplitType splitIntoBlocks = + FilterExecutorSplitType{std::vector{vs...}}; +template +const FilterExecutorSplitType splitStep = FilterExecutorSplitType{step}; + +INSTANTIATE_TEST_CASE_P(FilterExecutor, FilterExecutorTest, + ::testing::Values(splitIntoBlocks<2, 3>, splitIntoBlocks<3, 4>, + splitStep<1>, splitStep<2>)); + +TEST_P(FilterExecutorTest, empty_input) { + auto infos = buildInfos(); + AqlCall call{}; + ExecutionStats{}; + ExecutorTestHelper(*fakedQuery) + .setExecBlock(std::move(infos)) + .setInputValue({}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({1}, {}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); } -TEST_F(FilterExecutorTest, test_produce_datarange_need_more) { - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - FilterExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, - {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - size_t hardLimit = 1000; - AqlItemBlockInputRange input{ExecutorState::HASMORE, 0, inBlock, 0}; - AqlCall limitedCall{}; - limitedCall.hardLimit = hardLimit; - OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear(), std::move(limitedCall)); - EXPECT_EQ(output.numRowsWritten(), 0); - auto const [state, stats, call] = testee.produceRows(input, output); - EXPECT_EQ(state, ExecutorState::HASMORE); - EXPECT_EQ(stats.getFiltered(), 2); - EXPECT_EQ(output.numRowsWritten(), 3); - EXPECT_FALSE(input.hasDataRow()); - // Test the Call we send to upstream - EXPECT_EQ(call.offset, 0); - EXPECT_FALSE(call.hasHardLimit()); - // We have a given softLimit, so we do not do overfetching - EXPECT_EQ(call.getLimit(), hardLimit - 3); - EXPECT_FALSE(call.fullCount); +TEST_P(FilterExecutorTest, values) { + auto infos = buildInfos(); + AqlCall call{}; + ExecutionStats{}; + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock(std::move(infos)) + .setInputValue(MatrixBuilder<2>{RowBuilder<2>{1, 0}, RowBuilder<2>{0, 1}, + RowBuilder<2>{0, 2}, RowBuilder<2>{0, 3}, + RowBuilder<2>{0, 4}, RowBuilder<2>{0, 5}, + RowBuilder<2>{0, 6}, RowBuilder<2>{0, 7}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, MatrixBuilder<2>{RowBuilder<2>{1, 0}}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); } -TEST_F(FilterExecutorTest, test_skip_datarange_need_more) { - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - FilterExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, - {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - - AqlItemBlockInputRange input{ExecutorState::HASMORE, 0, inBlock, 0}; - AqlCall clientCall; - clientCall.offset = 1000; - - auto const [state, stats, skipped, call] = testee.skipRowsRange(input, clientCall); - // TODO check stats - EXPECT_EQ(state, ExecutorState::HASMORE); - EXPECT_EQ(skipped, 3); - EXPECT_EQ(clientCall.getOffset(), 1000 - 3); - EXPECT_FALSE(input.hasDataRow()); - - // Test the Call we send to upstream - EXPECT_EQ(call.offset, 0); - EXPECT_FALSE(call.hasHardLimit()); - // Avoid overfetching. I do not have a strong requirement on this - // test, however this is what we do right now. - EXPECT_EQ(call.getLimit(), 997); - EXPECT_FALSE(call.fullCount); +TEST_P(FilterExecutorTest, odd_values) { + auto infos = buildInfos(); + AqlCall call{}; + ExecutionStats{}; + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock(std::move(infos)) + .setInputValue(MatrixBuilder<2>{RowBuilder<2>{1, 0}, RowBuilder<2>{0, 1}, + RowBuilder<2>{1, 2}, RowBuilder<2>{0, 3}, + RowBuilder<2>{1, 4}, RowBuilder<2>{0, 5}, + RowBuilder<2>{1, 6}, RowBuilder<2>{0, 7}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, MatrixBuilder<2>{RowBuilder<2>{1, 0}, RowBuilder<2>{1, 2}, + RowBuilder<2>{1, 4}, RowBuilder<2>{1, 6}}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); } -TEST_F(FilterExecutorTest, test_produce_datarange_has_more) { - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - FilterExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, - {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - - AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; - block.reset(new AqlItemBlock(itemBlockManager, 2, 1)); - OutputAqlItemRow output(std::move(block), outputRegisters, registersToKeep, - infos.registersToClear()); - - auto const [state, stats, call] = testee.produceRows(input, output); - EXPECT_EQ(state, ExecutorState::HASMORE); - EXPECT_EQ(stats.getFiltered(), 1); - EXPECT_EQ(output.numRowsWritten(), 2); - EXPECT_TRUE(input.hasDataRow()); - // We still have two values in block: false and true - { - // pop false - auto const [state, row] = input.nextDataRow(); - EXPECT_EQ(state, ExecutorState::HASMORE); - EXPECT_FALSE(row.getValue(0).toBoolean()); - } - { - // pop true - auto const [state, row] = input.nextDataRow(); - EXPECT_EQ(state, ExecutorState::DONE); - EXPECT_TRUE(row.getValue(0).toBoolean()); - } - EXPECT_FALSE(input.hasDataRow()); +TEST_P(FilterExecutorTest, skip_and_odd_values) { + auto infos = buildInfos(); + AqlCall call{3}; + ExecutionStats{}; + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock(std::move(infos)) + .setInputValue(MatrixBuilder<2>{RowBuilder<2>{1, 0}, RowBuilder<2>{0, 1}, + RowBuilder<2>{1, 2}, RowBuilder<2>{0, 3}, + RowBuilder<2>{1, 4}, RowBuilder<2>{0, 5}, + RowBuilder<2>{1, 6}, RowBuilder<2>{0, 7}}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, MatrixBuilder<2>{RowBuilder<2>{1, 6}}) + .allowAnyOutputOrder(false) + .expectSkipped(3) + .expectedState(ExecutionState::DONE) + .run(); } -TEST_F(FilterExecutorTest, test_skip_datarange_has_more) { - // This fetcher will not be called! - // After Execute is done this fetcher shall be removed, the Executor does not need it anymore! - auto fakeUnusedBlock = VPackParser::fromJson("[ ]"); - SingleRowFetcherHelper<::arangodb::aql::BlockPassthrough::Disable> fetcher( - itemBlockManager, fakeUnusedBlock->steal(), false); - - // This is the relevant part of the test - FilterExecutor testee(fetcher, infos); - SharedAqlItemBlockPtr inBlock = - buildBlock<1>(itemBlockManager, - {{R"(true)"}, {R"(false)"}, {R"(true)"}, {R"(false)"}, {R"(true)"}}); - - AqlItemBlockInputRange input{ExecutorState::DONE, 0, inBlock, 0}; - AqlCall clientCall; - clientCall.offset = 2; - auto const [state, stats, skipped, call] = testee.skipRowsRange(input, clientCall); - // TODO check stats - EXPECT_EQ(state, ExecutorState::HASMORE); - EXPECT_EQ(skipped, 2); - EXPECT_EQ(clientCall.getOffset(), 0); - EXPECT_TRUE(input.hasDataRow()); - // We still have two values in block: false and true - { - // pop false - auto const [state, row] = input.nextDataRow(); - EXPECT_EQ(state, ExecutorState::HASMORE); - EXPECT_FALSE(row.getValue(0).toBoolean()); - } - { - // pop true - auto const [state, row] = input.nextDataRow(); - EXPECT_EQ(state, ExecutorState::DONE); - EXPECT_TRUE(row.getValue(0).toBoolean()); - } - EXPECT_FALSE(input.hasDataRow()); -} +TEST_P(FilterExecutorTest, hard_limit) { + auto infos = buildInfos(); + AqlCall call{}; + call.hardLimit = 0; + call.fullCount = true; + ExecutionStats{}; + ExecutorTestHelper<2, 2>(*fakedQuery) + .setExecBlock(std::move(infos)) + .setInputValue(MatrixBuilder<2>{}) + .setInputSplitType(getSplit()) + .setCall(call) + .expectOutput({0, 1}, MatrixBuilder<2>{}) + .allowAnyOutputOrder(false) + .expectSkipped(0) + .expectedState(ExecutionState::DONE) + .run(); +} // namespace aql } // namespace aql } // namespace tests From 10b24b2f5f7d8bf23353558ce1c9e853ed0bfa77 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Thu, 12 Mar 2020 22:48:36 +0100 Subject: [PATCH 102/122] Merge devel into feature/AqlSubqueryExecutionBlockImplExecuteImplementation --- .gitignore | 4 + 3rdParty/CMakeLists.txt | 12 + 3rdParty/README_maintainers.md | 10 + 3rdParty/date/include/date/date.h | 8 +- 3rdParty/fuerte/include/fuerte/helper.h | 4 +- 3rdParty/fuerte/src/H1Connection.cpp | 2 +- 3rdParty/fuerte/src/H2Connection.cpp | 5 +- 3rdParty/fuerte/src/helper.cpp | 12 +- 3rdParty/fuerte/src/jwt.cpp | 10 +- 3rdParty/iresearch/.travis.yml | 244 +- 3rdParty/iresearch/CMakeLists.txt | 23 +- 3rdParty/iresearch/PVSIResearch.cfg | 2 + 3rdParty/iresearch/README.md | 20 +- 3rdParty/iresearch/THIRD_PARTY_README.md | 4 +- 3rdParty/iresearch/appveyor.yml | 10 +- 3rdParty/iresearch/cmake/PVS-Studio.cmake | 1092 ++-- 3rdParty/iresearch/core/CMakeLists.txt | 6 + .../core/analysis/token_attributes.hpp | 8 +- .../iresearch/core/formats/formats_10.cpp | 205 +- .../core/formats/formats_burst_trie.cpp | 16 +- .../core/formats/formats_burst_trie.hpp | 2 +- 3rdParty/iresearch/core/index/field_data.cpp | 9 +- .../iresearch/core/index/index_reader.hpp | 2 +- 3rdParty/iresearch/core/iql/query_builder.cpp | 2 +- 3rdParty/iresearch/core/search/bm25.cpp | 71 +- .../iresearch/core/search/disjunction.hpp | 320 +- .../core/search/granular_range_filter.cpp | 3 +- .../core/search/levenshtein_filter.cpp | 25 +- .../core/search/levenshtein_filter.hpp | 41 +- .../search/limited_sample_scorer-heap.cpp | 204 - .../search/limited_sample_scorer-heap.hpp | 132 - .../core/search/limited_sample_scorer.cpp | 94 +- .../core/search/limited_sample_scorer.hpp | 10 +- .../core/search/min_match_disjunction.hpp | 58 +- .../core/search/ngram_similarity_filter.cpp | 551 ++ .../core/search/ngram_similarity_filter.hpp | 120 + .../iresearch/core/search/phrase_filter.cpp | 794 ++- .../iresearch/core/search/phrase_filter.hpp | 200 +- .../iresearch/core/search/phrase_iterator.hpp | 223 +- .../iresearch/core/search/prefix_filter.hpp | 4 +- .../iresearch/core/search/range_filter.hpp | 6 +- .../core/search/same_position_filter.cpp | 4 +- .../core/search/same_position_filter.hpp | 14 +- 3rdParty/iresearch/core/search/sort.cpp | 168 +- 3rdParty/iresearch/core/search/sort.hpp | 98 +- .../iresearch/core/search/term_filter.hpp | 4 +- 3rdParty/iresearch/core/search/term_query.cpp | 2 +- 3rdParty/iresearch/core/search/tfidf.cpp | 68 +- .../iresearch/core/search/wildcard_filter.cpp | 83 +- .../iresearch/core/search/wildcard_filter.hpp | 42 +- 3rdParty/iresearch/core/shared.hpp | 4 +- .../iresearch/core/utils/arena_allocator.hpp | 2 +- .../iresearch/core/utils/attribute_range.cpp | 46 + .../iresearch/core/utils/attribute_range.hpp | 65 + 3rdParty/iresearch/core/utils/attributes.cpp | 4 +- 3rdParty/iresearch/core/utils/attributes.hpp | 10 +- 3rdParty/iresearch/core/utils/automaton.hpp | 62 +- .../iresearch/core/utils/automaton_utils.cpp | 272 +- .../iresearch/core/utils/automaton_utils.hpp | 332 +- 3rdParty/iresearch/core/utils/fst.hpp | 176 +- .../iresearch/core/utils/fst_states_map.hpp | 144 + .../core/utils/fst_table_matcher.hpp | 8 +- .../core/utils/levenshtein_utils.cpp | 171 +- .../core/utils/levenshtein_utils.hpp | 48 +- .../iresearch/core/utils/locale_utils.cpp | 4 +- .../iresearch/core/utils/locale_utils.hpp | 2 +- 3rdParty/iresearch/core/utils/log.cpp | 4 +- .../core/utils/ngram_match_utils.hpp | 138 + 3rdParty/iresearch/core/utils/std.hpp | 4 +- 3rdParty/iresearch/core/utils/string.cpp | 8 +- 3rdParty/iresearch/core/utils/string.hpp | 36 + 3rdParty/iresearch/core/utils/utf8_utils.hpp | 167 +- .../iresearch/core/utils/wildcard_utils.cpp | 228 + .../iresearch/core/utils/wildcard_utils.hpp | 120 +- .../iresearch/external/openfst/draw-impl.h | 37 +- .../external/openfst/fst/string-weight.h | 6 +- 3rdParty/iresearch/external/openfst/util.cc | 2 +- 3rdParty/iresearch/python/CMakeLists.txt | 11 + 3rdParty/iresearch/python/pyresearch.hpp | 30 +- 3rdParty/iresearch/python/swig/CMakeLists.txt | 40 +- .../scripts/ArangoDBLoader/WikiLoader.py | 47 + .../scripts/Prometheus/Dashboard.json | 703 +- .../iresearch/scripts/Prometheus/INSTALL.md | 2 +- .../scripts/Prometheus/PythonBenchmark.py | 52 +- .../scripts/download-benchmark-resources.sh | 4 +- .../scripts/gtest-parallel/gtest-parallel | 0 .../scripts/gtest-parallel/gtest_parallel.py | 0 .../gtest-parallel/gtest_parallel_tests.py | 0 3rdParty/iresearch/scripts/index-benchmark.sh | 4 +- .../scripts/iresearch-benchmark.tasks | 2 + .../iresearch/scripts/search-benchmark.sh | 2 +- .../scripts/start-benchmark-comparison.sh | 2 + 3rdParty/iresearch/tests/CMakeLists.txt | 4 + .../tests/formats/formats_13_tests.cpp | 511 ++ .../tests/formats/formats_test_case_base.hpp | 4 + .../iresearch/tests/index/assert_format.cpp | 21 +- .../tests/index/index_levenshtein_tests.cpp | 10 + .../iresearch/tests/index/index_tests.cpp | 45 +- .../iresearch/tests/index/index_tests.hpp | 6 + .../tests/index/merge_writer_tests.cpp | 12 +- .../tests/iql/query_builder_test.cpp | 3 +- .../tests/resources/ngram_similarity.json | 18 + .../tests/resources/phrase_sequential.json | 10 +- .../resources/simple_sequential_utf8.json | 34 + 3rdParty/iresearch/tests/search/bm25_test.cpp | 6 +- .../tests/search/boolean_filter_tests.cpp | 7 +- .../tests/search/filter_test_case_base.hpp | 27 +- .../search/ngram_similarity_filter_tests.cpp | 1191 ++++ .../tests/search/phrase_filter_tests.cpp | 5660 ++++++++++++++++- .../search/same_position_filter_tests.cpp | 18 +- .../iresearch/tests/search/sort_tests.cpp | 2 +- .../iresearch/tests/search/tfidf_test.cpp | 6 +- .../tests/search/wildcard_filter_test.cpp | 181 +- 3rdParty/iresearch/tests/unicode/utf8.h | 34 - .../iresearch/tests/unicode/utf8/checked.h | 327 - 3rdParty/iresearch/tests/unicode/utf8/core.h | 329 - .../iresearch/tests/unicode/utf8/unchecked.h | 228 - .../iresearch/tests/utils/automaton_test.cpp | 1659 ++++- .../iresearch/tests/utils/fst_utils_test.cpp | 10 - .../tests/utils/levenshtein_utils_test.cpp | 477 +- .../tests/utils/ngram_match_utils_tests.cpp | 259 + .../iresearch/tests/utils/string_tests.cpp | 72 + .../iresearch/tests/utils/utf8_utils_test.cpp | 315 +- .../tests/utils/wildcard_utils_test.cpp | 651 ++ 3rdParty/iresearch/utils/index-put.cpp | 5 +- 3rdParty/iresearch/utils/index-search.cpp | 62 +- 3rdParty/iresearch/utils/pdd.cpp | 15 +- .../json-schema-validation/.clang_complete | 11 + 3rdParty/json-schema-validation/.gitignore | 10 + .../json-schema-validation/CMakeLists.txt | 76 + .../cmake/ext_cmake_install.cmake | 43 + .../cmake/ext_cmake_utils.cmake | 69 + .../include/validation/events_from_slice.hpp | 152 + .../include/validation/types.hpp | 28 + .../include/validation/validation.hpp | 32 + .../json-schema-validation/src/validation.cpp | 193 + 3rdParty/taocpp-json.version | 1 + 3rdParty/taocpp-json/.clang-format | 84 + 3rdParty/taocpp-json/CMakeLists.txt | 44 + 3rdParty/taocpp-json/LICENSE | 21 + .../taocpp-json/LICENSE.double-conversion | 32 + 3rdParty/taocpp-json/LICENSE.itoa | 19 + 3rdParty/taocpp-json/LICENSE.ryu | 201 + 3rdParty/taocpp-json/README.md | 149 + 3rdParty/taocpp-json/include/tao/json.hpp | 45 + .../include/tao/json/basic_value.hpp | 941 +++ .../taocpp-json/include/tao/json/binary.hpp | 103 + .../include/tao/json/binary_view.hpp | 31 + .../taocpp-json/include/tao/json/binding.hpp | 71 + .../include/tao/json/binding/constant.hpp | 232 + .../include/tao/json/binding/element.hpp | 182 + .../include/tao/json/binding/factory.hpp | 250 + .../tao/json/binding/for_nothing_value.hpp | 17 + .../tao/json/binding/for_unknown_key.hpp | 17 + .../include/tao/json/binding/inherit.hpp | 14 + .../tao/json/binding/internal/array.hpp | 103 + .../tao/json/binding/internal/inherit.hpp | 45 + .../tao/json/binding/internal/object.hpp | 267 + .../tao/json/binding/internal/type_key.hpp | 54 + .../include/tao/json/binding/member.hpp | 32 + .../include/tao/json/binding/member_kind.hpp | 17 + .../include/tao/json/binding/versions.hpp | 127 + .../taocpp-json/include/tao/json/consume.hpp | 43 + .../include/tao/json/consume_file.hpp | 33 + .../include/tao/json/consume_string.hpp | 31 + .../include/tao/json/contrib/array_traits.hpp | 43 + .../include/tao/json/contrib/deque_traits.hpp | 41 + .../include/tao/json/contrib/diff.hpp | 106 + .../include/tao/json/contrib/get.hpp | 152 + .../json/contrib/internal/array_traits.hpp | 92 + .../json/contrib/internal/indirect_traits.hpp | 69 + .../json/contrib/internal/object_traits.hpp | 105 + .../tao/json/contrib/internal/type_traits.hpp | 36 + .../include/tao/json/contrib/list_traits.hpp | 41 + .../include/tao/json/contrib/map_traits.hpp | 43 + .../tao/json/contrib/multimap_traits.hpp | 43 + .../tao/json/contrib/multiset_traits.hpp | 41 + .../include/tao/json/contrib/pair_traits.hpp | 21 + .../include/tao/json/contrib/patch.hpp | 105 + .../tao/json/contrib/pointer_traits.hpp | 59 + .../include/tao/json/contrib/position.hpp | 166 + .../include/tao/json/contrib/reference.hpp | 115 + .../include/tao/json/contrib/schema.hpp | 1851 ++++++ .../include/tao/json/contrib/set_traits.hpp | 41 + .../tao/json/contrib/shared_ptr_traits.hpp | 90 + .../include/tao/json/contrib/traits.hpp | 121 + .../include/tao/json/contrib/tuple_traits.hpp | 51 + .../tao/json/contrib/unique_ptr_traits.hpp | 89 + .../tao/json/contrib/unordered_map_traits.hpp | 43 + .../tao/json/contrib/unordered_set_traits.hpp | 41 + .../tao/json/contrib/vector_bool_traits.hpp | 45 + .../tao/json/contrib/vector_traits.hpp | 51 + .../taocpp-json/include/tao/json/events.hpp | 47 + .../include/tao/json/events/apply.hpp | 20 + .../tao/json/events/binary_to_base64.hpp | 26 + .../tao/json/events/binary_to_base64url.hpp | 28 + .../tao/json/events/binary_to_exception.hpp | 27 + .../include/tao/json/events/binary_to_hex.hpp | 26 + .../include/tao/json/events/compare.hpp | 265 + .../include/tao/json/events/debug.hpp | 145 + .../include/tao/json/events/discard.hpp | 43 + .../include/tao/json/events/from_file.hpp | 28 + .../include/tao/json/events/from_input.hpp | 45 + .../include/tao/json/events/from_stream.hpp | 33 + .../include/tao/json/events/from_string.hpp | 38 + .../include/tao/json/events/from_value.hpp | 202 + .../include/tao/json/events/hash.hpp | 174 + .../json/events/invalid_string_to_binary.hpp | 50 + .../events/invalid_string_to_exception.hpp | 49 + .../tao/json/events/invalid_string_to_hex.hpp | 48 + .../events/key_camel_case_to_snake_case.hpp | 62 + .../events/key_snake_case_to_camel_case.hpp | 57 + .../tao/json/events/limit_nesting_depth.hpp | 82 + .../tao/json/events/limit_value_count.hpp | 46 + .../json/events/non_finite_to_exception.hpp | 31 + .../tao/json/events/non_finite_to_null.hpp | 32 + .../tao/json/events/non_finite_to_string.hpp | 40 + .../include/tao/json/events/prefer_signed.hpp | 32 + .../tao/json/events/prefer_unsigned.hpp | 32 + .../include/tao/json/events/produce.hpp | 22 + .../include/tao/json/events/ref.hpp | 111 + .../include/tao/json/events/statistics.hpp | 112 + .../include/tao/json/events/tee.hpp | 386 ++ .../tao/json/events/to_pretty_stream.hpp | 172 + .../include/tao/json/events/to_stream.hpp | 142 + .../include/tao/json/events/to_string.hpp | 33 + .../include/tao/json/events/to_value.hpp | 137 + .../include/tao/json/events/transformer.hpp | 70 + .../tao/json/events/validate_event_order.hpp | 411 ++ .../include/tao/json/events/validate_keys.hpp | 51 + .../include/tao/json/events/virtual_base.hpp | 192 + .../include/tao/json/events/virtual_ref.hpp | 170 + .../include/tao/json/external/double.hpp | 1313 ++++ .../include/tao/json/external/itoa.hpp | 149 + .../include/tao/json/external/pegtl.hpp | 53 + .../pegtl/analysis/analyze_cycles.hpp | 127 + .../json/external/pegtl/analysis/counted.hpp | 23 + .../json/external/pegtl/analysis/generic.hpp | 31 + .../external/pegtl/analysis/grammar_info.hpp | 32 + .../external/pegtl/analysis/insert_guard.hpp | 51 + .../external/pegtl/analysis/insert_rules.hpp | 25 + .../external/pegtl/analysis/rule_info.hpp | 29 + .../external/pegtl/analysis/rule_type.hpp | 21 + .../tao/json/external/pegtl/analyze.hpp | 21 + .../tao/json/external/pegtl/apply_mode.hpp | 19 + .../tao/json/external/pegtl/argv_input.hpp | 51 + .../include/tao/json/external/pegtl/ascii.hpp | 67 + .../tao/json/external/pegtl/buffer_input.hpp | 212 + .../tao/json/external/pegtl/change_action.hpp | 38 + .../pegtl/change_action_and_state.hpp | 53 + .../pegtl/change_action_and_states.hpp | 62 + .../json/external/pegtl/change_control.hpp | 36 + .../tao/json/external/pegtl/change_state.hpp | 50 + .../tao/json/external/pegtl/change_states.hpp | 61 + .../tao/json/external/pegtl/config.hpp | 11 + .../tao/json/external/pegtl/contrib/abnf.hpp | 35 + .../json/external/pegtl/contrib/alphabet.hpp | 67 + .../json/external/pegtl/contrib/counter.hpp | 54 + .../tao/json/external/pegtl/contrib/http.hpp | 253 + .../external/pegtl/contrib/icu/internal.hpp | 68 + .../json/external/pegtl/contrib/icu/utf16.hpp | 200 + .../json/external/pegtl/contrib/icu/utf32.hpp | 200 + .../json/external/pegtl/contrib/icu/utf8.hpp | 105 + .../json/external/pegtl/contrib/if_then.hpp | 55 + .../json/external/pegtl/contrib/integer.hpp | 446 ++ .../tao/json/external/pegtl/contrib/json.hpp | 88 + .../external/pegtl/contrib/json_pointer.hpp | 33 + .../external/pegtl/contrib/parse_tree.hpp | 561 ++ .../pegtl/contrib/parse_tree_to_dot.hpp | 104 + .../external/pegtl/contrib/raw_string.hpp | 225 + .../pegtl/contrib/remove_first_state.hpp | 86 + .../pegtl/contrib/rep_one_min_max.hpp | 62 + .../external/pegtl/contrib/rep_string.hpp | 44 + .../json/external/pegtl/contrib/to_string.hpp | 38 + .../json/external/pegtl/contrib/tracer.hpp | 158 + .../json/external/pegtl/contrib/unescape.hpp | 199 + .../tao/json/external/pegtl/contrib/uri.hpp | 106 + .../tao/json/external/pegtl/cstream_input.hpp | 33 + .../json/external/pegtl/disable_action.hpp | 35 + .../tao/json/external/pegtl/discard_input.hpp | 37 + .../pegtl/discard_input_on_failure.hpp | 39 + .../pegtl/discard_input_on_success.hpp | 39 + .../tao/json/external/pegtl/enable_action.hpp | 35 + .../include/tao/json/external/pegtl/eol.hpp | 37 + .../tao/json/external/pegtl/eol_pair.hpp | 18 + .../tao/json/external/pegtl/file_input.hpp | 44 + .../json/external/pegtl/internal/action.hpp | 44 + .../external/pegtl/internal/action_input.hpp | 107 + .../json/external/pegtl/internal/alnum.hpp | 18 + .../json/external/pegtl/internal/alpha.hpp | 18 + .../external/pegtl/internal/always_false.hpp | 21 + .../tao/json/external/pegtl/internal/any.hpp | 58 + .../json/external/pegtl/internal/apply.hpp | 53 + .../json/external/pegtl/internal/apply0.hpp | 50 + .../external/pegtl/internal/apply0_single.hpp | 34 + .../external/pegtl/internal/apply_single.hpp | 34 + .../tao/json/external/pegtl/internal/at.hpp | 53 + .../tao/json/external/pegtl/internal/bof.hpp | 31 + .../tao/json/external/pegtl/internal/bol.hpp | 31 + .../tao/json/external/pegtl/internal/bump.hpp | 45 + .../external/pegtl/internal/bump_help.hpp | 29 + .../json/external/pegtl/internal/bytes.hpp | 36 + .../json/external/pegtl/internal/control.hpp | 44 + .../external/pegtl/internal/cr_crlf_eol.hpp | 32 + .../json/external/pegtl/internal/cr_eol.hpp | 32 + .../json/external/pegtl/internal/crlf_eol.hpp | 32 + .../pegtl/internal/cstream_reader.hpp | 49 + .../pegtl/internal/cstring_reader.hpp | 40 + .../json/external/pegtl/internal/demangle.hpp | 140 + .../json/external/pegtl/internal/disable.hpp | 44 + .../json/external/pegtl/internal/discard.hpp | 33 + .../external/pegtl/internal/dusel_mode.hpp | 23 + .../external/pegtl/internal/duseltronik.hpp | 187 + .../json/external/pegtl/internal/enable.hpp | 44 + .../json/external/pegtl/internal/endian.hpp | 62 + .../external/pegtl/internal/endian_gcc.hpp | 206 + .../external/pegtl/internal/endian_win.hpp | 106 + .../tao/json/external/pegtl/internal/eof.hpp | 31 + .../tao/json/external/pegtl/internal/eol.hpp | 31 + .../tao/json/external/pegtl/internal/eolf.hpp | 32 + .../pegtl/internal/file_mapper_posix.hpp | 83 + .../pegtl/internal/file_mapper_win32.hpp | 219 + .../external/pegtl/internal/file_opener.hpp | 72 + .../external/pegtl/internal/file_reader.hpp | 114 + .../external/pegtl/internal/has_apply.hpp | 25 + .../external/pegtl/internal/has_apply0.hpp | 25 + .../external/pegtl/internal/has_match.hpp | 56 + .../external/pegtl/internal/identifier.hpp | 22 + .../json/external/pegtl/internal/if_apply.hpp | 53 + .../json/external/pegtl/internal/if_must.hpp | 48 + .../external/pegtl/internal/if_must_else.hpp | 19 + .../external/pegtl/internal/if_then_else.hpp | 51 + .../external/pegtl/internal/input_pair.hpp | 29 + .../pegtl/internal/istream_reader.hpp | 40 + .../json/external/pegtl/internal/istring.hpp | 72 + .../json/external/pegtl/internal/iterator.hpp | 52 + .../external/pegtl/internal/lf_crlf_eol.hpp | 37 + .../json/external/pegtl/internal/lf_eol.hpp | 32 + .../tao/json/external/pegtl/internal/list.hpp | 19 + .../external/pegtl/internal/list_must.hpp | 20 + .../external/pegtl/internal/list_tail.hpp | 20 + .../external/pegtl/internal/list_tail_pad.hpp | 22 + .../json/external/pegtl/internal/marker.hpp | 82 + .../external/pegtl/internal/missing_apply.hpp | 25 + .../pegtl/internal/missing_apply0.hpp | 23 + .../tao/json/external/pegtl/internal/must.hpp | 72 + .../json/external/pegtl/internal/not_at.hpp | 53 + .../tao/json/external/pegtl/internal/one.hpp | 44 + .../tao/json/external/pegtl/internal/opt.hpp | 57 + .../tao/json/external/pegtl/internal/pad.hpp | 19 + .../json/external/pegtl/internal/pad_opt.hpp | 20 + .../external/pegtl/internal/peek_char.hpp | 32 + .../pegtl/internal/peek_mask_uint.hpp | 54 + .../pegtl/internal/peek_mask_uint8.hpp | 34 + .../external/pegtl/internal/peek_uint.hpp | 45 + .../external/pegtl/internal/peek_uint8.hpp | 33 + .../external/pegtl/internal/peek_utf16.hpp | 54 + .../external/pegtl/internal/peek_utf32.hpp | 43 + .../external/pegtl/internal/peek_utf8.hpp | 90 + .../external/pegtl/internal/pegtl_string.hpp | 90 + .../tao/json/external/pegtl/internal/plus.hpp | 53 + .../json/external/pegtl/internal/raise.hpp | 53 + .../json/external/pegtl/internal/range.hpp | 51 + .../json/external/pegtl/internal/ranges.hpp | 93 + .../external/pegtl/internal/read_uint.hpp | 77 + .../json/external/pegtl/internal/rematch.hpp | 69 + .../tao/json/external/pegtl/internal/rep.hpp | 66 + .../json/external/pegtl/internal/rep_min.hpp | 20 + .../external/pegtl/internal/rep_min_max.hpp | 79 + .../json/external/pegtl/internal/rep_opt.hpp | 46 + .../json/external/pegtl/internal/require.hpp | 42 + .../pegtl/internal/result_on_found.hpp | 19 + .../json/external/pegtl/internal/rules.hpp | 61 + .../tao/json/external/pegtl/internal/seq.hpp | 73 + .../external/pegtl/internal/skip_control.hpp | 25 + .../tao/json/external/pegtl/internal/sor.hpp | 60 + .../tao/json/external/pegtl/internal/star.hpp | 47 + .../external/pegtl/internal/star_must.hpp | 19 + .../json/external/pegtl/internal/state.hpp | 49 + .../json/external/pegtl/internal/string.hpp | 58 + .../json/external/pegtl/internal/trivial.hpp | 32 + .../pegtl/internal/try_catch_type.hpp | 64 + .../json/external/pegtl/internal/until.hpp | 84 + .../tao/json/external/pegtl/istream_input.hpp | 33 + .../include/tao/json/external/pegtl/match.hpp | 73 + .../tao/json/external/pegtl/memory_input.hpp | 381 ++ .../tao/json/external/pegtl/mmap_input.hpp | 79 + .../tao/json/external/pegtl/normal.hpp | 87 + .../tao/json/external/pegtl/nothing.hpp | 20 + .../include/tao/json/external/pegtl/parse.hpp | 53 + .../tao/json/external/pegtl/parse_error.hpp | 69 + .../tao/json/external/pegtl/position.hpp | 75 + .../tao/json/external/pegtl/read_input.hpp | 74 + .../tao/json/external/pegtl/require_apply.hpp | 16 + .../json/external/pegtl/require_apply0.hpp | 16 + .../tao/json/external/pegtl/rewind_mode.hpp | 20 + .../include/tao/json/external/pegtl/rules.hpp | 67 + .../tao/json/external/pegtl/string_input.hpp | 66 + .../tao/json/external/pegtl/tracking_mode.hpp | 19 + .../tao/json/external/pegtl/uint16.hpp | 62 + .../tao/json/external/pegtl/uint32.hpp | 62 + .../tao/json/external/pegtl/uint64.hpp | 63 + .../include/tao/json/external/pegtl/uint8.hpp | 36 + .../include/tao/json/external/pegtl/utf16.hpp | 49 + .../include/tao/json/external/pegtl/utf32.hpp | 49 + .../include/tao/json/external/pegtl/utf8.hpp | 28 + .../tao/json/external/pegtl/version.hpp | 13 + .../include/tao/json/external/ryu.hpp | 1216 ++++ .../taocpp-json/include/tao/json/forward.hpp | 44 + .../include/tao/json/from_file.hpp | 32 + .../include/tao/json/from_input.hpp | 32 + .../include/tao/json/from_stream.hpp | 45 + .../include/tao/json/from_string.hpp | 41 + .../include/tao/json/internal/action.hpp | 268 + .../include/tao/json/internal/base64.hpp | 55 + .../include/tao/json/internal/base64url.hpp | 53 + .../include/tao/json/internal/endian.hpp | 60 + .../include/tao/json/internal/endian_gcc.hpp | 198 + .../include/tao/json/internal/endian_win.hpp | 103 + .../include/tao/json/internal/errors.hpp | 85 + .../include/tao/json/internal/escape.hpp | 77 + .../include/tao/json/internal/format.hpp | 59 + .../include/tao/json/internal/grammar.hpp | 229 + .../include/tao/json/internal/hexdump.hpp | 31 + .../include/tao/json/internal/identity.hpp | 22 + .../tao/json/internal/number_state.hpp | 80 + .../tao/json/internal/number_traits.hpp | 267 + .../include/tao/json/internal/pair.hpp | 42 + .../include/tao/json/internal/parse_util.hpp | 112 + .../include/tao/json/internal/sha256.hpp | 218 + .../include/tao/json/internal/single.hpp | 40 + .../include/tao/json/internal/string_t.hpp | 35 + .../include/tao/json/internal/type_traits.hpp | 113 + .../tao/json/internal/unescape_action.hpp | 24 + .../tao/json/internal/uri_fragment.hpp | 182 + .../taocpp-json/include/tao/json/jaxn.hpp | 19 + .../include/tao/json/jaxn/consume_file.hpp | 34 + .../include/tao/json/jaxn/consume_string.hpp | 32 + .../tao/json/jaxn/events/from_file.hpp | 28 + .../tao/json/jaxn/events/from_input.hpp | 45 + .../tao/json/jaxn/events/from_stream.hpp | 33 + .../tao/json/jaxn/events/from_string.hpp | 39 + .../tao/json/jaxn/events/to_pretty_stream.hpp | 69 + .../tao/json/jaxn/events/to_stream.hpp | 67 + .../tao/json/jaxn/events/to_string.hpp | 33 + .../include/tao/json/jaxn/from_file.hpp | 33 + .../include/tao/json/jaxn/from_input.hpp | 33 + .../include/tao/json/jaxn/from_stream.hpp | 48 + .../include/tao/json/jaxn/from_string.hpp | 42 + .../include/tao/json/jaxn/internal/action.hpp | 355 ++ .../json/jaxn/internal/bunescape_action.hpp | 114 + .../include/tao/json/jaxn/internal/errors.hpp | 108 + .../tao/json/jaxn/internal/grammar.hpp | 375 ++ .../tao/json/jaxn/internal/integer.hpp | 256 + .../json/jaxn/internal/unescape_action.hpp | 28 + .../include/tao/json/jaxn/is_identifier.hpp | 27 + .../include/tao/json/jaxn/parts_parser.hpp | 263 + .../include/tao/json/jaxn/to_stream.hpp | 36 + .../include/tao/json/jaxn/to_string.hpp | 33 + .../include/tao/json/message_extension.hpp | 49 + .../include/tao/json/operators.hpp | 494 ++ .../include/tao/json/parts_parser.hpp | 306 + .../taocpp-json/include/tao/json/pointer.hpp | 432 ++ .../taocpp-json/include/tao/json/produce.hpp | 61 + .../include/tao/json/self_contained.hpp | 143 + .../taocpp-json/include/tao/json/span.hpp | 568 ++ .../taocpp-json/include/tao/json/stream.hpp | 38 + .../include/tao/json/to_stream.hpp | 42 + .../include/tao/json/to_string.hpp | 23 + .../taocpp-json/include/tao/json/traits.hpp | 971 +++ .../taocpp-json/include/tao/json/type.hpp | 112 + .../taocpp-json/include/tao/json/utf8.hpp | 57 + .../taocpp-json/include/tao/json/value.hpp | 12 + .../velocypack/include/velocypack/Iterator.h | 6 + CHANGELOG | 118 +- CMakeLists.txt | 4 +- .../Administration/get_admin_server_tls.md | 7 +- Documentation/Scripts/codeBlockReader.py | 103 +- LICENSES-OTHER-COMPONENTS.md | 9 + README.md | 3 + UnitTests/tls-ca.crt | 11 + UnitTests/tls-ca.key | 5 + UnitTests/tls.keyfile | 28 + arangod/Agency/AgencyCommon.h | 6 +- arangod/Agency/AgencyStrings.h | 2 +- arangod/Agency/Job.cpp | 14 + arangod/Agency/Job.h | 4 + arangod/Agency/MoveShard.cpp | 24 + arangod/Agency/Node.cpp | 22 +- arangod/Agency/Node.h | 3 +- arangod/Agency/State.cpp | 90 +- arangod/Agency/Supervision.cpp | 123 +- arangod/Agency/Supervision.h | 9 + arangod/Aql/AqlFunctionFeature.cpp | 2 + arangod/Aql/AqlItemBlock.cpp | 11 +- arangod/Aql/Ast.cpp | 16 +- arangod/Aql/AstNode.cpp | 48 +- arangod/Aql/AstNode.h | 5 +- arangod/Aql/BindParameters.cpp | 4 +- arangod/Aql/DocumentProducingHelper.cpp | 15 +- arangod/Aql/ExecutionPlan.cpp | 4 +- arangod/Aql/Functions.cpp | 2 +- arangod/Aql/IndexExecutor.cpp | 15 +- arangod/Aql/OptimizerRules.cpp | 20 +- arangod/Aql/RegisterPlan.cpp | 156 +- arangod/Aql/RegisterPlan.h | 19 +- arangod/Aql/SimpleModifier.cpp | 2 +- arangod/Aql/SimpleModifier.h | 2 +- arangod/Aql/SortedCollectExecutor.cpp | 6 +- arangod/Aql/SortedCollectExecutor.h | 2 +- arangod/Aql/TraversalExecutor.cpp | 8 +- arangod/Aql/UpsertModifier.cpp | 4 +- arangod/CMakeLists.txt | 9 + arangod/Cache/BucketState.cpp | 36 +- arangod/Cache/BucketState.h | 14 +- arangod/Cache/Cache.cpp | 242 +- arangod/Cache/Cache.h | 55 +- arangod/Cache/CacheManagerFeature.cpp | 19 +- arangod/Cache/CacheManagerFeature.h | 7 +- arangod/Cache/CacheManagerFeatureThreads.cpp | 8 +- arangod/Cache/CacheManagerFeatureThreads.h | 13 +- arangod/Cache/CachedValue.cpp | 45 +- arangod/Cache/CachedValue.h | 62 +- arangod/Cache/Common.h | 8 +- arangod/Cache/Finding.cpp | 7 +- arangod/Cache/Finding.h | 3 +- arangod/Cache/FrequencyBuffer.h | 37 +- arangod/Cache/Manager.cpp | 384 +- arangod/Cache/Manager.h | 76 +- arangod/Cache/ManagerTasks.cpp | 56 +- arangod/Cache/ManagerTasks.h | 20 +- arangod/Cache/Metadata.cpp | 67 +- arangod/Cache/Metadata.h | 61 +- arangod/Cache/PlainBucket.cpp | 41 +- arangod/Cache/PlainBucket.h | 26 +- arangod/Cache/PlainCache.cpp | 313 +- arangod/Cache/PlainCache.h | 33 +- arangod/Cache/Rebalancer.cpp | 6 +- arangod/Cache/Rebalancer.h | 4 +- arangod/Cache/Table.cpp | 304 +- arangod/Cache/Table.h | 102 +- arangod/Cache/Transaction.cpp | 10 +- arangod/Cache/Transaction.h | 7 +- arangod/Cache/TransactionManager.cpp | 108 +- arangod/Cache/TransactionManager.h | 30 +- arangod/Cache/TransactionalBucket.cpp | 63 +- arangod/Cache/TransactionalBucket.h | 39 +- arangod/Cache/TransactionalCache.cpp | 429 +- arangod/Cache/TransactionalCache.h | 33 +- arangod/Cluster/ClusterEdgeCursor.cpp | 78 +- arangod/Cluster/ClusterEdgeCursor.h | 31 +- arangod/Cluster/ClusterInfo.cpp | 44 +- arangod/Cluster/ClusterMethods.cpp | 46 +- arangod/Cluster/ClusterMethods.h | 29 +- arangod/Cluster/ClusterTraverser.cpp | 2 + arangod/Cluster/Maintenance.cpp | 4 +- arangod/Cluster/MaintenanceFeature.h | 2 +- arangod/Cluster/MaintenanceStrings.h | 2 +- arangod/Cluster/TraverserEngine.cpp | 83 +- arangod/Cluster/TraverserEngine.h | 11 + arangod/ClusterEngine/ClusterCollection.cpp | 14 +- arangod/ClusterEngine/ClusterCollection.h | 6 +- arangod/ClusterEngine/ClusterEngine.cpp | 4 +- arangod/ClusterEngine/ClusterIndex.cpp | 4 +- arangod/GeneralServer/AcceptorTcp.cpp | 2 +- arangod/GeneralServer/AsioSocket.h | 7 +- arangod/GeneralServer/GeneralServer.cpp | 36 +- arangod/GeneralServer/GeneralServer.h | 6 +- arangod/GeneralServer/H2CommTask.cpp | 71 +- arangod/GeneralServer/SslServerFeature.cpp | 58 +- arangod/GeneralServer/SslServerFeature.h | 24 +- .../AttributeWeightShortestPathFinder.cpp | 28 +- .../Graph/AttributeWeightShortestPathFinder.h | 12 +- arangod/Graph/BaseOptions.cpp | 46 +- arangod/Graph/BaseOptions.h | 14 +- arangod/Graph/BreadthFirstEnumerator.cpp | 160 +- arangod/Graph/BreadthFirstEnumerator.h | 17 +- .../ConstantWeightShortestPathFinder.cpp | 18 +- .../Graph/ConstantWeightShortestPathFinder.h | 6 +- arangod/Graph/EdgeCursor.h | 3 + arangod/Graph/KShortestPathsFinder.cpp | 30 +- arangod/Graph/KShortestPathsFinder.h | 6 +- arangod/Graph/NeighborsEnumerator.cpp | 65 +- arangod/Graph/PathEnumerator.cpp | 96 +- arangod/Graph/PathEnumerator.h | 16 +- arangod/Graph/ShortestPathOptions.cpp | 27 +- arangod/Graph/ShortestPathOptions.h | 16 +- arangod/Graph/SingleServerEdgeCursor.cpp | 148 +- arangod/Graph/SingleServerEdgeCursor.h | 32 +- arangod/Graph/SingleServerTraverser.cpp | 8 +- arangod/Graph/SingleServerTraverser.h | 1 - arangod/Graph/Traverser.cpp | 15 +- arangod/Graph/Traverser.h | 4 - arangod/Graph/TraverserCache.h | 2 - arangod/Graph/TraverserCacheFactory.cpp | 4 +- arangod/Graph/TraverserCacheFactory.h | 2 +- arangod/Graph/TraverserOptions.cpp | 40 +- arangod/Graph/TraverserOptions.h | 17 +- arangod/IResearch/ExpressionFilter.cpp | 2 +- .../IResearch/IResearchAnalyzerFeature.cpp | 1 - arangod/IResearch/IResearchFeature.cpp | 3 +- arangod/IResearch/IResearchFilterFactory.cpp | 713 ++- arangod/IResearch/IResearchLink.cpp | 2 +- arangod/IResearch/IResearchRocksDBLink.h | 4 +- arangod/Indexes/IndexIterator.cpp | 2 +- arangod/Indexes/IndexIterator.h | 2 +- arangod/MMFiles/MMFilesCollection.cpp | 26 +- arangod/MMFiles/MMFilesCollection.h | 4 +- arangod/Pregel/GraphStore.h | 1 - arangod/Pregel/IncomingCache.h | 1 - arangod/Replication/DatabaseInitialSyncer.cpp | 621 ++ arangod/Replication/DatabaseInitialSyncer.h | 16 +- arangod/Replication/InitialSyncer.h | 3 + arangod/Replication/ReplicationFeature.cpp | 29 + arangod/Replication/ReplicationFeature.h | 24 +- arangod/RestHandler/RestCollectionHandler.cpp | 44 +- .../RestHandler/RestReplicationHandler.cpp | 416 +- arangod/RestHandler/RestReplicationHandler.h | 53 +- arangod/RocksDBEngine/CMakeLists.txt | 1 + arangod/RocksDBEngine/RocksDBBuilderIndex.cpp | 14 +- arangod/RocksDBEngine/RocksDBBuilderIndex.h | 2 +- arangod/RocksDBEngine/RocksDBCollection.cpp | 206 +- arangod/RocksDBEngine/RocksDBCollection.h | 25 +- arangod/RocksDBEngine/RocksDBEdgeIndex.cpp | 11 +- arangod/RocksDBEngine/RocksDBEdgeIndex.h | 4 +- arangod/RocksDBEngine/RocksDBEngine.cpp | 8 +- .../RocksDBEngine/RocksDBFulltextIndex.cpp | 2 +- arangod/RocksDBEngine/RocksDBFulltextIndex.h | 4 +- arangod/RocksDBEngine/RocksDBGeoIndex.cpp | 2 +- arangod/RocksDBEngine/RocksDBGeoIndex.h | 3 +- .../RocksDBEngine/RocksDBIncrementalSync.cpp | 3 + arangod/RocksDBEngine/RocksDBIndex.cpp | 4 +- arangod/RocksDBEngine/RocksDBIndex.h | 3 +- arangod/RocksDBEngine/RocksDBKey.cpp | 11 + arangod/RocksDBEngine/RocksDBKey.h | 5 + arangod/RocksDBEngine/RocksDBKeyBounds.cpp | 18 + arangod/RocksDBEngine/RocksDBKeyBounds.h | 7 + .../RocksDBEngine/RocksDBMetaCollection.cpp | 485 +- arangod/RocksDBEngine/RocksDBMetaCollection.h | 62 +- arangod/RocksDBEngine/RocksDBMetadata.cpp | 128 +- arangod/RocksDBEngine/RocksDBMetadata.h | 21 +- arangod/RocksDBEngine/RocksDBPrimaryIndex.cpp | 34 +- arangod/RocksDBEngine/RocksDBPrimaryIndex.h | 4 +- .../RocksDBEngine/RocksDBRecoveryManager.cpp | 13 +- .../RocksDBReplicationContext.cpp | 15 +- .../RocksDBEngine/RocksDBReplicationContext.h | 1 + .../RocksDBReplicationIterator.cpp | 115 + .../RocksDBReplicationIterator.h | 59 + .../RocksDBReplicationManager.cpp | 3 + .../RocksDBEngine/RocksDBSettingsManager.cpp | 10 +- .../RocksDBTransactionCollection.cpp | 32 +- .../RocksDBTransactionCollection.h | 49 +- .../RocksDBEngine/RocksDBTransactionState.cpp | 92 +- .../RocksDBEngine/RocksDBTransactionState.h | 20 + arangod/RocksDBEngine/RocksDBTtlIndex.cpp | 7 +- arangod/RocksDBEngine/RocksDBTtlIndex.h | 4 +- arangod/RocksDBEngine/RocksDBTypes.cpp | 8 + arangod/RocksDBEngine/RocksDBTypes.h | 3 +- arangod/RocksDBEngine/RocksDBV8Functions.cpp | 22 + arangod/RocksDBEngine/RocksDBVPackIndex.cpp | 20 +- arangod/RocksDBEngine/RocksDBVPackIndex.h | 4 +- .../StorageEngine/EngineSelectorFeature.cpp | 8 + arangod/StorageEngine/EngineSelectorFeature.h | 3 + arangod/StorageEngine/PhysicalCollection.cpp | 50 + arangod/StorageEngine/PhysicalCollection.h | 33 +- .../StorageEngine/ReplicationIterator.cpp | 39 +- arangod/StorageEngine/ReplicationIterator.h | 79 + arangod/Transaction/Manager.cpp | 8 +- arangod/Transaction/Methods.cpp | 1 - arangod/Utils/OperationOptions.h | 5 + arangod/V8Server/v8-actions.cpp | 27 +- arangod/VocBase/LogicalCollection.cpp | 126 +- arangod/VocBase/LogicalCollection.h | 34 +- arangod/VocBase/Methods/Collections.cpp | 13 + arangod/VocBase/Validators.cpp | 36 +- arangod/VocBase/Validators.h | 23 +- arangosh/Benchmark/test-cases.h | 29 +- arangosh/Restore/RestoreFeature.cpp | 18 +- arangosh/Restore/RestoreFeature.h | 1 + arangosh/Shell/V8ClientConnection.cpp | 55 +- .../modules/@arangodb/arango-collection.js | 4 +- .../modules/@arangodb/arango-database.js | 2 +- js/client/modules/@arangodb/crash-utils.js | 6 +- js/client/modules/@arangodb/process-utils.js | 28 +- js/client/modules/@arangodb/replication.js | 10 +- .../modules/@arangodb/result-processing.js | 7 +- js/client/modules/@arangodb/test-utils.js | 7 +- .../@arangodb/testsuites/resilience.js | 19 +- .../testsuites/server_permissions.js | 10 +- js/common/bootstrap/errors.js | 4 +- js/common/modules/@arangodb/aql/explainer.js | 3 +- .../modules/@arangodb/foxx/router/response.js | 30 +- js/server/modules/@arangodb/foxx/service.js | 15 +- lib/Basics/ConditionLocker.cpp | 13 +- lib/Basics/ConditionLocker.h | 5 +- lib/Basics/CrashHandler.cpp | 103 +- lib/Basics/HybridLogicalClock.h | 17 + lib/Basics/MutexLocker.h | 6 +- lib/Basics/NumberUtils.h | 5 + lib/Basics/ReadLocker.h | 10 +- lib/Basics/ReadUnlocker.h | 37 +- lib/Basics/ReadWriteLock.cpp | 16 +- lib/Basics/ReadWriteLock.h | 8 +- lib/Basics/ReadWriteSpinLock.cpp | 99 +- lib/Basics/ReadWriteSpinLock.h | 46 +- lib/Basics/SpinLocker.h | 107 + lib/Basics/SpinUnlocker.h | 81 + lib/Basics/StaticStrings.cpp | 17 +- lib/Basics/StaticStrings.h | 16 +- lib/Basics/StringUtils.cpp | 100 - lib/Basics/StringUtils.h | 39 +- lib/Basics/WriteLocker.h | 10 +- lib/Basics/WriteUnlocker.h | 43 +- lib/Basics/debugging.h | 76 +- lib/Basics/errors.dat | 9 +- lib/Basics/hashes.h | 7 + lib/Basics/voc-errors.cpp | 4 +- lib/Basics/voc-errors.h | 20 +- lib/CMakeLists.txt | 6 + lib/Containers/Enumerate.h | 130 + lib/Containers/MerkleTree.cpp | 773 +++ lib/Containers/MerkleTree.h | 276 + lib/Logger/LogThread.cpp | 5 + lib/Logger/LogTopic.cpp | 1 + lib/Logger/Logger.h | 9 +- lib/Rest/GeneralResponse.cpp | 11 +- lib/Rest/GeneralResponse.h | 3 + lib/Rest/HttpResponse.h | 4 + lib/Rest/VstResponse.cpp | 4 + lib/Ssl/SslInterface.cpp | 15 - lib/Ssl/SslInterface.h | 12 - lib/Ssl/ssl-helper.cpp | 12 +- lib/Ssl/ssl-helper.h | 2 +- scripts/build-bundle.sh | 31 - scripts/build-dbg-deb.sh | 37 - scripts/build-deb.sh | 41 - scripts/build-docker.sh | 76 - scripts/build-nsis.sh | 28 - scripts/build-rpm.sh | 31 - scripts/build-snap.sh | 40 - scripts/build-xc-deb.sh | 28 - scripts/build-xc64-deb.sh | 31 - scripts/encryptionTest.sh | 37 - scripts/generateDocumentation.sh | 118 - tests/Agency/MoveShardTest.cpp | 66 + tests/Aql/TraversalExecutorTest.cpp | 5 +- tests/Basics/StringUtilsTest.cpp | 5 - tests/CMakeLists.txt | 6 +- tests/Cache/BucketState.cpp | 11 +- tests/Cache/CachedValue.cpp | 69 +- tests/Cache/FrequencyBuffer.cpp | 47 +- tests/Cache/LockStressTest.cpp | 36 +- tests/Cache/Manager.cpp | 99 +- tests/Cache/Metadata.cpp | 33 +- tests/Cache/MockScheduler.cpp | 11 +- tests/Cache/MockScheduler.h | 12 +- tests/Cache/PlainBucket.cpp | 69 +- tests/Cache/PlainCache.cpp | 160 +- tests/Cache/Rebalancer.cpp | 167 +- tests/Cache/Table.cpp | 129 +- tests/Cache/TransactionManager.cpp | 11 +- tests/Cache/TransactionalBucket.cpp | 95 +- tests/Cache/TransactionalCache.cpp | 173 +- tests/Cache/TransactionalStore.cpp | 51 +- tests/Cache/TransactionalStore.h | 27 +- tests/Cache/TransactionsWithBackingStore.cpp | 165 +- tests/Containers/EnumerateTest.cpp | 105 + tests/Containers/MerkleTreeTest.cpp | 464 ++ .../IResearchAnalyzerFeature-test.cpp | 6 +- .../IResearchFilterFunction-test.cpp | 586 +- .../IResearchQueryNGramMatch-test.cpp | 1160 ++++ tests/IResearch/IResearchQueryPhrase-test.cpp | 1607 +++-- tests/IResearch/common.cpp | 1 + tests/Metrics/MetricsTest.cpp | 21 +- tests/Mocks/StorageEngineMock.cpp | 12 +- tests/Mocks/StorageEngineMock.h | 5 +- tests/js/client/shell/shell-aql-v8.js | 118 + tests/js/client/shell/shell-foxx-interface.js | 225 + tests/js/client/shell/shell-statement.js | 28 - .../aql/aql-view-arangosearch-cluster.inc | 79 + .../aql/aql-view-arangosearch-noncluster.js | 79 + tests/js/common/shell/shell-collection.js | 25 + .../common/shell/shell-validation-rocksdb.js | 197 +- .../common/test-data/apps/interface/index.js | 412 ++ .../test-data/apps/interface/manifest.json | 7 + tests/js/server/aql/aql-graph-traverser.js | 81 + ...ersal-empty-register-mapping-regression.js | 93 + ...revision-trees-no-sync-rocksdb-disabled.js | 126 + ...ion-trees-partial-sync-rocksdb-disabled.js | 130 + .../revision-trees-rocksdb-disabled.js | 124 + .../replication/sync/replication-sync.js | 4 +- tests/js/server/shell/shell-index-rocksdb.js | 20 +- utils/checkLogIds.py | 2 +- utils/generateErrorfile.py | 8 +- utils/generateExamples.py | 106 +- utils/generateExitCodesFiles.py | 8 +- utils/generateSwagger.py | 122 +- 797 files changed, 64021 insertions(+), 8739 deletions(-) create mode 100644 3rdParty/iresearch/PVSIResearch.cfg delete mode 100644 3rdParty/iresearch/core/search/limited_sample_scorer-heap.cpp delete mode 100644 3rdParty/iresearch/core/search/limited_sample_scorer-heap.hpp create mode 100644 3rdParty/iresearch/core/search/ngram_similarity_filter.cpp create mode 100644 3rdParty/iresearch/core/search/ngram_similarity_filter.hpp create mode 100644 3rdParty/iresearch/core/utils/attribute_range.cpp create mode 100644 3rdParty/iresearch/core/utils/attribute_range.hpp create mode 100644 3rdParty/iresearch/core/utils/fst_states_map.hpp create mode 100644 3rdParty/iresearch/core/utils/ngram_match_utils.hpp create mode 100644 3rdParty/iresearch/core/utils/wildcard_utils.cpp create mode 100644 3rdParty/iresearch/scripts/ArangoDBLoader/WikiLoader.py mode change 100644 => 100755 3rdParty/iresearch/scripts/gtest-parallel/gtest-parallel mode change 100644 => 100755 3rdParty/iresearch/scripts/gtest-parallel/gtest_parallel.py mode change 100644 => 100755 3rdParty/iresearch/scripts/gtest-parallel/gtest_parallel_tests.py create mode 100644 3rdParty/iresearch/tests/formats/formats_13_tests.cpp create mode 100644 3rdParty/iresearch/tests/resources/ngram_similarity.json create mode 100644 3rdParty/iresearch/tests/resources/simple_sequential_utf8.json create mode 100644 3rdParty/iresearch/tests/search/ngram_similarity_filter_tests.cpp delete mode 100644 3rdParty/iresearch/tests/unicode/utf8.h delete mode 100644 3rdParty/iresearch/tests/unicode/utf8/checked.h delete mode 100644 3rdParty/iresearch/tests/unicode/utf8/core.h delete mode 100644 3rdParty/iresearch/tests/unicode/utf8/unchecked.h create mode 100644 3rdParty/iresearch/tests/utils/ngram_match_utils_tests.cpp create mode 100644 3rdParty/iresearch/tests/utils/wildcard_utils_test.cpp create mode 100644 3rdParty/json-schema-validation/.clang_complete create mode 100644 3rdParty/json-schema-validation/.gitignore create mode 100644 3rdParty/json-schema-validation/CMakeLists.txt create mode 100644 3rdParty/json-schema-validation/cmake/ext_cmake_install.cmake create mode 100644 3rdParty/json-schema-validation/cmake/ext_cmake_utils.cmake create mode 100644 3rdParty/json-schema-validation/include/validation/events_from_slice.hpp create mode 100644 3rdParty/json-schema-validation/include/validation/types.hpp create mode 100644 3rdParty/json-schema-validation/include/validation/validation.hpp create mode 100644 3rdParty/json-schema-validation/src/validation.cpp create mode 100644 3rdParty/taocpp-json.version create mode 100644 3rdParty/taocpp-json/.clang-format create mode 100644 3rdParty/taocpp-json/CMakeLists.txt create mode 100644 3rdParty/taocpp-json/LICENSE create mode 100644 3rdParty/taocpp-json/LICENSE.double-conversion create mode 100644 3rdParty/taocpp-json/LICENSE.itoa create mode 100644 3rdParty/taocpp-json/LICENSE.ryu create mode 100644 3rdParty/taocpp-json/README.md create mode 100644 3rdParty/taocpp-json/include/tao/json.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/basic_value.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binary.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binary_view.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/constant.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/element.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/factory.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/for_nothing_value.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/for_unknown_key.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/inherit.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/internal/array.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/internal/inherit.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/internal/object.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/internal/type_key.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/member.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/member_kind.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/binding/versions.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/consume.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/consume_file.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/consume_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/array_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/deque_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/diff.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/get.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/internal/array_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/internal/indirect_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/internal/object_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/internal/type_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/list_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/map_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/multimap_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/multiset_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/pair_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/patch.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/pointer_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/position.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/reference.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/schema.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/set_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/shared_ptr_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/tuple_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/unique_ptr_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/unordered_map_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/unordered_set_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/vector_bool_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/contrib/vector_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/apply.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/binary_to_base64.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/binary_to_base64url.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/binary_to_exception.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/binary_to_hex.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/compare.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/debug.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/discard.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/from_file.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/from_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/from_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/from_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/from_value.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/hash.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/invalid_string_to_binary.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/invalid_string_to_exception.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/invalid_string_to_hex.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/key_camel_case_to_snake_case.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/key_snake_case_to_camel_case.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/limit_nesting_depth.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/limit_value_count.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/non_finite_to_exception.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/non_finite_to_null.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/non_finite_to_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/prefer_signed.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/prefer_unsigned.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/produce.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/ref.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/statistics.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/tee.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/to_pretty_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/to_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/to_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/to_value.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/transformer.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/validate_event_order.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/validate_keys.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/virtual_base.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/events/virtual_ref.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/double.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/itoa.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/analysis/analyze_cycles.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/analysis/counted.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/analysis/generic.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/analysis/grammar_info.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/analysis/insert_guard.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/analysis/insert_rules.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/analysis/rule_info.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/analysis/rule_type.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/analyze.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/apply_mode.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/argv_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/ascii.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/buffer_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/change_action.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/change_action_and_state.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/change_action_and_states.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/change_control.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/change_state.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/change_states.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/config.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/abnf.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/alphabet.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/counter.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/http.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/icu/internal.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/icu/utf16.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/icu/utf32.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/icu/utf8.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/if_then.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/integer.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/json.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/json_pointer.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/parse_tree.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/parse_tree_to_dot.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/raw_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/remove_first_state.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/rep_one_min_max.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/rep_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/to_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/tracer.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/unescape.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/contrib/uri.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/cstream_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/disable_action.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/discard_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/discard_input_on_failure.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/discard_input_on_success.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/enable_action.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/eol.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/eol_pair.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/file_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/action.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/action_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/alnum.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/alpha.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/always_false.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/any.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/apply.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/apply0.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/apply0_single.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/apply_single.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/at.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/bof.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/bol.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/bump.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/bump_help.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/bytes.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/control.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/cr_crlf_eol.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/cr_eol.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/crlf_eol.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/cstream_reader.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/cstring_reader.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/demangle.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/disable.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/discard.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/dusel_mode.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/duseltronik.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/enable.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/endian.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/endian_gcc.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/endian_win.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/eof.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/eol.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/eolf.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/file_mapper_posix.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/file_mapper_win32.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/file_opener.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/file_reader.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/has_apply.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/has_apply0.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/has_match.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/identifier.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/if_apply.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/if_must.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/if_must_else.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/if_then_else.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/input_pair.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/istream_reader.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/istring.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/iterator.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/lf_crlf_eol.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/lf_eol.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/list.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/list_must.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/list_tail.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/list_tail_pad.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/marker.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/missing_apply.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/missing_apply0.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/must.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/not_at.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/one.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/opt.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/pad.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/pad_opt.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/peek_char.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/peek_mask_uint.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/peek_mask_uint8.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/peek_uint.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/peek_uint8.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/peek_utf16.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/peek_utf32.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/peek_utf8.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/pegtl_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/plus.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/raise.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/range.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/ranges.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/read_uint.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/rematch.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/rep.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/rep_min.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/rep_min_max.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/rep_opt.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/require.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/result_on_found.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/rules.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/seq.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/skip_control.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/sor.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/star.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/star_must.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/state.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/trivial.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/try_catch_type.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/internal/until.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/istream_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/match.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/memory_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/mmap_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/normal.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/nothing.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/parse.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/parse_error.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/position.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/read_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/require_apply.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/require_apply0.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/rewind_mode.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/rules.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/string_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/tracking_mode.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/uint16.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/uint32.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/uint64.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/uint8.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/utf16.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/utf32.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/utf8.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/pegtl/version.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/external/ryu.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/forward.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/from_file.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/from_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/from_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/from_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/action.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/base64.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/base64url.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/endian.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/endian_gcc.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/endian_win.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/errors.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/escape.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/format.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/grammar.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/hexdump.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/identity.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/number_state.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/number_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/pair.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/parse_util.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/sha256.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/single.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/string_t.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/type_traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/unescape_action.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/internal/uri_fragment.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/consume_file.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/consume_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/events/from_file.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/events/from_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/events/from_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/events/from_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/events/to_pretty_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/events/to_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/events/to_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/from_file.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/from_input.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/from_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/from_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/internal/action.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/internal/bunescape_action.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/internal/errors.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/internal/grammar.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/internal/integer.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/internal/unescape_action.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/is_identifier.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/parts_parser.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/to_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/jaxn/to_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/message_extension.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/operators.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/parts_parser.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/pointer.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/produce.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/self_contained.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/span.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/to_stream.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/to_string.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/traits.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/type.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/utf8.hpp create mode 100644 3rdParty/taocpp-json/include/tao/json/value.hpp create mode 100644 UnitTests/tls-ca.crt create mode 100644 UnitTests/tls-ca.key create mode 100644 UnitTests/tls.keyfile create mode 100644 arangod/RocksDBEngine/RocksDBReplicationIterator.cpp create mode 100644 arangod/RocksDBEngine/RocksDBReplicationIterator.h rename lib/SimpleHttpClient/Options.h => arangod/StorageEngine/ReplicationIterator.cpp (59%) create mode 100644 arangod/StorageEngine/ReplicationIterator.h create mode 100644 lib/Basics/SpinLocker.h create mode 100644 lib/Basics/SpinUnlocker.h create mode 100644 lib/Containers/Enumerate.h create mode 100644 lib/Containers/MerkleTree.cpp create mode 100644 lib/Containers/MerkleTree.h delete mode 100755 scripts/build-bundle.sh delete mode 100755 scripts/build-dbg-deb.sh delete mode 100755 scripts/build-deb.sh delete mode 100755 scripts/build-docker.sh delete mode 100644 scripts/build-nsis.sh delete mode 100755 scripts/build-rpm.sh delete mode 100755 scripts/build-snap.sh delete mode 100755 scripts/build-xc-deb.sh delete mode 100755 scripts/build-xc64-deb.sh delete mode 100755 scripts/encryptionTest.sh delete mode 100755 scripts/generateDocumentation.sh create mode 100644 tests/Containers/EnumerateTest.cpp create mode 100644 tests/Containers/MerkleTreeTest.cpp create mode 100644 tests/IResearch/IResearchQueryNGramMatch-test.cpp create mode 100644 tests/js/client/shell/shell-aql-v8.js create mode 100644 tests/js/client/shell/shell-foxx-interface.js create mode 100644 tests/js/common/test-data/apps/interface/index.js create mode 100644 tests/js/common/test-data/apps/interface/manifest.json create mode 100644 tests/js/server/aql/aql-traversal-empty-register-mapping-regression.js create mode 100644 tests/js/server/recovery/revision-trees-no-sync-rocksdb-disabled.js create mode 100644 tests/js/server/recovery/revision-trees-partial-sync-rocksdb-disabled.js create mode 100644 tests/js/server/recovery/revision-trees-rocksdb-disabled.js diff --git a/.gitignore b/.gitignore index 710eaacf7c12..389d47f200e0 100644 --- a/.gitignore +++ b/.gitignore @@ -112,3 +112,7 @@ datafile-*.db # by build process arangodb-linux-amd64 last_compiled_version.sha + +scripts/perfanalysis +perf* +callgrind* diff --git a/3rdParty/CMakeLists.txt b/3rdParty/CMakeLists.txt index 33e090bbe993..82cb64de268b 100755 --- a/3rdParty/CMakeLists.txt +++ b/3rdParty/CMakeLists.txt @@ -33,7 +33,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/zlib/zlib-1.2.11) set(SNAPPY_VERSION "1.1.7") set(SNAPPY_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/snappy/snappy-${SNAPPY_VERSION}") +set(SNAPPY_SOURCE_DIR "${SNAPPY_SOURCE_DIR}" PARENT_SCOPE) set(SNAPPY_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/snappy/snappy-${SNAPPY_VERSION}") +set(SNAPPY_BUILD_DIR "${SNAPPY_BUILD_DIR}" PARENT_SCOPE) set(SNAPPY_LIB "snappy") set(SNAPPY_LIB "${SNAPPY_LIB}" PARENT_SCOPE) add_subdirectory(${SNAPPY_SOURCE_DIR} EXCLUDE_FROM_ALL) @@ -235,3 +237,13 @@ target_include_directories(fuerte SYSTEM PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/fuerte/include") set(V8_INTERNAL_INCLUDE_DIR ${V8_INTERNAL_INCLUDE_DIR} PARENT_SCOPE) + +if(NOT TARGET velocypack) +add_library(velocypack INTERFACE) +target_include_directories(velocypack INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/velocypack/include") +endif() + +set(TAOCPP_JSON_BUILD_TESTS OFF CACHE BOOL "Build taocpp::json test programs" FORCE) +set(TAOCPP_JSON_BUILD_EXAMPLES OFF CACHE BOOL "Build taocpp::json example programs" FORCE) +add_subdirectory(taocpp-json EXCLUDE_FROM_ALL) +add_subdirectory(json-schema-validation EXCLUDE_FROM_ALL) diff --git a/3rdParty/README_maintainers.md b/3rdParty/README_maintainers.md index 318acf3f5afe..17070c00c68d 100644 --- a/3rdParty/README_maintainers.md +++ b/3rdParty/README_maintainers.md @@ -177,6 +177,16 @@ Note that to account for changes introduced by new versions of swagger-ui, the stylistic CSS changes may need to be adjusted manually even when applied correctly. +## taocpp::json + +Json Parser library +Contains TaoCpp PEGTL - PEG Parsing library + +Upstream is: https://github.com/taocpp/json + +- On upgrade do not add unnecessary files (e.g. src, tests, contrib) + and update the commit hash in `./taocpp-json.version`. + ## V8 Javascript interpreter. diff --git a/3rdParty/date/include/date/date.h b/3rdParty/date/include/date/date.h index 3be91afec021..1186d2a808b3 100644 --- a/3rdParty/date/include/date/date.h +++ b/3rdParty/date/include/date/date.h @@ -7330,9 +7330,13 @@ parse(const CharT* format, Parsable& tp, namespace detail { - +// [tobias, 2020-03-10] I added the check +// !defined(_MSC_VER) +// because this block does not compile with MSVC (at least MSVC 16 2019 / _MSC_VER = 1923). +// If it works at some point, add `|| _MSC_VER > ...`. #if __cplusplus >= 201402 && (!defined(__EDG_VERSION__) || __EDG_VERSION__ > 411) \ - && (!defined(__SUNPRO_CC) || __SUNPRO_CC > 0x5150) + && (!defined(__SUNPRO_CC) || __SUNPRO_CC > 0x5150) \ + && !defined(_MSC_VER) template class string_literal diff --git a/3rdParty/fuerte/include/fuerte/helper.h b/3rdParty/fuerte/include/fuerte/helper.h index 5c475a3f9307..d7b10ec1c9ec 100644 --- a/3rdParty/fuerte/include/fuerte/helper.h +++ b/3rdParty/fuerte/include/fuerte/helper.h @@ -112,8 +112,8 @@ std::string mapToKeys(std::unordered_map map) { return _detail::mapToKeys(map.begin(), map.end()); } -std::string encodeBase64(std::string const&); -std::string encodeBase64U(std::string const&); +std::string encodeBase64(std::string const&, bool pad); +std::string encodeBase64U(std::string const&, bool pad); void toLowerInPlace(std::string& str); diff --git a/3rdParty/fuerte/src/H1Connection.cpp b/3rdParty/fuerte/src/H1Connection.cpp index a0b1b63d7fdd..581af3c4b0c8 100644 --- a/3rdParty/fuerte/src/H1Connection.cpp +++ b/3rdParty/fuerte/src/H1Connection.cpp @@ -155,7 +155,7 @@ H1Connection::H1Connection(EventLoopService& loop, if (this->_config._authenticationType == AuthenticationType::Basic) { _authHeader.append("Authorization: Basic "); _authHeader.append( - fu::encodeBase64(this->_config._user + ":" + this->_config._password)); + fu::encodeBase64(this->_config._user + ":" + this->_config._password, true)); _authHeader.append("\r\n"); } else if (this->_config._authenticationType == AuthenticationType::Jwt) { if (this->_config._jwtToken.empty()) { diff --git a/3rdParty/fuerte/src/H2Connection.cpp b/3rdParty/fuerte/src/H2Connection.cpp index c142f3a38308..914759527b6c 100644 --- a/3rdParty/fuerte/src/H2Connection.cpp +++ b/3rdParty/fuerte/src/H2Connection.cpp @@ -94,6 +94,7 @@ template } else if (field == fu_content_length_key) { size_t len = std::min(std::stoul(val.toString()), 1024 * 1024 * 64); strm->data.reserve(len); + strm->response->header.addMeta(field.toString(), val.toString()); } else { // fall through strm->response->header.addMeta(field.toString(), val.toString()); // TODO limit max header size ?? @@ -225,7 +226,7 @@ std::string makeAuthHeader(fu::detail::ConnectionConfiguration const& config) { // preemptively cache authentication if (config._authenticationType == AuthenticationType::Basic) { auth.append("Basic "); - auth.append(fu::encodeBase64(config._user + ":" + config._password)); + auth.append(fu::encodeBase64(config._user + ":" + config._password, true)); } else if (config._authenticationType == AuthenticationType::Jwt) { if (config._jwtToken.empty()) { throw std::logic_error("JWT token is not set"); @@ -368,7 +369,7 @@ void H2Connection::finishConnect() { (uint8_t*)packed.data(), packed.size(), iv.data(), iv.size()); FUERTE_ASSERT(nwrite >= 0); packed.resize(static_cast(nwrite)); - std::string encoded = fu::encodeBase64(packed); + std::string encoded = fu::encodeBase64(packed, true); // lets do the HTTP2 session upgrade right away initNgHttp2Session(); diff --git a/3rdParty/fuerte/src/helper.cpp b/3rdParty/fuerte/src/helper.cpp index da1388bbcad9..707f2ac48a52 100644 --- a/3rdParty/fuerte/src/helper.cpp +++ b/3rdParty/fuerte/src/helper.cpp @@ -153,7 +153,7 @@ char const* const BASE64_CHARS = "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; -std::string encodeBase64(std::string const& in) { +std::string encodeBase64(std::string const& in, bool pad) { unsigned char charArray3[3]; unsigned char charArray4[4]; @@ -201,16 +201,18 @@ std::string encodeBase64(std::string const& in) { ret += BASE64_CHARS[charArray4[j]]; } - while ((i++ < 3)) { - ret += '='; + if (pad) { + while ((i++ < 3)) { + ret += '='; + } } } return ret; } -std::string encodeBase64U(std::string const& in) { - std::string encoded = encodeBase64(in); +std::string encodeBase64U(std::string const& in, bool pad) { + std::string encoded = encodeBase64(in, pad); // replace '+', '/' with '-' and '_' std::replace(encoded.begin(), encoded.end(), '+', '-'); std::replace(encoded.begin(), encoded.end(), '/', '_'); diff --git a/3rdParty/fuerte/src/jwt.cpp b/3rdParty/fuerte/src/jwt.cpp index 1a61b308a991..34cbf653e5fd 100644 --- a/3rdParty/fuerte/src/jwt.cpp +++ b/3rdParty/fuerte/src/jwt.cpp @@ -89,14 +89,18 @@ std::string jwt::generateRawJwt(std::string const& secret, VPackSlice const& bod headerBuilder.add("typ", VPackValue("JWT")); } - std::string fullMessage(encodeBase64(headerBuilder.toJson()) + "." + - encodeBase64(body.toJson())); + // https://tools.ietf.org/html/rfc7515#section-2 requires + // JWT to use base64-encoding without trailing padding `=` chars + bool const pad = false; + + std::string fullMessage(encodeBase64(headerBuilder.toJson(), pad) + "." + + encodeBase64(body.toJson(), pad)); std::string signature = sslHMAC(secret.c_str(), secret.length(), fullMessage.c_str(), fullMessage.length(), Algorithm::ALGORITHM_SHA256); - return fullMessage + "." + encodeBase64U(signature); + return fullMessage + "." + encodeBase64U(signature, pad); } // code from ArangoDBs SslInterface.cpp diff --git a/3rdParty/iresearch/.travis.yml b/3rdParty/iresearch/.travis.yml index 38e51b08b74d..d684ad8cd7ac 100644 --- a/3rdParty/iresearch/.travis.yml +++ b/3rdParty/iresearch/.travis.yml @@ -57,21 +57,7 @@ matrix: env: - SET_ENV="CC=gcc-4.9 && CXX=g++-4.9" - CXX_STANDARD=11 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" - - SUPPRESS_MAKE_ERRORS=true # travis is unable to handle log files > 4MB - - ############################################################################ - # Release build, static library (remaining tests) - ############################################################################ - - os: linux - before_install: - - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install cmake cmake-data g++-4.9 valgrind - env: - - SET_ENV="CC=gcc-4.9 && CXX=g++-4.9" - - CXX_STANDARD=11 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Release" STATIC_LIB="true" - SUPPRESS_MAKE_ERRORS=true # travis is unable to handle log files > 4MB ############################################################################ @@ -89,21 +75,7 @@ matrix: - SUPPRESS_MAKE_ERRORS=true # travis is unable to handle log files > 4MB ############################################################################ - # Release build, shared library (filesystem tests) - ############################################################################ - - os: linux - before_install: - - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install cmake cmake-data g++-4.9 valgrind - env: - - SET_ENV="CC=gcc-4.9 && CXX=g++-4.9" - - CXX_STANDARD=11 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" - - SUPPRESS_MAKE_ERRORS=true # travis is unable to handle log files > 4MB - - ############################################################################ - # Release build, shared library (remaining tests) + # Release build, shared library ############################################################################ - os: linux before_install: @@ -113,7 +85,7 @@ matrix: env: - SET_ENV="CC=gcc-4.9 && CXX=g++-4.9" - CXX_STANDARD=11 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Release" - SUPPRESS_MAKE_ERRORS=true # travis is unable to handle log files > 4MB ############################################################################ @@ -134,20 +106,7 @@ matrix: - BUILD_TYPE="Debug" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, static library (filesystem tests) - ############################################################################ - - os: linux - before_install: - - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install cmake cmake-data g++-5 valgrind - env: - - SET_ENV="CC=gcc-5 && CXX=g++-5" - - CXX_STANDARD=11 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" - - ############################################################################ - # Release build, static library (remaining tests) + # Release build, static library ############################################################################ - os: linux before_install: @@ -157,7 +116,7 @@ matrix: env: - SET_ENV="CC=gcc-5 && CXX=g++-5" - CXX_STANDARD=11 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Release" STATIC_LIB="true" ############################################################################ # Debug build, shared library @@ -173,20 +132,7 @@ matrix: - BUILD_TYPE="Debug" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, shared library (filesystem tests) - ############################################################################ - - os: linux - before_install: - - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install cmake cmake-data g++-5 valgrind - env: - - SET_ENV="CC=gcc-5 && CXX=g++-5" - - CXX_STANDARD=11 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" - - ############################################################################ - # Release build, shared library (remaining tests) + # Release build, shared library ############################################################################ - os: linux before_install: @@ -196,7 +142,7 @@ matrix: env: - SET_ENV="CC=gcc-5 && CXX=g++-5" - CXX_STANDARD=11 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Release" ############################################################################ # GCC 6 @@ -216,20 +162,7 @@ matrix: - BUILD_TYPE="Debug" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, static library (filesystem tests) - ############################################################################ - - os: linux - before_install: - - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install cmake cmake-data g++-6 valgrind - env: - - SET_ENV="CC=gcc-6 && CXX=g++-6" - - CXX_STANDARD=11 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" - - ############################################################################ - # Release build, static library (remaining tests) + # Release build, static library ############################################################################ - os: linux before_install: @@ -239,7 +172,7 @@ matrix: env: - SET_ENV="CC=gcc-6 && CXX=g++-6" - CXX_STANDARD=11 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Release" STATIC_LIB="true" ############################################################################ # Debug build, shared library @@ -255,20 +188,7 @@ matrix: - BUILD_TYPE="Debug" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, shared library (filesystem tests) - ############################################################################ - - os: linux - before_install: - - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install cmake cmake-data g++-6 valgrind - env: - - SET_ENV="CC=gcc-6 && CXX=g++-6" - - CXX_STANDARD=11 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" - - ############################################################################ - # Release build, shared library (remaining tests) + # Release build, shared library ############################################################################ - os: linux before_install: @@ -278,7 +198,7 @@ matrix: env: - SET_ENV="CC=gcc-6 && CXX=g++-6" - CXX_STANDARD=11 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Release" ############################################################################ # GCC 7 @@ -300,7 +220,7 @@ matrix: - BUILD_TYPE="Debug" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, static library (filesystem tests) + # Release build, static library ############################################################################ - os: linux dist: trusty # gcc7 is not available in `precise` @@ -312,10 +232,10 @@ matrix: - SET_ENV="CC=gcc-7 && CXX=g++-7" - CXX_STANDARD=14 - CMAKE_VERSION=3.2.3 # supports c++14 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" + - BUILD_TYPE="Release" STATIC_LIB="true" ############################################################################ - # Release build, static library (remaining tests) + # Debug build, shared library ############################################################################ - os: linux dist: trusty # gcc7 is not available in `precise` @@ -327,10 +247,10 @@ matrix: - SET_ENV="CC=gcc-7 && CXX=g++-7" - CXX_STANDARD=14 - CMAKE_VERSION=3.2.3 # supports c++14 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Debug" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Debug build, shared library + # Release build, shared library ############################################################################ - os: linux dist: trusty # gcc7 is not available in `precise` @@ -342,44 +262,44 @@ matrix: - SET_ENV="CC=gcc-7 && CXX=g++-7" - CXX_STANDARD=14 - CMAKE_VERSION=3.2.3 # supports c++14 - - BUILD_TYPE="Debug" TEST_PARAMS="--gtest_filter=*:-:*" + - BUILD_TYPE="Release" ############################################################################ - # Release build, shared library (filesystem tests) + # GCC 8 + ############################################################################ + + ############################################################################ + # Debug build, static library ############################################################################ - os: linux - dist: trusty # gcc7 is not available in `precise` + dist: trusty # gcc8 is not available in `precise` before_install: - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install cmake cmake-data g++-7 valgrind + - travis_retry sudo apt-get install g++-8 valgrind env: - - SET_ENV="CC=gcc-7 && CXX=g++-7" - - CXX_STANDARD=14 - - CMAKE_VERSION=3.2.3 # supports c++14 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" + - SET_ENV="CC=gcc-8 && CXX=g++-8" + - CMAKE_VERSION=3.8.2 # supports c++17 + - CXX_STANDARD=17 + - BUILD_TYPE="Debug" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, shared library (remaining tests) + # Release build, static library ############################################################################ - os: linux - dist: trusty # gcc7 is not available in `precise` + dist: trusty # gcc8 is not available in `precise` before_install: - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install cmake cmake-data g++-7 valgrind + - travis_retry sudo apt-get install g++-8 valgrind env: - - SET_ENV="CC=gcc-7 && CXX=g++-7" - - CXX_STANDARD=14 - - CMAKE_VERSION=3.2.3 # supports c++14 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" - - ############################################################################ - # GCC 8 - ############################################################################ + - SET_ENV="CC=gcc-8 && CXX=g++-8" + - CMAKE_VERSION=3.8.2 # supports c++17 + - CXX_STANDARD=17 + - BUILD_TYPE="Release" STATIC_LIB="true" ############################################################################ - # Debug build, static library + # Debug build, shared library ############################################################################ - os: linux dist: trusty # gcc8 is not available in `precise` @@ -391,82 +311,86 @@ matrix: - SET_ENV="CC=gcc-8 && CXX=g++-8" - CMAKE_VERSION=3.8.2 # supports c++17 - CXX_STANDARD=17 - - BUILD_TYPE="Debug" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*" + - BUILD_TYPE="Debug" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, static library (filesystem tests) + # Release build, shared library ############################################################################ - os: linux - dist: trusty # gcc8 is not available in `precise` + dist: trusty # gcc7 is not available in `precise` before_install: - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install g++-8 valgrind + - travis_retry sudo apt-get install cmake cmake-data g++-8 valgrind env: - SET_ENV="CC=gcc-8 && CXX=g++-8" - CMAKE_VERSION=3.8.2 # supports c++17 - CXX_STANDARD=17 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" + - BUILD_TYPE="Release" + + ############################################################################ + # GCC 9 + ############################################################################ ############################################################################ - # Release build, static library (remaining tests) + # Debug build, static library ############################################################################ - os: linux - dist: trusty # gcc8 is not available in `precise` + dist: bionic # gcc9 is not available in `precise` before_install: - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install g++-8 valgrind + - travis_retry sudo apt-get install g++-9 valgrind env: - - SET_ENV="CC=gcc-8 && CXX=g++-8" + - SET_ENV="CC=gcc-9 && CXX=g++-9" - CMAKE_VERSION=3.8.2 # supports c++17 - CXX_STANDARD=17 - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Debug" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Debug build, shared library + # Release build, static library ############################################################################ - os: linux - dist: trusty # gcc8 is not available in `precise` + dist: bionic # gcc9 is not available in `precise` before_install: - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - - travis_retry sudo apt-get install g++-8 valgrind + - travis_retry sudo apt-get install g++-9 valgrind env: - - SET_ENV="CC=gcc-8 && CXX=g++-8" + - SET_ENV="CC=gcc-9 && CXX=g++-9" - CMAKE_VERSION=3.8.2 # supports c++17 - CXX_STANDARD=17 - - BUILD_TYPE="Debug" TEST_PARAMS="--gtest_filter=*:-:*" + - BUILD_TYPE="Release" STATIC_LIB="true" ############################################################################ - # Release build, shared library (filesystem tests) + # Debug build, shared library ############################################################################ - os: linux - dist: trusty # gcc8 is not available in `precise` + dist: bionic # gcc9 is not available in `precise` before_install: - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - travis_retry sudo apt-get install g++-8 valgrind env: - - SET_ENV="CC=gcc-8 && CXX=g++-8" + - SET_ENV="CC=gcc-9 && CXX=g++-9" - CMAKE_VERSION=3.8.2 # supports c++17 - CXX_STANDARD=17 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" + - BUILD_TYPE="Debug" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, shared library (remaining tests) + # Release build, shared library ############################################################################ - os: linux - dist: trusty # gcc8 is not available in `precise` + dist: bionic # gcc9 is not available in `precise` before_install: - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - for i in 1 2 3; do travis_retry sudo apt-get update; done # 3 times since download failure is not an error - travis_retry sudo apt-get install g++-8 valgrind env: - - SET_ENV="CC=gcc-8 && CXX=g++-8" + - SET_ENV="CC=gcc-9 && CXX=g++-9" - CMAKE_VERSION=3.8.2 # supports c++17 - CXX_STANDARD=17 - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Release" ############################################################################ # OSX @@ -488,7 +412,7 @@ matrix: - BUILD_TYPE="Debug" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, static library (filesystem tests) + # Release build, static library ############################################################################ - os: osx osx_image: xcode9 @@ -500,22 +424,7 @@ matrix: - SET_ENV="PATH=/usr/local/opt/bison/bin:/usr/local/opt/bison@2.7/bin:$PATH" - CXX_STANDARD=11 - BOOST_B2_ARGS="boost.locale.iconv=off" - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" - - ############################################################################ - # Release build, static library (remaining tests) - ############################################################################ - - os: osx - osx_image: xcode9 - compiler: clang - before_install: - - brew update || true; - brew install bison@2.7 || true; - env: - - SET_ENV="PATH=/usr/local/opt/bison/bin:/usr/local/opt/bison@2.7/bin:$PATH" - - CXX_STANDARD=11 - - BOOST_B2_ARGS="boost.locale.iconv=off" - - BUILD_TYPE="Release" STATIC_LIB="true" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Release" STATIC_LIB="true" ############################################################################ # Debug build, shared library @@ -533,22 +442,7 @@ matrix: - BUILD_TYPE="Debug" TEST_PARAMS="--gtest_filter=*:-:*" ############################################################################ - # Release build, shared library (filesystem tests) - ############################################################################ - - os: osx - osx_image: xcode9 - compiler: clang - before_install: - - brew update || true; - brew install bison@2.7 || true; - env: - - SET_ENV="PATH=/usr/local/opt/bison/bin:/usr/local/opt/bison@2.7/bin:$PATH" - - CXX_STANDARD=11 - - BOOST_B2_ARGS="boost.locale.iconv=off" - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*/fs___*:*/mmap___*" - - ############################################################################ - # Release build, shared library (remaining tests) + # Release build, shared library ############################################################################ - os: osx osx_image: xcode9 @@ -560,7 +454,7 @@ matrix: - SET_ENV="PATH=/usr/local/opt/bison/bin:/usr/local/opt/bison@2.7/bin:$PATH" - CXX_STANDARD=11 - BOOST_B2_ARGS="boost.locale.iconv=off" - - BUILD_TYPE="Release" TEST_PARAMS="--gtest_filter=*:-:*/fs___*:-:*/mmap___*" + - BUILD_TYPE="Release" before_install: - eval "${SET_ENV}" @@ -717,7 +611,7 @@ script: exit 1 fi else - make iresearch-tests${BUILD_TYPE_SUFFIX} + make -j4 iresearch-tests${BUILD_TYPE_SUFFIX} fi # build utils if [[ "${STATIC_LIB}" == "true" ]]; then @@ -727,7 +621,7 @@ script: # execute tests ulimit -n 5120 # required for MacOS (max-open-files = 16 threads * (100000/10000) commits * 8 segments-per-commit * 4 open-files-per-segment) ulimit -a - if ! travis_wait 90 ./bin/iresearch-tests${TEST_EXECUTABLE_SUFFIX} ${TEST_PARAMS}; then + if ! travis_wait 120 python ../scripts/gtest-parallel/gtest_parallel.py ${TEST_PARAMS} ./bin/iresearch-tests${TEST_EXECUTABLE_SUFFIX} ; then exit 1 fi # archive artifacts diff --git a/3rdParty/iresearch/CMakeLists.txt b/3rdParty/iresearch/CMakeLists.txt index 05b2f6f86c7b..12145c74284d 100644 --- a/3rdParty/iresearch/CMakeLists.txt +++ b/3rdParty/iresearch/CMakeLists.txt @@ -73,6 +73,10 @@ option(USE_TESTS "Build tests" OFF) option(USE_PYRESEARCH "Build iresearch python bridge" OFF) option(USE_VALGRIND "Use workarounds to avoid false positives in valgrind" OFF) option(USE_SIMDCOMP "Use architecture specific low-level optimizations" OFF) +option(USE_CCACHE "Use CCACHE if present" ON) + +set(SUPPRESS_EXTERNAL_WARNINGS OFF CACHE INTERNAL "Suppress warnings originating in 3rd party code.") + add_option_gprof(FALSE) if (USE_VALGRIND) @@ -118,8 +122,6 @@ endif() ### setup ccache ################################################################################ -option(USE_CCACHE "Use CCACHE if present" ON) - if (USE_CCACHE) find_program(CCACHE_FOUND ccache) @@ -128,7 +130,11 @@ if (USE_CCACHE) endif(CCACHE_FOUND) endif() -if (USE_OPTIMIZE_FOR_ARCHITECTURE) +################################################################################ +### setup platform dependent optimizations +################################################################################ + +if (USE_SIMDCOMP) include(OptimizeForArchitecture) OptimizeForArchitecture() @@ -265,6 +271,7 @@ if (USE_PVS_STUDIO) ANALYZE ${PVS_STUDIO_ANALYZE} MODE GA:1,2 OP LOG ${IResearch_TARGET_NAME}.err + CONFIG "${CMAKE_SOURCE_DIR}/PVSIResearch.cfg" ) pvs_studio_add_target( @@ -273,6 +280,16 @@ if (USE_PVS_STUDIO) ANALYZE ${PVS_STUDIO_ANALYZE} MODE GA:1,2 OP LOG ${IResearch_TARGET_NAME}-html.err + CONFIG "${CMAKE_SOURCE_DIR}/PVSIResearch.cfg" + ) + + pvs_studio_add_target( + TARGET ${PVS_STUDIO_TARGET_NAME}-xml ALL + FORMAT xml + ANALYZE ${PVS_STUDIO_ANALYZE} + MODE GA:1,2 OP + LOG ${IResearch_TARGET_NAME}-xml.err + CONFIG "${CMAKE_SOURCE_DIR}/PVSIResearch.cfg" ) endif() diff --git a/3rdParty/iresearch/PVSIResearch.cfg b/3rdParty/iresearch/PVSIResearch.cfg new file mode 100644 index 000000000000..4920dfe69aa2 --- /dev/null +++ b/3rdParty/iresearch/PVSIResearch.cfg @@ -0,0 +1,2 @@ +analysis-mode=31 +exclude-path=iql diff --git a/3rdParty/iresearch/README.md b/3rdParty/iresearch/README.md index 1c7654b3f89a..79bb9937644c 100644 --- a/3rdParty/iresearch/README.md +++ b/3rdParty/iresearch/README.md @@ -16,6 +16,7 @@ - [Overview](#overview) - [High level architecture and main concepts](#high-level-architecture-and-main-concepts) - [Build](#build) +- [Pyresearch](#pyresearch) - [Included 3rd party dependencies](#included-3rd-party-dependencies) - [External 3rd party dependencies](#external-3rd-party-dependencies) - [Query filter building blocks](#query-filter-building-blocks) @@ -385,6 +386,20 @@ code coverage: cmake --build . --target iresearch-coverage ``` +## Pyresearch +There is Python wrapper for IResearch. Wrapper gives access to directory reader object. +For usage example see /python/scripts +### Build +To build Pyresearch SWIG generator should be available. +Add -DUSE_PYRESEARCH=ON to cmake command-line to generate Pyresearch targets +### Install +Run target pyresearch-install +#### win32 install notes: +Some version of ICU installers seems to fail to make available all icu dlls through +PATH enviroment variable, manual adjustment may be needed. +#### (*nix) install notes: +Shared version of libiresearch is used. Install IResearch before running Pyresearch. + ## Included 3rd party dependencies Code for all included 3rd party dependencies is located in the "external" directory. #### [MurMurHash](https://sites.google.com/site/murmurhash) @@ -435,12 +450,15 @@ the first whitespace is ignored), in the directory corresponding to its language ## Query filter building blocks | Filter | Description | |-----------|----------------------| +|iresearch::by_edit_distance|for filtering of values based on Levenshtein distance |iresearch::by_granular_range|for faster filtering of numeric values within a given range, with the possibility of specifying open/closed ranges +|iresearch::by_ngram_similarity|for filtering of values based on NGram model |iresearch::by_phrase|for word-position-sensitive filtering of values, with the possibility of skipping selected positions |iresearch::by_prefix|for filtering of exact value prefixes |iresearch::by_range|for filtering of values within a given range, with the possibility of specifying open/closed ranges |iresearch::by_same_position|for term-insertion-order sensitive filtering of exact values |iresearch::by_term|for filtering of exact values +|iresearch::by_wildcard|for filtering of values based on matching pattern |iresearch::And|boolean conjunction of multiple filters, influencing document ranks/scores as appropriate |iresearch::Or|boolean disjunction of multiple filters, influencing document ranks/scores as appropriate (including "minimum match" functionality) |iresearch::Not|boolean negation of multiple filters @@ -560,7 +578,7 @@ The following grammar is currently defined via Bison (the root is ): - Apple Clang: 9 ## License -Copyright (c) 2017-2019 ArangoDB GmbH +Copyright (c) 2017-2020 ArangoDB GmbH Copyright (c) 2016-2017 EMC Corporation diff --git a/3rdParty/iresearch/THIRD_PARTY_README.md b/3rdParty/iresearch/THIRD_PARTY_README.md index 1255e854f80a..659f268812ae 100644 --- a/3rdParty/iresearch/THIRD_PARTY_README.md +++ b/3rdParty/iresearch/THIRD_PARTY_README.md @@ -2,7 +2,7 @@ IResearch search engine Third Party Software Read Me -Copyright© 2017 ArangoDB GmbH. All rights reserved. +Copyright© 2017-2020 ArangoDB GmbH. All rights reserved. Published October, 2017 Copyright© 2016-2017 EMC Corporation. All rights reserved. @@ -289,7 +289,7 @@ The above copyright notice and this permission notice shall be included in all c THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -### Boost +### Boost, utfcpp Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization diff --git a/3rdParty/iresearch/appveyor.yml b/3rdParty/iresearch/appveyor.yml index 208d1f8b5b8e..7a33518e9dcb 100644 --- a/3rdParty/iresearch/appveyor.yml +++ b/3rdParty/iresearch/appveyor.yml @@ -11,7 +11,7 @@ build: environment: BOOST_VERSION: default - CMAKE_OPTIONS: -DUSE_TESTS=On -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DMSVC_BUILD_THREADS=16 + USE_SIMDCOMP: On CMAKE_BUILD_OPTIONS: --config %CONFIGURATION% matrix: @@ -24,8 +24,9 @@ environment: TEST_OPTIONS: --gtest_filter=*type_utils* VSINSTALL: "Microsoft Visual Studio 14.0\\VC" BOOST_ROOT: C:/Libraries/boost_1_60_0 + USE_SIMDCOMP: Off CONFIGURATION: Debug - + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 BUILD_TYPE: static TEST_OPTIONS: --gtest_filter=*type_utils* @@ -38,6 +39,7 @@ environment: TEST_OPTIONS: --gtest_filter=*:-:*europarl*/fs* VSINSTALL: "Microsoft Visual Studio 14.0\\VC" BOOST_ROOT: C:/Libraries/boost_1_60_0 + USE_SIMDCOMP: Off CONFIGURATION: Release - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 @@ -195,7 +197,7 @@ build_script: - cd %APPVEYOR_BUILD_FOLDER% - mkdir build - cd build - - cmake -g %APPVEYOR_BUILD_WORKER_IMAGE% -Ax64 %CMAKE_OPTIONS% .. + - cmake -g %APPVEYOR_BUILD_WORKER_IMAGE% -Ax64 -DUSE_TESTS=On -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DMSVC_BUILD_THREADS=16 -DUSE_SIMDCOMP=%USE_SIMDCOMP% .. - cmake --build . %CMAKE_BUILD_OPTIONS% --target iresearch-tests-%BUILD_TYPE% - if "%BUILD_TYPE%" == "static" ( cmake --build . %CMAKE_BUILD_OPTIONS% --target iresearch-benchmarks ) - if "%BUILD_TYPE%" == "static" ( cmake --build . %CMAKE_BUILD_OPTIONS% --target iresearch-index-util ) @@ -204,7 +206,7 @@ test_script: ############################################################################ # Execute tests ############################################################################ - - cmd: bin\%CONFIGURATION%\iresearch-tests%EXECUTABLE_SUFFIX%.exe --gtest_output=xml:test_results.xml %TEST_OPTIONS% + - cmd: python ..\scripts\gtest-parallel\gtest_parallel.py %TEST_OPTIONS% bin\%CONFIGURATION%\iresearch-tests%EXECUTABLE_SUFFIX%.exe -- --gtest_output=xml:test_results.xml on_finish: ############################################################################ diff --git a/3rdParty/iresearch/cmake/PVS-Studio.cmake b/3rdParty/iresearch/cmake/PVS-Studio.cmake index 3cc5c441704d..ee860fc21ae2 100644 --- a/3rdParty/iresearch/cmake/PVS-Studio.cmake +++ b/3rdParty/iresearch/cmake/PVS-Studio.cmake @@ -1,546 +1,546 @@ -# 2006-2008 (c) Viva64.com Team -# 2008-2018 (c) OOO "Program Verification Systems" -# -# Version 12 - -cmake_minimum_required(VERSION 2.8.12) -cmake_policy(SET CMP0054 NEW) - -if (PVS_STUDIO_AS_SCRIPT) - # This code runs at build time. - # It executes pvs-studio-analyzer and propagates its return value. - - set(in_cl_params FALSE) - set(additional_args) - - foreach (arg ${PVS_STUDIO_COMMAND}) - if (NOT in_cl_params) - if ("${arg}" STREQUAL "--cl-params") - set(in_cl_params TRUE) - endif () - else () - # A workaround for macOS frameworks (e.g. QtWidgets.framework) - # You can test this workaround on this project: https://github.com/easyaspi314/MidiEditor/tree/gba - if (APPLE AND "${arg}" MATCHES "^-I(.*)\\.framework$") - STRING(REGEX REPLACE "^-I(.*)\\.framework$" "\\1.framework" framework "${arg}") - if (IS_ABSOLUTE "${framework}") - get_filename_component(framework "${framework}" DIRECTORY) - list(APPEND additional_args "-iframework") - list(APPEND additional_args "${framework}") - endif () - endif () - endif () - endforeach () - - execute_process(COMMAND ${PVS_STUDIO_COMMAND} ${additional_args} - ERROR_VARIABLE error - RESULT_VARIABLE result) - - set(stderr_type "") - - if (result) - set(stderr_type FATAL_ERROR) - endif () - - if (result OR error) - message(${stderr_type} "${error}") - endif () - - return() -endif () - -if(__PVS_STUDIO_INCLUDED) - return() -endif() -set(__PVS_STUDIO_INCLUDED TRUE) - -set(PVS_STUDIO_SCRIPT "${CMAKE_CURRENT_LIST_FILE}") - -function (pvs_studio_log TEXT) - if (PVS_STUDIO_DEBUG) - message("PVS-Studio: ${TEXT}") - endif () -endfunction () - -function (pvs_studio_relative_path VAR ROOT FILEPATH) - set("${VAR}" "${FILEPATH}" PARENT_SCOPE) - if ("${FILEPATH}" MATCHES "^/.*$" OR "${FILEPATH}" MATCHES "^.:/.*$") - file(RELATIVE_PATH RPATH "${ROOT}" "${FILEPATH}") - if (NOT "${RPATH}" MATCHES "^\\.\\..*$") - set("${VAR}" "${RPATH}" PARENT_SCOPE) - endif () - endif () -endfunction () - -function (pvs_studio_join_path VAR DIR1 DIR2) - if ("${DIR2}" MATCHES "^(/|~|.:/).*$" OR "${DIR1}" STREQUAL "") - set("${VAR}" "${DIR2}" PARENT_SCOPE) - else () - set("${VAR}" "${DIR1}/${DIR2}" PARENT_SCOPE) - endif () -endfunction () - -macro (pvs_studio_append_flags_from_property CXX C DIR PREFIX) - if (NOT "${PROPERTY}" STREQUAL "NOTFOUND" AND NOT "${PROPERTY}" STREQUAL "PROPERTY-NOTFOUND") - foreach (PROP ${PROPERTY}) - pvs_studio_join_path(PROP "${DIR}" "${PROP}") - - if (APPLE AND "${PREFIX}" STREQUAL "-I" AND IS_ABSOLUTE "${PROP}" AND "${PROP}" MATCHES "\\.framework$") - get_filename_component(FRAMEWORK "${PROP}" DIRECTORY) - list(APPEND "${CXX}" "-iframework") - list(APPEND "${CXX}" "${FRAMEWORK}") - list(APPEND "${C}" "-iframework") - list(APPEND "${C}" "${FRAMEWORK}") - pvs_studio_log("framework: ${FRAMEWORK}") - elseif (NOT "${PROP}" STREQUAL "") - list(APPEND "${CXX}" "${PREFIX}${PROP}") - list(APPEND "${C}" "${PREFIX}${PROP}") - endif() - endforeach () - endif () -endmacro () - -macro (pvs_studio_append_standard_flag FLAGS STANDARD) - if ("${STANDARD}" MATCHES "^(99|11|14|17)$") - if ("${PVS_STUDIO_PREPROCESSOR}" MATCHES "gcc|clang") - list(APPEND "${FLAGS}" "-std=c++${STANDARD}") - endif () - endif () -endmacro () - -function (pvs_studio_set_directory_flags DIRECTORY CXX C) - set(CXX_FLAGS "${${CXX}}") - set(C_FLAGS "${${C}}") - - get_directory_property(PROPERTY DIRECTORY "${DIRECTORY}" INCLUDE_DIRECTORIES) - pvs_studio_append_flags_from_property(CXX_FLAGS C_FLAGS "${DIRECTORY}" "-I") - - get_directory_property(PROPERTY DIRECTORY "${DIRECTORY}" COMPILE_DEFINITIONS) - pvs_studio_append_flags_from_property(CXX_FLAGS C_FLAGS "" "-D") - - set("${CXX}" "${CXX_FLAGS}" PARENT_SCOPE) - set("${C}" "${C_FLAGS}" PARENT_SCOPE) -endfunction () - -function (pvs_studio_set_target_flags TARGET CXX C) - set(CXX_FLAGS "${${CXX}}") - set(C_FLAGS "${${C}}") - - set(prop_incdirs "$") - list(APPEND CXX_FLAGS "$<$:-I$-I>>") - list(APPEND C_FLAGS "$<$:-I$-I>>") - - set(prop_compdefs "$") - list(APPEND CXX_FLAGS "$<$:-D$-D>>") - list(APPEND C_FLAGS "$<$:-D$-D>>") - - set("${CXX}" "${CXX_FLAGS}" PARENT_SCOPE) - set("${C}" "${C_FLAGS}" PARENT_SCOPE) -endfunction () - -function (pvs_studio_set_source_file_flags SOURCE) - set(LANGUAGE "") - - string(TOLOWER "${SOURCE}" SOURCE_LOWER) - if ("${LANGUAGE}" STREQUAL "" AND "${SOURCE_LOWER}" MATCHES "^.*\\.(c|cpp|cc|cx|cxx|cp|c\\+\\+)$") - if ("${SOURCE}" MATCHES "^.*\\.c$") - set(LANGUAGE C) - else () - set(LANGUAGE CXX) - endif () - endif () - - if ("${LANGUAGE}" STREQUAL "C") - set(CL_PARAMS ${PVS_STUDIO_C_FLAGS} ${PVS_STUDIO_TARGET_C_FLAGS} -DPVS_STUDIO) - elseif ("${LANGUAGE}" STREQUAL "CXX") - set(CL_PARAMS ${PVS_STUDIO_CXX_FLAGS} ${PVS_STUDIO_TARGET_CXX_FLAGS} -DPVS_STUDIO) - endif () - - set(PVS_STUDIO_LANGUAGE "${LANGUAGE}" PARENT_SCOPE) - set(PVS_STUDIO_CL_PARAMS "${CL_PARAMS}" PARENT_SCOPE) -endfunction () - -function (pvs_studio_analyze_file SOURCE SOURCE_DIR BINARY_DIR) - set(PLOGS ${PVS_STUDIO_PLOGS}) - pvs_studio_set_source_file_flags("${SOURCE}") - - get_filename_component(SOURCE "${SOURCE}" REALPATH) - - get_source_file_property(PROPERTY "${SOURCE}" HEADER_FILE_ONLY) - if (PROPERTY) - return() - endif () - - pvs_studio_relative_path(SOURCE_RELATIVE "${SOURCE_DIR}" "${SOURCE}") - pvs_studio_join_path(SOURCE "${SOURCE_DIR}" "${SOURCE}") - - set(LOG "${BINARY_DIR}/PVS-Studio/${SOURCE_RELATIVE}.plog") - get_filename_component(LOG "${LOG}" REALPATH) - get_filename_component(PARENT_DIR "${LOG}" DIRECTORY) - - if (EXISTS "${SOURCE}" AND NOT TARGET "${LOG}" AND NOT "${PVS_STUDIO_LANGUAGE}" STREQUAL "") - # A workaround to support implicit dependencies for ninja generators. - set(depPvsArg) - set(depCommandArg) - if (CMAKE_VERSION VERSION_GREATER 3.6 AND "${CMAKE_GENERATOR}" STREQUAL "Ninja") - pvs_studio_relative_path(relLog "${CMAKE_BINARY_DIR}" "${LOG}") - set(depPvsArg --dep-file "${LOG}.d" --dep-file-target "${relLog}") - set(depCommandArg DEPFILE "${LOG}.d") - endif () - - # https://public.kitware.com/Bug/print_bug_page.php?bug_id=14353 - # https://public.kitware.com/Bug/file/5436/expand_command.cmake - # - # It is a workaround to expand generator expressions. - set(cmdline "${PVS_STUDIO_BIN}" analyze - --output-file "${LOG}" - --source-file "${SOURCE}" - ${depPvsArg} - ${PVS_STUDIO_ARGS} - --cl-params "${PVS_STUDIO_CL_PARAMS}" "${SOURCE}") - - string(REPLACE ";" "$" cmdline "${cmdline}") - set(pvscmd "${CMAKE_COMMAND}" - -D PVS_STUDIO_AS_SCRIPT=TRUE - -D "PVS_STUDIO_COMMAND=${cmdline}" - -P "${PVS_STUDIO_SCRIPT}" - ) - - add_custom_command(OUTPUT "${LOG}" - COMMAND "${CMAKE_COMMAND}" -E make_directory "${PARENT_DIR}" - COMMAND "${CMAKE_COMMAND}" -E remove_directory "${LOG}" - COMMAND ${pvscmd} - WORKING_DIRECTORY "${BINARY_DIR}" - DEPENDS "${SOURCE}" "${PVS_STUDIO_CONFIG}" - IMPLICIT_DEPENDS "${PVS_STUDIO_LANGUAGE}" "${SOURCE}" - ${depCommandArg} - VERBATIM - COMMENT "Analyzing ${PVS_STUDIO_LANGUAGE} file ${SOURCE_RELATIVE}") - list(APPEND PLOGS "${LOG}") - endif () - set(PVS_STUDIO_PLOGS "${PLOGS}" PARENT_SCOPE) -endfunction () - -function (pvs_studio_analyze_target TARGET DIR) - set(PVS_STUDIO_PLOGS "${PVS_STUDIO_PLOGS}") - set(PVS_STUDIO_TARGET_CXX_FLAGS "") - set(PVS_STUDIO_TARGET_C_FLAGS "") - - get_target_property(PROPERTY "${TARGET}" SOURCES) - pvs_studio_relative_path(BINARY_DIR "${CMAKE_SOURCE_DIR}" "${DIR}") - if ("${BINARY_DIR}" MATCHES "^/.*$") - pvs_studio_join_path(BINARY_DIR "${CMAKE_BINARY_DIR}" "PVS-Studio/__${BINARY_DIR}") - else () - pvs_studio_join_path(BINARY_DIR "${CMAKE_BINARY_DIR}" "${BINARY_DIR}") - endif () - - file(MAKE_DIRECTORY "${BINARY_DIR}") - - pvs_studio_set_directory_flags("${DIR}" PVS_STUDIO_TARGET_CXX_FLAGS PVS_STUDIO_TARGET_C_FLAGS) - pvs_studio_set_target_flags("${TARGET}" PVS_STUDIO_TARGET_CXX_FLAGS PVS_STUDIO_TARGET_C_FLAGS) - - if (NOT "${PROPERTY}" STREQUAL "NOTFOUND" AND NOT "${PROPERTY}" STREQUAL "PROPERTY-NOTFOUND") - foreach (SOURCE ${PROPERTY}) - pvs_studio_join_path(SOURCE "${DIR}" "${SOURCE}") - pvs_studio_analyze_file("${SOURCE}" "${DIR}" "${BINARY_DIR}") - endforeach () - endif () - - set(PVS_STUDIO_PLOGS "${PVS_STUDIO_PLOGS}" PARENT_SCOPE) -endfunction () - -set(PVS_STUDIO_RECURSIVE_TARGETS) -set(PVS_STUDIO_RECURSIVE_TARGETS_NEW) - -macro(pvs_studio_get_recursive_targets TARGET) - get_target_property(libs "${TARGET}" LINK_LIBRARIES) - foreach (lib IN LISTS libs) - list(FIND PVS_STUDIO_RECURSIVE_TARGETS "${lib}" index) - if (TARGET "${lib}" AND "${index}" STREQUAL -1) - get_target_property(target_type "${lib}" TYPE) - if (NOT "${target_type}" STREQUAL "INTERFACE_LIBRARY") - list(APPEND PVS_STUDIO_RECURSIVE_TARGETS "${lib}") - list(APPEND PVS_STUDIO_RECURSIVE_TARGETS_NEW "${lib}") - pvs_studio_get_recursive_targets("${lib}") - endif () - endif () - endforeach () -endmacro() - -option(PVS_STUDIO_DISABLE OFF "Disable PVS-Studio targets") -option(PVS_STUDIO_DEBUG OFF "Add debug info") - -# pvs_studio_add_target -# Target options: -# ALL add PVS-Studio target to default build (default: off) -# TARGET target name of analysis target (default: pvs) -# ANALYZE targets... targets to analyze -# RECURSIVE analyze target's dependencies (requires CMake 3.5+) -# COMPILE_COMMANDS use compile_commands.json instead of targets (specified by the 'ANALYZE' option) to determine files for analysis -# (set CMAKE_EXPORT_COMPILE_COMMANDS, available only for Makefile and Ninja generators) -# -# Output options: -# OUTPUT prints report to stdout -# LOG path path to report (default: ${CMAKE_CURRENT_BINARY_DIR}/PVS-Studio.log) -# FORMAT format format of report -# MODE mode analyzers/levels filter (default: GA:1,2) -# HIDE_HELP do not print help message -# -# Analyzer options: -# PLATFORM name linux32/linux64 (default: linux64) -# PREPROCESSOR name preprocessor type: gcc/clang (default: auto detected) -# LICENSE path path to PVS-Studio.lic (default: ~/.config/PVS-Studio/PVS-Studio.lic) -# CONFIG path path to PVS-Studio.cfg -# CFG_TEXT text embedded PVS-Studio.cfg -# KEEP_COMBINED_PLOG do not delete combined plog file *.pvs.raw for further processing with plog-converter -# -# Misc options: -# DEPENDS targets.. additional target dependencies -# SOURCES path... list of source files to analyze -# BIN path path to pvs-studio-analyzer (Unix) or CompilerCommandsAnalyzer.exe (Windows) -# CONVERTER path path to plog-converter (Unix) or HtmlGenerator.exe (Windows) -# C_FLAGS flags... additional C_FLAGS -# CXX_FLAGS flags... additional CXX_FLAGS -# ARGS args... additional pvs-studio-analyzer/CompilerCommandsAnalyzer.exe flags -function (pvs_studio_add_target) - macro (default VAR VALUE) - if ("${${VAR}}" STREQUAL "") - set("${VAR}" "${VALUE}") - endif () - endmacro () - - set(PVS_STUDIO_SUPPORTED_PREPROCESSORS "gcc|clang|visualcpp") - if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - set(DEFAULT_PREPROCESSOR "clang") - elseif (MSVC) - set(DEFAULT_PREPROCESSOR "visualcpp") - else () - set(DEFAULT_PREPROCESSOR "gcc") - endif () - - set(OPTIONAL OUTPUT ALL RECURSIVE HIDE_HELP KEEP_COMBINED_PLOG COMPILE_COMMANDS) - set(SINGLE LICENSE CONFIG TARGET LOG FORMAT BIN CONVERTER PLATFORM PREPROCESSOR CFG_TEXT) - set(MULTI SOURCES C_FLAGS CXX_FLAGS ARGS DEPENDS ANALYZE MODE) - cmake_parse_arguments(PVS_STUDIO "${OPTIONAL}" "${SINGLE}" "${MULTI}" ${ARGN}) - - if ("${PVS_STUDIO_CONFIG}" STREQUAL "" OR NOT "${PVS_STUDIO_CFG_TEXT}" STREQUAL "") - set(PVS_STUDIO_EMPTY_CONFIG ON) - else () - set(PVS_STUDIO_EMPTY_CONFIG OFF) - endif () - - default(PVS_STUDIO_CFG_TEXT "analysis-mode=31") - default(PVS_STUDIO_CONFIG "${CMAKE_BINARY_DIR}/PVS-Studio.cfg") - default(PVS_STUDIO_C_FLAGS "") - default(PVS_STUDIO_CXX_FLAGS "") - default(PVS_STUDIO_TARGET "pvs") - default(PVS_STUDIO_LOG "PVS-Studio.log") - - set(PATHS) - if (WIN32) - set(ROOT "PROGRAMFILES(X86)") - set(ROOT "$ENV{${ROOT}}/PVS-Studio") - string(REPLACE \\ / ROOT "${ROOT}") - - if (EXISTS "${ROOT}") - set(PATHS "${ROOT}") - endif () - - default(PVS_STUDIO_BIN "CompilerCommandsAnalyzer.exe") - default(PVS_STUDIO_CONVERTER "HtmlGenerator.exe") - else () - default(PVS_STUDIO_BIN "pvs-studio-analyzer") - default(PVS_STUDIO_CONVERTER "plog-converter") - endif () - - find_program(PVS_STUDIO_BIN_PATH "${PVS_STUDIO_BIN}" ${PATHS}) - set(PVS_STUDIO_BIN "${PVS_STUDIO_BIN_PATH}") - - if (NOT EXISTS "${PVS_STUDIO_BIN}") - message(FATAL_ERROR "pvs-studio-analyzer is not found") - endif () - - find_program(PVS_STUDIO_CONVERTER_PATH "${PVS_STUDIO_CONVERTER}" ${PATHS}) - set(PVS_STUDIO_CONVERTER "${PVS_STUDIO_CONVERTER_PATH}") - - if (NOT EXISTS "${PVS_STUDIO_CONVERTER}") - message(FATAL_ERROR "plog-converter is not found") - endif () - - default(PVS_STUDIO_MODE "GA:1,2") - default(PVS_STUDIO_PREPROCESSOR "${DEFAULT_PREPROCESSOR}") - if (WIN32) - default(PVS_STUDIO_PLATFORM "x64") - else () - default(PVS_STUDIO_PLATFORM "linux64") - endif () - - string(REPLACE ";" "+" PVS_STUDIO_MODE "${PVS_STUDIO_MODE}") - - if (PVS_STUDIO_EMPTY_CONFIG) - set(PVS_STUDIO_CONFIG_COMMAND "${CMAKE_COMMAND}" -E echo "${PVS_STUDIO_CFG_TEXT}" > "${PVS_STUDIO_CONFIG}") - else () - set(PVS_STUDIO_CONFIG_COMMAND "${CMAKE_COMMAND}" -E touch "${PVS_STUDIO_CONFIG}") - endif () - - add_custom_command(OUTPUT "${PVS_STUDIO_CONFIG}" - COMMAND ${PVS_STUDIO_CONFIG_COMMAND} - WORKING_DIRECTORY "${BINARY_DIR}" - COMMENT "Generating PVS-Studio.cfg") - - if (NOT "${PVS_STUDIO_PREPROCESSOR}" MATCHES "^${PVS_STUDIO_SUPPORTED_PREPROCESSORS}$") - message(FATAL_ERROR "Preprocessor ${PVS_STUDIO_PREPROCESSOR} isn't supported. Available options: ${PVS_STUDIO_SUPPORTED_PREPROCESSORS}.") - endif () - - pvs_studio_append_standard_flag(PVS_STUDIO_CXX_FLAGS "${CMAKE_CXX_STANDARD}") - pvs_studio_set_directory_flags("${CMAKE_CURRENT_SOURCE_DIR}" PVS_STUDIO_CXX_FLAGS PVS_STUDIO_C_FLAGS) - - if (NOT "${PVS_STUDIO_LICENSE}" STREQUAL "") - pvs_studio_join_path(PVS_STUDIO_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}" "${PVS_STUDIO_LICENSE}") - list(APPEND PVS_STUDIO_ARGS --lic-file "${PVS_STUDIO_LICENSE}") - endif () - - list(APPEND PVS_STUDIO_ARGS --cfg "${PVS_STUDIO_CONFIG}" - --platform "${PVS_STUDIO_PLATFORM}" - --preprocessor "${PVS_STUDIO_PREPROCESSOR}") - - if (NOT "${CMAKE_CXX_COMPILER}" STREQUAL "") - list(APPEND PVS_STUDIO_ARGS --cxx "${CMAKE_CXX_COMPILER}") - endif () - - if (NOT "${CMAKE_C_COMPILER}" STREQUAL "") - list(APPEND PVS_STUDIO_ARGS --cc "${CMAKE_C_COMPILER}") - endif () - - set(PVS_STUDIO_PLOGS "") - - set(PVS_STUDIO_RECURSIVE_TARGETS_NEW) - if (${PVS_STUDIO_RECURSIVE}) - foreach (TARGET IN LISTS PVS_STUDIO_ANALYZE) - list(APPEND PVS_STUDIO_RECURSIVE_TARGETS_NEW "${TARGET}") - pvs_studio_get_recursive_targets("${TARGET}") - endforeach () - endif () - - set(inc_path) - - foreach (TARGET ${PVS_STUDIO_ANALYZE}) - set(DIR "${CMAKE_CURRENT_SOURCE_DIR}") - string(FIND "${TARGET}" ":" DELIM) - if ("${DELIM}" GREATER "-1") - math(EXPR DELIMI "${DELIM}+1") - string(SUBSTRING "${TARGET}" "${DELIMI}" "-1" DIR) - string(SUBSTRING "${TARGET}" "0" "${DELIM}" TARGET) - pvs_studio_join_path(DIR "${CMAKE_CURRENT_SOURCE_DIR}" "${DIR}") - else () - get_target_property(TARGET_SOURCE_DIR "${TARGET}" SOURCE_DIR) - if (EXISTS "${TARGET_SOURCE_DIR}") - set(DIR "${TARGET_SOURCE_DIR}") - endif () - endif () - pvs_studio_analyze_target("${TARGET}" "${DIR}") - list(APPEND PVS_STUDIO_DEPENDS "${TARGET}") - - if ("${inc_path}" STREQUAL "") - set(inc_path "$") - else () - set(inc_path "${inc_path}$$") - endif () - endforeach () - - foreach (TARGET ${PVS_STUDIO_RECURSIVE_TARGETS_NEW}) - set(DIR "${CMAKE_CURRENT_SOURCE_DIR}") - get_target_property(TARGET_SOURCE_DIR "${TARGET}" SOURCE_DIR) - if (EXISTS "${TARGET_SOURCE_DIR}") - set(DIR "${TARGET_SOURCE_DIR}") - endif () - pvs_studio_analyze_target("${TARGET}" "${DIR}") - list(APPEND PVS_STUDIO_DEPENDS "${TARGET}") - endforeach () - - set(PVS_STUDIO_TARGET_CXX_FLAGS "") - set(PVS_STUDIO_TARGET_C_FLAGS "") - foreach (SOURCE ${PVS_STUDIO_SOURCES}) - pvs_studio_analyze_file("${SOURCE}" "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}") - endforeach () - - if (PVS_STUDIO_COMPILE_COMMANDS) - set(COMPILE_COMMANDS_LOG "${PVS_STUDIO_LOG}.pvs.analyzer.raw") - if (NOT CMAKE_EXPORT_COMPILE_COMMANDS) - message(FATAL_ERROR "You should set CMAKE_EXPORT_COMPILE_COMMANDS to TRUE") - endif () - add_custom_command( - OUTPUT "${COMPILE_COMMANDS_LOG}" - COMMAND "${PVS_STUDIO_BIN}" analyze -i - --output-file "${COMPILE_COMMANDS_LOG}.always" - ${PVS_STUDIO_ARGS} - COMMENT "Analyzing with PVS-Studio" - WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" - DEPENDS "${PVS_STUDIO_CONFIG}" - ) - list(APPEND PVS_STUDIO_PLOGS_LOGS "${COMPILE_COMMANDS_LOG}.always") - list(APPEND PVS_STUDIO_PLOGS_DEPENDENCIES "${COMPILE_COMMANDS_LOG}") - endif () - - pvs_studio_relative_path(LOG_RELATIVE "${CMAKE_BINARY_DIR}" "${PVS_STUDIO_LOG}") - if (PVS_STUDIO_PLOGS OR PVS_STUDIO_COMPILE_COMMANDS) - if (WIN32) - string(REPLACE / \\ PVS_STUDIO_PLOGS "${PVS_STUDIO_PLOGS}") - endif () - if (WIN32) - set(COMMANDS COMMAND type ${PVS_STUDIO_PLOGS} ${PVS_STUDIO_PLOGS_LOGS} > "${PVS_STUDIO_LOG}" 2>nul) - else () - set(COMMANDS COMMAND cat ${PVS_STUDIO_PLOGS} ${PVS_STUDIO_PLOGS_LOGS} > "${PVS_STUDIO_LOG}") - endif () - set(COMMENT "Generating ${LOG_RELATIVE}") - if (NOT "${PVS_STUDIO_FORMAT}" STREQUAL "" OR PVS_STUDIO_OUTPUT) - if ("${PVS_STUDIO_FORMAT}" STREQUAL "") - set(PVS_STUDIO_FORMAT "errorfile") - endif () - list(APPEND COMMANDS - COMMAND "${CMAKE_COMMAND}" -E remove -f "${PVS_STUDIO_LOG}.pvs.raw" - COMMAND "${CMAKE_COMMAND}" -E rename "${PVS_STUDIO_LOG}" "${PVS_STUDIO_LOG}.pvs.raw" - COMMAND "${PVS_STUDIO_CONVERTER}" -t "${PVS_STUDIO_FORMAT}" "${PVS_STUDIO_LOG}.pvs.raw" -o "${PVS_STUDIO_LOG}" -a "${PVS_STUDIO_MODE}" - ) - if(NOT PVS_STUDIO_KEEP_COMBINED_PLOG) - list(APPEND COMMANDS COMMAND "${CMAKE_COMMAND}" -E remove -f "${PVS_STUDIO_LOG}.pvs.raw") - endif() - endif () - else () - set(COMMANDS COMMAND "${CMAKE_COMMAND}" -E touch "${PVS_STUDIO_LOG}") - set(COMMENT "Generating ${LOG_RELATIVE}: no sources found") - endif () - - if (WIN32) - string(REPLACE / \\ PVS_STUDIO_LOG "${PVS_STUDIO_LOG}") - endif () - - add_custom_command(OUTPUT "${PVS_STUDIO_LOG}" - ${COMMANDS} - COMMENT "${COMMENT}" - DEPENDS ${PVS_STUDIO_PLOGS} ${PVS_STUDIO_PLOGS_DEPENDENCIES} - WORKING_DIRECTORY "${CMAKE_BINARY_DIR}") - - if (PVS_STUDIO_ALL) - set(ALL "ALL") - else () - set(ALL "") - endif () - - if (PVS_STUDIO_OUTPUT) - if (PVS_STUDIO_HIDE_HELP AND NOT WIN32) - set(COMMANDS COMMAND grep -v " error: Help:" ${PVS_STUDIO_LOG} 1>&2 || exit 0) - elseif (WIN32) - set(COMMANDS COMMAND type "${PVS_STUDIO_LOG}" 1>&2) - else () - set(COMMANDS COMMAND cat "${PVS_STUDIO_LOG}" 1>&2) - endif() - else () - set(COMMANDS "") - endif () - - add_custom_target("${PVS_STUDIO_TARGET}" ${ALL} ${COMMANDS} WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" DEPENDS ${PVS_STUDIO_DEPENDS} "${PVS_STUDIO_LOG}") - - # A workaround to add implicit dependencies of source files from include directories - set_target_properties("${PVS_STUDIO_TARGET}" PROPERTIES INCLUDE_DIRECTORIES "${inc_path}") -endfunction () +# 2006-2008 (c) Viva64.com Team +# 2008-2018 (c) OOO "Program Verification Systems" +# +# Version 12 + +cmake_minimum_required(VERSION 2.8.12) +cmake_policy(SET CMP0054 NEW) + +if (PVS_STUDIO_AS_SCRIPT) + # This code runs at build time. + # It executes pvs-studio-analyzer and propagates its return value. + + set(in_cl_params FALSE) + set(additional_args) + + foreach (arg ${PVS_STUDIO_COMMAND}) + if (NOT in_cl_params) + if ("${arg}" STREQUAL "--cl-params") + set(in_cl_params TRUE) + endif () + else () + # A workaround for macOS frameworks (e.g. QtWidgets.framework) + # You can test this workaround on this project: https://github.com/easyaspi314/MidiEditor/tree/gba + if (APPLE AND "${arg}" MATCHES "^-I(.*)\\.framework$") + STRING(REGEX REPLACE "^-I(.*)\\.framework$" "\\1.framework" framework "${arg}") + if (IS_ABSOLUTE "${framework}") + get_filename_component(framework "${framework}" DIRECTORY) + list(APPEND additional_args "-iframework") + list(APPEND additional_args "${framework}") + endif () + endif () + endif () + endforeach () + + execute_process(COMMAND ${PVS_STUDIO_COMMAND} ${additional_args} + ERROR_VARIABLE error + RESULT_VARIABLE result) + + set(stderr_type "") + + if (result) + set(stderr_type FATAL_ERROR) + endif () + + if (result OR error) + message(${stderr_type} "${error}") + endif () + + return() +endif () + +if(__PVS_STUDIO_INCLUDED) + return() +endif() +set(__PVS_STUDIO_INCLUDED TRUE) + +set(PVS_STUDIO_SCRIPT "${CMAKE_CURRENT_LIST_FILE}") + +function (pvs_studio_log TEXT) + if (PVS_STUDIO_DEBUG) + message("PVS-Studio: ${TEXT}") + endif () +endfunction () + +function (pvs_studio_relative_path VAR ROOT FILEPATH) + set("${VAR}" "${FILEPATH}" PARENT_SCOPE) + if ("${FILEPATH}" MATCHES "^/.*$" OR "${FILEPATH}" MATCHES "^.:/.*$") + file(RELATIVE_PATH RPATH "${ROOT}" "${FILEPATH}") + if (NOT "${RPATH}" MATCHES "^\\.\\..*$") + set("${VAR}" "${RPATH}" PARENT_SCOPE) + endif () + endif () +endfunction () + +function (pvs_studio_join_path VAR DIR1 DIR2) + if ("${DIR2}" MATCHES "^(/|~|.:/).*$" OR "${DIR1}" STREQUAL "") + set("${VAR}" "${DIR2}" PARENT_SCOPE) + else () + set("${VAR}" "${DIR1}/${DIR2}" PARENT_SCOPE) + endif () +endfunction () + +macro (pvs_studio_append_flags_from_property CXX C DIR PREFIX) + if (NOT "${PROPERTY}" STREQUAL "NOTFOUND" AND NOT "${PROPERTY}" STREQUAL "PROPERTY-NOTFOUND") + foreach (PROP ${PROPERTY}) + pvs_studio_join_path(PROP "${DIR}" "${PROP}") + + if (APPLE AND "${PREFIX}" STREQUAL "-I" AND IS_ABSOLUTE "${PROP}" AND "${PROP}" MATCHES "\\.framework$") + get_filename_component(FRAMEWORK "${PROP}" DIRECTORY) + list(APPEND "${CXX}" "-iframework") + list(APPEND "${CXX}" "${FRAMEWORK}") + list(APPEND "${C}" "-iframework") + list(APPEND "${C}" "${FRAMEWORK}") + pvs_studio_log("framework: ${FRAMEWORK}") + elseif (NOT "${PROP}" STREQUAL "") + list(APPEND "${CXX}" "${PREFIX}${PROP}") + list(APPEND "${C}" "${PREFIX}${PROP}") + endif() + endforeach () + endif () +endmacro () + +macro (pvs_studio_append_standard_flag FLAGS STANDARD) + if ("${STANDARD}" MATCHES "^(99|11|14|17)$") + if ("${PVS_STUDIO_PREPROCESSOR}" MATCHES "gcc|clang") + list(APPEND "${FLAGS}" "-std=c++${STANDARD}") + endif () + endif () +endmacro () + +function (pvs_studio_set_directory_flags DIRECTORY CXX C) + set(CXX_FLAGS "${${CXX}}") + set(C_FLAGS "${${C}}") + + get_directory_property(PROPERTY DIRECTORY "${DIRECTORY}" INCLUDE_DIRECTORIES) + pvs_studio_append_flags_from_property(CXX_FLAGS C_FLAGS "${DIRECTORY}" "-I") + + get_directory_property(PROPERTY DIRECTORY "${DIRECTORY}" COMPILE_DEFINITIONS) + pvs_studio_append_flags_from_property(CXX_FLAGS C_FLAGS "" "-D") + + set("${CXX}" "${CXX_FLAGS}" PARENT_SCOPE) + set("${C}" "${C_FLAGS}" PARENT_SCOPE) +endfunction () + +function (pvs_studio_set_target_flags TARGET CXX C) + set(CXX_FLAGS "${${CXX}}") + set(C_FLAGS "${${C}}") + + set(prop_incdirs "$") + list(APPEND CXX_FLAGS "$<$:-I$-I>>") + list(APPEND C_FLAGS "$<$:-I$-I>>") + + set(prop_compdefs "$") + list(APPEND CXX_FLAGS "$<$:-D$-D>>") + list(APPEND C_FLAGS "$<$:-D$-D>>") + + set("${CXX}" "${CXX_FLAGS}" PARENT_SCOPE) + set("${C}" "${C_FLAGS}" PARENT_SCOPE) +endfunction () + +function (pvs_studio_set_source_file_flags SOURCE) + set(LANGUAGE "") + + string(TOLOWER "${SOURCE}" SOURCE_LOWER) + if ("${LANGUAGE}" STREQUAL "" AND "${SOURCE_LOWER}" MATCHES "^.*\\.(c|cpp|cc|cx|cxx|cp|c\\+\\+)$") + if ("${SOURCE}" MATCHES "^.*\\.c$") + set(LANGUAGE C) + else () + set(LANGUAGE CXX) + endif () + endif () + + if ("${LANGUAGE}" STREQUAL "C") + set(CL_PARAMS ${PVS_STUDIO_C_FLAGS} ${PVS_STUDIO_TARGET_C_FLAGS} -DPVS_STUDIO) + elseif ("${LANGUAGE}" STREQUAL "CXX") + set(CL_PARAMS ${PVS_STUDIO_CXX_FLAGS} ${PVS_STUDIO_TARGET_CXX_FLAGS} -DPVS_STUDIO) + endif () + + set(PVS_STUDIO_LANGUAGE "${LANGUAGE}" PARENT_SCOPE) + set(PVS_STUDIO_CL_PARAMS "${CL_PARAMS}" PARENT_SCOPE) +endfunction () + +function (pvs_studio_analyze_file SOURCE SOURCE_DIR BINARY_DIR) + set(PLOGS ${PVS_STUDIO_PLOGS}) + pvs_studio_set_source_file_flags("${SOURCE}") + + get_filename_component(SOURCE "${SOURCE}" REALPATH) + + get_source_file_property(PROPERTY "${SOURCE}" HEADER_FILE_ONLY) + if (PROPERTY) + return() + endif () + + pvs_studio_relative_path(SOURCE_RELATIVE "${SOURCE_DIR}" "${SOURCE}") + pvs_studio_join_path(SOURCE "${SOURCE_DIR}" "${SOURCE}") + + set(LOG "${BINARY_DIR}/PVS-Studio/${SOURCE_RELATIVE}.plog") + get_filename_component(LOG "${LOG}" REALPATH) + get_filename_component(PARENT_DIR "${LOG}" DIRECTORY) + + if (EXISTS "${SOURCE}" AND NOT TARGET "${LOG}" AND NOT "${PVS_STUDIO_LANGUAGE}" STREQUAL "") + # A workaround to support implicit dependencies for ninja generators. + set(depPvsArg) + set(depCommandArg) + if (CMAKE_VERSION VERSION_GREATER 3.6 AND "${CMAKE_GENERATOR}" STREQUAL "Ninja") + pvs_studio_relative_path(relLog "${CMAKE_BINARY_DIR}" "${LOG}") + set(depPvsArg --dep-file "${LOG}.d" --dep-file-target "${relLog}") + set(depCommandArg DEPFILE "${LOG}.d") + endif () + + # https://public.kitware.com/Bug/print_bug_page.php?bug_id=14353 + # https://public.kitware.com/Bug/file/5436/expand_command.cmake + # + # It is a workaround to expand generator expressions. + set(cmdline "${PVS_STUDIO_BIN}" analyze + --output-file "${LOG}" + --source-file "${SOURCE}" + ${depPvsArg} + ${PVS_STUDIO_ARGS} + --cl-params "${PVS_STUDIO_CL_PARAMS}" "${SOURCE}") + + string(REPLACE ";" "$" cmdline "${cmdline}") + set(pvscmd "${CMAKE_COMMAND}" + -D PVS_STUDIO_AS_SCRIPT=TRUE + -D "PVS_STUDIO_COMMAND=${cmdline}" + -P "${PVS_STUDIO_SCRIPT}" + ) + + add_custom_command(OUTPUT "${LOG}" + COMMAND "${CMAKE_COMMAND}" -E make_directory "${PARENT_DIR}" + COMMAND "${CMAKE_COMMAND}" -E remove_directory "${LOG}" + COMMAND ${pvscmd} + WORKING_DIRECTORY "${BINARY_DIR}" + DEPENDS "${SOURCE}" "${PVS_STUDIO_CONFIG}" + IMPLICIT_DEPENDS "${PVS_STUDIO_LANGUAGE}" "${SOURCE}" + ${depCommandArg} + VERBATIM + COMMENT "Analyzing ${PVS_STUDIO_LANGUAGE} file ${SOURCE_RELATIVE}") + list(APPEND PLOGS "${LOG}") + endif () + set(PVS_STUDIO_PLOGS "${PLOGS}" PARENT_SCOPE) +endfunction () + +function (pvs_studio_analyze_target TARGET DIR) + set(PVS_STUDIO_PLOGS "${PVS_STUDIO_PLOGS}") + set(PVS_STUDIO_TARGET_CXX_FLAGS "") + set(PVS_STUDIO_TARGET_C_FLAGS "") + + get_target_property(PROPERTY "${TARGET}" SOURCES) + pvs_studio_relative_path(BINARY_DIR "${CMAKE_SOURCE_DIR}" "${DIR}") + if ("${BINARY_DIR}" MATCHES "^/.*$") + pvs_studio_join_path(BINARY_DIR "${CMAKE_BINARY_DIR}" "PVS-Studio/__${BINARY_DIR}") + else () + pvs_studio_join_path(BINARY_DIR "${CMAKE_BINARY_DIR}" "${BINARY_DIR}") + endif () + + file(MAKE_DIRECTORY "${BINARY_DIR}") + + pvs_studio_set_directory_flags("${DIR}" PVS_STUDIO_TARGET_CXX_FLAGS PVS_STUDIO_TARGET_C_FLAGS) + pvs_studio_set_target_flags("${TARGET}" PVS_STUDIO_TARGET_CXX_FLAGS PVS_STUDIO_TARGET_C_FLAGS) + + if (NOT "${PROPERTY}" STREQUAL "NOTFOUND" AND NOT "${PROPERTY}" STREQUAL "PROPERTY-NOTFOUND") + foreach (SOURCE ${PROPERTY}) + pvs_studio_join_path(SOURCE "${DIR}" "${SOURCE}") + pvs_studio_analyze_file("${SOURCE}" "${DIR}" "${BINARY_DIR}") + endforeach () + endif () + + set(PVS_STUDIO_PLOGS "${PVS_STUDIO_PLOGS}" PARENT_SCOPE) +endfunction () + +set(PVS_STUDIO_RECURSIVE_TARGETS) +set(PVS_STUDIO_RECURSIVE_TARGETS_NEW) + +macro(pvs_studio_get_recursive_targets TARGET) + get_target_property(libs "${TARGET}" LINK_LIBRARIES) + foreach (lib IN LISTS libs) + list(FIND PVS_STUDIO_RECURSIVE_TARGETS "${lib}" index) + if (TARGET "${lib}" AND "${index}" STREQUAL -1) + get_target_property(target_type "${lib}" TYPE) + if (NOT "${target_type}" STREQUAL "INTERFACE_LIBRARY") + list(APPEND PVS_STUDIO_RECURSIVE_TARGETS "${lib}") + list(APPEND PVS_STUDIO_RECURSIVE_TARGETS_NEW "${lib}") + pvs_studio_get_recursive_targets("${lib}") + endif () + endif () + endforeach () +endmacro() + +option(PVS_STUDIO_DISABLE OFF "Disable PVS-Studio targets") +option(PVS_STUDIO_DEBUG OFF "Add debug info") + +# pvs_studio_add_target +# Target options: +# ALL add PVS-Studio target to default build (default: off) +# TARGET target name of analysis target (default: pvs) +# ANALYZE targets... targets to analyze +# RECURSIVE analyze target's dependencies (requires CMake 3.5+) +# COMPILE_COMMANDS use compile_commands.json instead of targets (specified by the 'ANALYZE' option) to determine files for analysis +# (set CMAKE_EXPORT_COMPILE_COMMANDS, available only for Makefile and Ninja generators) +# +# Output options: +# OUTPUT prints report to stdout +# LOG path path to report (default: ${CMAKE_CURRENT_BINARY_DIR}/PVS-Studio.log) +# FORMAT format format of report +# MODE mode analyzers/levels filter (default: GA:1,2) +# HIDE_HELP do not print help message +# +# Analyzer options: +# PLATFORM name linux32/linux64 (default: linux64) +# PREPROCESSOR name preprocessor type: gcc/clang (default: auto detected) +# LICENSE path path to PVS-Studio.lic (default: ~/.config/PVS-Studio/PVS-Studio.lic) +# CONFIG path path to PVS-Studio.cfg +# CFG_TEXT text embedded PVS-Studio.cfg +# KEEP_COMBINED_PLOG do not delete combined plog file *.pvs.raw for further processing with plog-converter +# +# Misc options: +# DEPENDS targets.. additional target dependencies +# SOURCES path... list of source files to analyze +# BIN path path to pvs-studio-analyzer (Unix) or CompilerCommandsAnalyzer.exe (Windows) +# CONVERTER path path to plog-converter (Unix) or HtmlGenerator.exe (Windows) +# C_FLAGS flags... additional C_FLAGS +# CXX_FLAGS flags... additional CXX_FLAGS +# ARGS args... additional pvs-studio-analyzer/CompilerCommandsAnalyzer.exe flags +function (pvs_studio_add_target) + macro (default VAR VALUE) + if ("${${VAR}}" STREQUAL "") + set("${VAR}" "${VALUE}") + endif () + endmacro () + + set(PVS_STUDIO_SUPPORTED_PREPROCESSORS "gcc|clang|visualcpp") + if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + set(DEFAULT_PREPROCESSOR "clang") + elseif (MSVC) + set(DEFAULT_PREPROCESSOR "visualcpp") + else () + set(DEFAULT_PREPROCESSOR "gcc") + endif () + + set(OPTIONAL OUTPUT ALL RECURSIVE HIDE_HELP KEEP_COMBINED_PLOG COMPILE_COMMANDS) + set(SINGLE LICENSE CONFIG TARGET LOG FORMAT BIN CONVERTER PLATFORM PREPROCESSOR CFG_TEXT) + set(MULTI SOURCES C_FLAGS CXX_FLAGS ARGS DEPENDS ANALYZE MODE) + cmake_parse_arguments(PVS_STUDIO "${OPTIONAL}" "${SINGLE}" "${MULTI}" ${ARGN}) + + if ("${PVS_STUDIO_CONFIG}" STREQUAL "" OR NOT "${PVS_STUDIO_CFG_TEXT}" STREQUAL "") + set(PVS_STUDIO_EMPTY_CONFIG ON) + else () + set(PVS_STUDIO_EMPTY_CONFIG OFF) + endif () + + default(PVS_STUDIO_CFG_TEXT "analysis-mode=31") + default(PVS_STUDIO_CONFIG "${CMAKE_BINARY_DIR}/PVS-Studio.cfg") + default(PVS_STUDIO_C_FLAGS "") + default(PVS_STUDIO_CXX_FLAGS "") + default(PVS_STUDIO_TARGET "pvs") + default(PVS_STUDIO_LOG "PVS-Studio.log") + + set(PATHS) + if (WIN32) + set(ROOT "PROGRAMFILES(X86)") + set(ROOT "$ENV{${ROOT}}/PVS-Studio") + string(REPLACE \\ / ROOT "${ROOT}") + + if (EXISTS "${ROOT}") + set(PATHS "${ROOT}") + endif () + + default(PVS_STUDIO_BIN "CompilerCommandsAnalyzer.exe") + default(PVS_STUDIO_CONVERTER "HtmlGenerator.exe") + else () + default(PVS_STUDIO_BIN "pvs-studio-analyzer") + default(PVS_STUDIO_CONVERTER "plog-converter") + endif () + + find_program(PVS_STUDIO_BIN_PATH "${PVS_STUDIO_BIN}" ${PATHS}) + set(PVS_STUDIO_BIN "${PVS_STUDIO_BIN_PATH}") + + if (NOT EXISTS "${PVS_STUDIO_BIN}") + message(FATAL_ERROR "pvs-studio-analyzer is not found") + endif () + + find_program(PVS_STUDIO_CONVERTER_PATH "${PVS_STUDIO_CONVERTER}" ${PATHS}) + set(PVS_STUDIO_CONVERTER "${PVS_STUDIO_CONVERTER_PATH}") + + if (NOT EXISTS "${PVS_STUDIO_CONVERTER}") + message(FATAL_ERROR "plog-converter is not found") + endif () + + default(PVS_STUDIO_MODE "GA:1,2") + default(PVS_STUDIO_PREPROCESSOR "${DEFAULT_PREPROCESSOR}") + if (WIN32) + default(PVS_STUDIO_PLATFORM "x64") + else () + default(PVS_STUDIO_PLATFORM "linux64") + endif () + + string(REPLACE ";" "+" PVS_STUDIO_MODE "${PVS_STUDIO_MODE}") + + if (PVS_STUDIO_EMPTY_CONFIG) + set(PVS_STUDIO_CONFIG_COMMAND "${CMAKE_COMMAND}" -E echo "${PVS_STUDIO_CFG_TEXT}" > "${PVS_STUDIO_CONFIG}") + else () + set(PVS_STUDIO_CONFIG_COMMAND "${CMAKE_COMMAND}" -E touch "${PVS_STUDIO_CONFIG}") + endif () + + add_custom_command(OUTPUT "${PVS_STUDIO_CONFIG}" + COMMAND ${PVS_STUDIO_CONFIG_COMMAND} + WORKING_DIRECTORY "${BINARY_DIR}" + COMMENT "Generating PVS-Studio.cfg") + + if (NOT "${PVS_STUDIO_PREPROCESSOR}" MATCHES "^${PVS_STUDIO_SUPPORTED_PREPROCESSORS}$") + message(FATAL_ERROR "Preprocessor ${PVS_STUDIO_PREPROCESSOR} isn't supported. Available options: ${PVS_STUDIO_SUPPORTED_PREPROCESSORS}.") + endif () + + pvs_studio_append_standard_flag(PVS_STUDIO_CXX_FLAGS "${CMAKE_CXX_STANDARD}") + pvs_studio_set_directory_flags("${CMAKE_CURRENT_SOURCE_DIR}" PVS_STUDIO_CXX_FLAGS PVS_STUDIO_C_FLAGS) + + if (NOT "${PVS_STUDIO_LICENSE}" STREQUAL "") + pvs_studio_join_path(PVS_STUDIO_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}" "${PVS_STUDIO_LICENSE}") + list(APPEND PVS_STUDIO_ARGS --lic-file "${PVS_STUDIO_LICENSE}") + endif () + + list(APPEND PVS_STUDIO_ARGS --cfg "${PVS_STUDIO_CONFIG}" + --platform "${PVS_STUDIO_PLATFORM}" + --preprocessor "${PVS_STUDIO_PREPROCESSOR}") + + if (NOT "${CMAKE_CXX_COMPILER}" STREQUAL "") + list(APPEND PVS_STUDIO_ARGS --cxx "${CMAKE_CXX_COMPILER}") + endif () + + if (NOT "${CMAKE_C_COMPILER}" STREQUAL "") + list(APPEND PVS_STUDIO_ARGS --cc "${CMAKE_C_COMPILER}") + endif () + + set(PVS_STUDIO_PLOGS "") + + set(PVS_STUDIO_RECURSIVE_TARGETS_NEW) + if (${PVS_STUDIO_RECURSIVE}) + foreach (TARGET IN LISTS PVS_STUDIO_ANALYZE) + list(APPEND PVS_STUDIO_RECURSIVE_TARGETS_NEW "${TARGET}") + pvs_studio_get_recursive_targets("${TARGET}") + endforeach () + endif () + + set(inc_path) + + foreach (TARGET ${PVS_STUDIO_ANALYZE}) + set(DIR "${CMAKE_CURRENT_SOURCE_DIR}") + string(FIND "${TARGET}" ":" DELIM) + if ("${DELIM}" GREATER "-1") + math(EXPR DELIMI "${DELIM}+1") + string(SUBSTRING "${TARGET}" "${DELIMI}" "-1" DIR) + string(SUBSTRING "${TARGET}" "0" "${DELIM}" TARGET) + pvs_studio_join_path(DIR "${CMAKE_CURRENT_SOURCE_DIR}" "${DIR}") + else () + get_target_property(TARGET_SOURCE_DIR "${TARGET}" SOURCE_DIR) + if (EXISTS "${TARGET_SOURCE_DIR}") + set(DIR "${TARGET_SOURCE_DIR}") + endif () + endif () + pvs_studio_analyze_target("${TARGET}" "${DIR}") + list(APPEND PVS_STUDIO_DEPENDS "${TARGET}") + + if ("${inc_path}" STREQUAL "") + set(inc_path "$") + else () + set(inc_path "${inc_path}$$") + endif () + endforeach () + + foreach (TARGET ${PVS_STUDIO_RECURSIVE_TARGETS_NEW}) + set(DIR "${CMAKE_CURRENT_SOURCE_DIR}") + get_target_property(TARGET_SOURCE_DIR "${TARGET}" SOURCE_DIR) + if (EXISTS "${TARGET_SOURCE_DIR}") + set(DIR "${TARGET_SOURCE_DIR}") + endif () + pvs_studio_analyze_target("${TARGET}" "${DIR}") + list(APPEND PVS_STUDIO_DEPENDS "${TARGET}") + endforeach () + + set(PVS_STUDIO_TARGET_CXX_FLAGS "") + set(PVS_STUDIO_TARGET_C_FLAGS "") + foreach (SOURCE ${PVS_STUDIO_SOURCES}) + pvs_studio_analyze_file("${SOURCE}" "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}") + endforeach () + + if (PVS_STUDIO_COMPILE_COMMANDS) + set(COMPILE_COMMANDS_LOG "${PVS_STUDIO_LOG}.pvs.analyzer.raw") + if (NOT CMAKE_EXPORT_COMPILE_COMMANDS) + message(FATAL_ERROR "You should set CMAKE_EXPORT_COMPILE_COMMANDS to TRUE") + endif () + add_custom_command( + OUTPUT "${COMPILE_COMMANDS_LOG}" + COMMAND "${PVS_STUDIO_BIN}" analyze -i + --output-file "${COMPILE_COMMANDS_LOG}.always" + ${PVS_STUDIO_ARGS} + COMMENT "Analyzing with PVS-Studio" + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" + DEPENDS "${PVS_STUDIO_CONFIG}" + ) + list(APPEND PVS_STUDIO_PLOGS_LOGS "${COMPILE_COMMANDS_LOG}.always") + list(APPEND PVS_STUDIO_PLOGS_DEPENDENCIES "${COMPILE_COMMANDS_LOG}") + endif () + + pvs_studio_relative_path(LOG_RELATIVE "${CMAKE_BINARY_DIR}" "${PVS_STUDIO_LOG}") + if (PVS_STUDIO_PLOGS OR PVS_STUDIO_COMPILE_COMMANDS) + if (WIN32) + string(REPLACE / \\ PVS_STUDIO_PLOGS "${PVS_STUDIO_PLOGS}") + endif () + if (WIN32) + set(COMMANDS COMMAND type ${PVS_STUDIO_PLOGS} ${PVS_STUDIO_PLOGS_LOGS} > "${PVS_STUDIO_LOG}" 2>nul) + else () + set(COMMANDS COMMAND cat ${PVS_STUDIO_PLOGS} ${PVS_STUDIO_PLOGS_LOGS} > "${PVS_STUDIO_LOG}") + endif () + set(COMMENT "Generating ${LOG_RELATIVE}") + if (NOT "${PVS_STUDIO_FORMAT}" STREQUAL "" OR PVS_STUDIO_OUTPUT) + if ("${PVS_STUDIO_FORMAT}" STREQUAL "") + set(PVS_STUDIO_FORMAT "errorfile") + endif () + list(APPEND COMMANDS + COMMAND "${CMAKE_COMMAND}" -E remove -f "${PVS_STUDIO_LOG}.pvs.raw" + COMMAND "${CMAKE_COMMAND}" -E rename "${PVS_STUDIO_LOG}" "${PVS_STUDIO_LOG}.pvs.raw" + COMMAND "${PVS_STUDIO_CONVERTER}" -t "${PVS_STUDIO_FORMAT}" "${PVS_STUDIO_LOG}.pvs.raw" -o "${PVS_STUDIO_LOG}" -a "${PVS_STUDIO_MODE}" + ) + if(NOT PVS_STUDIO_KEEP_COMBINED_PLOG) + list(APPEND COMMANDS COMMAND "${CMAKE_COMMAND}" -E remove -f "${PVS_STUDIO_LOG}.pvs.raw") + endif() + endif () + else () + set(COMMANDS COMMAND "${CMAKE_COMMAND}" -E touch "${PVS_STUDIO_LOG}") + set(COMMENT "Generating ${LOG_RELATIVE}: no sources found") + endif () + + if (WIN32) + string(REPLACE / \\ PVS_STUDIO_LOG "${PVS_STUDIO_LOG}") + endif () + + add_custom_command(OUTPUT "${PVS_STUDIO_LOG}" + ${COMMANDS} + COMMENT "${COMMENT}" + DEPENDS ${PVS_STUDIO_PLOGS} ${PVS_STUDIO_PLOGS_DEPENDENCIES} + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}") + + if (PVS_STUDIO_ALL) + set(ALL "ALL") + else () + set(ALL "") + endif () + + if (PVS_STUDIO_OUTPUT) + if (PVS_STUDIO_HIDE_HELP AND NOT WIN32) + set(COMMANDS COMMAND grep -v " error: Help:" ${PVS_STUDIO_LOG} 1>&2 || exit 0) + elseif (WIN32) + set(COMMANDS COMMAND type "${PVS_STUDIO_LOG}" 1>&2) + else () + set(COMMANDS COMMAND cat "${PVS_STUDIO_LOG}" 1>&2) + endif() + else () + set(COMMANDS "") + endif () + + add_custom_target("${PVS_STUDIO_TARGET}" ${ALL} ${COMMANDS} WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" DEPENDS ${PVS_STUDIO_DEPENDS} "${PVS_STUDIO_LOG}") + + # A workaround to add implicit dependencies of source files from include directories + set_target_properties("${PVS_STUDIO_TARGET}" PROPERTIES INCLUDE_DIRECTORIES "${inc_path}") +endfunction () diff --git a/3rdParty/iresearch/core/CMakeLists.txt b/3rdParty/iresearch/core/CMakeLists.txt index f4497a86a843..8ba6230ae172 100644 --- a/3rdParty/iresearch/core/CMakeLists.txt +++ b/3rdParty/iresearch/core/CMakeLists.txt @@ -125,6 +125,7 @@ set(IResearch_core_sources ./search/multiterm_query.cpp ./search/term_query.cpp ./search/boolean_filter.cpp + ./search/ngram_similarity_filter.cpp ./store/data_input.cpp ./store/data_output.cpp ./store/directory.cpp @@ -151,6 +152,7 @@ set(IResearch_core_sources ./utils/index_utils.cpp ./utils/math_utils.cpp ./utils/levenshtein_utils.cpp + ./utils/wildcard_utils.cpp ./utils/levenshtein_default_pdp.cpp ./utils/memory.cpp ./utils/text_format.cpp @@ -165,6 +167,7 @@ set(IResearch_core_sources ./utils/network_utils.cpp ./utils/cpuinfo.cpp ./utils/numeric_utils.cpp + ./utils/attribute_range.cpp ${IResearch_core_os_specific_sources} ${IResearch_core_optimized_sources} ) @@ -212,6 +215,7 @@ set(IResearch_core_headers ./search/disjunction.hpp ./search/conjunction.hpp ./search/exclusion.hpp + ./search/ngram_similarity_filter.hpp ./store/data_input.hpp ./store/data_output.hpp ./store/directory.hpp @@ -256,6 +260,7 @@ set(IResearch_core_headers ./utils/bitset.hpp ./utils/bitvector.hpp ./utils/type_id.hpp + ./utils/attribute_range.hpp ./shared.hpp ./types.hpp ) @@ -589,6 +594,7 @@ if(MSVC) ${Unwind_STATIC_LIBS} ${DL_LIBRARY} ${MSVC_ONLY_LIBRARIES} + ${SIMD_LIBRARY_STATIC} ) endif() diff --git a/3rdParty/iresearch/core/analysis/token_attributes.hpp b/3rdParty/iresearch/core/analysis/token_attributes.hpp index 61aaf0a1e800..d252c5aedd0c 100644 --- a/3rdParty/iresearch/core/analysis/token_attributes.hpp +++ b/3rdParty/iresearch/core/analysis/token_attributes.hpp @@ -55,7 +55,7 @@ struct IRESEARCH_API offset : attribute { }; ////////////////////////////////////////////////////////////////////////////// -/// @class offset +/// @class increment /// @brief represents token increment in a stream ////////////////////////////////////////////////////////////////////////////// struct IRESEARCH_API increment : basic_attribute { @@ -173,6 +173,10 @@ class IRESEARCH_API position DECLARE_REFERENCE(position); DECLARE_TYPE_ID(attribute::type_id); + static irs::position* extract(const attribute_view& attrs) noexcept { + return attrs.get().get(); + } + const irs::attribute_view& attributes() const noexcept override { return attrs_; } @@ -186,6 +190,8 @@ class IRESEARCH_API position return value_; } + virtual void reset() = 0; + virtual bool next() = 0; protected: diff --git a/3rdParty/iresearch/core/formats/formats_10.cpp b/3rdParty/iresearch/core/formats/formats_10.cpp index b12893d42cf7..4115d77b906d 100644 --- a/3rdParty/iresearch/core/formats/formats_10.cpp +++ b/3rdParty/iresearch/core/formats/formats_10.cpp @@ -253,8 +253,24 @@ class postings_writer_base : public irs::postings_writer { static const int32_t TERMS_FORMAT_MIN = 0; static const int32_t TERMS_FORMAT_MAX = TERMS_FORMAT_MIN; - static const int32_t FORMAT_MIN = 0; - static const int32_t FORMAT_MAX = 1; // sse + static constexpr int32_t FORMAT_MIN = 0; + // positions are stored one based (if first osition is 1 first offset is 0) + // This forces reader to adjust first read position of every document additionally to + // stored increment. Or incorrect positions will be read - 1 2 3 will be stored + // (offsets 0 1 1) but 0 1 2 will be read. At least this will lead to incorrect results in + // by_same_positions filter if searching for position 1 + static constexpr int32_t FORMAT_POSITIONS_ONEBASED = FORMAT_MIN; + // positions are stored one based, sse used + static constexpr int32_t FORMAT_SSE_POSITIONS_ONEBASED = FORMAT_POSITIONS_ONEBASED + 1; + + // positions are stored zero based + // if first position is 1 first offset is also 1 + // so no need to adjust position while reading first + // position for document, always just increment from previous pos + static constexpr int32_t FORMAT_POSITIONS_ZEROBASED = FORMAT_SSE_POSITIONS_ONEBASED + 1; + // positions are stored zero based, sse used + static constexpr int32_t FORMAT_SSE_POSITIONS_ZEROBASED = FORMAT_POSITIONS_ZEROBASED + 1; + static constexpr int32_t FORMAT_MAX = FORMAT_SSE_POSITIONS_ZEROBASED; static const uint32_t MAX_SKIP_LEVELS = 10; static const uint32_t BLOCK_SIZE = 128; @@ -272,7 +288,9 @@ class postings_writer_base : public irs::postings_writer { postings_writer_base(int32_t postings_format_version, int32_t terms_format_version) : skip_(BLOCK_SIZE, SKIP_N), postings_format_version_(postings_format_version), - terms_format_version_(terms_format_version) { + terms_format_version_(terms_format_version), + pos_min_(postings_format_version_ >= FORMAT_POSITIONS_ZEROBASED ? // first position offsets now is format dependent + pos_limits::invalid(): pos_limits::min()) { assert(postings_format_version >= FORMAT_MIN && postings_format_version <= FORMAT_MAX); assert(terms_format_version >= TERMS_FORMAT_MIN && terms_format_version <= TERMS_FORMAT_MAX); attrs_.emplace(docs_); @@ -368,7 +386,9 @@ class postings_writer_base : public irs::postings_writer { bool full() const { return BLOCK_SIZE == size; } void next(uint32_t pos) { last = pos, ++size; } - void pos(uint32_t pos) { buf[size] = pos; } + void pos(uint32_t pos) { + buf[size] = pos; + } void reset() noexcept { stream::reset(); @@ -444,6 +464,7 @@ class postings_writer_base : public irs::postings_writer { size_t docs_count_{}; // number of processed documents const int32_t postings_format_version_; const int32_t terms_format_version_; + uint32_t pos_min_; // initial base value for writing positions offsets }; MSVC2015_ONLY(__pragma(warning(push))) @@ -782,9 +803,8 @@ void postings_writer_base::begin_doc(doc_id_t id, const frequency* freq) { FormatTraits::write_block(*doc_out_, doc_.freqs, BLOCK_SIZE, buf_); } } - if (pos_) { - pos_->last = 0; + pos_->last = pos_min_; } if (pay_) { @@ -889,15 +909,14 @@ irs::postings_writer::state postings_writer::w assert(doc_limits::valid(did)); begin_doc(did, freq.get()); docs_.value.set(did - doc_limits::min()); - if (pos) { if (VolatileAttributes) { auto& attrs = pos->attributes(); offs = attrs.get().get(); pay = attrs.get().get(); } - while (pos->next()) { + assert(pos_limits::valid(pos->value())); add_position(pos->value(), offs, pay); } } @@ -937,8 +956,8 @@ struct skip_context : skip_state { struct doc_state { const index_input* pos_in; const index_input* pay_in; - version10::term_meta* term_state; - uint32_t* freq; + const version10::term_meta* term_state; + const uint32_t* freq; uint32_t* enc_buf; uint64_t tail_start; size_t tail_length; @@ -1117,7 +1136,6 @@ struct position_impl } pay_in_->seek(state.term_state->pay_start); - } void prepare(const skip_state& state) { @@ -1252,7 +1270,6 @@ struct position_impl } pay_in_->seek(state.term_state->pay_start); - } void prepare(const skip_state& state) { @@ -1351,6 +1368,7 @@ struct position_impl { throw io_error("failed to reopen positions input"); } + cookie_.file_pointer_ = state.term_state->pos_start; pos_in_->seek(state.term_state->pos_start); freq_ = state.freq; features_ = state.features; @@ -1363,6 +1381,15 @@ struct position_impl { pos_in_->seek(state.pos_ptr); pend_pos_ = state.pend_pos; buf_pos_ = postings_writer_base::BLOCK_SIZE; + cookie_.pend_pos_ = pend_pos_; + } + + void reset() { + if (std::numeric_limits::max() != cookie_.file_pointer_) { + buf_pos_ = postings_writer_base::BLOCK_SIZE; + pend_pos_ = cookie_.pend_pos_; + pos_in_->seek(cookie_.file_pointer_); + } } void read_attributes() { } @@ -1411,9 +1438,14 @@ struct position_impl { uint32_t pend_pos_{}; // how many positions "behind" we are uint64_t tail_start_; // file pointer where the last (vInt encoded) pos delta block is size_t tail_length_; // number of positions in the last (vInt encoded) pos delta block - uint32_t buf_pos_{ postings_writer_base::BLOCK_SIZE } ; // current position in pos_deltas_ buffer + uint32_t buf_pos_{ postings_writer_base::BLOCK_SIZE }; // current position in pos_deltas_ buffer index_input::ptr pos_in_; features features_; + + struct cookie { + uint32_t pend_pos_{}; + size_t file_pointer_ = std::numeric_limits::max(); + } cookie_; }; // position_impl template @@ -1445,9 +1477,11 @@ class position final : public irs::position, refill(); this->buf_pos_ = 0; } - + if /*constexpr*/ (IteratorTraits::one_based_position_storage()) { + value_ += (uint32_t)(!pos_limits::valid(value_)); + } value_ += this->pos_deltas_[this->buf_pos_]; - + assert(irs::pos_limits::valid(value_)); this->read_attributes(); ++this->buf_pos_; @@ -1455,6 +1489,11 @@ class position final : public irs::position, return true; } + virtual void reset() override { + value_ = pos_limits::invalid(); + impl::reset(); + } + // prepares iterator to work // or notifies iterator that doc iterator has skipped to a new block using impl::prepare; @@ -1462,6 +1501,7 @@ class position final : public irs::position, // notify iterator that corresponding doc_iterator has moved forward void notify(uint32_t n) { this->pend_pos_ += n; + this->cookie_.pend_pos_ += n; } void clear() noexcept { @@ -1495,9 +1535,7 @@ class position final : public irs::position, if (count < left) { impl::skip(count); } - clear(); - value_ = 0; } }; // position @@ -1601,6 +1639,13 @@ class doc_iterator final : public irs::doc_iterator_base { attrs_.emplace(pos_); } } + + if (1 == term_state_.docs_count) { + *docs_ = (doc_limits::min)() + term_state_.e_single_doc; + *doc_freqs_ = term_freq_; + doc_freq_ = doc_freqs_; + ++end_; + } } virtual doc_id_t seek(doc_id_t target) override { @@ -1746,6 +1791,8 @@ class doc_iterator final : public irs::doc_iterator_base { } void refill() { + // should never call refill for singleton documents + assert(1 != term_state_.docs_count); const auto left = term_state_.docs_count - cur_pos_; if (left >= postings_writer_base::BLOCK_SIZE) { @@ -1772,12 +1819,6 @@ class doc_iterator final : public irs::doc_iterator_base { } end_ = docs_ + postings_writer_base::BLOCK_SIZE; - } else if (1 == term_state_.docs_count) { - docs_[0] = term_state_.e_single_doc; - if (term_freq_) { - doc_freqs_[0] = term_freq_; - } - end_ = docs_ + 1; } else { read_end_block(left); end_ = docs_ + left; @@ -1789,7 +1830,7 @@ class doc_iterator final : public irs::doc_iterator_base { } begin_ = docs_; - doc_freq_ = docs_ + postings_writer_base::BLOCK_SIZE; + doc_freq_ = doc_freqs_; } std::vector skip_levels_; @@ -1915,7 +1956,7 @@ struct index_meta_writer final: public irs::index_meta_writer { template<> std::string file_name(const index_meta& meta) { return file_name(index_meta_writer::FORMAT_PREFIX_TMP, meta.generation()); -}; +} struct index_meta_reader final: public irs::index_meta_reader { virtual bool last_segments_file( @@ -1932,7 +1973,7 @@ struct index_meta_reader final: public irs::index_meta_reader { template<> std::string file_name(const index_meta& meta) { return file_name(index_meta_writer::FORMAT_PREFIX, meta.generation()); -}; +} MSVC2015_ONLY(__pragma(warning(push))) MSVC2015_ONLY(__pragma(warning(disable: 4592))) // symbol will be dynamically initialized (implementation limitation) false positive bug in VS2015.1 @@ -2366,7 +2407,7 @@ class document_mask_writer final: public irs::document_mask_writer { template<> std::string file_name(const segment_meta& meta) { return file_name(meta.name, meta.version, document_mask_writer::FORMAT_EXT); -}; +} MSVC2015_ONLY(__pragma(warning(push))) MSVC2015_ONLY(__pragma(warning(disable: 4592))) // symbol will be dynamically initialized (implementation limitation) false positive bug in VS2015.1 @@ -2527,9 +2568,9 @@ std::string file_name( const segment_meta& meta ) { return irs::file_name(meta.name, columns::meta_writer::FORMAT_EXT); -}; +} -void meta_writer::prepare(directory& dir, const segment_meta& meta) { +void meta_writer::prepare(directory& dir, const segment_meta& meta) { auto filename = file_name(meta); out_ = dir.create(filename); @@ -2673,7 +2714,7 @@ bool meta_reader::prepare( if (irs::decrypt(filename, *in_, enc, in_cipher_)) { assert(in_cipher_ && in_cipher_->block_size()); - const auto blocks_in_buffer= math::div_ceil64( + const auto blocks_in_buffer = math::div_ceil64( buffered_index_input::DEFAULT_BUFFER_SIZE, in_cipher_->block_size() ); @@ -3185,7 +3226,7 @@ class writer final : public irs::columnstore_writer { template<> std::string file_name(const segment_meta& meta) { return file_name(meta.name, columns::writer::FORMAT_EXT); -}; +} MSVC2015_ONLY(__pragma(warning(push))) MSVC2015_ONLY(__pragma(warning(disable: 4592))) // symbol will be dynamically initialized (implementation limitation) false positive bug in VS2015.1 @@ -3746,7 +3787,7 @@ class dense_fixed_offset_block : util::noncopyable { const auto offset = (value_ - value_min_)*avg_length_; assert(payload_ != &DUMMY); - *payload_= bytes_ref( + *payload_ = bytes_ref( data_.c_str() + offset, value_ == value_back_ ? data_.size() - offset : avg_length_ ); @@ -4514,7 +4555,7 @@ class sparse_column final : public column { const auto& cached = load_block(*ctxs_, decompressor(), encrypted(), *it); return cached.value(key, value); - }; + } virtual bool visit( const columnstore_reader::values_visitor_f& visitor @@ -5266,7 +5307,7 @@ size_t postings_reader_base::decode( return size_t(std::distance(in, p)); } -template +template class postings_reader final: public postings_reader_base { public: template @@ -5275,6 +5316,7 @@ class postings_reader final: public postings_reader_base { static constexpr bool position() { return Freq && Pos; } static constexpr bool offset() { return position() && Offset; } static constexpr bool payload() { return position() && Payload; } + static constexpr bool one_based_position_storage() { return OneBasedPositionStorage; } }; virtual irs::doc_iterator::ptr iterator( @@ -5290,8 +5332,8 @@ class postings_reader final: public postings_reader_base { #pragma GCC diagnostic ignored "-Wswitch" #endif -template -irs::doc_iterator::ptr postings_reader::iterator( +template +irs::doc_iterator::ptr postings_reader::iterator( const flags& field, const attribute_view& attrs, const flags& req) { @@ -5468,7 +5510,7 @@ irs::postings_writer::ptr format10::get_postings_writer(bool volatile_state) con } irs::postings_reader::ptr format10::get_postings_reader() const { - return irs::postings_reader::make<::postings_reader>(); + return irs::postings_reader::make<::postings_reader>(); } /*static*/ irs::format::ptr format10::make() { @@ -5578,6 +5620,51 @@ columnstore_writer::ptr format12::get_columnstore_writer() const { DEFINE_FORMAT_TYPE_NAMED(::format12, "1_2"); REGISTER_FORMAT_MODULE(::format12, MODULE_NAME); +// ---------------------------------------------------------------------------- +// --SECTION-- format13 +// ---------------------------------------------------------------------------- + +class format13 : public format12 { + public: + DECLARE_FORMAT_TYPE(); + DECLARE_FACTORY(); + + format13() noexcept : format12(format13::type()) { } + + virtual irs::postings_writer::ptr get_postings_writer(bool volatile_state) const override; + virtual irs::postings_reader::ptr get_postings_reader() const override; + + protected: + explicit format13(const irs::format::type_id& type) noexcept + : format12(type) { + } +}; + +irs::postings_writer::ptr format13::get_postings_writer(bool volatile_state) const { + constexpr const auto VERSION = postings_writer_base::FORMAT_POSITIONS_ZEROBASED; + + if (volatile_state) { + return memory::make_unique<::postings_writer>(VERSION); + } + + return memory::make_unique<::postings_writer>(VERSION); +} + +irs::postings_reader::ptr format13::get_postings_reader() const { + return irs::postings_reader::make<::postings_reader>(); +} + +/*static*/ irs::format::ptr format13::make() { + static const ::format13 INSTANCE; + + // aliasing constructor + return irs::format::ptr(irs::format::ptr(), &INSTANCE); +} + + +DEFINE_FORMAT_TYPE_NAMED(::format13, "1_3"); +REGISTER_FORMAT_MODULE(::format13, MODULE_NAME); + // ---------------------------------------------------------------------------- // --SECTION-- format12sse // ---------------------------------------------------------------------------- @@ -5618,7 +5705,7 @@ class format12simd final : public format12 { }; // format12simd irs::postings_writer::ptr format12simd::get_postings_writer(bool volatile_state) const { - constexpr const auto VERSION = postings_writer_base::FORMAT_MAX; + constexpr const auto VERSION = postings_writer_base::FORMAT_SSE_POSITIONS_ONEBASED; if (volatile_state) { return memory::make_unique<::postings_writer>(VERSION); @@ -5628,7 +5715,7 @@ irs::postings_writer::ptr format12simd::get_postings_writer(bool volatile_state) } irs::postings_reader::ptr format12simd::get_postings_reader() const { - return irs::postings_reader::make<::postings_reader>(); + return irs::postings_reader::make<::postings_reader>(); } /*static*/ irs::format::ptr format12simd::make() { @@ -5641,6 +5728,46 @@ irs::postings_reader::ptr format12simd::get_postings_reader() const { DEFINE_FORMAT_TYPE_NAMED(::format12simd, "1_2simd"); REGISTER_FORMAT_MODULE(::format12simd, MODULE_NAME); + +// ---------------------------------------------------------------------------- +// --SECTION-- format13sse +// ---------------------------------------------------------------------------- + +class format13simd final : public format13 { + public: + DECLARE_FORMAT_TYPE(); + DECLARE_FACTORY(); + + format13simd() noexcept : format13(format13simd::type()) { } + + virtual irs::postings_writer::ptr get_postings_writer(bool volatile_state) const override; + virtual irs::postings_reader::ptr get_postings_reader() const override; +}; // format13simd + +irs::postings_writer::ptr format13simd::get_postings_writer(bool volatile_state) const { + constexpr const auto VERSION = postings_writer_base::FORMAT_SSE_POSITIONS_ZEROBASED; + + if (volatile_state) { + return memory::make_unique<::postings_writer>(VERSION); + } + + return memory::make_unique<::postings_writer>(VERSION); +} + +irs::postings_reader::ptr format13simd::get_postings_reader() const { + return irs::postings_reader::make<::postings_reader>(); +} + +/*static*/ irs::format::ptr format13simd::make() { + static const ::format13simd INSTANCE; + + // aliasing constructor + return irs::format::ptr(irs::format::ptr(), &INSTANCE); +} + +DEFINE_FORMAT_TYPE_NAMED(::format13simd, "1_3simd"); +REGISTER_FORMAT_MODULE(::format13simd, MODULE_NAME); + #endif // IRESEARCH_SSE2 NS_END diff --git a/3rdParty/iresearch/core/formats/formats_burst_trie.cpp b/3rdParty/iresearch/core/formats/formats_burst_trie.cpp index 7114742ac918..85d786ec5c14 100644 --- a/3rdParty/iresearch/core/formats/formats_burst_trie.cpp +++ b/3rdParty/iresearch/core/formats/formats_burst_trie.cpp @@ -786,6 +786,7 @@ class automaton_term_iterator final : public term_iterator_base { : term_iterator_base(owner), acceptor_(&matcher.GetFst()), matcher_(&matcher) { + attrs_.emplace(payload_); } virtual bool next() override; @@ -893,6 +894,14 @@ class automaton_term_iterator final : public term_iterator_base { automaton::StateId state_; // state to which current block belongs }; // block_iterator + struct payload : irs::payload { + payload() noexcept { + irs::payload::value = bytes_ref(&value, sizeof(value)); + } + + automaton::Weight::PayloadType value; + }; // payload + typedef std::deque block_stack_t; block_iterator* pop_block() noexcept { @@ -924,6 +933,7 @@ class automaton_term_iterator final : public term_iterator_base { automaton_table_matcher* matcher_; block_stack_t block_stack_; block_iterator* cur_block_{}; + payload payload_{}; // payload of the matched automaton state }; // automaton_term_iterator bool automaton_term_iterator::next() { @@ -999,7 +1009,9 @@ bool automaton_term_iterator::next() { switch (cur_block_->type()) { case ET_TERM: { - if (acceptor_->Final(state)) { + const auto weight = acceptor_->Final(state); + if (weight) { + payload_.value = weight.Payload(); copy(suffix, cur_block_->prefix(), suffix_size); match = MATCH; } @@ -2437,7 +2449,7 @@ irs::field_iterator::ptr field_reader::iterator() const { return memory::make_managed(it.release()); } -size_t field_reader::size() const { +size_t field_reader::size() const noexcept { return fields_.size(); } diff --git a/3rdParty/iresearch/core/formats/formats_burst_trie.hpp b/3rdParty/iresearch/core/formats/formats_burst_trie.hpp index 46fd23687f82..e32270e97d22 100644 --- a/3rdParty/iresearch/core/formats/formats_burst_trie.hpp +++ b/3rdParty/iresearch/core/formats/formats_burst_trie.hpp @@ -396,7 +396,7 @@ class field_reader final : public irs::field_reader { virtual const irs::term_reader* field(const string_ref& field) const override; virtual irs::field_iterator::ptr iterator() const override; - virtual size_t size() const override; + virtual size_t size() const noexcept override; private: friend class detail::term_iterator_base; diff --git a/3rdParty/iresearch/core/index/field_data.cpp b/3rdParty/iresearch/core/index/field_data.cpp index 9638310d8609..07e48e812e23 100644 --- a/3rdParty/iresearch/core/index/field_data.cpp +++ b/3rdParty/iresearch/core/index/field_data.cpp @@ -224,8 +224,9 @@ class pos_iterator final: public irs::position { pay_.resize(size); prox_in_.read(pay_.data(), size); } - + value_ += pos; + assert(pos_limits::valid(value_)); if (has_offs_) { offs_.start += irs::vread(prox_in_); @@ -237,6 +238,10 @@ class pos_iterator final: public irs::position { return true; } + virtual void reset() override { + assert(false); // unsupported + } + private: Reader prox_in_; const frequency* freq_{}; // number of term positions in a document @@ -778,7 +783,7 @@ void field_data::reset(doc_id_t doc_id) { return; // nothing to do } - pos_ = integer_traits::const_max; + pos_ = pos_limits::invalid(); last_pos_ = 0; len_ = 0; num_overlap_ = 0; diff --git a/3rdParty/iresearch/core/index/index_reader.hpp b/3rdParty/iresearch/core/index/index_reader.hpp index 16363ba6c3d1..33f04e90af1d 100644 --- a/3rdParty/iresearch/core/index/index_reader.hpp +++ b/3rdParty/iresearch/core/index/index_reader.hpp @@ -107,7 +107,7 @@ struct IRESEARCH_API index_reader { virtual size_t size() const = 0; // first sub-segment - reader_iterator begin() const { + reader_iterator begin() const noexcept { return reader_iterator(*this, 0); } diff --git a/3rdParty/iresearch/core/iql/query_builder.cpp b/3rdParty/iresearch/core/iql/query_builder.cpp index 70a803eb12fb..9561055e5bbc 100644 --- a/3rdParty/iresearch/core/iql/query_builder.cpp +++ b/3rdParty/iresearch/core/iql/query_builder.cpp @@ -223,7 +223,7 @@ const irs::iql::query_builder::branch_builder_function_t SIMILAR_BRANCH_BUILDER auto& node = root.proxy().field(field); for (auto& term = tokens->attributes().get(); tokens->next();) { - node.push_back(term->value()); + node.push_back(irs::by_phrase::simple_term{term->value()}); } return true; diff --git a/3rdParty/iresearch/core/search/bm25.cpp b/3rdParty/iresearch/core/search/bm25.cpp index b16ee0dbbfd1..c15ebd6c03aa 100644 --- a/3rdParty/iresearch/core/search/bm25.cpp +++ b/3rdParty/iresearch/core/search/bm25.cpp @@ -304,14 +304,16 @@ struct score_ctx : public irs::score_ctx { float_t k, irs::boost_t boost, const bm25::stats& stats, - const frequency* freq) noexcept - : freq_(freq ? freq : &EMPTY_FREQ), + const frequency* freq, + const filter_boost* fb = nullptr) noexcept + : freq_(freq ? freq : &EMPTY_FREQ), filter_boost_(fb), num_(boost * (k + 1) * stats.idf), - norm_const_(k) { + norm_const_(k) { assert(freq_); } const frequency* freq_; // document frequency + const filter_boost* filter_boost_; float_t num_; // partially precomputed numerator : boost * (k + 1) * idf float_t norm_const_; // 'k' factor }; // score_ctx @@ -323,8 +325,9 @@ struct norm_score_ctx final : public score_ctx { irs::boost_t boost, const bm25::stats& stats, const frequency* freq, - irs::norm&& norm) noexcept - : score_ctx(k, boost, stats, freq), + irs::norm&& norm, + const filter_boost* fb = nullptr) noexcept + : score_ctx(k, boost, stats, freq, fb), norm_(std::move(norm)) { // if there is no norms, assume that b==0 if (!norm_.empty()) { @@ -417,6 +420,7 @@ class sort final : public irs::sort::prepared_basic } auto& stats = stats_cast(query_stats); + auto& filter_boost = doc_attrs.get(); if (b_ != 0.f) { irs::norm norm; @@ -429,28 +433,55 @@ class sort final : public irs::sort::prepared_basic } if (norm.reset(segment, field.meta().norm, *doc)) { - return { - memory::make_unique(k_, boost, stats, freq.get(), std::move(norm)), - [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { + if (filter_boost) { + return { + memory::make_unique(k_, boost, stats, freq.get(), std::move(norm), filter_boost.get()), + [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { auto& state = *static_cast(ctx); - + assert(state.filter_boost_); const float_t tf = ::SQRT(state.freq_->value); - irs::sort::score_cast(score_buf) = state.num_ * tf / (state.norm_const_ + state.norm_length_ * state.norm_.read() + tf); - } - }; + irs::sort::score_cast(score_buf) = state.filter_boost_->value * + state.num_ * + tf / + (state.norm_const_ + state.norm_length_ * state.norm_.read() + tf); + } + }; + } else { + return { + memory::make_unique(k_, boost, stats, freq.get(), std::move(norm)), + [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { + auto& state = *static_cast(ctx); + + const float_t tf = ::SQRT(state.freq_->value); + irs::sort::score_cast(score_buf) = state.num_ * tf / (state.norm_const_ + state.norm_length_ * state.norm_.read() + tf); + } + }; + } } } - // BM11 - return { - memory::make_unique(k_, boost, stats, freq.get()), - [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { + // BM15 + if (filter_boost) { + return { + memory::make_unique(k_, boost, stats, freq.get(), filter_boost.get()), + [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { auto& state = *static_cast(ctx); - + assert(state.filter_boost_); const float_t tf = ::SQRT(state.freq_->value); - irs::sort::score_cast(score_buf) = state.num_ * tf / (state.norm_const_ + tf); - } - }; + irs::sort::score_cast(score_buf) = state.filter_boost_->value * state.num_ * tf / (state.norm_const_ + tf); + } + }; + } else { + return { + memory::make_unique(k_, boost, stats, freq.get()), + [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { + auto& state = *static_cast(ctx); + + const float_t tf = ::SQRT(state.freq_->value); + irs::sort::score_cast(score_buf) = state.num_ * tf / (state.norm_const_ + tf); + } + }; + } } virtual irs::sort::term_collector::ptr prepare_term_collector() const override { diff --git a/3rdParty/iresearch/core/search/disjunction.hpp b/3rdParty/iresearch/core/search/disjunction.hpp index bcf0c06f6fab..bd7ff509d664 100644 --- a/3rdParty/iresearch/core/search/disjunction.hpp +++ b/3rdParty/iresearch/core/search/disjunction.hpp @@ -29,6 +29,7 @@ #include "utils/std.hpp" #include "utils/type_limits.hpp" #include "index/iterators.hpp" +#include "utils/attribute_range.hpp" NS_ROOT NS_BEGIN(detail) @@ -67,12 +68,126 @@ void evaluate_score_iter(const irs::byte_type**& pVal, DocIterator& src) { NS_END // detail //////////////////////////////////////////////////////////////////////////////// -/// @class basic_disjunction +/// @class position_score_iterator_adapter +/// @brief adapter to use doc_iterator with positions for disjunction //////////////////////////////////////////////////////////////////////////////// template -class basic_disjunction final : public doc_iterator_base, score_ctx { +struct position_score_iterator_adapter : score_iterator_adapter { + position_score_iterator_adapter(typename position_score_iterator_adapter::doc_iterator_t&& it) noexcept + : score_iterator_adapter(std::move(it)) { + auto& attrs = this->it->attributes(); + position = irs::position::extract(attrs); + } + + position_score_iterator_adapter(const position_score_iterator_adapter&) = default; + position_score_iterator_adapter& operator=(const position_score_iterator_adapter&) = default; + + position_score_iterator_adapter(position_score_iterator_adapter&& rhs) noexcept + : score_iterator_adapter(std::move(rhs)), + position(std::move(rhs.position)) { + } + + position_score_iterator_adapter& operator=(position_score_iterator_adapter&& rhs) noexcept { + if (this != &rhs) { + score_iterator_adapter::operator=(std::move(rhs)); + position = rhs.position; + } + return *this; + } + + irs::position* position; +}; // position_score_iterator_adapter + +template +class attribute_range_adapter { + public: + attribute_range_adapter(typename attribute_view::ref>::type& map_attribute_range) { + map_attribute_range = &attribute_range_; + } + + protected: + attribute_range attribute_range_; +}; + +template +class unary_disjunction_state : protected attribute_range_state { + protected: + bool state_finished_; // is all iterators exhausted +}; + +template +class basic_disjunction_state : protected attribute_range_state { + protected: + bool state_finished_; // is all iterators exhausted + bool state_is_min_; // is current iterator has a minimal value + bool state_is_new_document_; // is a document value updated +}; + +template +class small_disjunction_state : protected attribute_range_state { + protected: + bool state_finished_; // is all iterators exhausted + size_t state_idx_; // current index + bool state_is_new_document_; // is a document value updated +}; + +template +class disjunction_state : protected attribute_range_state { + protected: + bool state_finished_; // is all iterators exhausted + size_t state_idx_; // current heap index + bool state_is_new_document_; // is a document value updated +}; + +//////////////////////////////////////////////////////////////////////////////// +/// @class unary_disjunction +//////////////////////////////////////////////////////////////////////////////// +template> +class unary_disjunction final : public doc_iterator_base, attribute_range_adapter, unary_disjunction_state { + public: + typedef Adapter doc_iterator_t; + + unary_disjunction(doc_iterator_t&& it) + : attribute_range_adapter(attrs_.emplace>()), + doc_(doc_limits::invalid()), + it_(std::move(it)) { + attrs_.emplace(*it_->attributes().template get()); + this->attribute_range_.set_state(this); + } + + virtual doc_id_t value() const noexcept override { + return it_.value(); + } + + virtual bool next() override { + return it_->next(); + } + + virtual doc_id_t seek(doc_id_t target) override { + return it_->seek(target); + } + + private: + virtual Adapter* get_next_iterator() override { + return this->state_finished_ ? nullptr : (this->state_finished_ = true, &it_); + } + + virtual void reset_next_iterator_state() override { + this->state_finished_ = false; + } + + document doc_; + doc_iterator_t it_; +}; // unary_disjunction + +//////////////////////////////////////////////////////////////////////////////// +/// @class basic_disjunction +/// @brief use for special adapters only +//////////////////////////////////////////////////////////////////////////////// +template> +class basic_disjunction final : public doc_iterator_base, score_ctx, attribute_range_adapter, basic_disjunction_state { public: - typedef score_iterator_adapter doc_iterator_t; + typedef Adapter doc_iterator_t; basic_disjunction( doc_iterator_t&& lhs, @@ -103,12 +218,14 @@ class basic_disjunction final : public doc_iterator_base, score_ctx { } virtual bool next() override { + this->state_is_new_document_ = true; next_iterator_impl(lhs_); next_iterator_impl(rhs_); return !doc_limits::eof(doc_.value = std::min(lhs_.value(), rhs_.value())); } virtual doc_id_t seek(doc_id_t target) override { + this->state_is_new_document_ = true; if (target <= doc_.value) { return doc_.value; } @@ -128,10 +245,12 @@ class basic_disjunction final : public doc_iterator_base, score_ctx { doc_iterator_t&& rhs, const order::prepared& ord, resolve_overload_tag) - : lhs_(std::move(lhs)), + : attribute_range_adapter(attrs_.emplace>()), + lhs_(std::move(lhs)), rhs_(std::move(rhs)), doc_(doc_limits::invalid()), ord_(&ord) { + this->attribute_range_.set_state(this); // make 'document' attribute accessible from outside attrs_.emplace(doc_); // prepare score @@ -203,6 +322,43 @@ class basic_disjunction final : public doc_iterator_base, score_ctx { return false; } + virtual Adapter* get_next_iterator() override { + if (this->state_finished_) { + return nullptr; + } + + auto l_value = lhs_.value(); + auto r_value = rhs_.value(); + if (this->state_is_min_) { + if (l_value == doc_.value && r_value != doc_.value) { + this->state_finished_ = true; + return &lhs_; + } + + if (r_value == doc_.value && l_value != doc_.value) { + this->state_finished_ = true; + return &rhs_; + } + + this->state_is_min_ = false; + return r_value < l_value ? &rhs_ : &lhs_; + } + + this->state_finished_ = true; + return r_value < l_value ? &lhs_ : &rhs_; + } + + virtual void reset_next_iterator_state() override { + // call after success next() or seek() + assert(!doc_limits::eof(doc_.value)); + if (this->state_is_new_document_) { + seek_iterator_impl(rhs_, doc_.value); + this->state_is_new_document_ = false; + } + this->state_is_min_ = true; + this->state_finished_ = false; + } + mutable doc_iterator_t lhs_; mutable doc_iterator_t rhs_; mutable const irs::byte_type* scores_vals_[2]; @@ -214,10 +370,10 @@ class basic_disjunction final : public doc_iterator_base, score_ctx { /// @class small_disjunction /// @brief linear search based disjunction //////////////////////////////////////////////////////////////////////////////// -template -class small_disjunction : public doc_iterator_base, score_ctx { +template> +class small_disjunction : public doc_iterator_base, score_ctx, attribute_range_adapter, small_disjunction_state { public: - typedef score_iterator_adapter doc_iterator_t; + typedef Adapter doc_iterator_t; typedef std::vector doc_iterators_t; small_disjunction( @@ -260,6 +416,7 @@ class small_disjunction : public doc_iterator_base, score_ctx { } virtual bool next() override { + this->state_is_new_document_ = true; if (doc_limits::eof(doc_.value)) { return false; } @@ -288,6 +445,7 @@ class small_disjunction : public doc_iterator_base, score_ctx { } virtual doc_id_t seek(doc_id_t target) override { + this->state_is_new_document_ = true; if (doc_limits::eof(doc_.value)) { return doc_.value; } @@ -329,11 +487,13 @@ class small_disjunction : public doc_iterator_base, score_ctx { doc_iterators_t&& itrs, const order::prepared& ord, resolve_overload_tag) - : itrs_(std::move(itrs)), + : attribute_range_adapter(attrs_.emplace>()), + itrs_(std::move(itrs)), doc_(itrs_.empty() ? doc_limits::eof() : doc_limits::invalid()), ord_(&ord) { + this->attribute_range_.set_state(this); // copy iterators with scores into separate container // to avoid extra checks scored_itrs_.reserve(itrs_.size()); @@ -376,6 +536,43 @@ class small_disjunction : public doc_iterator_base, score_ctx { return !itrs_.empty(); } + void hitch_all_iterators() { + for (auto rbegin = itrs_.rbegin(); rbegin != itrs_.rend();) { + auto& it = *rbegin; + ++rbegin; + if (it.value() < doc_.value && doc_limits::eof(it->seek(doc_.value))) { + #ifdef IRESEARCH_DEBUG + assert(remove_iterator(it)); + #else + remove_iterator(it); + #endif + } + } + } + + virtual Adapter* get_next_iterator() override { + if (this->state_finished_) { + return nullptr; + } + auto size = itrs_.size(); + for (; this->state_idx_ < size && itrs_[this->state_idx_].value() != doc_.value; ++this->state_idx_) {} + + if (size == this->state_idx_) { + this->state_finished_ = true; + return nullptr; + } + return &itrs_[this->state_idx_++]; + } + + virtual void reset_next_iterator_state() override { + if (this->state_is_new_document_) { + hitch_all_iterators(); + this->state_is_new_document_ = false; + } + this->state_finished_ = false; + this->state_idx_ = 0; + } + doc_iterators_t itrs_; doc_iterators_t scored_itrs_; // iterators with scores document doc_; @@ -395,14 +592,20 @@ class small_disjunction : public doc_iterator_base, score_ctx { /// [n] <-- lead (accepted iterator) /// ---------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// -template -class disjunction : public doc_iterator_base, score_ctx { +template, bool EnableUnary = false> +class disjunction : public doc_iterator_base, score_ctx, attribute_range_adapter, disjunction_state { public: - typedef small_disjunction small_disjunction_t; - typedef basic_disjunction basic_disjunction_t; - typedef score_iterator_adapter doc_iterator_t; + typedef small_disjunction small_disjunction_t; + typedef basic_disjunction basic_disjunction_t; + typedef unary_disjunction unary_disjunction_t; + typedef Adapter doc_iterator_t; typedef std::vector doc_iterators_t; + typedef std::vector heap_container; + typedef heap_container::iterator heap_iterator; + + static const bool kEnableUnary = EnableUnary; + disjunction( doc_iterators_t&& itrs, const order::prepared& ord, @@ -431,6 +634,7 @@ class disjunction : public doc_iterator_base, score_ctx { } virtual bool next() override { + this->state_is_new_document_ = true; if (doc_limits::eof(doc_.value)) { return false; } @@ -449,10 +653,12 @@ class disjunction : public doc_iterator_base, score_ctx { } doc_.value = lead().value(); + return true; } virtual doc_id_t seek(doc_id_t target) override { + this->state_is_new_document_ = true; if (doc_limits::eof(doc_.value)) { return doc_.value; } @@ -477,11 +683,14 @@ class disjunction : public doc_iterator_base, score_ctx { doc_iterators_t&& itrs, const order::prepared& ord, resolve_overload_tag) - : itrs_(std::move(itrs)), + : attribute_range_adapter(attrs_.emplace>()), + itrs_(std::move(itrs)), doc_(itrs_.empty() ? doc_limits::eof() : doc_limits::invalid()), ord_(&ord) { + this->attribute_range_.set_state(this); + // since we are using heap in order to determine next document, // in order to avoid useless make_heap call we expect that all // iterators are equal here */ @@ -556,9 +765,7 @@ class disjunction : public doc_iterator_base, score_ctx { return itrs_[heap_.front()]; } - void score_impl(byte_type* lhs) { - assert(!heap_.empty()); - + std::pair hitch_all_iterators() { // hitch all iterators in head to the lead (current doc_) auto begin = heap_.begin(), end = heap_.end()-1; @@ -576,16 +783,23 @@ class disjunction : public doc_iterator_base, score_ctx { push(begin,end); } } + return {begin, end}; + } + + void score_impl(byte_type* lhs) { + assert(!heap_.empty()); + + auto its = hitch_all_iterators(); const irs::byte_type** pVal = scores_vals_.data(); detail::evaluate_score_iter(pVal, lead()); if (top().value() == doc_.value) { irstd::heap::for_each_if( - begin, end, + its.first, its.second, [this](const size_t it) { assert(it < itrs_.size()); return itrs_[it].value() == doc_.value; }, - [this, lhs, &pVal](size_t it) { + [this, &pVal](size_t it) { assert(it < itrs_.size()); detail::evaluate_score_iter(pVal, itrs_[it]); }); @@ -593,8 +807,68 @@ class disjunction : public doc_iterator_base, score_ctx { ord_->merge(lhs, scores_vals_.data(), std::distance(scores_vals_.data(), pVal)); } + Adapter* get_next_iterator() override { + // if exhausted + if (this->state_finished_) { + return nullptr; + } + const auto size = heap_.size(); + // if the first time + if (std::numeric_limits::max() == this->state_idx_) { + this->state_idx_ = 0; + if (1 == size) { + this->state_finished_ = true; + } + assert(heap_.back() < itrs_.size()); + return &itrs_[heap_.back()]; + } + assert(size > 1); + const auto bottom = size - 1; + do { + if (this->state_idx_ < bottom) { + assert(heap_[this->state_idx_] < itrs_.size()); + auto& itr = itrs_[heap_[this->state_idx_]]; + if (itr.value() == doc_.value) { + this->state_idx_ = (this->state_idx_ << 1) + 1; + return &itr; + } + } + do { + if (0 == this->state_idx_) { + this->state_finished_ = true; + return nullptr; + } + const auto up_idx = (this->state_idx_ - 1) >> 1; + if ((this->state_idx_ & 1) == 0) { + assert((up_idx << 1) + 2 == this->state_idx_); + this->state_idx_ = up_idx; + continue; + } + assert((up_idx << 1) + 1 == this->state_idx_); + this->state_idx_ += 1; + break; + } while (true); + } while (true); + + assert(false); + this->state_finished_ = true; + return nullptr; + } + + virtual void reset_next_iterator_state() override { + // call after success next() or seek() + assert(!doc_limits::eof(doc_.value)); + assert(!heap_.empty()); + if (this->state_is_new_document_) { + hitch_all_iterators(); + this->state_is_new_document_ = false; + } + this->state_finished_ = false; + this->state_idx_ = std::numeric_limits::max(); + } + doc_iterators_t itrs_; - std::vector heap_; + heap_container heap_; mutable std::vector scores_vals_; document doc_; const order::prepared* ord_; @@ -614,6 +888,12 @@ doc_iterator::ptr make_disjunction( // empty or unreachable search criteria return doc_iterator::empty(); case 1: + if /*constexpr*/ (Disjunction::kEnableUnary) { + typedef typename Disjunction::unary_disjunction_t unary_disjunction_t; + return doc_iterator::make( + std::move(itrs.front()) + ); + } // single sub-query return std::move(itrs.front()); case 2: { diff --git a/3rdParty/iresearch/core/search/granular_range_filter.cpp b/3rdParty/iresearch/core/search/granular_range_filter.cpp index 2176fe084ad5..b89377f39263 100644 --- a/3rdParty/iresearch/core/search/granular_range_filter.cpp +++ b/3rdParty/iresearch/core/search/granular_range_filter.cpp @@ -161,11 +161,10 @@ void collect_terms_between( return; // skipped current term and no more terms in segment } - const auto& masked_begin_term = mask_value(terms.value(), prefix_size); // the starting term for range collection const auto& masked_end_term = mask_value(end_term, prefix_size); // the ending term for range collection collect_terms( - states, sr, tr, terms, scorer, [&prefix_size, &masked_begin_level, &masked_begin_term, &masked_end_term, include_end_term]( + states, sr, tr, terms, scorer, [&prefix_size, &masked_begin_level, &masked_end_term, include_end_term]( const irs::term_iterator& itr )->bool { const auto& masked_current_level = mask_granularity(itr.value(), prefix_size); diff --git a/3rdParty/iresearch/core/search/levenshtein_filter.cpp b/3rdParty/iresearch/core/search/levenshtein_filter.cpp index c2be0bf81bd9..8900fdc1d1cd 100644 --- a/3rdParty/iresearch/core/search/levenshtein_filter.cpp +++ b/3rdParty/iresearch/core/search/levenshtein_filter.cpp @@ -24,6 +24,7 @@ #include "shared.hpp" #include "limited_sample_scorer.hpp" +#include "term_query.hpp" #include "index/index_reader.hpp" #include "utils/automaton_utils.hpp" #include "utils/levenshtein_utils.hpp" @@ -37,27 +38,29 @@ NS_ROOT DEFINE_FILTER_TYPE(by_edit_distance) DEFINE_FACTORY_DEFAULT(by_edit_distance) -filter::prepared::ptr by_edit_distance::prepare( +/*static*/ filter::prepared::ptr by_edit_distance::prepare( const index_reader& index, const order::prepared& order, boost_t boost, - const attribute_view& ctx) const { - if (0 == max_distance_) { - return by_term::prepare(index, order, boost, ctx); + const string_ref& field, + const bytes_ref& term, + size_t scored_terms_limit, + byte_type max_distance, + pdp_f provider, + bool with_transpositions) { + if (0 == max_distance) { + return term_query::make(index, order, boost, field, term); } - assert(provider_); - const auto& d = (*provider_)(max_distance_, with_transpositions_); + assert(provider); + const auto& d = (*provider)(max_distance, with_transpositions); if (!d) { return prepared::empty(); } - boost *= this->boost(); - const string_ref field = this->field(); - - return prepare_automaton_filter(field, make_levenshtein_automaton(d, term()), - scored_terms_limit(), index, order, boost); + return prepare_automaton_filter(field, make_levenshtein_automaton(d, term), + scored_terms_limit, index, order, boost); } by_edit_distance::by_edit_distance() noexcept diff --git a/3rdParty/iresearch/core/search/levenshtein_filter.hpp b/3rdParty/iresearch/core/search/levenshtein_filter.hpp index 8b6b1300b418..03f9ff9e693f 100644 --- a/3rdParty/iresearch/core/search/levenshtein_filter.hpp +++ b/3rdParty/iresearch/core/search/levenshtein_filter.hpp @@ -45,6 +45,17 @@ class IRESEARCH_API by_edit_distance final : public by_prefix { ////////////////////////////////////////////////////////////////////////////// using pdp_f = const parametric_description&(*)(byte_type, bool); + static prepared::ptr prepare( + const index_reader& index, + const order::prepared& order, + boost_t boost, + const string_ref& field, + const bytes_ref& term, + size_t scored_terms_limit, + byte_type max_distance, + pdp_f provider, + bool with_transpositions); + explicit by_edit_distance() noexcept; using by_prefix::field; @@ -57,11 +68,27 @@ class IRESEARCH_API by_edit_distance final : public by_prefix { using filter::prepare; virtual filter::prepared::ptr prepare( - const index_reader& rdr, - const order::prepared& ord, - boost_t boost, - const attribute_view& ctx - ) const override; + const index_reader& index, + const order::prepared& order, + boost_t boost, + const attribute_view& /*ctx*/) const override { + return prepare(index, order, this->boost()*boost, + field(), term(), scored_terms_limit(), + max_distance_, provider_, with_transpositions_); + } + + + using by_prefix::scored_terms_limit; + + ////////////////////////////////////////////////////////////////////////////// + /// @brief the maximum number of most frequent terms to consider for scoring + ////////////////////////////////////////////////////////////////////////////// + by_edit_distance& scored_terms_limit(size_t limit) noexcept { + by_prefix::scored_terms_limit(limit); + return *this; + } + + ////////////////////////////////////////////////////////////////////////////// /// @brief sets maximum allowed edit distance @@ -119,6 +146,6 @@ class IRESEARCH_API by_edit_distance final : public by_prefix { bool with_transpositions_{false}; }; // by_edit_distance -#endif // IRESEARCH_LEVENSHTEIN_FILTER_H - NS_END + +#endif // IRESEARCH_LEVENSHTEIN_FILTER_H diff --git a/3rdParty/iresearch/core/search/limited_sample_scorer-heap.cpp b/3rdParty/iresearch/core/search/limited_sample_scorer-heap.cpp deleted file mode 100644 index 66e95051ff98..000000000000 --- a/3rdParty/iresearch/core/search/limited_sample_scorer-heap.cpp +++ /dev/null @@ -1,204 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -/// DISCLAIMER -/// -/// Copyright 2019 ArangoDB GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is ArangoDB GmbH, Cologne, Germany -/// -/// @author Andrey Abramov -//////////////////////////////////////////////////////////////////////////////// - -#include "limited_sample_scorer.hpp" - -#include "analysis/token_attributes.hpp" -#include "index/index_reader.hpp" -#include "utils/hash_utils.hpp" - -NS_LOCAL - -struct state { - explicit state(const irs::index_reader& index, - const irs::term_reader& field, - const irs::order::prepared& order, - size_t& state_offset) - : collectors(order.prepare_collectors(1)) { // 1 term per bstring because a range is treated as a disjunction - - // once per every 'state' collect field statistics over the entire index - for (auto& segment: index) { - collectors.collect(segment, field); // collect field statistics once per segment - } - - stats_offset = state_offset++; - } - - irs::order::prepared::collectors collectors; - size_t stats_offset; -}; - -void set_doc_ids(irs::bitset& buf, const irs::term_iterator& term, size_t docs_count) { - docs_count += (irs::doc_limits::min)(); - if (buf.size() < docs_count) { - buf.reset(docs_count); // ensure we have enough space - } - - auto itr = term.postings(irs::flags::empty_instance()); - - if (!itr) { - return; // no doc_ids in iterator - } - -//FIXME use doc attribute -// auto* doc = itr->attributes().get().get(); -// -// if (!doc) { -// return; // no doc value -// } - - while (itr->next()) { - buf.set(itr->value()); - } -}; - -bool less(size_t lhs_priority, size_t rhs_priority, - size_t lhs_state_offset, size_t rhs_state_offset) noexcept { - return rhs_priority < lhs_priority - || (rhs_priority == lhs_priority && lhs_state_offset < rhs_state_offset); -} - -NS_END - -NS_ROOT - -limited_sample_scorer::limited_sample_scorer(size_t scored_terms_limit) - : scored_terms_limit_(scored_terms_limit) { - scored_states_.reserve(scored_terms_limit); - scored_states_heap_.reserve(scored_terms_limit); -} - -void limited_sample_scorer::score(const index_reader& index, - const order::prepared& order, - std::vector& stats) { - if (!scored_terms_limit_) { - return; // nothing to score (optimization) - } - - // stats for a specific term - std::unordered_map term_stats; - - // iterate over all the states from which statistcis should be collected - size_t stats_offset = 0; - for (auto& scored_state : scored_states_) { - assert(scored_state.cookie); - auto& field = *scored_state.state->reader; - auto term_itr = field.iterator(); // FIXME - assert(term_itr); - - // find the stats for the current term - const auto res = map_utils::try_emplace( - term_stats, - make_hashed_ref(bytes_ref(scored_state.term), std::hash()), - index, field, order, stats_offset); - - // find term attributes using cached state - if (!term_itr->seek(bytes_ref::NIL, *(scored_state.cookie))) { - continue; // some internal error that caused the term to disappear - } - - auto& stats_entry = res.first->second; - - // collect statistics, 0 because only 1 term - stats_entry.collectors.collect(*scored_state.segment, field, 0, term_itr->attributes()); - - scored_state.state->scored_states.emplace_back( - std::move(scored_state.cookie), - stats_entry.stats_offset); - } - - // iterate over all stats and apply/store order stats - stats.resize(stats_offset); - for (auto& entry : term_stats) { - auto& stats_entry = stats[entry.second.stats_offset]; - stats_entry.resize(order.stats_size()); - auto* stats_buf = const_cast(stats_entry.data()); - - order.prepare_stats(stats_buf); - entry.second.collectors.finish(stats_buf, index); - } -} - -void limited_sample_scorer::collect( - size_t priority, - size_t scored_state_id, - limited_sample_state& scored_state, - const sub_reader& segment, - const seek_term_iterator& term_itr) { - if (!scored_terms_limit_) { - // state will not be scored - // add all doc_ids from the doc_iterator to the unscored_docs - set_doc_ids(scored_state.unscored_docs, term_itr, segment.docs_count()); - - return; // nothing to collect (optimization) - } - - auto less = [this](const size_t lhs, const size_t rhs) noexcept { - const auto& lhs_state = scored_states_[lhs]; - const auto& rhs_state = scored_states_[rhs]; - - return ::less(lhs_state.priority, rhs_state.priority, - lhs_state.state_offset, rhs_state.state_offset); - }; - - if (scored_states_.size() < scored_terms_limit_) { - // have not reached the scored state limit yet - scored_states_heap_.push_back(scored_states_.size()); - scored_states_.emplace_back(priority, segment, scored_state, - scored_state_id, term_itr.value(), term_itr.cookie()); - - std::push_heap(scored_states_heap_.begin(), scored_states_heap_.end(), less); - return; - } - - auto& min_state = scored_states_[scored_states_heap_.front()]; - - if (::less(priority, min_state.priority, min_state.state_offset, scored_state_id)) { // FIXME - std::pop_heap(scored_states_heap_.begin(), scored_states_heap_.end(), less); - - auto& state = scored_states_[scored_states_heap_.back()]; - auto state_term_it = state.state->reader->iterator(); // FIXME cache iterator??? - - assert(state.cookie); - if (state_term_it->seek(bytes_ref::NIL, *state.cookie)) { - // state will not be scored - // add all doc_ids from the doc_iterator to the unscored_docs - set_doc_ids(state.state->unscored_docs, *state_term_it, state.segment->docs_count()); - } - - // update min state - state.priority = priority; - state.state = &scored_state; - state.cookie = term_itr.cookie(); - state.term = term_itr.value(); - state.segment = &segment; - state.state_offset = scored_state_id; - - std::push_heap(scored_states_heap_.begin(), scored_states_heap_.end(), less); - } else { - // state will not be scored - // add all doc_ids from the doc_iterator to the unscored_docs - set_doc_ids(scored_state.unscored_docs, term_itr, segment.docs_count()); - } -} - -NS_END diff --git a/3rdParty/iresearch/core/search/limited_sample_scorer-heap.hpp b/3rdParty/iresearch/core/search/limited_sample_scorer-heap.hpp deleted file mode 100644 index 2c5d0464f33f..000000000000 --- a/3rdParty/iresearch/core/search/limited_sample_scorer-heap.hpp +++ /dev/null @@ -1,132 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -/// DISCLAIMER -/// -/// Copyright 2019 ArangoDB GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is ArangoDB GmbH, Cologne, Germany -/// -/// @author Andrey Abramov -//////////////////////////////////////////////////////////////////////////////// - -#ifndef IRESEARCH_LIMITED_SAMPLE_SCORER_H -#define IRESEARCH_LIMITED_SAMPLE_SCORER_H - -#include "shared.hpp" -#include "sort.hpp" -#include "index/iterators.hpp" -#include "utils/string.hpp" -#include "utils/bitset.hpp" - -NS_ROOT - -struct sub_reader; -struct index_reader; - -////////////////////////////////////////////////////////////////////////////// -/// @struct limited_sample_state -////////////////////////////////////////////////////////////////////////////// -struct limited_sample_state { - limited_sample_state() = default; - limited_sample_state(limited_sample_state&& rhs) noexcept - : reader(rhs.reader), - scored_states(std::move(rhs.scored_states)), - unscored_docs(std::move(rhs.unscored_docs)) { - rhs.reader = nullptr; - } - limited_sample_state& operator=(limited_sample_state&& rhs) noexcept { - if (this != &rhs) { - scored_states = std::move(rhs.scored_states); - unscored_docs = std::move(rhs.unscored_docs); - reader = rhs.reader; - rhs.reader = nullptr; - } - return *this; - } - - // reader using for iterate over the terms - const term_reader* reader{}; - - // scored term states (cookie + stat offset) - std::vector> scored_states; - - // matching doc_ids that may have been skipped - // while collecting statistics and should not be scored by the disjunction - bitset unscored_docs; -}; // limited_sample_state - -////////////////////////////////////////////////////////////////////////////// -/// @class limited_sample_score -/// @brief object to collect and track a limited number of scorers -////////////////////////////////////////////////////////////////////////////// -class limited_sample_scorer : util::noncopyable { - public: - explicit limited_sample_scorer(size_t scored_terms_limit); - - ////////////////////////////////////////////////////////////////////////////// - /// @param priority priority of this entry, lowest priority removed first - /// @param scored_state_id state identifier used for querying of attributes - /// @param scored_state state containing this scored term - /// @param reader segment reader for the current term - /// @param term_itr segment term-iterator positioned at the current term - ////////////////////////////////////////////////////////////////////////////// - void collect(size_t priority, - size_t scored_state_id, - limited_sample_state& scored_state, - const sub_reader& reader, - const seek_term_iterator& term_itr); - - void score(const index_reader& index, - const order::prepared& order, - std::vector& stats); - - private: - ////////////////////////////////////////////////////////////////////////////// - /// @brief a representation of a term cookie with its associated range_state - ////////////////////////////////////////////////////////////////////////////// - struct scored_term_state { - scored_term_state( - size_t priority, - const sub_reader& segment, - limited_sample_state& scored_state, - size_t scored_state_offset, - const bytes_ref& term, - seek_term_iterator::seek_cookie::ptr&& cookie) - : cookie(std::move(cookie)), - state(&scored_state), - state_offset(scored_state_offset), - priority(priority), - segment(&segment), - term(term) { - assert(this->cookie); - } - - seek_term_iterator::cookie_ptr cookie; // term offset cache - limited_sample_state* state; // state containing this scored term - size_t state_offset; - size_t priority; - const irs::sub_reader* segment; // segment reader for the current term - bstring term; // actual term value this state is for - }; // scored_term_state_t - - typedef std::vector scored_term_states_t; - - scored_term_states_t scored_states_; - std::vector scored_states_heap_; // use external heap as states are big - size_t scored_terms_limit_; -}; // limited_sample_scorer - -NS_END - -#endif // IRESEARCH_LIMITED_SAMPLE_SCORER_H diff --git a/3rdParty/iresearch/core/search/limited_sample_scorer.cpp b/3rdParty/iresearch/core/search/limited_sample_scorer.cpp index 04eea1a97aee..7c52eace2232 100644 --- a/3rdParty/iresearch/core/search/limited_sample_scorer.cpp +++ b/3rdParty/iresearch/core/search/limited_sample_scorer.cpp @@ -30,10 +30,10 @@ NS_LOCAL struct state { explicit state(const irs::index_reader& index, - const irs::term_reader& field, - const irs::order::prepared& order, - size_t& state_offset) - : collectors(order.prepare_collectors(1)) { // 1 term per bstring because a range is treated as a disjunction + const irs::term_reader& field, + const irs::order::prepared& order, + size_t& state_offset) + : collectors(order.fixed_prepare_collectors(1)) { // 1 term per bstring because a range is treated as a disjunction // once per every 'state' collect field statistics over the entire index for (auto& segment: index) { @@ -43,13 +43,12 @@ struct state { stats_offset = state_offset++; } - irs::order::prepared::collectors collectors; + irs::order::prepared::fixed_terms_collectors collectors; size_t stats_offset; }; void set_doc_ids(irs::bitset& buf, const irs::term_iterator& term, size_t docs_count) { docs_count += (irs::doc_limits::min)(); - if (buf.size() < docs_count) { buf.reset(docs_count); // ensure we have enough space } @@ -60,24 +59,31 @@ void set_doc_ids(irs::bitset& buf, const irs::term_iterator& term, size_t docs_c return; // no doc_ids in iterator } -//FIXME use doc attribute -// auto* doc = itr->attributes().get().get(); -// -// if (!doc) { -// return; // no doc value -// } + auto* doc = itr->attributes().get().get(); + + if (!doc) { + return; // no doc value + } while (itr->next()) { - buf.set(itr->value()); + buf.set(doc->value); } }; +bool less(size_t lhs_priority, size_t rhs_priority, + size_t lhs_state_offset, size_t rhs_state_offset) noexcept { + return rhs_priority < lhs_priority + || (rhs_priority == lhs_priority && lhs_state_offset < rhs_state_offset); +} + NS_END NS_ROOT limited_sample_scorer::limited_sample_scorer(size_t scored_terms_limit) : scored_terms_limit_(scored_terms_limit) { + scored_states_.reserve(scored_terms_limit); + scored_states_heap_.reserve(scored_terms_limit); } void limited_sample_scorer::score(const index_reader& index, @@ -92,8 +98,7 @@ void limited_sample_scorer::score(const index_reader& index, // iterate over all the states from which statistcis should be collected size_t stats_offset = 0; - for (auto& entry : scored_states_) { - auto& scored_state = entry.second; + for (auto& scored_state : scored_states_) { assert(scored_state.cookie); auto& field = *scored_state.state->reader; auto term_itr = field.iterator(); // FIXME @@ -146,28 +151,53 @@ void limited_sample_scorer::collect( return; // nothing to collect (optimization) } - scored_states_.emplace( - std::piecewise_construct, - std::forward_as_tuple(priority), - std::forward_as_tuple(priority, segment, scored_state, scored_state_id, term_itr) - ); + auto less = [this](const size_t lhs, const size_t rhs) noexcept { + const auto& lhs_state = scored_states_[lhs]; + const auto& rhs_state = scored_states_[rhs]; - if (scored_states_.size() <= scored_terms_limit_) { - return; // have not reached the scored state limit yet - } + return ::less(lhs_state.priority, rhs_state.priority, + lhs_state.state_offset, rhs_state.state_offset); + }; - auto itr = scored_states_.begin(); // least significant state to be removed - auto& entry = itr->second; - auto state_term_itr = entry.state->reader->iterator(); + if (scored_states_.size() < scored_terms_limit_) { + // have not reached the scored state limit yet + scored_states_heap_.push_back(scored_states_.size()); + scored_states_.emplace_back(priority, segment, scored_state, + scored_state_id, term_itr.value(), term_itr.cookie()); - // add all doc_ids from the doc_iterator to the unscored_docs - if (state_term_itr - && entry.cookie - && state_term_itr->seek(bytes_ref::NIL, *(entry.cookie))) { - set_doc_ids(entry.state->unscored_docs, *state_term_itr, entry.segment->docs_count()); + std::push_heap(scored_states_heap_.begin(), scored_states_heap_.end(), less); + return; } - scored_states_.erase(itr); + auto& min_state = scored_states_[scored_states_heap_.front()]; + + if (::less(priority, min_state.priority, min_state.state_offset, scored_state_id)) { // FIXME + std::pop_heap(scored_states_heap_.begin(), scored_states_heap_.end(), less); + + auto& state = scored_states_[scored_states_heap_.back()]; + auto state_term_it = state.state->reader->iterator(); // FIXME cache iterator??? + + assert(state.cookie); + if (state_term_it->seek(bytes_ref::NIL, *state.cookie)) { + // state will not be scored + // add all doc_ids from the doc_iterator to the unscored_docs + set_doc_ids(state.state->unscored_docs, *state_term_it, state.segment->docs_count()); + } + + // update min state + state.priority = priority; + state.state = &scored_state; + state.cookie = term_itr.cookie(); + state.term = term_itr.value(); + state.segment = &segment; + state.state_offset = scored_state_id; + + std::push_heap(scored_states_heap_.begin(), scored_states_heap_.end(), less); + } else { + // state will not be scored + // add all doc_ids from the doc_iterator to the unscored_docs + set_doc_ids(scored_state.unscored_docs, term_itr, segment.docs_count()); + } } NS_END diff --git a/3rdParty/iresearch/core/search/limited_sample_scorer.hpp b/3rdParty/iresearch/core/search/limited_sample_scorer.hpp index 902d0e228c71..2c5d0464f33f 100644 --- a/3rdParty/iresearch/core/search/limited_sample_scorer.hpp +++ b/3rdParty/iresearch/core/search/limited_sample_scorer.hpp @@ -101,13 +101,14 @@ class limited_sample_scorer : util::noncopyable { const sub_reader& segment, limited_sample_state& scored_state, size_t scored_state_offset, - const seek_term_iterator& term) - : cookie(term.cookie()), + const bytes_ref& term, + seek_term_iterator::seek_cookie::ptr&& cookie) + : cookie(std::move(cookie)), state(&scored_state), state_offset(scored_state_offset), priority(priority), segment(&segment), - term(term.value()) { + term(term) { assert(this->cookie); } @@ -119,9 +120,10 @@ class limited_sample_scorer : util::noncopyable { bstring term; // actual term value this state is for }; // scored_term_state_t - typedef std::multimap scored_term_states_t; + typedef std::vector scored_term_states_t; scored_term_states_t scored_states_; + std::vector scored_states_heap_; // use external heap as states are big size_t scored_terms_limit_; }; // limited_sample_scorer diff --git a/3rdParty/iresearch/core/search/min_match_disjunction.hpp b/3rdParty/iresearch/core/search/min_match_disjunction.hpp index b6d1ab4cc879..e25f601730d7 100644 --- a/3rdParty/iresearch/core/search/min_match_disjunction.hpp +++ b/3rdParty/iresearch/core/search/min_match_disjunction.hpp @@ -97,8 +97,8 @@ class min_match_disjunction : public doc_iterator_base, score_ctx { itrs_.begin(), itrs_.end(), cost::cost_t(0), [](cost::cost_t lhs, const doc_iterator_t& rhs) { return lhs + cost::extract(rhs->attributes(), 0); + }); }); - }); // prepare external heap heap_.resize(itrs_.size()); @@ -239,7 +239,40 @@ class min_match_disjunction : public doc_iterator_base, score_ctx { } } + ////////////////////////////////////////////////////////////////////////////// + /// @brief calculates total count of matched iterators. This value could be + /// greater than required min_match. All matched iterators points + /// to current matched document after this call. + /// @returns total matched iterators count + ////////////////////////////////////////////////////////////////////////////// + size_t count_matched() { + push_valid_to_lead(); + return lead_; + } + private: + ////////////////////////////////////////////////////////////////////////////// + /// @brief push all valid iterators to lead + ////////////////////////////////////////////////////////////////////////////// + inline void push_valid_to_lead() { + for(auto lead = this->lead(), begin = heap_.begin(); + lead != begin && top().value() <= doc_.value;) { + // hitch head + if (top().value() == doc_.value) { + // got hit here + add_lead(); + --lead; + } else { + if (doc_limits::eof(top()->seek(doc_.value))) { + // iterator exhausted + remove_top(); + } else { + refresh_top(); + } + } + } + } + template inline void push(Iterator begin, Iterator end) { // lambda here gives ~20% speedup on GCC @@ -383,34 +416,17 @@ class min_match_disjunction : public doc_iterator_base, score_ctx { ++lead_; } + inline void score_impl(byte_type* lhs) { assert(!heap_.empty()); - // push all valid iterators to lead - { - for(auto lead = this->lead(), begin = heap_.begin(); - lead != begin && top().value() <= doc_.value;) { - // hitch head - if (top().value() == doc_.value) { - // got hit here - add_lead(); - --lead; - } else { - if (doc_limits::eof(top()->seek(doc_.value))) { - // iterator exhausted - remove_top(); - } else { - refresh_top(); - } - } - } - } + push_valid_to_lead(); // score lead iterators const irs::byte_type** pVal = scores_vals_.data(); std::for_each( lead(), heap_.end(), - [this, lhs, &pVal](size_t it) { + [this, &pVal](size_t it) { assert(it < itrs_.size()); detail::evaluate_score_iter(pVal, itrs_[it]); }); diff --git a/3rdParty/iresearch/core/search/ngram_similarity_filter.cpp b/3rdParty/iresearch/core/search/ngram_similarity_filter.cpp new file mode 100644 index 000000000000..fdd89301a272 --- /dev/null +++ b/3rdParty/iresearch/core/search/ngram_similarity_filter.cpp @@ -0,0 +1,551 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Andrei Lobov +//////////////////////////////////////////////////////////////////////////////// + +#include "ngram_similarity_filter.hpp" +#include "min_match_disjunction.hpp" +#include "disjunction.hpp" +#include +#include "shared.hpp" +#include "cost.hpp" +#include "analysis/token_attributes.hpp" +#include "index/index_reader.hpp" +#include "index/field_meta.hpp" +#include "utils/misc.hpp" +#include "utils/map_utils.hpp" + + +NS_LOCAL + +struct ngram_segment_state_t { + const irs::term_reader* field{}; + std::vector terms; +}; + +typedef irs::states_cache states_t; +typedef std::vector stats_t; + +NS_END + +NS_ROOT + +////////////////////////////////////////////////////////////////////////////// +///@class ngram_similarity_doc_iterator +///@brief adapter for min_match_disjunction with honor of terms orderings +////////////////////////////////////////////////////////////////////////////// +template +class ngram_similarity_doc_iterator : public doc_iterator_base, score_ctx { + public: + struct position_t { + position_t(position* p, document* d, score* s) + : pos(p), doc(d), scr(s) {} + position* pos; + document* doc; + score* scr; + }; + + using positions_t = std::vector; + using doc_iterators_t = typename min_match_disjunction::doc_iterators_t; + + static positions_t extract_positions(const doc_iterators_t& itrs) { + positions_t pos; + pos.reserve(itrs.size()); + for (const auto& itr : itrs) { + auto& attrs = itr->attributes(); + // get needed positions for iterators + auto p = attrs.template get().get(); + auto d = attrs.template get().get(); + auto s = attrs.template get().get(); + pos.emplace_back(p, d, s); + } + return pos; + } + + ngram_similarity_doc_iterator(doc_iterators_t&& itrs, + const states_t& states, + const sub_reader& segment, + const term_reader& field, + boost_t boost, + const byte_type* stats, + size_t total_terms_count, + size_t min_match_count = 1, + const order::prepared& ord = order::prepared::unordered()) + : pos_(extract_positions(itrs)), + min_match_count_(min_match_count), + disjunction_(std::forward(itrs), min_match_count, + order::prepared::unordered()),// we are not interested in disjunction`s scoring + states_(states), total_terms_count_(total_terms_count) { + scores_vals_.resize(pos_.size()); + + attrs_.emplace(seq_freq_); + doc_ = (attrs_.emplace() = disjunction_.attributes().template get()).get(); + attrs_.emplace(filter_boost_); + + prepare_score(ord, ord.prepare_scorers(segment, field, stats, attrs_, boost)); + empty_order_ = ord.empty(); + } + + virtual bool next() override { + bool next = false; + while ((next = disjunction_.next()) && !check_serial_positions()) {} + return next; + } + + virtual doc_id_t value() const override { + return doc_->value; + } + + virtual doc_id_t seek(doc_id_t target) override { + const auto doc = disjunction_.seek(target); + + if (doc_limits::eof(doc) || check_serial_positions()) { + return doc; + } + + next(); + return doc_->value; + } + + private: + struct search_state { + search_state(size_t p, const score* s) : parent{nullptr}, scr{s}, pos{p}, len(1) {} + search_state(search_state&&) = default; + search_state(const search_state&) = default; + search_state& operator=(const search_state&) = default; + + // appending constructor + search_state(std::shared_ptr& other, size_t p, const score* s) + : parent{other}, scr{s}, pos{p}, len(other->len + 1) {} + + std::shared_ptr parent; + const score* scr; + size_t pos; + size_t len; + }; + + using search_states_t = std::map, std::greater>; + using pos_temp_t = std::vector>>; + + bool check_serial_positions() { + size_t potential = disjunction_.count_matched(); // how long max sequence could be in the best case + search_buf_.clear(); + size_t longest_sequence_len = 0; + seq_freq_.value = 0; + for (const auto& pos_iterator : pos_) { + if (pos_iterator.doc->value == doc_->value) { + position& pos = *(pos_iterator.pos); + if (potential <= longest_sequence_len || potential < min_match_count_) { + // this term could not start largest (or long enough) sequence. + // skip it to first position to append to any existing candidates + assert(!search_buf_.empty()); + pos.seek(search_buf_.rbegin()->first + 1); + } else { + pos.next(); + } + if (!pos_limits::eof(pos.value())) { + pos_temp_t swap_cache; + auto last_found_pos = pos_limits::invalid(); + do { + auto current_pos = pos.value(); + auto found = search_buf_.lower_bound(current_pos); + if (found != search_buf_.end()) { + if (last_found_pos != found->first) { + last_found_pos = found->first; + auto current_sequence = found; + // if we hit same position - set length to 0 to force checking candidates to the left + size_t current_found_len = (found->first == current_pos || + found->second->scr == pos_iterator.scr) ? 0 : found->second->len + 1; + auto initial_found = found; + if (current_found_len > longest_sequence_len) { + longest_sequence_len = current_found_len; + } else { + // maybe some previous candidates could produce better results. + // lets go leftward and check if there are any candidates which could became longer + // if we stick this ngram to them rather than the closest one found + for (++found; found != search_buf_.end(); ++found) { + if (found->second->scr != pos_iterator.scr && + found->second->len + 1 > current_found_len) { + // we have better option. Replace this match! + current_sequence = found; + current_found_len = found->second->len + 1; + if (current_found_len > longest_sequence_len) { + longest_sequence_len = current_found_len; + break; // this match is the best - nothing to search further + } + } + } + } + if (current_found_len) { + auto new_candidate = std::make_shared(current_sequence->second, current_pos, pos_iterator.scr); + auto res = map_utils::try_emplace(search_buf_, current_pos, std::move(new_candidate)); + if (!res.second) { + // pos already used. This could be if same ngram used several times. + // replace with new length through swap cache - to not spoil + // candidate for following positions of same ngram + swap_cache.emplace_back(current_pos, std::move(new_candidate)); + } + } else if (initial_found->second->scr == pos_iterator.scr && + potential > longest_sequence_len && potential >= min_match_count_) { + // we just hit same iterator and found no better place to join, + // so it will produce new candidate + search_buf_.emplace(std::piecewise_construct, + std::forward_as_tuple(current_pos), + std::forward_as_tuple(std::make_shared(current_pos, pos_iterator.scr))); + } + } + } else if (potential > longest_sequence_len && potential >= min_match_count_) { + // this ngram at this position could potentially start a long enough sequence + // so add it to candidate list + search_buf_.emplace(std::piecewise_construct, + std::forward_as_tuple(current_pos), + std::forward_as_tuple(std::make_shared(current_pos, pos_iterator.scr))); + if (!longest_sequence_len) { + longest_sequence_len = 1; + } + } + } while (pos.next()); + for (auto& p : swap_cache) { + auto res = search_buf_.find(p.first); + assert(res != search_buf_.end()); + std::swap(res->second, p.second); + } + } + --potential; // we are done with this term. + // next will have potential one less as less matches left + + if (!potential) { + break; // all further terms will not add anything + } + + if (longest_sequence_len + potential < min_match_count_) { + break; // all further terms will not let us build long enough sequence + } + + // if we have no scoring - we could stop searh once we got enough matches + if (longest_sequence_len >= min_match_count_ && empty_order_) { + break; + } + } + } + + if (longest_sequence_len >= min_match_count_ && !empty_order_) { + uint32_t freq = 0; + size_t count_longest{ 0 }; + // try to optimize case with one longest candidate + // performance profiling shows it is majority of cases + for (auto i = search_buf_.begin(), end = search_buf_.end(); i != end; ++i) { + if (i->second->len == longest_sequence_len) { + ++count_longest; + if (count_longest > 1) { + break; + } + } + } + + if (count_longest > 1) { + longest_sequence_.clear(); + used_pos_.clear(); + longest_sequence_.reserve(longest_sequence_len); + pos_sequence_.reserve(longest_sequence_len); + for (auto i = search_buf_.begin(), end = search_buf_.end(); i != end;) { + pos_sequence_.clear(); + assert(i->second->len <= longest_sequence_len); + if (i->second->len == longest_sequence_len) { + bool delete_candidate = false; + // only first longest sequence will contribute to frequency + if (longest_sequence_.empty()) { + longest_sequence_.push_back(i->second->scr); + pos_sequence_.push_back(i->second->pos); + auto cur_parent = i->second->parent; + while (cur_parent) { + longest_sequence_.push_back(cur_parent->scr); + pos_sequence_.push_back(cur_parent->pos); + cur_parent = cur_parent->parent; + } + } else { + if (used_pos_.find(i->second->pos) != used_pos_.end() || + i->second->scr != longest_sequence_[0]) { + delete_candidate = true; + } else { + pos_sequence_.push_back(i->second->pos); + auto cur_parent = i->second->parent; + size_t j = 1; + while (cur_parent) { + assert(j < longest_sequence_.size()); + if (longest_sequence_[j] != cur_parent->scr || + used_pos_.find(cur_parent->pos) != used_pos_.end()) { + delete_candidate = true; + break; + } + pos_sequence_.push_back(cur_parent->pos); + cur_parent = cur_parent->parent; + ++j; + } + } + } + if (!delete_candidate) { + ++freq; + used_pos_.insert(std::begin(pos_sequence_), + std::end(pos_sequence_)); + } + } + ++i; + } + } else { + freq = 1; + } + seq_freq_.value = freq; + assert(!pos_.empty()); + filter_boost_.value = (boost_t)longest_sequence_len / (boost_t)total_terms_count_; + } + return longest_sequence_len >= min_match_count_; + } + + std::vector longest_sequence_; + positions_t pos_; + frequency seq_freq_; // longest sequence frequency + filter_boost filter_boost_; + size_t min_match_count_; + min_match_disjunction disjunction_; + mutable std::vector scores_vals_; + search_states_t search_buf_; + const states_t& states_; + std::vector pos_sequence_; + size_t total_terms_count_; + const document* doc_; + bool empty_order_; + std::set used_pos_; // longest sequence positions overlaping detector +}; + +////////////////////////////////////////////////////////////////////////////// +/// @class ngram_similarity_query +/// @brief prepared ngram similarity query implementation +////////////////////////////////////////////////////////////////////////////// +class ngram_similarity_query : public filter::prepared { + public: + DECLARE_SHARED_PTR(ngram_similarity_query); + + ngram_similarity_query(size_t min_match_count, states_t&& states, bstring&& stats, boost_t boost = no_boost()) + :prepared(boost), min_match_count_(min_match_count), states_(std::move(states)), stats_(std::move(stats)) {} + + using filter::prepared::execute; + + virtual doc_iterator::ptr execute( + const sub_reader& rdr, + const order::prepared& ord, + const attribute_view&) const override { + auto query_state = states_.find(rdr); + if (!query_state || !query_state->field) { + // invalid state + return doc_iterator::empty(); + } + + if (1 == min_match_count_ && ord.empty()) { + return execute_simple_disjunction(*query_state); + } else { + return execute_ngram_similarity(rdr, *query_state, ord); + } + } + + private: + doc_iterator::ptr execute_simple_disjunction( + const ngram_segment_state_t& query_state) const { + using disjunction_t = irs::disjunction; + disjunction_t::doc_iterators_t itrs; + itrs.reserve(query_state.terms.size()); + for (auto& term_state : query_state.terms) { + if (term_state == nullptr) { + continue; + } + auto term = query_state.field->iterator(); + + // use bytes_ref::blank here since we do not need just to "jump" + // to cached state, and we are not interested in term value itself */ + if (!term->seek(bytes_ref::NIL, *term_state)) { + continue; + } + + // get postings + auto docs = term->postings(irs::flags::empty_instance()); + assert(docs); + + // add iterator + itrs.emplace_back(std::move(docs)); + } + + if (itrs.empty()) { + return doc_iterator::empty(); + } + return make_disjunction(std::move(itrs)); + } + + doc_iterator::ptr execute_ngram_similarity( + const sub_reader& rdr, + const ngram_segment_state_t& query_state, + const order::prepared& ord) const { + min_match_disjunction::doc_iterators_t itrs; + itrs.reserve(query_state.terms.size()); + auto features = ord.features() | by_ngram_similarity::features(); + for (auto& term_state : query_state.terms) { + if (term_state == nullptr) { + continue; + } + auto term = query_state.field->iterator(); + + // use bytes_ref::blank here since we do not need just to "jump" + // to cached state, and we are not interested in term value itself */ + if (!term->seek(bytes_ref::NIL, *term_state)) { + continue; + } + + // get postings + auto docs = term->postings(features); + assert(docs); + + // add iterator + itrs.emplace_back(std::move(docs)); + } + + if (itrs.size() < min_match_count_) { + return doc_iterator::empty(); + } + return memory::make_shared>( + std::move(itrs), states_, rdr, *query_state.field, boost(), stats_.c_str(), + query_state.terms.size(), min_match_count_, ord); + } + + size_t min_match_count_; + states_t states_; + bstring stats_; +}; + +// ----------------------------------------------------------------------------- +// --SECTION-- by_ngram_similarity implementation +// ----------------------------------------------------------------------------- + +/* static */ const flags& by_ngram_similarity::features() { + static flags req{ frequency::type(), position::type() }; + return req; +} + +DEFINE_FILTER_TYPE(by_ngram_similarity) +DEFINE_FACTORY_DEFAULT(by_ngram_similarity) + +by_ngram_similarity::by_ngram_similarity(): filter(by_ngram_similarity::type()) { +} + +bool by_ngram_similarity::equals(const filter& rhs) const noexcept { + const by_ngram_similarity& trhs = static_cast(rhs); + return filter::equals(rhs) && fld_ == trhs.fld_ && ngrams_ == trhs.ngrams_ && threshold_ == trhs.threshold_; +} + +size_t by_ngram_similarity::hash() const noexcept { + size_t seed = 0; + ::boost::hash_combine(seed, filter::hash()); + ::boost::hash_combine(seed, fld_); + std::for_each( + ngrams_.begin(), ngrams_.end(), + [&seed](const by_ngram_similarity::term_t& term) { + ::boost::hash_combine(seed, term); + }); + ::boost::hash_combine(seed, threshold_); + return seed; +} + +filter::prepared::ptr by_ngram_similarity::prepare( + const index_reader& rdr, + const order::prepared& ord, + boost_t boost, + const attribute_view& /*ctx*/) const { + if (ngrams_.empty() || fld_.empty()) { + // empty field or terms or invalid threshold + return filter::prepared::empty(); + } + + size_t min_match_count = std::max( + static_cast(std::ceil(static_cast(ngrams_.size()) * threshold_)), (size_t)1); + + states_t query_states(rdr.size()); + + // per segment terms states + ngram_segment_state_t term_states; + term_states.terms.reserve(ngrams_.size()); + + // prepare ngrams stats + auto collectors = ord.fixed_prepare_collectors(ngrams_.size()); + + for (const auto& segment : rdr) { + // get term dictionary for field + const term_reader* field = segment.field(fld_); + if (!field) { + continue; + } + + // check required features + if (!features().is_subset_of(field->meta().features)) { + continue; + } + + term_states.field = field; + collectors.collect(segment, *field); // collect field statistics once per segment + size_t term_itr = 0; + size_t count_terms = 0; + for (const auto& ngram : ngrams_) { + auto next_stats = irs::make_finally([&term_itr]()->void{ ++term_itr; }); + // find terms + seek_term_iterator::ptr term = field->iterator(); + + term_states.terms.emplace_back(); + auto& state = term_states.terms.back(); + if (term->seek(ngram)) { + term->read(); // read term attributes + collectors.collect(segment, *field, term_itr, term->attributes()); // collect statistics + state = term->cookie(); + ++count_terms; + } + } + if (count_terms < min_match_count) { + // we have not found enough terms + term_states.terms.clear(); + term_states.field = nullptr; + continue; + } + + auto& state = query_states.insert(segment); + state = std::move(term_states); + + term_states.terms.reserve(ngrams_.size()); + } + + bstring stats(ord.stats_size(), 0); + auto* stats_buf = const_cast(stats.data()); + + ord.prepare_stats(stats_buf); + collectors.finish(stats_buf, rdr); + + return memory::make_shared( + min_match_count, + std::move(query_states), + std::move(stats), + this->boost() * boost); +} + +NS_END diff --git a/3rdParty/iresearch/core/search/ngram_similarity_filter.hpp b/3rdParty/iresearch/core/search/ngram_similarity_filter.hpp new file mode 100644 index 000000000000..5fe3acfa3487 --- /dev/null +++ b/3rdParty/iresearch/core/search/ngram_similarity_filter.hpp @@ -0,0 +1,120 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Andrei Lobov +//////////////////////////////////////////////////////////////////////////////// + +#ifndef IRESEARCH_NGRAM_SIMILARITY_FILTER_H +#define IRESEARCH_NGRAM_SIMILARITY_FILTER_H + +#include "filter.hpp" +#include "utils/string.hpp" + +NS_ROOT + + +////////////////////////////////////////////////////////////////////////////// +/// @class by_ngram_similarity +////////////////////////////////////////////////////////////////////////////// +class IRESEARCH_API by_ngram_similarity : public filter { + public: + typedef bstring term_t; + typedef std::vector terms_t; + typedef terms_t::iterator iterator; + typedef terms_t::const_iterator const_iterator; + + DECLARE_FILTER_TYPE(); + DECLARE_FACTORY(); + + // returns set of features required for filter + static const flags& features(); + + by_ngram_similarity(); + + using filter::prepare; + + virtual filter::prepared::ptr prepare( + const index_reader& rdr, + const order::prepared& ord, + boost_t boost, + const attribute_view& ctx + ) const override; + + virtual size_t hash() const noexcept override; + + float_t threshold() const noexcept { return threshold_; } + + by_ngram_similarity& threshold(float_t d) noexcept { + assert(d >= 0.); + assert(d <= 1.); + threshold_ = std::max(0.f, std::min(1.f, d)); + return *this; + } + + by_ngram_similarity& field(std::string fld) noexcept { + fld_ = std::move(fld); + return *this; + } + + const std::string& field() const noexcept { return fld_; } + + by_ngram_similarity& push_back(const bstring& term) { + ngrams_.emplace_back(term); + return *this; + } + + by_ngram_similarity& push_back(bstring&& term) { + ngrams_.push_back(std::move(term)); + return *this; + } + + by_ngram_similarity& push_back(const bytes_ref& term) { + ngrams_.emplace_back(term); + return *this; + } + + by_ngram_similarity& push_back(const string_ref& term) { + ngrams_.emplace_back(ref_cast(term)); + return *this; + } + + iterator begin() noexcept { return ngrams_.begin(); } + iterator end() noexcept { return ngrams_.end(); } + + const_iterator begin() const noexcept { return ngrams_.begin(); } + const_iterator end() const noexcept { return ngrams_.end(); } + + bool empty() const noexcept { return ngrams_.empty(); } + size_t size() const noexcept { return ngrams_.size(); } + void clear() noexcept { ngrams_.clear(); } + + protected: + virtual bool equals(const filter& rhs) const noexcept override; + + private: + IRESEARCH_API_PRIVATE_VARIABLES_BEGIN + terms_t ngrams_; + std::string fld_; + float_t threshold_{1.f}; + IRESEARCH_API_PRIVATE_VARIABLES_END + +}; // by_ngram_similarity +NS_END // ROOT + +#endif // IRESEARCH_NGRAM_SIMILARITY_FILTER_H \ No newline at end of file diff --git a/3rdParty/iresearch/core/search/phrase_filter.cpp b/3rdParty/iresearch/core/search/phrase_filter.cpp index 88307dcac15e..105117582d93 100644 --- a/3rdParty/iresearch/core/search/phrase_filter.cpp +++ b/3rdParty/iresearch/core/search/phrase_filter.cpp @@ -20,6 +20,8 @@ /// @author Andrey Abramov //////////////////////////////////////////////////////////////////////////////// +#include "utils/automaton.hpp" // FOR FST_NO_DYNAMIC_LINKING + #include "phrase_filter.hpp" #include @@ -28,6 +30,10 @@ #include "cost.hpp" #include "term_query.hpp" #include "conjunction.hpp" +#include "disjunction.hpp" +#include "levenshtein_filter.hpp" +#include "prefix_filter.hpp" +#include "wildcard_filter.hpp" #if defined(_MSC_VER) #pragma warning( disable : 4706 ) @@ -48,7 +54,11 @@ #include "index/index_reader.hpp" #include "index/field_meta.hpp" +#include "utils/fst_table_matcher.hpp" +#include "utils/levenshtein_utils.hpp" #include "utils/misc.hpp" +#include "utils/utf8_utils.hpp" +#include "utils/wildcard_utils.hpp" NS_ROOT @@ -56,9 +66,10 @@ NS_ROOT /// @class phrase_state /// @brief cached per reader phrase state ////////////////////////////////////////////////////////////////////////////// +template class T> struct phrase_state { typedef seek_term_iterator::cookie_ptr term_state_t; - typedef std::vector terms_states_t; + typedef T terms_states_t; phrase_state() = default; @@ -78,12 +89,13 @@ struct phrase_state { /// @class phrase_query /// @brief prepared phrase query implementation ////////////////////////////////////////////////////////////////////////////// +template class T> class phrase_query : public filter::prepared { public: - typedef states_cache states_t; + typedef states_cache> states_t; typedef std::vector positions_t; - DECLARE_SHARED_PTR(phrase_query); + DECLARE_SHARED_PTR(phrase_query); phrase_query( states_t&& states, @@ -96,12 +108,31 @@ class phrase_query : public filter::prepared { stats_(std::move(stats)) { } + protected: + states_t states_; + positions_t positions_; + bstring stats_; +}; // phrase_query + +class fixed_phrase_query : public phrase_query { + public: + + DECLARE_SHARED_PTR(fixed_phrase_query); + + fixed_phrase_query( + typename phrase_query::states_t&& states, + typename phrase_query::positions_t&& positions, + bstring&& stats, + boost_t boost) noexcept + : phrase_query(std::move(states), std::move(positions), std::move(stats), boost) { + } + using filter::prepared::execute; - virtual doc_iterator::ptr execute( + doc_iterator::ptr execute( const sub_reader& rdr, const order::prepared& ord, - const attribute_view& /*ctx*/) const override { + const attribute_view& /*ctx*/) const { // get phrase state for the specified reader auto phrase_state = states_.find(rdr); @@ -114,7 +145,7 @@ class phrase_query : public filter::prepared { auto features = ord.features() | by_phrase::required(); typedef conjunction conjunction_t; - typedef phrase_iterator phrase_iterator_t; + typedef phrase_iterator phrase_iterator_t; conjunction_t::doc_iterators_t itrs; itrs.reserve(phrase_state->terms.size()); @@ -126,9 +157,9 @@ class phrase_query : public filter::prepared { auto terms = phrase_state->reader->iterator(); auto position = positions_.begin(); - for (auto& term_state : phrase_state->terms) { + for (const auto& term_state : phrase_state->terms) { // use bytes_ref::blank here since we do not need just to "jump" - // to cached state, and we are not interested in term value itself */ + // to cached state, and we are not interested in term value itself if (!terms->seek(bytes_ref::NIL, *term_state)) { return doc_iterator::empty(); } @@ -159,12 +190,100 @@ class phrase_query : public filter::prepared { boost() ); } +}; // fixed_phrase_query - private: - states_t states_; - positions_t positions_; - bstring stats_; -}; // phrase_query +class variadic_phrase_query : public phrase_query { + public: + + DECLARE_SHARED_PTR(variadic_phrase_query); + + variadic_phrase_query( + states_t&& states, + positions_t&& positions, + bstring&& stats, + boost_t boost) noexcept + : phrase_query(std::move(states), std::move(positions), std::move(stats), boost) { + } + + using filter::prepared::execute; + + doc_iterator::ptr execute( + const sub_reader& rdr, + const order::prepared& ord, + const attribute_view& /*ctx*/) const override { + // get phrase state for the specified reader + auto phrase_state = states_.find(rdr); + + if (!phrase_state) { + // invalid state + return doc_iterator::empty(); + } + + // get features required for query & order + auto features = ord.features() | by_phrase::required(); + + typedef conjunction conjunction_t; + typedef phrase_iterator phrase_iterator_t; + + conjunction_t::doc_iterators_t conj_itrs; + conj_itrs.reserve(phrase_state->terms.size()); + + typedef disjunction, true> disjunction_t; + + phrase_iterator_t::positions_t positions; + positions.resize(phrase_state->terms.size()); + + // find term using cached state + auto terms = phrase_state->reader->iterator(); + auto position = positions_.begin(); + + size_t i = 0; + for (const auto& term_states : phrase_state->terms) { + auto is_found = false; + auto& ps = positions[i++]; + ps.second = *position; + + disjunction_t::doc_iterators_t disj_itrs; + disj_itrs.reserve(term_states.size()); + for (const auto& term_state : term_states) { + // use bytes_ref::blank here since we do not need just to "jump" + // to cached state, and we are not interested in term value itself + if (!terms->seek(bytes_ref::NIL, *term_state)) { + continue; + } + + auto docs = terms->postings(features); // postings + auto& pos = docs->attributes().get(); // needed postings attributes + + if (!pos) { + // positions not found + continue; + } + + // add base iterator + disj_itrs.emplace_back(std::move(docs)); + + is_found = true; + } + if (!is_found) { + return doc_iterator::empty(); + } + conj_itrs.emplace_back(make_disjunction(std::move(disj_itrs))); + ps.first = &conj_itrs.back()->attributes().get>>(); + ++position; + } + + return memory::make_shared( + std::move(conj_itrs), + std::move(positions), + rdr, + *phrase_state->reader, + stats_.c_str(), + ord, + boost() + ); + } +}; // variadic_phrase_query // ----------------------------------------------------------------------------- // --SECTION-- by_phrase implementation @@ -186,6 +305,274 @@ bool by_phrase::equals(const filter& rhs) const noexcept { return filter::equals(rhs) && fld_ == trhs.fld_ && phrase_ == trhs.phrase_; } +by_phrase::info_t::info_t() : type(PhrasePartType::TERM), st() { +} + +by_phrase::info_t::info_t(const info_t& other) { + type = other.type; + allocate(); + switch (type) { + case PhrasePartType::TERM: + this->st = other.st; + break; + case PhrasePartType::PREFIX: + this->pt = other.pt; + break; + case PhrasePartType::WILDCARD: + this->wt = other.wt; + break; + case PhrasePartType::LEVENSHTEIN: + this->lt = other.lt; + break; + case PhrasePartType::SET: + this->ct = other.ct; + break; + default: + assert(false); + } +} + +by_phrase::info_t::info_t(info_t&& other) noexcept { + type = other.type; + allocate(); + switch (type) { + case PhrasePartType::TERM: + this->st = std::move(other.st); + break; + case PhrasePartType::PREFIX: + this->pt = std::move(other.pt); + break; + case PhrasePartType::WILDCARD: + this->wt = std::move(other.wt); + break; + case PhrasePartType::LEVENSHTEIN: + this->lt = std::move(other.lt); + break; + case PhrasePartType::SET: + this->ct = std::move(other.ct); + break; + default: + assert(false); + } +} + +by_phrase::info_t::info_t(const simple_term& st) { + type = PhrasePartType::TERM; + allocate(); + this->st = st; +} + +by_phrase::info_t::info_t(simple_term&& st) noexcept { + type = PhrasePartType::TERM; + allocate(); + this->st = std::move(st); +} + +by_phrase::info_t::info_t(const prefix_term& pt) { + type = PhrasePartType::PREFIX; + allocate(); + this->pt = pt; +} + +by_phrase::info_t::info_t(prefix_term&& pt) noexcept { + type = PhrasePartType::PREFIX; + allocate(); + this->pt = std::move(pt); +} + +by_phrase::info_t::info_t(const wildcard_term& wt) { + type = PhrasePartType::WILDCARD; + allocate(); + this->wt = wt; +} + +by_phrase::info_t::info_t(wildcard_term&& wt) noexcept { + type = PhrasePartType::WILDCARD; + allocate(); + this->wt = std::move(wt); +} + +by_phrase::info_t::info_t(const levenshtein_term& lt) { + type = PhrasePartType::LEVENSHTEIN; + allocate(); + this->lt = lt; +} + +by_phrase::info_t::info_t(levenshtein_term&& lt) noexcept { + type = PhrasePartType::LEVENSHTEIN; + allocate(); + this->lt = std::move(lt); +} + +by_phrase::info_t::info_t(const set_term& ct) { + type = PhrasePartType::SET; + allocate(); + this->ct = ct; +} + +by_phrase::info_t::info_t(set_term&& ct) noexcept { + type = PhrasePartType::SET; + allocate(); + this->ct = std::move(ct); +} + +by_phrase::info_t& by_phrase::info_t::operator=(const info_t& other) noexcept { + if (&other == this) { + return *this; + } + recreate(other.type); + switch (type) { + case PhrasePartType::TERM: + st = other.st; + break; + case PhrasePartType::PREFIX: + pt = other.pt; + break; + case PhrasePartType::WILDCARD: + wt = other.wt; + break; + case PhrasePartType::LEVENSHTEIN: + lt = other.lt; + break; + case PhrasePartType::SET: + ct = other.ct; + break; + default: + assert(false); + } + return *this; +} + +by_phrase::info_t& by_phrase::info_t::operator=(info_t&& other) noexcept { + if (&other == this) { + return *this; + } + recreate(other.type); + switch (type) { + case PhrasePartType::TERM: + st = std::move(other.st); + break; + case PhrasePartType::PREFIX: + pt = std::move(other.pt); + break; + case PhrasePartType::WILDCARD: + wt = std::move(other.wt); + break; + case PhrasePartType::LEVENSHTEIN: + lt = std::move(other.lt); + break; + case PhrasePartType::SET: + ct = std::move(other.ct); + break; + default: + assert(false); + } + return *this; +} + +bool by_phrase::info_t::operator==(const info_t& other) const noexcept { + if (type != other.type) { + return false; + } + switch (type) { + case PhrasePartType::TERM: + return st == other.st; + case PhrasePartType::PREFIX: + return pt == other.pt; + case PhrasePartType::WILDCARD: + return wt == other.wt; + case PhrasePartType::LEVENSHTEIN: + return lt == other.lt; + case PhrasePartType::SET: + return ct == other.ct; + default: + assert(false); + } + return false; +} + +void by_phrase::info_t::allocate() noexcept { + switch (type) { + case PhrasePartType::TERM: + new (&st) simple_term(); + break; + case PhrasePartType::PREFIX: + new (&pt) prefix_term(); + break; + case PhrasePartType::WILDCARD: + new (&wt) wildcard_term(); + break; + case PhrasePartType::LEVENSHTEIN: + new (<) levenshtein_term(); + break; + case PhrasePartType::SET: + new (&ct) set_term(); + break; + default: + assert(false); + } +} + +void by_phrase::info_t::destroy() noexcept { + switch (type) { + case PhrasePartType::TERM: + st.~simple_term(); + break; + case PhrasePartType::PREFIX: + pt.~prefix_term(); + break; + case PhrasePartType::WILDCARD: + wt.~wildcard_term(); + break; + case PhrasePartType::LEVENSHTEIN: + lt.~levenshtein_term(); + break; + case PhrasePartType::SET: + ct.~set_term(); + break; + default: + assert(false); + } +} + +void by_phrase::info_t::recreate(PhrasePartType new_type) noexcept { + if (type != new_type) { + destroy(); + type = new_type; + allocate(); + } +} + +size_t hash_value(const by_phrase::info_t& info) { + auto seed = std::hash()(static_cast(info.type)); + switch (info.type) { + case by_phrase::PhrasePartType::TERM: + break; + case by_phrase::PhrasePartType::PREFIX: + ::boost::hash_combine(seed, std::hash()(info.pt.scored_terms_limit)); + break; + case by_phrase::PhrasePartType::WILDCARD: + ::boost::hash_combine(seed, std::hash()(info.wt.scored_terms_limit)); + break; + case by_phrase::PhrasePartType::LEVENSHTEIN: + ::boost::hash_combine(seed, std::hash()(info.lt.scored_terms_limit)); + ::boost::hash_combine(seed, std::hash()(info.lt.max_distance)); + ::boost::hash_combine(seed, std::hash()(info.lt.provider)); + ::boost::hash_combine(seed, std::hash()(info.lt.with_transpositions)); + break; + case by_phrase::PhrasePartType::SET: + std::for_each( + info.ct.terms.cbegin(), info.ct.terms.cend(), + [&seed](const bstring& term) { + ::boost::hash_combine(seed, term); + }); + break; + default: + assert(false); + } + return seed; +} + size_t by_phrase::hash() const noexcept { size_t seed = 0; ::boost::hash_combine(seed, filter::hash()); @@ -208,21 +595,50 @@ filter::prepared::ptr by_phrase::prepare( return filter::prepared::empty(); } - if (1 == phrase_.size()) { - // similar to `term_query` - const irs::bytes_ref term = phrase_.begin()->second; - return term_query::make(rdr, ord, boost*this->boost(), fld_, term); + const auto phrase_size = phrase_.size(); + if (1 == phrase_size) { + const auto& term_info = phrase_.begin()->second; + switch (term_info.type) { + case PhrasePartType::TERM: // similar to `term_query` + return term_query::make(rdr, ord, boost*this->boost(), fld_, term_info.st.term); + case PhrasePartType::PREFIX: + return by_prefix::prepare(rdr, ord, boost*this->boost(), fld_, term_info.pt.term, + term_info.pt.scored_terms_limit); + case PhrasePartType::WILDCARD: + return by_wildcard::prepare(rdr, ord, boost*this->boost(), fld_, + term_info.wt.term, term_info.wt.scored_terms_limit); + case PhrasePartType::LEVENSHTEIN: + return by_edit_distance::prepare(rdr, ord, boost*this->boost(), fld_, + term_info.lt.term, term_info.lt.scored_terms_limit, + term_info.lt.max_distance, term_info.lt.provider, + term_info.lt.with_transpositions); + case PhrasePartType::SET: + // do nothing + break; + default: + assert(false); + } } - // per segment phrase states - phrase_query::states_t phrase_states(rdr.size()); + // prepare phrase stats (collector for each term) + if (is_simple_term_only_) { + return fixed_prepare_collect(rdr, ord, boost, ord.fixed_prepare_collectors(phrase_size)); + } + return variadic_prepare_collect(rdr, ord, boost, ord.variadic_prepare_collectors(phrase_size)); +} - // per segment phrase terms - phrase_state::terms_states_t phrase_terms; - phrase_terms.reserve(phrase_.size()); +filter::prepared::ptr by_phrase::fixed_prepare_collect( + const index_reader& rdr, + const order::prepared& ord, + boost_t boost, + order::prepared::fixed_terms_collectors collectors) const { + // per segment phrase states + fixed_phrase_query::states_t phrase_states(rdr.size()); - // prepare phrase stats (collector for each term) - auto collectors = ord.prepare_collectors(phrase_.size()); + // per segment phrase terms + phrase_state::terms_states_t phrase_terms; + auto phrase_size = phrase_.size(); + phrase_terms.reserve(phrase_size); // iterate over the segments const string_ref field = fld_; @@ -246,14 +662,14 @@ filter::prepared::ptr by_phrase::prepare( seek_term_iterator::ptr term = tr->iterator(); size_t term_itr = 0; - for(auto& word: phrase_) { + for (const auto& word : phrase_) { + assert(PhrasePartType::TERM == word.second.type); auto next_stats = irs::make_finally([&term_itr]()->void{ ++term_itr; }); - - if (!term->seek(word.second)) { + if (!term->seek(word.second.st.term)) { if (ord.empty()) { break; } else { - // continue here because we should collect + // continue here because we should collect // stats for other terms in phrase continue; } @@ -267,7 +683,7 @@ filter::prepared::ptr by_phrase::prepare( } // we have not found all needed terms - if (phrase_terms.size() != phrase_.size()) { + if (phrase_terms.size() != phrase_size) { phrase_terms.clear(); continue; } @@ -276,17 +692,329 @@ filter::prepared::ptr by_phrase::prepare( state.terms = std::move(phrase_terms); state.reader = tr; - phrase_terms.reserve(phrase_.size()); + phrase_terms.reserve(phrase_size); + } + + // offset of the first term in a phrase + size_t base_offset = first_pos(); + + // finish stats + fixed_phrase_query::positions_t positions(phrase_size); + auto pos_itr = positions.begin(); + + for (const auto& term : phrase_) { + *pos_itr = position::value_t(term.first - base_offset); + ++pos_itr; + } + + bstring stats(ord.stats_size(), 0); // aggregated phrase stats + auto* stats_buf = const_cast(stats.data()); + + ord.prepare_stats(stats_buf); + collectors.finish(stats_buf, rdr); + + return memory::make_shared( + std::move(phrase_states), + std::move(positions), + std::move(stats), + this->boost() * boost + ); +} + +filter::prepared::ptr by_phrase::variadic_prepare_collect( + const index_reader& rdr, + const order::prepared& ord, + boost_t boost, + order::prepared::variadic_terms_collectors collectors) const { + // per segment phrase states + variadic_phrase_query::states_t phrase_states(rdr.size()); + + // per segment phrase terms + phrase_state::terms_states_t phrase_terms; + auto phrase_size = phrase_.size(); + phrase_terms.resize(phrase_size); + + // iterate over the segments + const string_ref field = fld_; + + for (const auto& sr : rdr) { + // get term dictionary for field + const term_reader* tr = sr.field(field); + + if (!tr) { + continue; + } + + // check required features + if (!by_phrase::required().is_subset_of(tr->meta().features)) { + continue; + } + + collectors.collect(sr, *tr); // collect field statistics once per segment + + // find terms + seek_term_iterator::ptr term = tr->iterator(); + size_t term_itr = 0; + size_t found_words_count = 0; + + size_t i = 0; + for (const auto& word : phrase_) { + auto next_stats = irs::make_finally([&term_itr]()->void{ ++term_itr; }); + auto& pt = phrase_terms[i++]; + auto type = word.second.type; + bytes_ref pattern; + auto stop = false; + switch (word.second.type) { + case PhrasePartType::TERM: + pattern = word.second.st.term; + break; + case PhrasePartType::PREFIX: + pattern = word.second.pt.term; + break; + case PhrasePartType::WILDCARD: + pattern = word.second.wt.term; + switch (irs::wildcard_type(pattern)) { + case WildcardType::INVALID: + if (ord.empty()) { + stop = true; + // else we should collect + // stats for other terms in phrase + } + break; + case WildcardType::TERM: + type = PhrasePartType::TERM; + break; + case WildcardType::MATCH_ALL: + pattern = bytes_ref::EMPTY; // empty prefix == match all + type = PhrasePartType::PREFIX; + break; + case WildcardType::PREFIX: { + assert(!pattern.empty()); + const auto* begin = pattern.c_str(); + const auto* end = begin + pattern.size(); + + // pattern is already checked to be a valid UTF-8 sequence + const auto* pos = utf8_utils::find(begin, end, WildcardMatch::ANY_STRING); + assert(pos != end); + pattern = bytes_ref(begin, size_t(pos - begin)); // remove trailing '%' + type = PhrasePartType::PREFIX; + break; + } + case WildcardType::WILDCARD: + // do nothing + break; + default: + assert(false); + } + break; + case PhrasePartType::LEVENSHTEIN: + if (0 == word.second.lt.max_distance) { + type = PhrasePartType::TERM; + } + break; + case PhrasePartType::SET: + if (word.second.ct.terms.size() == 1) { + pattern = word.second.ct.terms.front(); + type = PhrasePartType::TERM; + } + break; + default: + assert(false); + } + if (stop) { + break; + } + + switch (type) { + case PhrasePartType::TERM: { + if (!term->seek(pattern)) { + if (ord.empty()) { + stop = true; + // else we should collect + // stats for other terms in phrase + } + break; + } + + term->read(); // read term attributes + collectors.collect(sr, *tr, term_itr, term->attributes()); // collect statistics + + // estimate phrase & term + pt.emplace_back(term->cookie()); + ++found_words_count; + break; + } + case PhrasePartType::PREFIX: { + // seek to prefix + if (SeekResult::END == term->seek_ge(pattern)) { + if (ord.empty()) { + stop = true; + // else we should collect + // stats for other terms in phrase + } + break; + } + + const auto& value = term->value(); + + #ifdef IRESEARCH_DEBUG + auto found = false; + #endif + while (starts_with(value, pattern)) { + #ifdef IRESEARCH_DEBUG + found = true; + #endif + term->read(); + collectors.collect(sr, *tr, term_itr, term->attributes()); // collect statistics + + // estimate phrase & term + pt.emplace_back(term->cookie()); + if (!term->next()) { + break; + } + } + #ifdef IRESEARCH_DEBUG + assert(found); + #endif + ++found_words_count; + break; + } + case PhrasePartType::WILDCARD: { + const auto& acceptor = from_wildcard(pattern); + automaton_table_matcher matcher(acceptor, fst::fsa::kRho); + + if (fst::kError == matcher.Properties(0)) { + IR_FRMT_ERROR("Expected deterministic, epsilon-free acceptor, " + "got the following properties " IR_UINT64_T_SPECIFIER "", + acceptor.Properties(automaton_table_matcher::FST_PROPERTIES, false)); + if (ord.empty()) { + stop = true; + // else we should collect + // stats for other terms in phrase + } + break; + } + + auto term_wildcard = tr->iterator(matcher); + + auto found = false; + while (term_wildcard->next()) { + found = true; + term_wildcard->read(); + collectors.collect(sr, *tr, term_itr, term_wildcard->attributes()); // collect statistics + + // estimate phrase & term + pt.emplace_back(term_wildcard->cookie()); + } + if (found) { + ++found_words_count; + } else if (ord.empty()) { + stop = true; + // else we should collect + // stats for other terms in phrase + } + break; + } + case PhrasePartType::LEVENSHTEIN: { + assert(word.second.lt.provider); + const auto& d = (*word.second.lt.provider)(word.second.lt.max_distance, + word.second.lt.with_transpositions); + if (!d) { + if (ord.empty()) { + stop = true; + // else we should collect + // stats for other terms in phrase + } + break; + } + const auto& acceptor = irs::make_levenshtein_automaton(d, word.second.lt.term); + automaton_table_matcher matcher(acceptor, fst::fsa::kRho); + + if (fst::kError == matcher.Properties(0)) { + IR_FRMT_ERROR("Expected deterministic, epsilon-free acceptor, " + "got the following properties " IR_UINT64_T_SPECIFIER "", + acceptor.Properties(automaton_table_matcher::FST_PROPERTIES, false)); + if (ord.empty()) { + stop = true; + // else we should collect + // stats for other terms in phrase + } + break; + } + auto term_levenshtein = tr->iterator(matcher); + + auto found = false; + while (term_levenshtein->next()) { + found = true; + term_levenshtein->read(); + collectors.collect(sr, *tr, term_itr, term_levenshtein->attributes()); // collect statistics + + // estimate phrase & term + pt.emplace_back(term_levenshtein->cookie()); + } + if (found) { + ++found_words_count; + } else if (ord.empty()) { + stop = true; + // else we should collect + // stats for other terms in phrase + } + break; + } + case PhrasePartType::SET: { + auto found = false; + for (const auto& pat : word.second.ct.terms) { + if (!term->seek(pat)) { + continue; + } + found = true; + + term->read(); // read term attributes + collectors.collect(sr, *tr, term_itr, term->attributes()); // collect statistics + + // estimate phrase & term + pt.emplace_back(term->cookie()); + } + if (found) { + ++found_words_count; + } else if (ord.empty()) { + stop = true; + // else we should collect + // stats for other terms in phrase + } + break; + } + default: + assert(false); + } + if (stop) { + break; + } + } + + // we have not found all needed terms + if (found_words_count != phrase_size) { + for (auto& pt : phrase_terms) { + pt.clear(); + } + continue; + } + + auto& state = phrase_states.insert(sr); + state.terms = std::move(phrase_terms); + state.reader = tr; + + phrase_terms.resize(phrase_size); } // offset of the first term in a phrase size_t base_offset = first_pos(); // finish stats - phrase_query::positions_t positions(phrase_.size()); + variadic_phrase_query::positions_t positions(phrase_size); auto pos_itr = positions.begin(); - for(auto& term: phrase_) { + for (const auto& term : phrase_) { *pos_itr = position::value_t(term.first - base_offset); ++pos_itr; } @@ -297,7 +1025,7 @@ filter::prepared::ptr by_phrase::prepare( ord.prepare_stats(stats_buf); collectors.finish(stats_buf, rdr); - return memory::make_shared( + return memory::make_shared( std::move(phrase_states), std::move(positions), std::move(stats), diff --git a/3rdParty/iresearch/core/search/phrase_filter.hpp b/3rdParty/iresearch/core/search/phrase_filter.hpp index c7e66b9044a4..043e52b2c71c 100644 --- a/3rdParty/iresearch/core/search/phrase_filter.hpp +++ b/3rdParty/iresearch/core/search/phrase_filter.hpp @@ -26,6 +26,8 @@ #include #include "filter.hpp" +#include "levenshtein_filter.hpp" +#include "utils/levenshtein_default_pdp.hpp" #include "utils/string.hpp" NS_ROOT @@ -36,13 +38,108 @@ NS_ROOT ////////////////////////////////////////////////////////////////////////////// class IRESEARCH_API by_phrase : public filter { public: + enum class PhrasePartType { + TERM, PREFIX, WILDCARD, LEVENSHTEIN, SET + }; + + struct simple_term { + bool operator==(const simple_term& other) const noexcept { + return term == other.term; + } + + bstring term; + }; + + struct prefix_term { + bool operator==(const prefix_term& other) const noexcept { + return term == other.term; + } + + size_t scored_terms_limit{1024}; + bstring term; + }; + + struct wildcard_term { + bool operator==(const wildcard_term& other) const noexcept { + return term == other.term; + } + + size_t scored_terms_limit{1024}; + bstring term; + }; + + struct levenshtein_term { + bool operator==(const levenshtein_term& other) const noexcept { + return with_transpositions == other.with_transpositions && + max_distance == other.max_distance && + provider == other.provider && + term == other.term; + } + + bool with_transpositions{false}; + byte_type max_distance{0}; + size_t scored_terms_limit{1024}; + by_edit_distance::pdp_f provider{irs::default_pdp}; + bstring term; + }; + + struct set_term { + bool operator==(const set_term& other) const noexcept { + return terms == other.terms; + } + + std::vector terms; + }; + + private: + struct IRESEARCH_API info_t { + ~info_t() { + destroy(); + } + + PhrasePartType type; + + union { + simple_term st; + prefix_term pt; + wildcard_term wt; + levenshtein_term lt; + set_term ct; + }; + + info_t(); + info_t(const info_t& other); + info_t(info_t&& other) noexcept; + info_t(const simple_term& st); + info_t(simple_term&& st) noexcept; + info_t(const prefix_term& pt); + info_t(prefix_term&& pt) noexcept; + info_t(const wildcard_term& wt); + info_t(wildcard_term&& wt) noexcept; + info_t(const levenshtein_term& lt); + info_t(levenshtein_term&& lt) noexcept; + info_t(const set_term& lt); + info_t(set_term&& lt) noexcept; + + info_t& operator=(const info_t& other) noexcept; + info_t& operator=(info_t&& other) noexcept; + + bool operator==(const info_t& other) const noexcept; + + private: + void allocate() noexcept; + void destroy() noexcept; + void recreate(PhrasePartType new_type) noexcept; + }; + // positions and terms - typedef std::map terms_t; - typedef terms_t::const_iterator const_iterator; - typedef terms_t::iterator iterator; + typedef std::map terms_t; typedef terms_t::value_type term_t; - // returns set of features required for filter + friend size_t hash_value(const by_phrase::info_t& info); + + public: + // returns set of features required for filter static const flags& required(); DECLARE_FILTER_TYPE(); @@ -50,55 +147,73 @@ class IRESEARCH_API by_phrase : public filter { by_phrase(); - by_phrase& field(std::string fld) { - fld_ = std::move(fld); + by_phrase& field(std::string fld) noexcept { + fld_ = std::move(fld); return *this; } - const std::string& field() const { return fld_; } + const std::string& field() const noexcept { return fld_; } - // inserts term to the specified position - by_phrase& insert(size_t pos, const bytes_ref& term) { - phrase_[pos] = term; + // inserts term to the specified position + template + by_phrase& insert(PhrasePart&& t, size_t pos) { + is_simple_term_only_ &= std::is_same::value; // constexpr + phrase_[pos] = std::forward(t); return *this; } - by_phrase& insert(size_t pos, const string_ref& term) { - return insert(pos, ref_cast(term)); + template + by_phrase& push_back(PhrasePart&& t, size_t offs = 0) { + return insert(std::forward(t), next_pos() + offs); } - by_phrase& insert(size_t pos, bstring&& term) { - phrase_[pos] = std::move(term); - return *this; + bool get(size_t pos, simple_term& t) { + const auto& inf = phrase_.at(pos); + if (PhrasePartType::TERM != inf.type) { + return false; + } + t = inf.st; + return true; } - // inserts term to the end of the phrase with - // the specified offset from the last term - by_phrase& push_back(const bytes_ref& term, size_t offs = 0) { - return insert(next_pos() + offs, term); + bool get(size_t pos, prefix_term& t) { + const auto& inf = phrase_.at(pos); + if (PhrasePartType::PREFIX != inf.type) { + return false; + } + t = inf.pt; + return true; } - by_phrase& push_back(const string_ref& term, size_t offs = 0) { - return push_back(ref_cast(term), offs); + bool get(size_t pos, wildcard_term& t) { + const auto& inf = phrase_.at(pos); + if (PhrasePartType::WILDCARD != inf.type) { + return false; + } + t = inf.wt; + return true; } - by_phrase& push_back(bstring&& term, size_t offs = 0) { - return insert(next_pos() + offs, std::move(term)); + bool get(size_t pos, levenshtein_term& t) { + const auto& inf = phrase_.at(pos); + if (PhrasePartType::LEVENSHTEIN != inf.type) { + return false; + } + t = inf.lt; + return true; } - bstring& operator[](size_t pos) { return phrase_[pos]; } - const bstring& operator[](size_t pos) const { - return phrase_.at(pos); + bool get(size_t pos, set_term& t) { + const auto& inf = phrase_.at(pos); + if (PhrasePartType::SET != inf.type) { + return false; + } + t = inf.ct; + return true; } - bool empty() const { return phrase_.empty(); } - size_t size() const { return phrase_.size(); } - - const_iterator begin() const { return phrase_.begin(); } - const_iterator end() const { return phrase_.end(); } - - iterator begin() { return phrase_.begin(); } - iterator end() { return phrase_.end(); } + bool empty() const noexcept { return phrase_.empty(); } + size_t size() const noexcept { return phrase_.size(); } using filter::prepare; @@ -113,19 +228,32 @@ class IRESEARCH_API by_phrase : public filter { protected: virtual bool equals(const filter& rhs) const noexcept override; - + private: size_t next_pos() const { return phrase_.empty() ? 0 : 1 + phrase_.rbegin()->first; } - + size_t first_pos() const { return phrase_.empty() ? 0 : phrase_.begin()->first; } + filter::prepared::ptr fixed_prepare_collect( + const index_reader& rdr, + const order::prepared& ord, + boost_t boost, + order::prepared::fixed_terms_collectors collectors) const; + + filter::prepared::ptr variadic_prepare_collect( + const index_reader& rdr, + const order::prepared& ord, + boost_t boost, + order::prepared::variadic_terms_collectors collectors) const; + IRESEARCH_API_PRIVATE_VARIABLES_BEGIN std::string fld_; terms_t phrase_; + bool is_simple_term_only_{true}; IRESEARCH_API_PRIVATE_VARIABLES_END }; // by_phrase diff --git a/3rdParty/iresearch/core/search/phrase_iterator.hpp b/3rdParty/iresearch/core/search/phrase_iterator.hpp index 169311d0e896..b20174dfb16c 100644 --- a/3rdParty/iresearch/core/search/phrase_iterator.hpp +++ b/3rdParty/iresearch/core/search/phrase_iterator.hpp @@ -23,14 +23,14 @@ #ifndef IRESEARCH_PHRASE_ITERATOR_H #define IRESEARCH_PHRASE_ITERATOR_H +#include "disjunction.hpp" +#include "score_doc_iterators.hpp" #include "shared.hpp" +#include "utils/attribute_range.hpp" NS_ROOT -// implementation is optimized for frequency based similarity measures -// for generic implementation see a03025accd8b84a5f8ecaaba7412fc92a1636be3 -template -class phrase_iterator final : public doc_iterator_base { +class fixed_phrase_frequency { public: typedef std::pair< position::ref, // position attribute @@ -38,19 +38,173 @@ class phrase_iterator final : public doc_iterator_base { > position_t; typedef std::vector positions_t; + fixed_phrase_frequency( + positions_t&& pos, + const order::prepared& ord + ) : pos_(std::move(pos)), order_(&ord) { + assert(!pos_.empty()); // must not be empty + assert(0 == pos_.front().second); // lead offset is always 0 + } + + protected: + // returns frequency of the phrase + frequency::value_t phrase_freq() { + frequency::value_t freq = 0; + bool match; + + position& lead = pos_.front().first; + lead.next(); + + for (auto end = pos_.end(); !pos_limits::eof(lead.value());) { + const position::value_t base_position = lead.value(); + + match = true; + + for (auto it = pos_.begin() + 1; it != end; ++it) { + position& pos = it->first; + const auto term_position = base_position + it->second; + if (!pos_limits::valid(term_position)) { + return freq; + } + const auto seeked = pos.seek(term_position); + + if (pos_limits::eof(seeked)) { + // exhausted + return freq; + } else if (seeked != term_position) { + // seeked too far from the lead + match = false; + + lead.seek(seeked - it->second); + break; + } + } + + if (match) { + if (order_->empty()) { + return 1; + } + + ++freq; + lead.next(); + } + } + + return freq; + } + + private: + positions_t pos_; // list of desired positions along with corresponding attributes + const order::prepared* order_; +}; // fixed_phrase_frequency + +class variadic_phrase_frequency { + public: + typedef std::pair< + const attribute_view::ref>>::type*, // position attribute + position::value_t // desired offset in the phrase + > position_t; + typedef std::vector positions_t; + + variadic_phrase_frequency( + positions_t&& pos, + const order::prepared& ord) + : pos_(std::move(pos)), order_(&ord) { + assert(!pos_.empty()); // must not be empty + assert(0 == pos_.front().second); // lead offset is always 0 + } + + protected: + // returns frequency of the phrase + frequency::value_t phrase_freq() { + frequency::value_t freq = 0; + auto end = pos_.end(); + auto* posa = pos_.front().first->get(); + assert(posa); + posa->reset(); + while (posa->next()) { + auto* lead_adapter = posa->value(); + auto* lead = lead_adapter->position; + auto global_match = true; + // lead->reset(); // Do not need here. There is always a first time. + lead->next(); + + position::value_t base_position = pos_limits::eof(); + while (!pos_limits::eof(base_position = lead->value())) { + auto match = true; + for (auto it = pos_.begin() + 1; it != end; ++it) { + match = false; + const auto term_position = base_position + it->second; + if (!pos_limits::valid(term_position)) { + global_match = false; // invalid for all + break; + } + auto min_seeked = std::numeric_limits::max(); + auto* ita = it->first->get(); + assert(ita); + ita->reset(); + while (ita->next()) { + auto* it_adapter = ita->value(); + auto* p = it_adapter->position; + p->reset(); + const auto seeked = p->seek(term_position); + + if (pos_limits::eof(seeked)) { + continue; + } else if (seeked != term_position) { + if (seeked < min_seeked) { + min_seeked = seeked; + } + continue; + } + match = true; + break; + } + if (!match) { + if (min_seeked < std::numeric_limits::max()) { + lead->seek(min_seeked - it->second); + break; + } + global_match = false; // eof for all + break; + } + } + if (!global_match) { + break; + } + if (match) { + if (order_->empty()) { + return 1; + } + ++freq; + lead->next(); + } + } + } + return freq; + } + + private: + positions_t pos_; // list of desired positions along with corresponding attributes + const order::prepared* order_; +}; // variadic_phrase_frequency + +// implementation is optimized for frequency based similarity measures +// for generic implementation see a03025accd8b84a5f8ecaaba7412fc92a1636be3 +template +class phrase_iterator : public doc_iterator_base, Frequency { + public: + typedef typename Frequency::positions_t positions_t; + phrase_iterator( typename Conjunction::doc_iterators_t&& itrs, - positions_t&& pos, + typename Frequency::positions_t&& pos, const sub_reader& segment, const term_reader& field, const byte_type* stats, const order::prepared& ord, boost_t boost - ) : approx_(std::move(itrs)), - pos_(std::move(pos)), - order_(&ord) { - assert(!pos_.empty()); // must not be empty - assert(0 == pos_.front().second); // lead offset is always 0 + ) : Frequency(std::move(pos), ord), approx_(std::move(itrs)) { // FIXME find a better estimation // estimate iterator @@ -75,7 +229,7 @@ class phrase_iterator final : public doc_iterator_base { virtual bool next() override { bool next = false; - while ((next = approx_.next()) && !(phrase_freq_.value = phrase_freq())) {} + while ((next = approx_.next()) && !(phrase_freq_.value = this->phrase_freq())) {} return next; } @@ -85,7 +239,7 @@ class phrase_iterator final : public doc_iterator_base { return target; } - if (doc_limits::eof(value()) || (phrase_freq_.value = phrase_freq())) { + if (doc_limits::eof(value()) || (phrase_freq_.value = this->phrase_freq())) { return value(); } @@ -95,54 +249,9 @@ class phrase_iterator final : public doc_iterator_base { } private: - // returns frequency of the phrase - frequency::value_t phrase_freq() { - frequency::value_t freq = 0; - bool match; - - position& lead = pos_.front().first; - lead.next(); - - for (auto end = pos_.end(); !pos_limits::eof(lead.value());) { - const position::value_t base_offset = lead.value(); - - match = true; - - for (auto it = pos_.begin() + 1; it != end; ++it) { - position& pos = it->first; - const auto term_offset = base_offset + it->second; - const auto seeked = pos.seek(term_offset); - - if (pos_limits::eof(seeked)) { - // exhausted - return freq; - } else if (seeked != term_offset) { - // seeked too far from the lead - match = false; - - lead.seek(seeked - it->second); - break; - } - } - - if (match) { - if (order_->empty()) { - return 1; - } - - ++freq; - lead.next(); - } - } - - return freq; - } - Conjunction approx_; // first approximation (conjunction over all words in a phrase) const document* doc_{}; // document itself frequency phrase_freq_; // freqency of the phrase in a document - positions_t pos_; // list of desired positions along with corresponding attributes - const order::prepared* order_; }; // phrase_iterator NS_END // ROOT diff --git a/3rdParty/iresearch/core/search/prefix_filter.hpp b/3rdParty/iresearch/core/search/prefix_filter.hpp index 310d90f3ff19..fe8ae558a5d9 100644 --- a/3rdParty/iresearch/core/search/prefix_filter.hpp +++ b/3rdParty/iresearch/core/search/prefix_filter.hpp @@ -64,7 +64,7 @@ class IRESEARCH_API by_prefix : public by_term { ////////////////////////////////////////////////////////////////////////////// /// @brief the maximum number of most frequent terms to consider for scoring ////////////////////////////////////////////////////////////////////////////// - by_prefix& scored_terms_limit(size_t limit) { + by_prefix& scored_terms_limit(size_t limit) noexcept { scored_terms_limit_ = limit; return *this; } @@ -72,7 +72,7 @@ class IRESEARCH_API by_prefix : public by_term { ////////////////////////////////////////////////////////////////////////////// /// @brief the maximum number of most frequent terms to consider for scoring ////////////////////////////////////////////////////////////////////////////// - size_t scored_terms_limit() const { + size_t scored_terms_limit() const noexcept { return scored_terms_limit_; } diff --git a/3rdParty/iresearch/core/search/range_filter.hpp b/3rdParty/iresearch/core/search/range_filter.hpp index a7ed96e4620b..b237c83714d1 100644 --- a/3rdParty/iresearch/core/search/range_filter.hpp +++ b/3rdParty/iresearch/core/search/range_filter.hpp @@ -78,7 +78,7 @@ class IRESEARCH_API by_range : public filter { return *this; } - const std::string& field() const { + const std::string& field() const noexcept { return fld_; } @@ -130,7 +130,7 @@ class IRESEARCH_API by_range : public filter { ////////////////////////////////////////////////////////////////////////////// /// @brief the maximum number of most frequent terms to consider for scoring ////////////////////////////////////////////////////////////////////////////// - by_range& scored_terms_limit(size_t limit) { + by_range& scored_terms_limit(size_t limit) noexcept { scored_terms_limit_ = limit; return *this; } @@ -138,7 +138,7 @@ class IRESEARCH_API by_range : public filter { ////////////////////////////////////////////////////////////////////////////// /// @brief the maximum number of most frequent terms to consider for scoring ////////////////////////////////////////////////////////////////////////////// - size_t scored_terms_limit() const { + size_t scored_terms_limit() const noexcept { return scored_terms_limit_; } diff --git a/3rdParty/iresearch/core/search/same_position_filter.cpp b/3rdParty/iresearch/core/search/same_position_filter.cpp index 97a858f875b0..ee5cc548c332 100644 --- a/3rdParty/iresearch/core/search/same_position_filter.cpp +++ b/3rdParty/iresearch/core/search/same_position_filter.cpp @@ -264,11 +264,11 @@ filter::prepared::ptr by_same_position::prepare( term_states.reserve(terms_.size()); // prepare phrase stats (collector for each term) - std::vector term_stats; + std::vector term_stats; term_stats.reserve(terms_.size()); for(auto size = terms_.size(); size; --size) { - term_stats.emplace_back(ord.prepare_collectors(1)); // 1 term per bstring because a range is treated as a disjunction + term_stats.emplace_back(ord.fixed_prepare_collectors(1)); // 1 term per bstring because a range is treated as a disjunction } for (const auto& segment : index) { diff --git a/3rdParty/iresearch/core/search/same_position_filter.hpp b/3rdParty/iresearch/core/search/same_position_filter.hpp index a6ba6e435eb6..746ad8fb37c8 100644 --- a/3rdParty/iresearch/core/search/same_position_filter.hpp +++ b/3rdParty/iresearch/core/search/same_position_filter.hpp @@ -62,15 +62,15 @@ class IRESEARCH_API by_same_position : public filter { by_same_position& push_back(std::string&& field, const bstring& term); by_same_position& push_back(std::string&& field, bstring&& term); - iterator begin() { return terms_.begin(); } - iterator end() { return terms_.end(); } + iterator begin() noexcept { return terms_.begin(); } + iterator end() noexcept { return terms_.end(); } - const_iterator begin() const { return terms_.begin(); } - const_iterator end() const { return terms_.end(); } + const_iterator begin() const noexcept { return terms_.begin(); } + const_iterator end() const noexcept { return terms_.end(); } - bool empty() const { return terms_.empty(); } - size_t size() const { return terms_.size(); } - void clear() { terms_.clear(); } + bool empty() const noexcept { return terms_.empty(); } + size_t size() const noexcept { return terms_.size(); } + void clear() noexcept { terms_.clear(); } protected: virtual bool equals(const filter& rhs) const noexcept override; diff --git a/3rdParty/iresearch/core/search/sort.cpp b/3rdParty/iresearch/core/search/sort.cpp index b480ab249a8c..5ea53848eadd 100644 --- a/3rdParty/iresearch/core/search/sort.cpp +++ b/3rdParty/iresearch/core/search/sort.cpp @@ -164,36 +164,27 @@ order::prepared order::prepare() const { // --SECTION-- collectors // ----------------------------------------------------------------------------- -order::prepared::collectors::collectors( - const order::prepared& buckets, - size_t terms_count) +template class T> +order::prepared::collectors::collectors(const order::prepared& buckets) : buckets_(buckets.order_) { field_collectors_.reserve(buckets_.size()); - term_collectors_.reserve(buckets_.size() * terms_count); // add field collectors from each bucket for (auto& entry: buckets_) { assert(entry.bucket); // ensured by order::prepare field_collectors_.emplace_back(entry.bucket->prepare_field_collector()); } - - // add term collectors from each bucket - // layout order [t0.b0, t0.b1, ... t0.bN, t1.b0, t1.b1 ... tM.BN] - for (size_t i = 0; i < terms_count; ++i) { - for (auto& entry: buckets_) { - assert(entry.bucket); // ensured by order::prepare - term_collectors_.emplace_back(entry.bucket->prepare_term_collector()); - } - } } -order::prepared::collectors::collectors(collectors&& other) noexcept +template class T> +order::prepared::collectors::collectors(collectors&& other) noexcept : buckets_(other.buckets_), field_collectors_(std::move(other.field_collectors_)), term_collectors_(std::move(other.term_collectors_)) { } -void order::prepared::collectors::collect( +template class T> +void order::prepared::collectors::collect( const sub_reader& segment, const term_reader& field) const { for (auto& entry: field_collectors_) { @@ -203,13 +194,59 @@ void order::prepared::collectors::collect( } } -void order::prepared::collectors::collect( +template class T> +void order::prepared::collectors::empty_finish( + byte_type* stats_buf, + const index_reader& index) const { + // special case where term statistics collection is not applicable + // e.g. by_column_existence filter + assert(field_collectors_.size() == buckets_.size()); // enforced by allocation in the constructor + + for (size_t i = 0, count = field_collectors_.size(); i < count; ++i) { + auto& sort = buckets_[i]; + assert(sort.bucket); // ensured by order::prepare + + sort.bucket->collect( + stats_buf + sort.stats_offset, // where stats for bucket start + index, + field_collectors_[i].get(), + nullptr + ); + } +} + +template class order::prepared::collectors; +template class order::prepared::collectors; + +order::prepared::fixed_terms_collectors::fixed_terms_collectors( + const order::prepared& buckets, + size_t terms_count) + : collectors(buckets) { + term_collectors_.reserve(buckets_.size() * terms_count); + + // add term collectors from each bucket + // layout order [t0.b0, t0.b1, ... t0.bN, t1.b0, t1.b1 ... tM.BN] + for (size_t i = 0; i < terms_count; ++i) { + for (auto& entry: buckets_) { + assert(entry.bucket); // ensured by order::prepare + term_collectors_.emplace_back(entry.bucket->prepare_term_collector()); + } + } +} + +order::prepared::fixed_terms_collectors::fixed_terms_collectors(fixed_terms_collectors&& other) noexcept + : collectors(std::move(other)) { +} + +void order::prepared::fixed_terms_collectors::collect( const sub_reader& segment, const term_reader& field, size_t term_offset, const attribute_view& term_attrs) const { - for (size_t i = 0, count = buckets_.size(); i < count; ++i) { - const auto idx = term_offset * buckets_.size() + i; + size_t count = buckets_.size(); + size_t term_offset_count = term_offset * count; + for (size_t i = 0; i < count; ++i) { + const auto idx = term_offset_count + i; assert(idx < term_collectors_.size()); // enforced by allocation in the constructor auto& entry = term_collectors_[idx]; @@ -219,25 +256,13 @@ void order::prepared::collectors::collect( } } -void order::prepared::collectors::finish( +void order::prepared::fixed_terms_collectors::finish( byte_type* stats_buf, const index_reader& index) const { // special case where term statistics collection is not applicable // e.g. by_column_existence filter if (term_collectors_.empty()) { - assert(field_collectors_.size() == buckets_.size()); // enforced by allocation in the constructor - - for (size_t i = 0, count = field_collectors_.size(); i < count; ++i) { - auto& sort = buckets_[i]; - assert(sort.bucket); // ensured by order::prepare - - sort.bucket->collect( - stats_buf + sort.stats_offset, // where stats for bucket start - index, - field_collectors_[i].get(), - nullptr - ); - } + empty_finish(stats_buf, index); } else { auto bucket_count = buckets_.size(); assert(term_collectors_.size() % bucket_count == 0); // enforced by allocation in the constructor @@ -258,7 +283,7 @@ void order::prepared::collectors::finish( } } -size_t order::prepared::collectors::push_back() { +size_t order::prepared::fixed_terms_collectors::push_back() { auto term_offset = term_collectors_.size() / buckets_.size(); term_collectors_.reserve(term_collectors_.size() + buckets_.size()); @@ -271,6 +296,76 @@ size_t order::prepared::collectors::push_back() { return term_offset; } +order::prepared::variadic_terms_collectors::variadic_terms_collectors( + const order::prepared& buckets, + size_t terms_count) + : collectors(buckets) { + term_collectors_.resize(buckets_.size()); + + // reserve minimal term collectors count + for (auto& tc : term_collectors_) { + tc.reserve(terms_count); + } +} + +order::prepared::variadic_terms_collectors::variadic_terms_collectors(variadic_terms_collectors&& other) noexcept + : collectors(std::move(other)) { +} + +void order::prepared::variadic_terms_collectors::collect( + const sub_reader& segment, + const term_reader& field, + size_t /*term_offset*/, + const attribute_view& term_attrs) const { + for (size_t i = 0, count = buckets_.size(); i < count; ++i) { + auto& entry = buckets_[i]; + assert(entry.bucket); // ensured by order::prepare + auto& tc = term_collectors_[i]; + tc.emplace_back(entry.bucket->prepare_term_collector()); + auto& e = tc.back(); + + if (e) { // may be null if prepare_term_collector() returned nullptr + e->collect(segment, field, term_attrs); + } + } +} + +void order::prepared::variadic_terms_collectors::finish( + byte_type* stats_buf, + const index_reader& index) const { + // special case where term statistics collection is not applicable + // e.g. by_column_existence filter + if (term_collectors_.empty()) { + empty_finish(stats_buf, index); + } else { + auto count = term_collectors_.size(); + assert(count == buckets_.size()); + + for (size_t i = 0; i < count; ++i) { + const auto& sort = buckets_[i]; + assert(sort.bucket); // ensured by order::prepare + + assert(i < field_collectors_.size()); + const auto& tc = term_collectors_[i]; + const auto* fc = field_collectors_[i].get(); + for (size_t j = 0, tc_count = tc.size(); j < tc_count; ++j) { + sort.bucket->collect( + stats_buf + sort.stats_offset, // where stats for bucket start + index, + fc, + tc[j].get() + ); + } + } + } +} + +size_t order::prepared::variadic_terms_collectors::push_back() { + assert(false); // unsupported + + return 0; +} + // ---------------------------------------------------------------------------- // --SECTION-- scorers // ---------------------------------------------------------------------------- @@ -373,6 +468,13 @@ bool order::prepared::less(const byte_type* lhs, const byte_type* rhs) const { return false; } +filter_boost::filter_boost() noexcept + : basic_attribute(1.f) { +} + +REGISTER_ATTRIBUTE(filter_boost); +DEFINE_ATTRIBUTE_TYPE(filter_boost); + NS_END // ----------------------------------------------------------------------------- diff --git a/3rdParty/iresearch/core/search/sort.hpp b/3rdParty/iresearch/core/search/sort.hpp index 7844fe39df7a..29426410b4d0 100644 --- a/3rdParty/iresearch/core/search/sort.hpp +++ b/3rdParty/iresearch/core/search/sort.hpp @@ -435,7 +435,7 @@ class IRESEARCH_API sort { /// specified 'offset' to 'dst', i.e. using += //////////////////////////////////////////////////////////////////////////////// virtual void merge(byte_type* dst, const byte_type** src_start, - const size_t size, size_t offset) const { + const size_t size, size_t offset) const override { auto& casted_dst = base_t::score_cast(dst + offset); casted_dst = ScoreType(); for (size_t i = 0; i < size; ++i) { @@ -606,9 +606,13 @@ class IRESEARCH_API order final { /// @brief a convinience class for filters to invoke collector functions /// on collectors in each order bucket //////////////////////////////////////////////////////////////////////////// + template using FixedContainer = std::vector; + template using VariadicContainer = std::vector>; + + template class T> class IRESEARCH_API collectors: private util::noncopyable { // noncopyable required by MSVC public: - collectors(const prepared& buckets, size_t terms_count); + collectors(const prepared& buckets); collectors(collectors&& other) noexcept; // function definition explicitly required by MSVC ////////////////////////////////////////////////////////////////////////// @@ -621,6 +625,23 @@ class IRESEARCH_API order final { ////////////////////////////////////////////////////////////////////////// void collect(const sub_reader& segment, const term_reader& field) const; + void empty_finish(byte_type* stats_buf, const index_reader& index) const; + + protected: + IRESEARCH_API_PRIVATE_VARIABLES_BEGIN + const std::vector& buckets_; + std::vector field_collectors_; // size == buckets_.size() + mutable T term_collectors_; + IRESEARCH_API_PRIVATE_VARIABLES_END + }; + + class IRESEARCH_API fixed_terms_collectors : public collectors { + public: + using collectors::collect; + + fixed_terms_collectors(const prepared& buckets, size_t terms_count); + fixed_terms_collectors(fixed_terms_collectors&& other) noexcept; // function definition explicitly required by MSVC + ////////////////////////////////////////////////////////////////////////// /// @brief collect term related statistics, i.e. term used in the filter /// @param segment the segment being processed (e.g. for columnstore) @@ -656,12 +677,52 @@ class IRESEARCH_API order final { ////////////////////////////////////////////////////////////////////////// size_t push_back(); - private: - IRESEARCH_API_PRIVATE_VARIABLES_BEGIN - const std::vector& buckets_; - std::vector field_collectors_; // size == buckets_.size() - std::vector term_collectors_; // size == buckets_.size() * terms_count, layout order [t0.b0, t0.b1, ... t0.bN, t1.b0, t1.b1 ... tM.BN] - IRESEARCH_API_PRIVATE_VARIABLES_END + // term_collectors_; size == buckets_.size() * terms_count, layout order [t0.b0, t0.b1, ... t0.bN, t1.b0, t1.b1 ... tM.BN] + }; + + class IRESEARCH_API variadic_terms_collectors : public collectors { + public: + using collectors::collect; + + variadic_terms_collectors(const prepared& buckets, size_t terms_count); + variadic_terms_collectors(variadic_terms_collectors&& other) noexcept; // function definition explicitly required by MSVC + + ////////////////////////////////////////////////////////////////////////// + /// @brief collect term related statistics, i.e. term used in the filter + /// @param segment the segment being processed (e.g. for columnstore) + /// @param field the field matched by the filter in the 'segment' + /// @param term_offset offset of term, value < constructor 'terms_count' + /// @param term_attributes the attributes of the matched term in the field + /// @note called once for every term matched by a filter in the 'field' + /// per each segment + /// @note only called on a matched 'term' in the 'field' in the 'segment' + ////////////////////////////////////////////////////////////////////////// + void collect( + const sub_reader& segment, + const term_reader& field, + size_t term_offset, + const attribute_view& term_attrs + ) const; + + ////////////////////////////////////////////////////////////////////////// + /// @brief store collected index statistics into 'stats' of the + /// current 'filter' + /// @param stats out-parameter to store statistics for later use in + /// calls to score(...) + /// @param index the full index to collect statistics on + /// @note called once on the 'index' for every term matched by a filter + /// calling collect(...) on each of its segments + /// @note if not matched terms then called exactly once + ////////////////////////////////////////////////////////////////////////// + void finish(byte_type* stats, const index_reader& index) const; + + ////////////////////////////////////////////////////////////////////////// + /// @brief add collectors for another term + /// @return term_offset + ////////////////////////////////////////////////////////////////////////// + size_t push_back(); + + // term_collectors_; size == buckets_.size(), inner size == terms count }; //////////////////////////////////////////////////////////////////////////// @@ -759,8 +820,12 @@ class IRESEARCH_API order final { /// @param terms_count number of term_collectors to allocate /// 0 == collect only field level statistics e.g. by_column_existence //////////////////////////////////////////////////////////////////////////// - collectors prepare_collectors(size_t terms_count = 0) const { - return collectors(*this, terms_count); + fixed_terms_collectors fixed_prepare_collectors(size_t terms_count = 0) const { + return fixed_terms_collectors(*this, terms_count); + } + + variadic_terms_collectors variadic_prepare_collectors(size_t terms_count = 0) const { + return variadic_terms_collectors(*this, terms_count); } //////////////////////////////////////////////////////////////////////////// @@ -903,6 +968,19 @@ class IRESEARCH_API order final { IRESEARCH_API_PRIVATE_VARIABLES_END }; // order +////////////////////////////////////////////////////////////////////////////// +/// @class filter_boost +/// @brief represents an addition to score from filter specific to a particular +/// document. May vary from document to document. +////////////////////////////////////////////////////////////////////////////// +struct IRESEARCH_API filter_boost : public basic_attribute { + DECLARE_ATTRIBUTE_TYPE(); + filter_boost() noexcept; + + void clear() { value = 1.f; } +}; + + NS_END #endif diff --git a/3rdParty/iresearch/core/search/term_filter.hpp b/3rdParty/iresearch/core/search/term_filter.hpp index 8abf92933cb1..7bfe422175d4 100644 --- a/3rdParty/iresearch/core/search/term_filter.hpp +++ b/3rdParty/iresearch/core/search/term_filter.hpp @@ -44,7 +44,7 @@ class IRESEARCH_API by_term : public filter { return *this; } - const std::string& field() const { + const std::string& field() const noexcept { return fld_; } @@ -71,7 +71,7 @@ class IRESEARCH_API by_term : public filter { const attribute_view& ctx ) const override; - const bstring& term() const { + const bstring& term() const noexcept { return term_; } diff --git a/3rdParty/iresearch/core/search/term_query.cpp b/3rdParty/iresearch/core/search/term_query.cpp index 0a1a81873463..73e5474f7670 100644 --- a/3rdParty/iresearch/core/search/term_query.cpp +++ b/3rdParty/iresearch/core/search/term_query.cpp @@ -39,7 +39,7 @@ term_query::ptr term_query::make( const string_ref& field, const bytes_ref& term) { term_query::states_t states(index.size()); - auto collectors = ord.prepare_collectors(1); + auto collectors = ord.fixed_prepare_collectors(1); // iterate over the segments for (const auto& segment : index) { diff --git a/3rdParty/iresearch/core/search/tfidf.cpp b/3rdParty/iresearch/core/search/tfidf.cpp index 460c980fa9f9..ad6733533612 100644 --- a/3rdParty/iresearch/core/search/tfidf.cpp +++ b/3rdParty/iresearch/core/search/tfidf.cpp @@ -257,14 +257,17 @@ struct const_score_ctx final : public irs::score_ctx { struct score_ctx : public irs::score_ctx { score_ctx( irs::boost_t boost, const tfidf::idf& idf, - const frequency* freq) noexcept - : idf_(boost * idf.value), - freq_(freq ? freq : &EMPTY_FREQ) { + const frequency* freq, + const filter_boost* fb = nullptr) noexcept + : freq_(freq ? freq : &EMPTY_FREQ), + filter_boost_(fb), + idf_(boost * idf.value) { assert(freq_); } - - float_t idf_; // precomputed : boost * idf + const frequency* freq_; + const filter_boost* filter_boost_; + float_t idf_; // precomputed : boost * idf }; // score_ctx struct norm_score_ctx final : public score_ctx { @@ -272,8 +275,9 @@ struct norm_score_ctx final : public score_ctx { irs::norm&& norm, irs::boost_t boost, const tfidf::idf& idf, - const frequency* freq) noexcept - : score_ctx(boost, idf, freq), + const frequency* freq, + const filter_boost* fb = nullptr) noexcept + : score_ctx(boost, idf, freq, fb), norm_(std::move(norm)) { } @@ -342,6 +346,7 @@ class sort final: public irs::sort::prepared_basic { } auto& stats = stats_cast(stats_buf); + auto& filter_boost = doc_attrs.get(); // add norm attribute if requested if (normalize_) { @@ -355,24 +360,49 @@ class sort final: public irs::sort::prepared_basic { } if (norm.reset(segment, field.meta().norm, *doc)) { - return { - memory::make_unique(std::move(norm), boost, stats, freq.get()), - [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { + + if (filter_boost) { + return { + memory::make_unique(std::move(norm), boost, stats, freq.get(), filter_boost.get()), + [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { + auto& state = *static_cast(ctx); + assert(state.filter_boost_); + irs::sort::score_cast(score_buf) = ::tfidf(state.freq_->value, + state.idf_ * state.filter_boost_->value) * + state.norm_.read(); + } + }; + } else { + return { + memory::make_unique(std::move(norm), boost, stats, freq.get()), + [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { auto& state = *static_cast(ctx); - irs::sort::score_cast(score_buf) = ::tfidf(state.freq_->value, state.idf_)*state.norm_.read(); - } - }; + irs::sort::score_cast(score_buf) = ::tfidf(state.freq_->value, state.idf_) * state.norm_.read(); + } + }; + } } } - - return { - memory::make_unique(boost, stats, freq.get()), - [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { + if (filter_boost) { + return { + memory::make_unique(boost, stats, freq.get(), filter_boost.get()), + [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { auto& state = *static_cast(ctx); - irs::sort::score_cast(score_buf) = ::tfidf(state.freq_->value, state.idf_); + assert(state.filter_boost_); + irs::sort::score_cast(score_buf) = ::tfidf(state.freq_->value, + state.idf_* state.filter_boost_->value); } - }; + }; + } else { + return { + memory::make_unique(boost, stats, freq.get()), + [](const irs::score_ctx* ctx, byte_type* RESTRICT score_buf) noexcept { + auto& state = *static_cast(ctx); + irs::sort::score_cast(score_buf) = ::tfidf(state.freq_->value, state.idf_); + } + }; + } } virtual irs::sort::term_collector::ptr prepare_term_collector() const override { diff --git a/3rdParty/iresearch/core/search/wildcard_filter.cpp b/3rdParty/iresearch/core/search/wildcard_filter.cpp index 3c59469039a8..86d4420118db 100644 --- a/3rdParty/iresearch/core/search/wildcard_filter.cpp +++ b/3rdParty/iresearch/core/search/wildcard_filter.cpp @@ -31,87 +31,44 @@ #include "utils/automaton_utils.hpp" #include "utils/hash_utils.hpp" -NS_LOCAL - -using wildcard_traits_t = irs::wildcard_traits; - -NS_END - NS_ROOT -WildcardType wildcard_type(const bytes_ref& expr) noexcept { - if (expr.empty()) { - return WildcardType::TERM; - } - - bool escaped = false; - size_t num_match_any_string = 0; - for (const auto c : expr) { - switch (c) { - case wildcard_traits_t::MATCH_ANY_STRING: - num_match_any_string += size_t(!escaped); - escaped = false; - break; - case wildcard_traits_t::MATCH_ANY_CHAR: - if (!escaped) { - return WildcardType::WILDCARD; - } - escaped = false; - break; - case wildcard_traits_t::ESCAPE: - escaped = !escaped; - break; - default: - escaped = false; - break; - } - } - - if (0 == num_match_any_string) { - return WildcardType::TERM; - } - - if (expr.size() == num_match_any_string) { - return WildcardType::MATCH_ALL; - } - - return std::all_of(expr.end() - num_match_any_string, expr.end(), - [](byte_type c) { return c == wildcard_traits_t::MATCH_ANY_STRING; }) - ? WildcardType::PREFIX - : WildcardType::WILDCARD; -} - DEFINE_FILTER_TYPE(by_wildcard) DEFINE_FACTORY_DEFAULT(by_wildcard) -filter::prepared::ptr by_wildcard::prepare( +/*static*/ filter::prepared::ptr by_wildcard::prepare( const index_reader& index, const order::prepared& order, boost_t boost, - const attribute_view& /*ctx*/) const { - boost *= this->boost(); - const string_ref field = this->field(); - - switch (wildcard_type(term())) { + const string_ref& field, + const bstring& term, + size_t scored_terms_limit) { + switch (wildcard_type(term)) { + case WildcardType::INVALID: + return prepared::empty(); case WildcardType::TERM: - return term_query::make(index, order, boost, field, term()); + return term_query::make(index, order, boost, field, term); case WildcardType::MATCH_ALL: return by_prefix::prepare(index, order, boost, field, bytes_ref::EMPTY, // empty prefix == match all - scored_terms_limit()); + scored_terms_limit); case WildcardType::PREFIX: { - assert(!term().empty()); - const auto pos = term().find(wildcard_traits_t::MATCH_ANY_STRING); - assert(pos != irs::bstring::npos); + assert(!term.empty()); + const auto* begin = term.c_str(); + const auto* end = begin + term.size(); + + // term is already checked to be a valid UTF-8 sequence + const auto* pos = utf8_utils::find(begin, end, WildcardMatch::ANY_STRING); + assert(pos != end); return by_prefix::prepare(index, order, boost, field, - bytes_ref(term().c_str(), pos), // remove trailing '%' - scored_terms_limit()); + bytes_ref(begin, size_t(pos - begin)), // remove trailing '%' + scored_terms_limit); } case WildcardType::WILDCARD: - return prepare_automaton_filter(field, from_wildcard(term()), - scored_terms_limit(), index, order, boost); + return prepare_automaton_filter(field, from_wildcard(term), + scored_terms_limit, index, order, boost); } assert(false); diff --git a/3rdParty/iresearch/core/search/wildcard_filter.hpp b/3rdParty/iresearch/core/search/wildcard_filter.hpp index 885d5ccb173d..5d432d82b875 100644 --- a/3rdParty/iresearch/core/search/wildcard_filter.hpp +++ b/3rdParty/iresearch/core/search/wildcard_filter.hpp @@ -29,15 +29,6 @@ NS_ROOT -enum class WildcardType { - TERM = 0, // foo - MATCH_ALL, // * - PREFIX, // foo* - WILDCARD // f_o* -}; - -IRESEARCH_API WildcardType wildcard_type(const bytes_ref& pattern) noexcept; - ////////////////////////////////////////////////////////////////////////////// /// @class by_wildcard /// @brief user-side wildcard filter @@ -47,6 +38,14 @@ class IRESEARCH_API by_wildcard final : public by_prefix { DECLARE_FILTER_TYPE(); DECLARE_FACTORY(); + static prepared::ptr prepare( + const index_reader& index, + const order::prepared& order, + boost_t boost, + const string_ref& field, + const bstring& term, + size_t scored_terms_limit); + explicit by_wildcard() noexcept; using by_prefix::field; @@ -59,11 +58,26 @@ class IRESEARCH_API by_wildcard final : public by_prefix { using filter::prepare; virtual filter::prepared::ptr prepare( - const index_reader& rdr, - const order::prepared& ord, - boost_t boost, - const attribute_view& ctx - ) const override; + const index_reader& index, + const order::prepared& order, + boost_t boost, + const attribute_view& /*ctx*/) const override { + return prepare(index, order, this->boost()*boost, + field(), term(), scored_terms_limit()); + } + + + using by_prefix::scored_terms_limit; + + ////////////////////////////////////////////////////////////////////////////// + /// @brief the maximum number of most frequent terms to consider for scoring + ////////////////////////////////////////////////////////////////////////////// + by_wildcard& scored_terms_limit(size_t limit) noexcept { + by_prefix::scored_terms_limit(limit); + return *this; + } + + }; // by_wildcard #endif // IRESEARCH_WILDCARD_FILTER_H diff --git a/3rdParty/iresearch/core/shared.hpp b/3rdParty/iresearch/core/shared.hpp index a6bd07302669..0499c30bf211 100644 --- a/3rdParty/iresearch/core/shared.hpp +++ b/3rdParty/iresearch/core/shared.hpp @@ -267,7 +267,9 @@ //////////////////////////////////////////////////////////////////////////////// /// SSE compatibility //////////////////////////////////////////////////////////////////////////////// -#ifdef __SSE2__ + +// for MSVC on x64 architecture SSE2 is always enabled +#if defined(__SSE2__) || (defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_X64))) #define IRESEARCH_SSE2 #endif diff --git a/3rdParty/iresearch/core/utils/arena_allocator.hpp b/3rdParty/iresearch/core/utils/arena_allocator.hpp index b351f12fa830..1becd8b554fd 100644 --- a/3rdParty/iresearch/core/utils/arena_allocator.hpp +++ b/3rdParty/iresearch/core/utils/arena_allocator.hpp @@ -169,7 +169,7 @@ template inline bool operator==(const arena_allocator& lhs, const arena_allocator& rhs) noexcept { return std::is_same::value && - lhs.arena_ == reinterpret_cast(rhs.arena_); + lhs.arena_ == reinterpret_cast(rhs.arena_); } template diff --git a/3rdParty/iresearch/core/utils/attribute_range.cpp b/3rdParty/iresearch/core/utils/attribute_range.cpp new file mode 100644 index 000000000000..1a85822a4ef0 --- /dev/null +++ b/3rdParty/iresearch/core/utils/attribute_range.cpp @@ -0,0 +1,46 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Yuriy Popov +//////////////////////////////////////////////////////////////////////////////// + +#include "attribute_range.hpp" +#include "search/disjunction.hpp" + +NS_ROOT + +template<> +bool attribute_range>::next() { + assert(ars_); + value_ = ars_->get_next_iterator(); + return value_ != nullptr; +} + +#define DEFINE_ATTRIBUTE_RANGE_TYPE(AttributeRange) template<> IRESEARCH_API \ +/*static*/ const attribute::type_id& AttributeRange::type() { \ + static attribute::type_id type(#AttributeRange); \ + return type; \ +} + +#define DEFINE_ATTRIBUTE_RANGE(Adapter) DEFINE_ATTRIBUTE_RANGE_TYPE(attribute_range); + +DEFINE_ATTRIBUTE_RANGE(score_iterator_adapter); +DEFINE_ATTRIBUTE_RANGE(position_score_iterator_adapter); + +NS_END // ROOT diff --git a/3rdParty/iresearch/core/utils/attribute_range.hpp b/3rdParty/iresearch/core/utils/attribute_range.hpp new file mode 100644 index 000000000000..3ddf779dd7e7 --- /dev/null +++ b/3rdParty/iresearch/core/utils/attribute_range.hpp @@ -0,0 +1,65 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Yuriy Popov +//////////////////////////////////////////////////////////////////////////////// + +#ifndef IRESEARCH_ATTRIBUTE_RANGE_H +#define IRESEARCH_ATTRIBUTE_RANGE_H + +#include "attributes.hpp" + +NS_ROOT + +template +struct attribute_range_state { + virtual Adapter* get_next_iterator() = 0; + virtual void reset_next_iterator_state() = 0; + virtual ~attribute_range_state() = default; +}; + +template +class attribute_range final : public attribute { + public: + DECLARE_TYPE_ID(attribute::type_id); + + void set_state(attribute_range_state* ars) noexcept { + ars_ = ars; + } + + Adapter* value() noexcept { + return value_; + } + + bool next(); + + void reset() { + value_ = nullptr; + assert(ars_); + ars_->reset_next_iterator_state(); + } + + private: + attribute_range_state* ars_ = nullptr; + Adapter* value_ = nullptr; +}; // attribute_range + +NS_END // ROOT + +#endif diff --git a/3rdParty/iresearch/core/utils/attributes.cpp b/3rdParty/iresearch/core/utils/attributes.cpp index d4a9853f050d..f2b92a91820e 100644 --- a/3rdParty/iresearch/core/utils/attributes.cpp +++ b/3rdParty/iresearch/core/utils/attributes.cpp @@ -74,8 +74,6 @@ const flags& flags::empty_instance() { return instance; } -flags::flags() { } - flags::flags(flags&& rhs) noexcept : map_(std::move(rhs.map_)) { } @@ -186,4 +184,4 @@ NS_END // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE -// ----------------------------------------------------------------------------- \ No newline at end of file +// ----------------------------------------------------------------------------- diff --git a/3rdParty/iresearch/core/utils/attributes.hpp b/3rdParty/iresearch/core/utils/attributes.hpp index 6cc5228aeb91..bbea4564223d 100644 --- a/3rdParty/iresearch/core/utils/attributes.hpp +++ b/3rdParty/iresearch/core/utils/attributes.hpp @@ -170,7 +170,7 @@ class IRESEARCH_API flags { static const flags& empty_instance(); - flags(); + flags() = default; flags(const flags&) = default; flags(flags&& rhs) noexcept; flags(std::initializer_list flags); @@ -178,8 +178,8 @@ class IRESEARCH_API flags { flags& operator=(flags&& rhs) noexcept; flags& operator=(const flags&) = default; - type_map::const_iterator begin() const { return map_.begin(); } - type_map::const_iterator end() const { return map_.end(); } + type_map::const_iterator begin() const noexcept { return map_.begin(); } + type_map::const_iterator end() const noexcept { return map_.end(); } template< typename T > flags& add() { @@ -209,8 +209,8 @@ class IRESEARCH_API flags { return *this; } - bool empty() const { return map_.empty(); } - size_t size() const { return map_.size(); } + bool empty() const noexcept { return map_.empty(); } + size_t size() const noexcept { return map_.size(); } void clear() noexcept { map_.clear(); } void reserve(size_t /*capacity*/) { // NOOP for std::set diff --git a/3rdParty/iresearch/core/utils/automaton.hpp b/3rdParty/iresearch/core/utils/automaton.hpp index 6c57e6e1b6b4..7a1a7e417b8a 100644 --- a/3rdParty/iresearch/core/utils/automaton.hpp +++ b/3rdParty/iresearch/core/utils/automaton.hpp @@ -61,6 +61,9 @@ NS_BEGIN(fsa) class BooleanWeight { public: using ReverseWeight = BooleanWeight; + using PayloadType = irs::byte_type; + + static constexpr PayloadType MaxPayload = 0x3F; static const std::string& Type() { static const std::string type = "boolean"; @@ -71,62 +74,69 @@ class BooleanWeight { static constexpr BooleanWeight One() noexcept { return true; } static constexpr BooleanWeight NoWeight() noexcept { return {}; } - constexpr BooleanWeight() noexcept : v_(Value::NO_WEIGHT) { } - constexpr BooleanWeight(bool v) noexcept : v_(Value(char(v))) { } - static constexpr uint64 Properties() noexcept { return kLeftSemiring | kRightSemiring | kCommutative | kIdempotent | kPath; } - constexpr bool Member() const noexcept { return v_ != Value::NO_WEIGHT; } + constexpr BooleanWeight() noexcept : v_(Invalid) { } + constexpr BooleanWeight(bool v, PayloadType payload = 0) noexcept + : v_(PayloadType(v) | (payload << 2)) { + } + + constexpr bool Member() const noexcept { return 0 == (v_ & Invalid); } constexpr BooleanWeight Quantize(float delta = kDelta) const noexcept { return {}; } std::istream& Read(std::istream& strm) noexcept { - v_ = Value(strm.get()); + v_ = strm.get(); + if (strm.fail()) { + v_ = Invalid; + } return strm; } std::ostream& Write(std::ostream &strm) const noexcept { - strm << char(v_); + strm.put(v_); return strm; } - constexpr size_t Hash() const noexcept { return size_t(v_); } + constexpr size_t Hash() const noexcept { return size_t(v_ & WeightMask); } constexpr ReverseWeight Reverse() const noexcept { return *this; } - constexpr operator bool() const noexcept { return v_ == Value::TRUE; } + constexpr PayloadType Payload() const noexcept { return v_ >> 2; } + constexpr operator bool() const noexcept { return 0 != (v_ & True); } friend constexpr bool operator==(const BooleanWeight& lhs, const BooleanWeight& rhs) noexcept { - return lhs.v_ == rhs.v_; + return lhs.Hash() == rhs.Hash(); } friend constexpr bool operator!=(const BooleanWeight& lhs, const BooleanWeight& rhs) noexcept { return !(lhs == rhs); } - friend BooleanWeight Plus(const BooleanWeight& lhs, const BooleanWeight& rhs) noexcept { - return { bool(lhs.v_) || bool(rhs.v_) }; + friend constexpr BooleanWeight Plus(const BooleanWeight& lhs, const BooleanWeight& rhs) noexcept { + return BooleanWeight(bool(lhs.Hash()) || bool(rhs.Hash()), lhs.Payload() | rhs.Payload()); } - friend BooleanWeight Times(const BooleanWeight& lhs, const BooleanWeight& rhs) noexcept { - return { bool(lhs.v_) && bool(rhs.v_) }; + friend constexpr BooleanWeight Times(const BooleanWeight& lhs, const BooleanWeight& rhs) noexcept { + return BooleanWeight(bool(lhs.Hash()) && bool(rhs.Hash()), lhs.Payload() & rhs.Payload()); } - friend BooleanWeight Divide(BooleanWeight, BooleanWeight, DivideType) noexcept { - return { }; + friend constexpr BooleanWeight Divide(BooleanWeight, BooleanWeight, DivideType) noexcept { + return NoWeight(); } friend std::ostream& operator<<(std::ostream& strm, const BooleanWeight& w) { - if (Value::NO_WEIGHT != w.v_) { - strm << char(w.v_ + 48); + if (w.Member()) { + strm << "{" << char(bool(w) + 48) << "," << int(w.Payload()) << "}"; } return strm; } - friend bool ApproxEqual(const BooleanWeight& lhs, const BooleanWeight& rhs, - float delta = kDelta) { + friend constexpr bool ApproxEqual(const BooleanWeight& lhs, const BooleanWeight& rhs, + float delta = kDelta) { return lhs == rhs; } private: - enum Value : char { - NO_WEIGHT = -1, - FALSE = 0, - TRUE = 1 - }; - - Value v_; + static constexpr PayloadType WeightMask = 0x03; + static constexpr PayloadType True = 1; // "is true" mask + static constexpr PayloadType Invalid = 2; // "not a member" mask + + // [2..7] - payload + // [1] - "not a member" bit + // [0] - true/false bit + PayloadType v_; }; struct MinMaxLabel { diff --git a/3rdParty/iresearch/core/utils/automaton_utils.cpp b/3rdParty/iresearch/core/utils/automaton_utils.cpp index bcdccaeee638..e8bd14827644 100644 --- a/3rdParty/iresearch/core/utils/automaton_utils.cpp +++ b/3rdParty/iresearch/core/utils/automaton_utils.cpp @@ -27,8 +27,269 @@ #include "search/multiterm_query.hpp" #include "utils/fst_table_matcher.hpp" +NS_LOCAL + +using irs::automaton; + +// table contains indexes of states in +// utf8_transitions_builder::rho_states_ table +const automaton::Arc::Label UTF8_RHO_STATE_TABLE[] { + // 1 byte sequence (0-127) + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // invalid sequence (128-191) + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 2 bytes sequence (192-223) + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // 3 bytes sequence (224-239) + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // 4 bytes sequence (240-255) + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +NS_END + NS_ROOT +void utf8_emplace_arc( + automaton& a, + automaton::StateId from, + automaton::StateId rho_state, + const bytes_ref& label, + automaton::StateId to) { + if (fst::kNoStateId == rho_state) { + return utf8_emplace_arc(a, from, label, to); + } + + if (label.empty()) { + return; + } + + // reserve enough arcs and states (stated ids are sequential) + a.ReserveArcs(from, 256); + const auto id = a.NumStates(); + a.AddStates(3 + label.size() - 1); + + const automaton::StateId rho_states[] { rho_state, id, id + 1, id + 2 }; + + const automaton::Arc::Label lead = label.front(); + automaton::Arc::Label min = 0; + + for (; min < lead; ++min) { + a.EmplaceArc(from, min, rho_states[UTF8_RHO_STATE_TABLE[min]]); + } + + switch (label.size()) { + case 1: { + a.EmplaceArc(from, lead, to); + break; + } + case 2: { + const auto s0 = id + 3; + a.EmplaceArc(from, lead, s0); + a.EmplaceArc(s0, label[1], to); + a.EmplaceArc(s0, fst::fsa::kRho, rho_states[0]); + break; + } + case 3: { + const auto s0 = id + 3; + const auto s1 = id + 4; + a.EmplaceArc(from, lead, s0); + a.EmplaceArc(s0, label[1], s1); + a.EmplaceArc(s1, label[2], to); + a.EmplaceArc(s0, fst::fsa::kRho, rho_states[1]); + a.EmplaceArc(s1, fst::fsa::kRho, rho_states[0]); + break; + } + case 4: { + const auto s0 = id + 3; + const auto s1 = id + 4; + const auto s2 = id + 5; + a.EmplaceArc(from, lead, s0); + a.EmplaceArc(s0, label[1], s1); + a.EmplaceArc(s1, label[2], s2); + a.EmplaceArc(s2, label[3], to); + a.EmplaceArc(s0, fst::fsa::kRho, rho_states[2]); + a.EmplaceArc(s1, fst::fsa::kRho, rho_states[1]); + a.EmplaceArc(s2, fst::fsa::kRho, rho_states[0]); + break; + } + } + + for (++min; min < 256; ++min) { + a.EmplaceArc(from, min, rho_states[UTF8_RHO_STATE_TABLE[min]]); + } + + // connect intermediate states of default multi-byte UTF8 sequence + + a.EmplaceArc(rho_states[1], fst::fsa::kRho, rho_states[0]); + a.EmplaceArc(rho_states[2], fst::fsa::kRho, rho_states[1]); + a.EmplaceArc(rho_states[3], fst::fsa::kRho, rho_states[2]); +} + +void utf8_emplace_arc( + automaton& a, + automaton::StateId from, + const bytes_ref& label, + automaton::StateId to) { + switch (label.size()) { + case 1: { + a.EmplaceArc(from, label[0], to); + return; + } + case 2: { + const auto s0 = a.AddState(); + a.EmplaceArc(from, label[0], s0); + a.EmplaceArc(s0, label[1], to); + return; + } + case 3: { + const auto s0 = a.AddState(); + const auto s1 = a.AddState(); + a.EmplaceArc(from, label[0], s0); + a.EmplaceArc(s0, label[1], s1); + a.EmplaceArc(s1, label[2], to); + return; + } + case 4: { + const auto s0 = a.AddState(); + const auto s1 = a.AddState(); + const auto s2 = a.AddState(); + a.EmplaceArc(from, label[0], s0); + a.EmplaceArc(s0, label[1], s1); + a.EmplaceArc(s1, label[2], s2); + a.EmplaceArc(s2, label[3], to); + return; + } + } +} + +void utf8_emplace_rho_arc( + automaton& a, + automaton::StateId from, + automaton::StateId to) { + const auto id = a.NumStates(); // stated ids are sequential + a.AddStates(3); + const automaton::StateId rho_states[] { to, id, id + 1, id + 2 }; + + // add rho transitions + + for (automaton::Arc::Label label = 0; label < 256; ++label) { + a.EmplaceArc(from, label, rho_states[UTF8_RHO_STATE_TABLE[label]]); + } + + // connect intermediate states of default multi-byte UTF8 sequence + + a.EmplaceArc(rho_states[1], fst::fsa::kRho, rho_states[0]); + a.EmplaceArc(rho_states[2], fst::fsa::kRho, rho_states[1]); + a.EmplaceArc(rho_states[3], fst::fsa::kRho, rho_states[2]); +} + +void utf8_transitions_builder::insert( + automaton& a, + const byte_type* label, + const size_t size, + const automaton::StateId to) { + assert(label && size < 5); + + add_states(size); // ensure we have enough states + const size_t prefix = 1 + common_prefix_length(last_.c_str(), last_.size(), label, size); + minimize(a, prefix); // minimize suffix + + // add current word suffix + for (size_t i = prefix; i <= size; ++i) { + auto& p = states_[i - 1]; + p.arcs.emplace_back(label[i - 1], &states_[i]); + p.rho_id = rho_states_[size - i]; + } + + const bool is_final = last_.size() != size || prefix != (size + 1); + + if (is_final) { + states_[size].id = to; + } +} + +void utf8_transitions_builder::finish(automaton& a, automaton::StateId from) { +#ifdef IRESEARCH_DEBUG + auto ensure_empty = make_finally([this]() { + // ensure everything is cleaned up + assert(std::all_of( + states_.begin(), states_.end(), [](const state& s) noexcept { + return s.arcs.empty() && + s.id == fst::kNoStateId && + s.rho_id == fst::kNoStateId; + })); + }); +#endif + + auto& root = states_.front(); + minimize(a, 1); + + if (fst::kNoStateId == rho_states_[0]) { + // no default state: just add transitions from the + // root node to its successors + for (const auto& arc : root.arcs) { + a.EmplaceArc(from, arc.label, arc.id); + } + + root.clear(); + + return; + } + + // reserve enough memory to store all outbound transitions + + a.ReserveArcs(from, 256); + + // in presence of default state we have to add some extra + // transitions from root to properly handle multi-byte sequences + // and preserve correctness of arcs order + + auto add_rho_arc = [&a, from, this](automaton::Arc::Label label) { + const auto rho_state_idx = UTF8_RHO_STATE_TABLE[label]; + a.EmplaceArc(from, label, rho_states_[rho_state_idx]); + }; + + automaton::Arc::Label min = 0; + + for (const auto& arc : root.arcs) { + assert(arc.label < 256); + assert(min <= arc.label); // ensure arcs are sorted + + for (; min < arc.label; ++min) { + add_rho_arc(min); + } + + assert(min == arc.label); + a.EmplaceArc(from, min++, arc.id); + } + + root.clear(); + + // add remaining rho transitions + + for (; min < 256; ++min) { + add_rho_arc(min); + } + + // connect intermediate states of default multi-byte UTF8 sequence + + a.EmplaceArc(rho_states_[1], fst::fsa::kRho, rho_states_[0]); + a.EmplaceArc(rho_states_[2], fst::fsa::kRho, rho_states_[1]); + a.EmplaceArc(rho_states_[3], fst::fsa::kRho, rho_states_[2]); +} + filter::prepared::ptr prepare_automaton_filter(const string_ref& field, const automaton& acceptor, size_t scored_terms_limit, @@ -60,7 +321,12 @@ filter::prepared::ptr prepare_automaton_filter(const string_ref& field, auto& meta = it->attributes().get(); // get term metadata const decltype(irs::term_meta::docs_count) NO_DOCS = 0; - const auto& docs_count = meta ? meta->docs_count : NO_DOCS; + + // NOTE: we can't use reference to 'docs_count' here, like + // 'const auto& docs_count = meta ? meta->docs_count : NO_DOCS;' + // since not gcc4.9 nor msvc2015-2019 can handle this correctly + // probably due to broken optimization + const auto* docs_count = meta ? &meta->docs_count : &NO_DOCS; if (it->next()) { auto& state = states.insert(segment); @@ -69,8 +335,8 @@ filter::prepared::ptr prepare_automaton_filter(const string_ref& field, do { it->read(); // read term attributes - state.estimation += docs_count; - scorer.collect(docs_count, state.count++, state, segment, *it); + state.estimation += *docs_count; + scorer.collect(*docs_count, state.count++, state, segment, *it); } while (it->next()); } } diff --git a/3rdParty/iresearch/core/utils/automaton_utils.hpp b/3rdParty/iresearch/core/utils/automaton_utils.hpp index f05a1b1863bf..a8fa6c9af2c2 100644 --- a/3rdParty/iresearch/core/utils/automaton_utils.hpp +++ b/3rdParty/iresearch/core/utils/automaton_utils.hpp @@ -1,4 +1,4 @@ -//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2019 ArangoDB GmbH, Cologne, Germany @@ -24,44 +24,19 @@ #define IRESEARCH_AUTOMATON_UTILS_H #include "automaton.hpp" +#include "fst_states_map.hpp" +#include "hash_utils.hpp" +#include "utf8_utils.hpp" #include "formats/formats.hpp" #include "search/filter.hpp" NS_ROOT -inline automaton match_any_char() { - automaton a; - const auto start = a.AddState(); - const auto finish = a.AddState(); - a.SetStart(start); - a.EmplaceArc(start, fst::fsa::kRho, finish); - a.SetFinal(finish); - - return a; -} - -inline automaton match_any() { - automaton a; - const auto start = a.AddState(); - a.SetStart(start); - a.EmplaceArc(start, fst::fsa::kRho, start); - a.SetFinal(start); - - return a; -} - -inline automaton match_char(automaton::Arc::Label c) { - automaton a; - const auto start = a.AddState(); - a.SetStart(start); - a.EmplaceArc(start, c, start); - a.SetFinal(start); - - return a; -} - template -bool accept(const automaton& a, Matcher& matcher, const irs::basic_string_ref& target) { +inline automaton::Weight accept( + const automaton& a, + Matcher& matcher, + const basic_string_ref& target) { auto state = a.Start(); matcher.SetState(state); @@ -73,14 +48,14 @@ bool accept(const automaton& a, Matcher& matcher, const irs::basic_string_ref -bool accept(const automaton& a, const irs::basic_string_ref& target) { +inline automaton::Weight accept(const automaton& a, const basic_string_ref& target) { typedef fst::RhoMatcher matcher_t; - // FIXME optimize rho label lookup (just check last arc) matcher_t matcher(a, fst::MatchType::MATCH_INPUT, fst::fsa::kRho); return accept(a, matcher, target); } @@ -97,7 +72,7 @@ class automaton_term_iterator final : public seek_term_iterator { return *value_; } - virtual doc_iterator::ptr postings(const irs::flags& features) const override { + virtual doc_iterator::ptr postings(const flags& features) const override { return it_->postings(features); } @@ -152,6 +127,289 @@ class automaton_term_iterator final : public seek_term_iterator { const bytes_ref* value_; }; // automaton_term_iterator +////////////////////////////////////////////////////////////////////////////// +/// @class utf8_transitions_builder +/// @brief helper class for building minimal acyclic binary automaton from +/// a specified root, a default (rho) state and a set of arcs with +/// UTF-8 encoded labels +////////////////////////////////////////////////////////////////////////////// +class IRESEARCH_API utf8_transitions_builder { + public: + utf8_transitions_builder() + : states_map_(16, state_emplace(weight_)) { + // ensure we have enough space for utf8 sequence + add_states(utf8_utils::MAX_CODE_POINT_SIZE); + } + + template + void insert(automaton& a, + automaton::StateId from, + automaton::StateId rho_state, + Iterator begin, Iterator end) { + // we inherit weight from 'from' node to all intermediate states + // that were created by transitions builder + weight_ = a.Final(from); + last_ = bytes_ref::EMPTY; + states_map_.reset(); + + // 'from' state is already a part of automaton + assert(!states_.empty()); + states_.front().id = from; + + std::fill(std::begin(rho_states_), std::end(rho_states_), rho_state); + + if (fst::kNoStateId != rho_state) { + // create intermediate default states if necessary + a.SetFinal(rho_states_[1] = a.AddState(), weight_); + a.SetFinal(rho_states_[2] = a.AddState(), weight_); + a.SetFinal(rho_states_[3] = a.AddState(), weight_); + } + + for (; begin != end; ++begin) { + // we expect sorted input + assert(last_ <= std::get<0>(*begin)); + + const auto& label = std::get<0>(*begin); + insert(a, label.c_str(), label.size(), std::get<1>(*begin)); + last_ = label; + } + + finish(a, from); + } + + private: + struct state; + + struct arc : private util::noncopyable { + arc(automaton::Arc::Label label, state* target) + : target(target), + label(label) { + } + + arc(arc&& rhs) noexcept + : target(rhs.target), + label(rhs.label) { + } + + bool operator==(const automaton::Arc& rhs) const noexcept { + return label == rhs.ilabel + && id == rhs.nextstate; + } + + bool operator!=(const automaton::Arc& rhs) const noexcept { + return !(*this == rhs); + } + + union { + state* target; + automaton::StateId id; + }; + automaton::Arc::Label label; + }; // arc + + struct state : private util::noncopyable { + state() = default; + + state(state&& rhs) noexcept + : rho_id(rhs.rho_id), + id(rhs.id), + arcs(std::move(rhs.arcs)) { + rhs.rho_id = fst::kNoStateId; + rhs.id = fst::kNoStateId; + } + + void clear() noexcept { + rho_id = fst::kNoStateId; + id = fst::kNoStateId; + arcs.clear(); + } + + automaton::StateId rho_id{fst::kNoStateId}; + automaton::StateId id{fst::kNoStateId}; + std::vector arcs; + }; // state + + struct state_hash { + size_t operator()(const state& s, const automaton& fst) const noexcept { + if (fst::kNoStateId != s.id) { + return operator()(s.id, fst); + } + + size_t hash = 0; + + for (auto& arc: s.arcs) { + hash = hash_combine(hash, arc.label); + hash = hash_combine(hash, arc.id); + } + + if (fst::kNoStateId != s.rho_id) { + hash = hash_combine(hash, fst::fsa::kRho); + hash = hash_combine(hash, s.rho_id); + } + + return hash; + } + + size_t operator()(automaton::StateId id, const automaton& fst) const noexcept { + fst::ArcIteratorData arcs; + fst.InitArcIterator(id, &arcs); + + const auto* begin = arcs.arcs; + const auto* end = arcs.arcs + arcs.narcs; + + size_t hash = 0; + for (; begin != end; ++begin) { + hash = hash_combine(hash, begin->ilabel); + hash = hash_combine(hash, begin->nextstate); + } + + return hash; + } + }; // state_hash + + struct state_equal { + bool operator()(const state& lhs, automaton::StateId rhs, const automaton& fst) const noexcept { + if (lhs.id != fst::kNoStateId) { + // already a part of automaton + return lhs.id == rhs; + } + + fst::ArcIteratorData rarcs; + fst.InitArcIterator(rhs, &rarcs); + + const bool has_rho = (fst::kNoStateId != lhs.rho_id); + + if ((lhs.arcs.size() + size_t(has_rho)) != rarcs.narcs) { + return false; + } + + const auto* rarc = rarcs.arcs; + for (const auto& larc : lhs.arcs) { + if (larc != *rarc) { + return false; + } + ++rarc; + } + + if (has_rho && (rarc->ilabel != fst::fsa::kRho || rarc->nextstate != lhs.rho_id)) { + return false; + } + + return true; + } + }; // state_equal + + class state_emplace { + public: + explicit state_emplace(const automaton::Weight& weight) noexcept + : weight_(&weight) { + } + + automaton::StateId operator()(const state& s, automaton& fst) const { + auto id = s.id; + + if (id == fst::kNoStateId) { + id = fst.AddState(); + fst.SetFinal(id, *weight_); + } + + for (const auto& a : s.arcs) { + fst.EmplaceArc(id, a.label, a.id); + } + + if (s.rho_id != fst::kNoStateId) { + fst.EmplaceArc(id, fst::fsa::kRho, s.rho_id); + } + + return id; + } + + private: + const automaton::Weight* weight_; + }; // state_emplace + + using automaton_states_map = fst_states_map< + automaton, state, + state_emplace, state_hash, + state_equal, fst::kNoStateId>; + + void add_states(size_t size) { + // reserve size + 1 for root state + if (states_.size() < ++size) { + states_.resize(size); + } + } + + void minimize(automaton& a, size_t prefix) { + assert(prefix > 0); + + for (size_t i = last_.size(); i >= prefix; --i) { + state& s = states_[i]; + state& p = states_[i - 1]; + assert(!p.arcs.empty()); + + p.arcs.back().id = states_map_.insert(s, a); + + s.clear(); + } + } + + void insert(automaton& a, + const byte_type* label_data, + const size_t label_size, + automaton::StateId target); + + void finish(automaton& a, automaton::StateId from); + + automaton::Weight weight_; + automaton::StateId rho_states_[4]; + std::vector states_; + automaton_states_map states_map_; + bytes_ref last_; +}; // utf8_automaton_builder + +////////////////////////////////////////////////////////////////////////////// +/// @brief establish UTF-8 labeled connection between specified source and +/// target states +////////////////////////////////////////////////////////////////////////////// +IRESEARCH_API void utf8_emplace_arc( + automaton& a, + automaton::StateId from, + const bytes_ref& label, + automaton::StateId to); + +////////////////////////////////////////////////////////////////////////////// +/// @brief establish UTF-8 labeled connection between specified source (from) +/// and target (to) states with the fallback to default (rho_state) +/// state +////////////////////////////////////////////////////////////////////////////// +IRESEARCH_API void utf8_emplace_arc( + automaton& a, + automaton::StateId from, + automaton::StateId rho_state, + const bytes_ref& label, + automaton::StateId to); + +////////////////////////////////////////////////////////////////////////////// +/// @brief establish default connnection between specified source (from) and +/// and target (to) +////////////////////////////////////////////////////////////////////////////// +IRESEARCH_API void utf8_emplace_rho_arc( + automaton& a, + automaton::StateId from, + automaton::StateId to); + +////////////////////////////////////////////////////////////////////////////// +/// @brief instantiate compiled filter based on a specified automaton, field +/// and other properties +/// @param field field name +/// @param acceptor input automaton +/// @param scored_terms_limit score as many terms +/// @param index index reader +/// @param order compiled order +/// @param bool query boost +/// @returns compiled filter +////////////////////////////////////////////////////////////////////////////// IRESEARCH_API filter::prepared::ptr prepare_automaton_filter( const string_ref& field, const automaton& acceptor, diff --git a/3rdParty/iresearch/core/utils/fst.hpp b/3rdParty/iresearch/core/utils/fst.hpp index b8167965904c..2ecbd6245bf9 100644 --- a/3rdParty/iresearch/core/utils/fst.hpp +++ b/3rdParty/iresearch/core/utils/fst.hpp @@ -58,6 +58,7 @@ #include #include "shared.hpp" +#include "utils/fst_states_map.hpp" #include "utils/string.hpp" #include "utils/noncopyable.hpp" @@ -96,21 +97,23 @@ class fst_builder : util::noncopyable { return; } + const auto size = in.size(); + // determine common prefix - const size_t pref = 1 + prefix(last_, in); + const size_t pref = 1 + common_prefix_length(last_, in); // add states for current input - add_states(in.size()); + add_states(size); // minimize last word suffix minimize(pref); // add current word suffix - for (size_t i = pref; i <= in.size(); ++i) { + for (size_t i = pref; i <= size; ++i) { states_[i - 1].arcs.emplace_back(in[i - 1], &states_[i]); } - const bool is_final = last_.size() != in.size() || pref != (in.size() + 1); + const bool is_final = last_.size() != size || pref != (size + 1); decltype(fst::DivideLeft(out, out)) output = out; @@ -142,7 +145,7 @@ class fst_builder : util::noncopyable { if (is_final) { // set final state { - state& s = states_[in.size()]; + state& s = states_[size]; s.final = true; } @@ -153,7 +156,7 @@ class fst_builder : util::noncopyable { s.arcs.back().out = std::move(output); } } else { - state& s = states_[in.size()]; + state& s = states_[size]; assert(s.arcs.size()); assert(s.arcs.back().label == in[pref - 1]); s.arcs.back().out = fst::Times(s.arcs.back().out, output); @@ -163,13 +166,17 @@ class fst_builder : util::noncopyable { } void finish() { - stateid_t start; - if (states_.empty()) { - start = final; - } else { + stateid_t start = fst_builder::final; + + if (!states_.empty()) { // minimize last word suffix minimize(1); - start = states_map_.insert(states_[0], fst_); + + auto& root = states_[0]; + + if (!root.arcs.empty() || !root.final) { + start = states_map_.insert(root, fst_); + } } // set the start state @@ -205,24 +212,16 @@ class fst_builder : util::noncopyable { out(std::move(rhs.out)) { } - bool operator==(const arc_t& rhs) const { + bool operator==(const arc_t& rhs) const noexcept { return label == rhs.ilabel && id == rhs.nextstate && out == rhs.weight; } - bool operator!=(const arc_t& rhs) const { + bool operator!=(const arc_t& rhs) const noexcept { return !(*this == rhs); } - friend size_t hash_value(const arc& a) { - size_t hash = 0; - ::boost::hash_combine(hash, a.label); - ::boost::hash_combine(hash, a.id); - ::boost::hash_combine(hash, a.out.Hash()); - return hash; - } - union { state* target; stateid_t id; @@ -240,20 +239,10 @@ class fst_builder : util::noncopyable { final(rhs.final) { } - void clear() { + void clear() noexcept { arcs.clear(); - final = false; out = weight_t::One(); - } - - friend size_t hash_value(const state& s) { - size_t seed = 0; - - for (auto& arc: s.arcs) { - ::boost::hash_combine(seed, arc); - } - - return seed; + final = false; } std::vector arcs; @@ -261,118 +250,78 @@ class fst_builder : util::noncopyable { bool final{ false }; }; // state - class state_map : private util::noncopyable { - public: - static const size_t InitialSize = 16; - - state_map(): states_(InitialSize, fst::kNoStateId) {} - stateid_t insert(const state& s, fst_t& fst) { - if (s.arcs.empty() && s.final) { - return fst_builder::final; + struct state_equal { + bool operator()(const state& lhs, stateid_t rhs, const fst_t& fst) const { + if (lhs.arcs.size() != fst.NumArcs(rhs)) { + return false; } - stateid_t id; - const size_t mask = states_.size() - 1; - size_t pos = hash_value(s) % mask; - for ( ;; ++pos, pos %= mask ) { // TODO: maybe use quadratic probing here - if (fst::kNoStateId == states_[pos]) { - states_[pos] = id = add_state(s, fst); - ++count_; - - if (count_ > 2 * states_.size() / 3) { - rehash(fst); - } - break; - } else if (equals( s, states_[pos], fst)) { - id = states_[pos]; - break; + fst::ArcIterator rhs_arc(fst, rhs); + + for (auto& lhs_arc : lhs.arcs) { + if (lhs_arc != rhs_arc.Value()) { + return false; } + + rhs_arc.Next(); } - return id; + assert(rhs_arc.Done()); + return true; } + }; - void reset() { - count_ = 0; - std::fill(states_.begin(), states_.end(), fst::kNoStateId); - } + struct state_hash { + size_t operator()(const state& s, const fst_t& /*fst*/) const noexcept { + size_t hash = 0; - private: - static bool equals(const state& lhs, stateid_t rhs, const fst_t& fst) { - if (fst.NumArcs( rhs ) != lhs.arcs.size() ) { - return false; + for (auto& a: s.arcs) { + ::boost::hash_combine(hash, a.label); + ::boost::hash_combine(hash, a.id); + ::boost::hash_combine(hash, a.out.Hash()); } - for (fst::ArcIteratorit(fst, rhs); !it.Done(); it.Next()) { - if (lhs.arcs[it.Position()] != it.Value()) { - return false; - } - } - return true; + return hash; } - static size_t hash(stateid_t id, const fst_t& fst) { + size_t operator()(stateid_t id, const fst_t& fst) const noexcept { size_t hash = 0; - for (fst::ArcIterator< fst_t > it(fst, id); !it.Done(); it.Next()) { + + for (fst::ArcIterator it(fst, id); !it.Done(); it.Next()) { const arc_t& a = it.Value(); ::boost::hash_combine(hash, a.ilabel); ::boost::hash_combine(hash, a.nextstate); ::boost::hash_combine(hash, a.weight.Hash()); } - return hash; - } - - void rehash(const fst_t& fst) { - std::vector< stateid_t > states(states_.size() * 2, fst::kNoStateId); - const size_t mask = states.size() - 1; - for (stateid_t id : states_) { - - if (fst::kNoStateId == id) { - continue; - } - - size_t pos = hash(id, fst) % mask; - for (;;++pos, pos %= mask) { // TODO: maybe use quadratic probing here - if (fst::kNoStateId == states[pos] ) { - states[pos] = id; - break; - } - } - } - states_ = std::move(states); + return hash; } + }; - stateid_t add_state(const state& s, fst_t& fst) { + struct state_emplace { + stateid_t operator()(const state& s, fst_t& fst) const { const stateid_t id = fst.AddState(); + if (s.final) { fst.SetFinal(id, s.out); } for (const arc& a : s.arcs) { - fst.AddArc(id, arc_t(a.label, a.label, a.out, a.id)); + fst.EmplaceArc(id, a.label, a.label, a.out, a.id); } return id; } + }; - // TODO: maybe use "buckets" here - std::vector states_; - size_t count_{}; - }; // state_map - - static size_t prefix(const key_t& lhs, const key_t& rhs) { - size_t pref = 0; - const size_t max = std::min( lhs.size(), rhs.size() ); - while ( pref < max && lhs[pref] == rhs[pref] ) { - ++pref; - } - return pref; - } + using states_map = fst_states_map< + fst_t, state, + state_emplace, state_hash, + state_equal, fst::kNoStateId>; void add_states(size_t size) { // reserve size + 1 for root state - if ( states_.size() < ++size ) { + if (states_.size() < ++size) { states_.resize(size); } } @@ -383,14 +332,17 @@ class fst_builder : util::noncopyable { for (size_t i = last_.size(); i >= pref; --i) { state& s = states_[i]; state& p = states_[i - 1]; + assert(!p.arcs.empty()); + p.arcs.back().id = s.arcs.empty() && s.final + ? fst_builder::final + : states_map_.insert(s, fst_); - p.arcs.back().id = states_map_.insert(s, fst_); s.clear(); } } - state_map states_map_; + states_map states_map_; std::vector states_; // current states weight_t start_out_; // output for "empty" input key_t last_; diff --git a/3rdParty/iresearch/core/utils/fst_states_map.hpp b/3rdParty/iresearch/core/utils/fst_states_map.hpp new file mode 100644 index 000000000000..005b958d4732 --- /dev/null +++ b/3rdParty/iresearch/core/utils/fst_states_map.hpp @@ -0,0 +1,144 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Andrey Abramov +//////////////////////////////////////////////////////////////////////////////// + +#ifndef IRESEARCH_FST_STATES_MAP_H +#define IRESEARCH_FST_STATES_MAP_H + +#include + +#include "shared.hpp" +#include "ebo.hpp" +#include "noncopyable.hpp" + +NS_ROOT + +////////////////////////////////////////////////////////////////////////////// +/// @class fst_states_map +/// @brief helper class for deduplication of fst states while building +/// minimal acyclic subsequential transducer +////////////////////////////////////////////////////////////////////////////// +template +class fst_states_map : private compact<0, Hash>, + private compact<1, StateEq>, + private compact<2, PushState>, + private util::noncopyable { + public: + using fst_type = Fst; + using state_type = State; + using state_id = typename fst_type::StateId; + using push_state = PushState; + using hasher = Hash; + using state_equal = StateEq; + + explicit fst_states_map( + size_t capacity = 16, + const push_state state_emplace = {}, + const hasher& hash_function = {}, + const state_equal& state_eq = {}) + : compact<0, hasher>{ hash_function }, + compact<1, state_equal>{ state_eq }, + compact<2, push_state>{ state_emplace }, + states_(capacity, NoStateId) { + } + + state_id insert(const state_type& s, fst_type& fst) { + const auto state_equal = state_eq(); + const auto hasher = hash_function(); + + const size_t mask = states_.size() - 1; + size_t pos = hasher(s, fst) % mask; + for (;;++pos, pos %= mask) { + auto& bucket = states_[pos]; + + if (NoStateId == bucket) { + const auto push_state = state_emplace(); + const state_id id = bucket = push_state(s, fst); + assert(hasher(s, fst) == hasher(id, fst)); + ++count_; + + if (count_ > 2 * states_.size() / 3) { + rehash(fst); + } + + return id; + } + + if (state_equal(s, bucket, fst)) { + return bucket; + } + } + } + + void reset() noexcept { + count_ = 0; + std::fill(states_.begin(), states_.end(), NoStateId); + } + + hasher hash_function() const noexcept { + return compact<0, hasher>::get(); + } + + state_equal state_eq() const noexcept { + return compact<1, state_equal>::get(); + } + + push_state state_emplace() const noexcept { + return compact<2, push_state>::get(); + } + + private: + void rehash(const fst_type& fst) { + const auto hasher = hash_function(); + + std::vector states(states_.size() * 2, NoStateId); + const size_t mask = states.size() - 1; + for (const auto id : states_) { + if (NoStateId == id) { + continue; + } + + size_t pos = hasher(id, fst) % mask; + for (;;++pos, pos %= mask) { + auto& bucket = states[pos]; + + if (NoStateId == bucket) { + bucket = id; + break; + } + } + } + + states_ = std::move(states); + } + + std::vector states_; + size_t count_{}; +}; // fst_states_map + +NS_END + +#endif diff --git a/3rdParty/iresearch/core/utils/fst_table_matcher.hpp b/3rdParty/iresearch/core/utils/fst_table_matcher.hpp index ebaaf78c9728..cce094b51f57 100644 --- a/3rdParty/iresearch/core/utils/fst_table_matcher.hpp +++ b/3rdParty/iresearch/core/utils/fst_table_matcher.hpp @@ -64,16 +64,16 @@ class TableMatcher final : public MatcherBase { // expected FST properties static constexpr auto FST_PROPERTIES = - (MATCH_TYPE == MATCH_INPUT ? kNoIEpsilons : kNoOEpsilons) - | (MATCH_TYPE == MATCH_INPUT ? kILabelSorted : kOLabelSorted) - | (MATCH_TYPE == MATCH_INPUT ? kIDeterministic : kODeterministic) - | kAcceptor; + (MATCH_TYPE == MATCH_INPUT ? kILabelSorted : kOLabelSorted) + | (MATCH_TYPE == MATCH_INPUT ? kIDeterministic : kODeterministic) + | kAcceptor; explicit TableMatcher(const FST& fst, Label rho) : start_labels_(fst::getStartLabels(fst)), arc_(kNoLabel, kNoLabel, Weight::NoWeight(), kNoStateId), rho_(rho), fst_(&fst), error_(fst.Properties(FST_PROPERTIES, true) != FST_PROPERTIES) { + assert(!error_); const size_t numLabels = start_labels_.size(); // initialize transition table diff --git a/3rdParty/iresearch/core/utils/levenshtein_utils.cpp b/3rdParty/iresearch/core/utils/levenshtein_utils.cpp index e84239657e6d..d5308a920a6c 100644 --- a/3rdParty/iresearch/core/utils/levenshtein_utils.cpp +++ b/3rdParty/iresearch/core/utils/levenshtein_utils.cpp @@ -23,11 +23,12 @@ #include "levenshtein_utils.hpp" #include +#include #include #include "shared.hpp" #include "store/store_utils.hpp" -#include "automaton.hpp" +#include "automaton_utils.hpp" #include "arena_allocator.hpp" #include "bit_utils.hpp" #include "bitset.hpp" @@ -127,12 +128,15 @@ class parametric_state { } } - for (auto begin = positions_.data(); begin != positions_.data() + positions_.size();) { - if (subsumes(new_pos, *begin)) { - std::swap(*begin, positions_.back()); - positions_.pop_back(); // removed positions subsumed by new_pos - } else { - ++begin; + if (!positions_.empty()) { + for (auto begin = positions_.data(), end = positions_.data() + positions_.size(); begin != end; ) { + if (subsumes(new_pos, *begin)) { + std::swap(*begin, positions_.back()); + positions_.pop_back(); // removed positions subsumed by new_pos + end = positions_.data() + positions_.size(); + } else { + ++begin; + } } } @@ -190,7 +194,7 @@ class parametric_states { } uint32_t emplace(parametric_state&& state) { - const auto res = irs::map_utils::try_emplace( + const auto res = map_utils::try_emplace( states_, std::move(state), states_.size()); if (res.second) { @@ -360,43 +364,56 @@ uint32_t distance( // --SECTION-- Helpers for DFA instantiation // ----------------------------------------------------------------------------- +struct character { + bitset chi; // characteristic vector + byte_type utf8[utf8_utils::MAX_CODE_POINT_SIZE]{}; + size_t size{}; + uint32_t cp{}; // utf8 code point + + const byte_type* begin() const noexcept { return utf8; } + const byte_type* end() const noexcept { return utf8 + size; } +}; + ////////////////////////////////////////////////////////////////////////////// /// @return characteristic vectors for a specified word ////////////////////////////////////////////////////////////////////////////// -std::vector> make_alphabet( +std::vector make_alphabet( const bytes_ref& word, size_t& utf8_size) { memory::arena arena; memory::arena_vector chars(arena); - utf8_utils::to_utf8(word, std::back_inserter(chars)); + utf8_utils::utf8_to_utf32(word, std::back_inserter(chars)); utf8_size = chars.size(); std::sort(chars.begin(), chars.end()); auto cbegin = chars.begin(); auto cend = std::unique(cbegin, chars.end()); // no need to erase here - std::vector> alphabet(1 + size_t(std::distance(cbegin, cend))); // +1 for rho + std::vector alphabet(1 + size_t(std::distance(cbegin, cend))); // +1 for rho auto begin = alphabet.begin(); // ensure we have enough capacity const auto capacity = utf8_size + bits_required(); - begin->first = fst::fsa::kRho; - begin->second.reset(capacity); + begin->cp = fst::fsa::kRho; + begin->chi.reset(capacity); ++begin; for (; cbegin != cend; ++cbegin, ++begin) { const auto c = *cbegin; - // set char - begin->first = c; + // set code point + begin->cp = c; + + // set utf8 representation + begin->size = utf8_utils::utf32_to_utf8(c, begin->utf8); // evaluate characteristic vector - auto& bits = begin->second; - bits.reset(capacity); + auto& chi = begin->chi; + chi.reset(capacity); auto utf8_begin = word.begin(); for (size_t i = 0; i < utf8_size; ++i) { - bits.reset(i, c == utf8_utils::next(utf8_begin)); + chi.reset(i, c == utf8_utils::next(utf8_begin)); } IRS_ASSERT(utf8_begin == word.end()); } @@ -432,16 +449,6 @@ uint64_t chi(const bitset& bs, size_t offset, uint64_t mask) noexcept { return (lhs | rhs) & mask; } -////////////////////////////////////////////////////////////////////////////// -/// @return true if a specified state at a given offset is accepting, -/// false - otherwise -////////////////////////////////////////////////////////////////////////////// -FORCE_INLINE bool is_accepting( - const parametric_description& description, - size_t state, size_t offset) noexcept { - return description.distance(state, offset) <= description.max_distance(); -} - NS_END NS_ROOT @@ -585,24 +592,42 @@ automaton make_levenshtein_automaton( std::vector transitions(description.size()*num_offsets, fst::kNoStateId); - // result automaton automaton a; a.ReserveStates(transitions.size()); - const auto invalid_state = a.AddState(); // state without outbound transitions - a.SetStart(a.AddState()); // initial state + + // terminal state without outbound transitions + const auto invalid_state = a.AddState(); + assert(INVALID_STATE == invalid_state); + UNUSED(invalid_state); + + // initial state + a.SetStart(a.AddState()); + + // check if start state is final + const auto distance = description.distance(1, utf8_size); + + if (distance <= description.max_distance()) { + assert(distance < fst::fsa::BooleanWeight::MaxPayload); + a.SetFinal(a.Start(), {true, distance}); + } // state stack std::vector stack; stack.emplace_back(0, 1, a.Start()); // 0 offset, 1st parametric state, initial automaton state - while (!stack.empty()) { + std::vector> arcs; + arcs.resize(utf8_size); // allocate space for max possible number of arcs + + for (utf8_transitions_builder builder; !stack.empty(); ) { const auto state = stack.back(); stack.pop_back(); + arcs.clear(); automaton::StateId default_state = fst::kNoStateId; // destination of rho transition if exist + bool ascii = true; // ascii only input for (auto& entry : alphabet) { - const auto chi = ::chi(entry.second, state.offset, mask); + const auto chi = ::chi(entry.chi, state.offset, mask); auto& transition = description.transition(state.state_id, chi); const size_t offset = transition.first ? transition.second + state.offset : 0; @@ -610,38 +635,53 @@ automaton make_levenshtein_automaton( auto& to = transitions[transition.first*num_offsets + offset]; if (INVALID_STATE == transition.first) { - to = invalid_state; + to = INVALID_STATE; } else if (fst::kNoStateId == to) { to = a.AddState(); - if (is_accepting(description, transition.first, utf8_size - offset)) { - a.SetFinal(to); + const auto distance = description.distance(transition.first, utf8_size - offset); + + if (distance <= description.max_distance()) { + assert(distance < fst::fsa::BooleanWeight::MaxPayload); + a.SetFinal(to, {true, distance}); } stack.emplace_back(offset, transition.first, to); } if (chi && to != default_state) { - a.EmplaceArc(state.from, entry.first, to); - } else if (fst::kNoStateId == default_state) { + arcs.emplace_back(bytes_ref(entry.utf8, entry.size), to); + ascii &= (entry.size == 1); + } else { + assert(fst::kNoStateId == default_state || to == default_state); default_state = to; } } - if (fst::kNoStateId != default_state) { - a.EmplaceArc(state.from, fst::fsa::kRho, default_state); + if (INVALID_STATE == default_state && arcs.empty()) { + // optimization for invalid terminal state + a.EmplaceArc(state.from, fst::fsa::kRho, INVALID_STATE); + } else if (INVALID_STATE == default_state && ascii && !a.Final(state.from)) { + // optimization for ascii only input without default state and weight + for (auto& arc: arcs) { + assert(1 == arc.first.size()); + a.EmplaceArc(state.from, arc.first.front(), arc.second); + } + } else { + builder.insert(a, state.from, default_state, arcs.begin(), arcs.end()); } } #ifdef IRESEARCH_DEBUG // ensure resulting automaton is sorted and deterministic - constexpr auto EXPECTED_PROPERTIES = + static constexpr auto EXPECTED_PROPERTIES = fst::kIDeterministic | fst::kODeterministic | - fst::kILabelSorted | fst::kOLabelSorted; - assert(a.Properties(EXPECTED_PROPERTIES, true)); + fst::kILabelSorted | fst::kOLabelSorted | + fst::kAcceptor; + assert(EXPECTED_PROPERTIES == a.Properties(EXPECTED_PROPERTIES, true)); // ensure invalid state has no outbound transitions - assert(0 == a.NumArcs(invalid_state)); + assert(0 == a.NumArcs(INVALID_STATE)); #endif return a; @@ -654,7 +694,7 @@ size_t edit_distance(const parametric_description& description, memory::arena arena; memory::arena_vector lhs_chars(arena); - utf8_utils::to_utf8(lhs, lhs_size, std::back_inserter(lhs_chars)); + utf8_utils::utf8_to_utf32(lhs, lhs_size, std::back_inserter(lhs_chars)); size_t state = 1; // current parametric state size_t offset = 0; // current offset @@ -678,4 +718,45 @@ size_t edit_distance(const parametric_description& description, return description.distance(state, lhs_chars.size() - offset); } +bool edit_distance( + size_t& distance, + const parametric_description& description, + const byte_type* lhs, size_t lhs_size, + const byte_type* rhs, size_t rhs_size) { + assert(description); + + memory::arena arena; + memory::arena_vector lhs_chars(arena); + if (!utf8_utils::utf8_to_utf32(lhs, lhs_size, std::back_inserter(lhs_chars))) { + return false; + } + + size_t state = 1; // current parametric state + size_t offset = 0; // current offset + + for (auto* rhs_end = rhs + rhs_size; rhs < rhs_end; ) { + const auto c = utf8_utils::next_checked(rhs, rhs_end); + + if (utf8_utils::INVALID_CODE_POINT == c) { + return false; + } + + const auto begin = lhs_chars.begin() + ptrdiff_t(offset); + const auto end = std::min(begin + ptrdiff_t(description.chi_size()), lhs_chars.end()); + const auto chi = ::chi(begin, end, c); + const auto& transition = description.transition(state, chi); + + if (INVALID_STATE == transition.first) { + distance = description.max_distance() + 1; + return true; + } + + state = transition.first; + offset += transition.second; + } + + distance = description.distance(state, lhs_chars.size() - offset); + return true; +} + NS_END diff --git a/3rdParty/iresearch/core/utils/levenshtein_utils.hpp b/3rdParty/iresearch/core/utils/levenshtein_utils.hpp index 7ab480a9c17b..4f654cd149f3 100644 --- a/3rdParty/iresearch/core/utils/levenshtein_utils.hpp +++ b/3rdParty/iresearch/core/utils/levenshtein_utils.hpp @@ -73,7 +73,9 @@ inline size_t edit_distance(const T* lhs, size_t lhs_size, /// @param rhs string to compare /// @returns edit distance //////////////////////////////////////////////////////////////////////////////// -inline size_t edit_distance(const bytes_ref& lhs, const bytes_ref& rhs) { +template +inline size_t edit_distance(const basic_string_ref& lhs, + const basic_string_ref& rhs) { return edit_distance(lhs.begin(), lhs.size(), rhs.begin(), rhs.size()); } @@ -236,8 +238,9 @@ IRESEARCH_API parametric_description read(data_input& in); //////////////////////////////////////////////////////////////////////////////// /// @brief instantiates DFA based on provided parametric description and target /// @param description parametric description -/// @param target actual "string" (utf8 encoded) +/// @param target valid UTF-8 encoded string /// @returns DFA +/// @note if 'target' isn't a valid UTF-8 sequence, behaviour is undefined //////////////////////////////////////////////////////////////////////////////// IRESEARCH_API automaton make_levenshtein_automaton( const parametric_description& description, @@ -277,6 +280,47 @@ inline size_t edit_distance( return edit_distance(description, lhs.begin(), lhs.size(), rhs.begin(), rhs.size()); } +//////////////////////////////////////////////////////////////////////////////// +/// @brief evaluates edit distance between the specified words up to +/// specified in description.max_distance. +/// @param evaluated edit distance +/// @param description parametric description +/// @param lhs string to compare (utf8 encoded) +/// @param lhs_size size of the string to comprare +/// @param rhs string to compare (utf8 encoded) +/// @param rhs_size size of the string to comprare +/// @returns true if both lhs_string and rhs_strign are valid UTF-8 sequences, +/// false - otherwise +/// @note accepts only valid descriptions, calling function with +/// invalid description is undefined behaviour +//////////////////////////////////////////////////////////////////////////////// +IRESEARCH_API bool edit_distance( + size_t& distance, + const parametric_description& description, + const byte_type* lhs, size_t lhs_size, + const byte_type* rhs, size_t rhs_size); + +//////////////////////////////////////////////////////////////////////////////// +/// @brief evaluates edit distance between the specified words up to +/// specified in description.max_distance +/// @param description parametric description +/// @param lhs string to compare (utf8 encoded) +/// @param rhs string to compare (utf8 encoded) +/// @returns true if both lhs_string and rhs_strign are valid UTF-8 sequences, +/// false - otherwise +/// @note accepts only valid descriptions, calling function with +/// invalid description is undefined behaviour +//////////////////////////////////////////////////////////////////////////////// +inline bool edit_distance( + size_t& distance, + const parametric_description& description, + const bytes_ref& lhs, + const bytes_ref& rhs) { + return edit_distance(distance, description, + lhs.begin(), lhs.size(), + rhs.begin(), rhs.size()); +} + NS_END #endif // IRESEARCH_LEVENSHTEIN_UTILS_H diff --git a/3rdParty/iresearch/core/utils/locale_utils.cpp b/3rdParty/iresearch/core/utils/locale_utils.cpp index faa149ade10e..d3588211a138 100644 --- a/3rdParty/iresearch/core/utils/locale_utils.cpp +++ b/3rdParty/iresearch/core/utils/locale_utils.cpp @@ -2530,7 +2530,7 @@ class num_put_facet: public std::num_put { return nullptr; } - return std::move(ctx); + return ctx; } void reset(const std::ios_base& str) { @@ -3660,7 +3660,7 @@ NS_BEGIN( locale_utils ) ) { return std::use_facet>(locale); } -#elif defined(_MSC_VER) && _MSC_VER <= 1923// MSVC2015/MSVC2017/MSVC2019 +#elif defined(_MSC_VER) && _MSC_VER <= 1924// MSVC2015/MSVC2017/MSVC2019 // MSVC2015/MSVC2017 implementations do not support char16_t/char32_t 'codecvt' // due to a missing export, as per their comment: // This is an active bug in our database (VSO#143857), which we'll investigate diff --git a/3rdParty/iresearch/core/utils/locale_utils.hpp b/3rdParty/iresearch/core/utils/locale_utils.hpp index 3a14e1aa4dae..e2052e75f2ac 100644 --- a/3rdParty/iresearch/core/utils/locale_utils.hpp +++ b/3rdParty/iresearch/core/utils/locale_utils.hpp @@ -49,7 +49,7 @@ const std::codecvt& codecvt(std::locale const& locale) { IRESEARCH_API const std::codecvt& codecvt( std::locale const& locale ); -#elif defined(_MSC_VER) && _MSC_VER <= 1923 // MSVC2015/MSVC2017/MSVC2019 +#elif defined(_MSC_VER) && _MSC_VER <= 1924 // MSVC2015/MSVC2017/MSVC2019 // MSVC2015/MSVC2017 implementations do not support char16_t/char32_t 'codecvt' // due to a missing export, as per their comment: // This is an active bug in our database (VSO#143857), which we'll investigate diff --git a/3rdParty/iresearch/core/utils/log.cpp b/3rdParty/iresearch/core/utils/log.cpp index 9328f8230293..07752aefff1f 100644 --- a/3rdParty/iresearch/core/utils/log.cpp +++ b/3rdParty/iresearch/core/utils/log.cpp @@ -370,9 +370,9 @@ bool stack_trace_libunwind(iresearch::logger::level_t level); // predeclaration } size_t buf_len = 0; - constexpr const size_t buf_size = 1024; // arbitrary size + constexpr size_t buf_size = 1024; // arbitrary size char buf[buf_size]; - std::thread thread([&pipefd, level, out, &buf, &buf_len, buf_size]()->void { + std::thread thread([&pipefd, level, out, &buf, &buf_len]()->void { for (char ch; read(pipefd[0], &ch, 1) > 0;) { if (ch != '\n') { if (buf_len < buf_size - 1) { diff --git a/3rdParty/iresearch/core/utils/ngram_match_utils.hpp b/3rdParty/iresearch/core/utils/ngram_match_utils.hpp new file mode 100644 index 000000000000..c58fe395115a --- /dev/null +++ b/3rdParty/iresearch/core/utils/ngram_match_utils.hpp @@ -0,0 +1,138 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Andrei Lobov +//////////////////////////////////////////////////////////////////////////////// + +#ifndef IRESEARCH_NGRAM_MATCH_UTILS_H +#define IRESEARCH_NGRAM_MATCH_UTILS_H + +#include "shared.hpp" +#include "utf8_utils.hpp" +#include + +NS_ROOT + +//////////////////////////////////////////////////////////////////////////////// +/// Evaluates ngram similarity between the specified strings based on +/// "N-gram similarity and distance" by Grzegorz Kondrak +/// http://www.cs.ualberta.ca/~kondrak/papers/spire05.pdf +/// Could operate in two forms depending on search_semantics. +/// If search_semantics is false then positional ngram similarity is used, +/// binary ngram similarity is used otherwise. Also setting search_semantics to +/// true disables normalizing resulting similarity by length of longest string and +/// result is evaluated based strictly on target length (search-like semantics). +/// If search_semantics is false there is no difference which string pass +/// as target. First characters affixing currently is not implemented +/// @param target string to seek similarity for +/// @param target_size length of target string +/// @param src string to seek similarity in +/// @param src_size length of source string +/// @param ngram_size ngram size +/// @returns similarity value +//////////////////////////////////////////////////////////////////////////////// +template +float_t ngram_similarity(const T* target, size_t target_size, + const T* src, size_t src_size, + size_t ngram_size) { + if (ngram_size == 0) { + return 0.f; + } + + if /*consexpr*/ (!search_semantics) { + if (target_size > src_size) { + std::swap(target_size, src_size); + std::swap(target, src); + } + } + + if (target_size < ngram_size || src_size < ngram_size) { + if /*constexpr*/ (!search_semantics) { + if (target_size == 0 && src_size == 0) { + return 1; // consider two empty strings as matched + } + const T* r = src; + size_t matched = 0; + for (const T* it = target; it != target + target_size; ) { + matched += size_t(*it == *r); + ++r; + ++it; + } + return float_t(matched) / float_t(src_size); + } else { + if (target_size == src_size) { + return memcmp(target, src, target_size * sizeof(T)) == 0 ? 1 : 0; + } + return 0; + } + } + + const size_t t_ngram_count = target_size - ngram_size + 1; + const size_t s_ngram_count = src_size - ngram_size + 1; + const T* t_ngram_start = target; + const T* t_ngram_start_end = target + target_size - ngram_size + 1; // end() analog for target ngram start + + float_t d = 0; // will store upper-left cell value for current cache row + std::vector cache(s_ngram_count + 1, 0); + + // here could be constructed source string with start characters affixing + + size_t t_ngram_idx = 1; + for (; t_ngram_start != t_ngram_start_end; ++t_ngram_start, ++t_ngram_idx) { + const T* t_ngram_end = t_ngram_start + ngram_size; + const T* s_ngram_start = src; + size_t s_ngram_idx = 1; + const T* s_ngram_start_end = src + src_size - ngram_size + 1; // end() analog for src ngram start + + for (; s_ngram_start != s_ngram_start_end; ++s_ngram_start, ++s_ngram_idx) { + const T* rhs_ngram_end = s_ngram_start + ngram_size; + float_t similarity = !search_semantics ? 0 : 1; + for (const T* l = t_ngram_start, *r = s_ngram_start; l != t_ngram_end && r != rhs_ngram_end; ++l, ++r) { + if /*constexpr*/ (search_semantics) { + if (*l != *r) { + similarity = 0; + break; + } + } else { + if (*l == *r) { + ++similarity; + } + } + } + if /*constexpr*/ (!search_semantics) { + similarity = similarity / float_t(ngram_size); + } + + auto tmp = cache[s_ngram_idx]; + cache[s_ngram_idx] = + std::max( + std::max(cache[s_ngram_idx - 1], + cache[s_ngram_idx]), + d + similarity); + d = tmp; + } + } + return cache[s_ngram_count] / + float_t((!search_semantics) ? s_ngram_count : t_ngram_count); +} + +NS_END + + +#endif // IRESEARCH_NGRAM_MATCH_UTILS_H diff --git a/3rdParty/iresearch/core/utils/std.hpp b/3rdParty/iresearch/core/utils/std.hpp index 048d6ed7f11e..2029635ff0aa 100644 --- a/3rdParty/iresearch/core/utils/std.hpp +++ b/3rdParty/iresearch/core/utils/std.hpp @@ -256,7 +256,7 @@ template struct initializer { using type = typename Builder::type; - static constexpr const auto Idx = Size - 1; + static constexpr auto Idx = Size - 1; template #ifndef IRESEARCH_CXX_11 @@ -277,7 +277,7 @@ template struct initializer { using type = typename Builder::type; - static constexpr const auto Idx = 0; + static constexpr auto Idx = 0; template #ifndef IRESEARCH_CXX_11 diff --git a/3rdParty/iresearch/core/utils/string.cpp b/3rdParty/iresearch/core/utils/string.cpp index ac7fb2957670..2411e6d807e4 100644 --- a/3rdParty/iresearch/core/utils/string.cpp +++ b/3rdParty/iresearch/core/utils/string.cpp @@ -56,9 +56,9 @@ NS_END NS_ROOT -/* ------------------------------------------------------------------- - * basic_string_ref - * ------------------------------------------------------------------*/ +// ----------------------------------------------------------------------------- +// --SECTION-- basic_string_ref implementation +// ----------------------------------------------------------------------------- #if defined(_MSC_VER) && defined(IRESEARCH_DLL) @@ -93,5 +93,5 @@ size_t hash(const string_ref& value) noexcept { return get_hash(value.c_str(), value.size()); } -NS_END // detail +NS_END // hash_utils NS_END diff --git a/3rdParty/iresearch/core/utils/string.hpp b/3rdParty/iresearch/core/utils/string.hpp index b929c923de38..1628dd7a820b 100644 --- a/3rdParty/iresearch/core/utils/string.hpp +++ b/3rdParty/iresearch/core/utils/string.hpp @@ -314,6 +314,42 @@ inline bool starts_with( return 0 == first.compare(0, second.size(), second.c_str(), second.size()); } +template +inline size_t common_prefix_length( + const Char* lhs, size_t lhs_size, + const Char* rhs, size_t rhs_size) noexcept { + static_assert(1 == sizeof(Char), "1 != sizeof(Char)"); + + const size_t* lhs_block = reinterpret_cast(lhs); + const size_t* rhs_block = reinterpret_cast(rhs); + + size_t size = std::min(lhs_size, rhs_size); + + while (size >= sizeof(size_t) && *lhs_block == *rhs_block) { + ++lhs_block; + ++rhs_block; + size -= sizeof(size_t); + } + + const Char* lhs_block_start = reinterpret_cast(lhs_block); + const Char* rhs_block_start = reinterpret_cast(rhs_block); + + while (size && *lhs_block_start == *rhs_block_start) { + ++lhs_block_start; + ++rhs_block_start; + --size; + } + + return lhs_block_start - lhs; +} + +template +inline size_t common_prefix_length( + const basic_string_ref& lhs, + const basic_string_ref& rhs) noexcept { + return common_prefix_length(lhs.c_str(), lhs.size(), rhs.c_str(), rhs.size()); +} + template< typename _Elem, typename _Traits > inline bool starts_with( const basic_string_ref< _Elem, _Traits >& first, const _Elem* second ) { diff --git a/3rdParty/iresearch/core/utils/utf8_utils.hpp b/3rdParty/iresearch/core/utils/utf8_utils.hpp index 06c7e8dedc6f..e464f1cb5885 100644 --- a/3rdParty/iresearch/core/utils/utf8_utils.hpp +++ b/3rdParty/iresearch/core/utils/utf8_utils.hpp @@ -32,29 +32,82 @@ NS_ROOT NS_BEGIN(utf8_utils) -FORCE_INLINE const byte_type* next(const byte_type* it, const byte_type* end) noexcept { - IRS_ASSERT(it); +// max number of bytes to represent single UTF8 code point +constexpr size_t MAX_CODE_POINT_SIZE = 4; +constexpr uint32_t MIN_CODE_POINT = 0; +constexpr uint32_t MAX_CODE_POINT = 0x10FFFF; +constexpr uint32_t INVALID_CODE_POINT = integer_traits::const_max; + +FORCE_INLINE const byte_type* next(const byte_type* begin, const byte_type* end) noexcept { + IRS_ASSERT(begin); IRS_ASSERT(end); - if (it < end) { - const uint8_t symbol_start = *it; - if (symbol_start < 0x80) { - ++it; - } else if ((symbol_start >> 5) == 0x06) { - it += 2; - } else if ((symbol_start >> 4) == 0x0E) { - it += 3; - } else if ((symbol_start >> 3) == 0x1E) { - it += 4; - } else { - IR_FRMT_ERROR("Invalid UTF-8 symbol increment"); - it = end; + if (begin < end) { + const uint32_t cp_start = *begin++; + if ((cp_start >> 5) == 0x06) { + ++begin; + } else if ((cp_start >> 4) == 0x0E) { + begin += 2; + } else if ((cp_start >> 3) == 0x1E) { + begin += 3; + } else if (cp_start >= 0x80) { + begin = end; } } - return it > end ? end : it; + + return begin > end ? end : begin; +} + +FORCE_INLINE size_t cp_length(const uint32_t cp_start) noexcept { + if (cp_start < 0x80) { + return 1; + } else if ((cp_start >> 5) == 0x06) { + return 2; + } else if ((cp_start >> 4) == 0x0E) { + return 3; + } else if ((cp_start >> 3) == 0x1E) { + return 4; + } + + return 0; } -FORCE_INLINE uint32_t next(const byte_type*& it) noexcept { +inline uint32_t next_checked(const byte_type*& begin, const byte_type* end) noexcept { + IRS_ASSERT(begin); + IRS_ASSERT(end); + + if (begin >= end) { + return INVALID_CODE_POINT; + } + + uint32_t cp = *begin; + const size_t size = cp_length(cp); + + begin += size; + + if (begin <= end) { + switch (size) { + case 1: return cp; + + case 2: return ((cp << 6) & 0x7FF) + + (uint32_t(begin[-1]) & 0x3F); + + case 3: return ((cp << 12) & 0xFFFF) + + ((uint32_t(begin[-2]) << 6) & 0xFFF) + + (uint32_t(begin[-1]) & 0x3F); + + case 4: return ((cp << 18) & 0x1FFFFF) + + ((uint32_t(begin[-3]) << 12) & 0x3FFFF) + + ((uint32_t(begin[-2]) << 6) & 0xFFF) + + (uint32_t(begin[-1]) & 0x3F); + } + } + + begin = end; + return INVALID_CODE_POINT; +} + +inline uint32_t next(const byte_type*& it) noexcept { IRS_ASSERT(it); uint32_t cp = *it; @@ -75,16 +128,82 @@ FORCE_INLINE uint32_t next(const byte_type*& it) noexcept { return cp; } -template -inline void to_utf8(const byte_type* begin, size_t size, OutputIterator out) { - for (auto end = begin + size; begin < end; ++out) { - *out = next(begin); +#ifdef IRESEARCH_CXX14 +constexpr +#endif +FORCE_INLINE size_t utf32_to_utf8(uint32_t cp, byte_type* begin) noexcept { + if (cp < 0x80) { + begin[0] = static_cast(cp); + return 1; + } + + if (cp < 0x800) { + begin[0] = static_cast((cp >> 6) | 0xC0); + begin[1] = static_cast((cp & 0x3F) | 0x80); + return 2; + } + + if (cp < 0x10000) { + begin[0] = static_cast((cp >> 12) | 0xE0); + begin[1] = static_cast(((cp >> 6) & 0x3F) | 0x80); + begin[2] = static_cast((cp & 0x3F) | 0x80); + return 3; + } + + begin[0] = static_cast((cp >> 18) | 0xF0); + begin[1] = static_cast(((cp >> 12) & 0x3F) | 0x80); + begin[2] = static_cast(((cp >> 6) & 0x3F) | 0x80); + begin[3] = static_cast((cp & 0x3F) | 0x80); + return 4; +} + +template +inline const byte_type* find(const byte_type* begin, const byte_type* end, uint32_t ch) noexcept { + for (const byte_type* char_begin = begin; begin < end; char_begin = begin) { + const auto cp = Checked ? next_checked(begin, end) : next(begin); + + if (cp == ch) { + return char_begin; + } } + + return end; +} + +template +inline size_t find(const byte_type* begin, const size_t size, uint32_t ch) noexcept { + size_t pos = 0; + for (auto end = begin + size; begin < end; ++pos) { + const auto cp = Checked ? next_checked(begin, end) : next(begin); + + if (cp == ch) { + return pos; + } + } + + return bstring::npos; +} + +template +inline bool utf8_to_utf32(const byte_type* begin, size_t size, OutputIterator out) { + for (auto end = begin + size; begin < end; ) { + const auto cp = Checked ? next_checked(begin, end) : next(begin); + + if /*constexpr*/ (Checked) { + if (cp == INVALID_CODE_POINT) { + return false; + } + } + + *out = cp; + } + + return true; } -template -inline void to_utf8(const bytes_ref& in, OutputIterator out) { - to_utf8(in.begin(), in.size(), out); +template +FORCE_INLINE bool utf8_to_utf32(const bytes_ref& in, OutputIterator out) { + return utf8_to_utf32(in.begin(), in.size(), out); } NS_END diff --git a/3rdParty/iresearch/core/utils/wildcard_utils.cpp b/3rdParty/iresearch/core/utils/wildcard_utils.cpp new file mode 100644 index 000000000000..c6466ae6fc7a --- /dev/null +++ b/3rdParty/iresearch/core/utils/wildcard_utils.cpp @@ -0,0 +1,228 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Andrey Abramov +//////////////////////////////////////////////////////////////////////////////// + +#include "wildcard_utils.hpp" + +#include "automaton_utils.hpp" + +NS_ROOT + +WildcardType wildcard_type(const bytes_ref& expr) noexcept { + if (expr.empty()) { + return WildcardType::TERM; + } + + bool escaped = false; + size_t num_match_any_string = 0; + size_t num_adjacent_match_any_string = 0; + + const auto* char_begin = expr.begin(); + const auto* end = expr.end(); + + for (size_t i = 0; char_begin < end; ++i) { + const auto char_length = utf8_utils::cp_length(*char_begin); + const auto char_end = char_begin + char_length; + + if (!char_length || char_end > end) { + return WildcardType::INVALID; + } + + switch (*char_begin) { + case WildcardMatch::ANY_STRING: + num_adjacent_match_any_string += size_t(!escaped); + num_match_any_string += size_t(!escaped); + escaped = false; + break; + case WildcardMatch::ANY_CHAR: + if (!escaped) { + return WildcardType::WILDCARD; + } + num_adjacent_match_any_string = 0; + escaped = false; + break; + case WildcardMatch::ESCAPE: + num_adjacent_match_any_string = 0; + escaped = !escaped; + break; + default: + num_adjacent_match_any_string = 0; + escaped = false; + break; + } + + char_begin = char_end; + } + + if (0 == num_match_any_string) { + return WildcardType::TERM; + } + + if (expr.size() == num_match_any_string) { + return WildcardType::MATCH_ALL; + } + + if (num_match_any_string == num_adjacent_match_any_string) { + return WildcardType::PREFIX; + } + + return WildcardType::WILDCARD; +} + +automaton from_wildcard(const bytes_ref& expr) { + struct { + automaton::StateId from; + automaton::StateId to; + automaton::StateId match_all_from{ fst::kNoStateId }; + automaton::StateId match_all_to{ fst::kNoStateId }; + bytes_ref match_all_label{}; + bool escaped{ false }; + bool match_all{ false }; + } state; + + utf8_transitions_builder builder; + std::pair arcs[2]; + + automaton a; + state.from = a.AddState(); + state.to = state.from; + a.SetStart(state.from); + + auto appendChar = [&a, &builder, &arcs, &state](const bytes_ref& c) { + state.to = a.AddState(); + if (!state.match_all) { + if (state.match_all_label.null()) { + utf8_emplace_arc(a, state.from, c, state.to); + } else { + const auto r = compare(c, state.match_all_label); + + if (!r) { + utf8_emplace_arc(a, state.from, state.match_all_from, c, state.to); + state.match_all_to = state.to; + } else { + arcs[0] = { c, state.to }; + arcs[1] = { state.match_all_label, state.match_all_to }; + + if (r > 0) { + std::swap(arcs[0], arcs[1]); + } + + builder.insert(a, state.from, state.match_all_from, + std::begin(arcs), std::end(arcs)); + } + } + } else { + utf8_emplace_arc(a, state.from, state.from, c, state.to); + + state.match_all_from = state.from; + state.match_all_to = state.to; + state.match_all_label = c; + state.match_all = false; + } + + state.from = state.to; + state.escaped = false; + }; + + const auto* label_begin = expr.begin(); + const auto* end = expr.end(); + + while (label_begin < end) { + const auto label_length = utf8_utils::cp_length(*label_begin); + const auto label_end = label_begin + label_length; + + if (!label_length || label_end > end) { + // invalid UTF-8 sequence + a.DeleteStates(); + return a; + } + + switch (*label_begin) { + case WildcardMatch::ANY_STRING: { + if (state.escaped) { + appendChar({label_begin, label_length}); + } else { + state.match_all = true; + } + break; + } + case WildcardMatch::ANY_CHAR: { + if (state.escaped) { + appendChar({label_begin, label_length}); + } else { + state.to = a.AddState(); + utf8_emplace_rho_arc(a, state.from, state.to); + state.from = state.to; + } + break; + } + case WildcardMatch::ESCAPE: { + if (state.escaped) { + appendChar({label_begin, label_length}); + } else { + state.escaped = !state.escaped; + } + break; + } + default: { + appendChar({label_begin, label_length}); + break; + } + } + + label_begin = label_end; + } + + // need this variable to preserve valid address + // for cases with match all and terminal escape + // character (%\\) + const byte_type c = WildcardMatch::ESCAPE; + + if (state.escaped) { + // non-terminated escape sequence + appendChar({&c, 1}); + } if (state.match_all) { + // terminal MATCH_ALL + utf8_emplace_rho_arc(a, state.to, state.to); + state.match_all_from = fst::kNoStateId; + } + + if (state.match_all_from != fst::kNoStateId) { + // non-terminal MATCH_ALL + utf8_emplace_arc(a, state.to, state.match_all_from, + state.match_all_label, state.match_all_to); + } + + a.SetFinal(state.to); + +#ifdef IRESEARCH_DEBUG + // ensure resulting automaton is sorted and deterministic + static constexpr auto EXPECTED_PROPERTIES = + fst::kIDeterministic | fst::kODeterministic | + fst::kILabelSorted | fst::kOLabelSorted | + fst::kAcceptor; + assert(EXPECTED_PROPERTIES == a.Properties(EXPECTED_PROPERTIES, true)); +#endif + + return a; +} + +NS_END diff --git a/3rdParty/iresearch/core/utils/wildcard_utils.hpp b/3rdParty/iresearch/core/utils/wildcard_utils.hpp index 89f3aef0683c..3081c0659988 100644 --- a/3rdParty/iresearch/core/utils/wildcard_utils.hpp +++ b/3rdParty/iresearch/core/utils/wildcard_utils.hpp @@ -27,104 +27,38 @@ NS_ROOT -template -struct wildcard_traits { - using char_type = Char; - - // match any string or empty string - static constexpr Char MATCH_ANY_STRING= Char('%'); - - // match any char - static constexpr Char MATCH_ANY_CHAR = Char('_'); - - // denotes beginning of escape sequence - static constexpr Char ESCAPE = Char('\\'); +enum class WildcardType { + INVALID = 0, // invalid input sequence + TERM, // foo + MATCH_ALL, // * + PREFIX, // foo* + WILDCARD // f_o* }; -template< - typename Char, - typename Traits = wildcard_traits, - // brackets over condition are for circumventing MSVC parser bug - typename = typename std::enable_if<(sizeof(Char) < sizeof(fst::fsa::kMaxLabel))>::type, - typename = typename std::enable_if::type> -automaton from_wildcard(const irs::basic_string_ref& expr) { - automaton a; - a.ReserveStates(expr.size() + 1); - - automaton::StateId from = a.AddState(); - automaton::StateId to = from; - a.SetStart(from); - - automaton::StateId match_all_state = fst::kNoStateId; - bool escaped = false; - auto appendChar = [&match_all_state, &escaped, &a, &to, &from](Char c) { - to = a.AddState(); - a.EmplaceArc(from, c, to); - from = to; - escaped = false; - if (match_all_state != fst::kNoStateId) { - auto state = a.AddState(); - a.ReserveArcs(state, 2); - a.EmplaceArc(match_all_state, fst::fsa::kRho, state); - a.EmplaceArc(state, fst::fsa::kRho, state); - a.EmplaceArc(state, c, to); - a.EmplaceArc(to, c, to); - a.EmplaceArc(to, fst::fsa::kRho, state); - match_all_state = fst::kNoStateId; - } - }; - - for (const auto c : expr) { - switch (c) { - case Traits::MATCH_ANY_STRING: { - if (escaped) { - appendChar(c); - } else { - match_all_state = from; - } - break; - } - case Traits::MATCH_ANY_CHAR: { - if (escaped) { - appendChar(c); - } else { - to = a.AddState(); - a.EmplaceArc(from, fst::fsa::kRho, to); - from = to; - } - } break; - case Traits::ESCAPE: { - if (escaped) { - appendChar(c); - } else { - escaped = !escaped; - } - break; - } - default: { - appendChar(c); - break; - } - } - } +IRESEARCH_API WildcardType wildcard_type(const bytes_ref& pattern) noexcept; - // non-terminated escape sequence - if (escaped) { - appendChar(Traits::ESCAPE); - } - - if (match_all_state != fst::kNoLabel) { - a.EmplaceArc(to, fst::fsa::kRho, to); - } - - a.SetFinal(to); - - fst::ArcSort(&a, fst::ILabelCompare()); +enum WildcardMatch : byte_type { + ANY_STRING = '%', + ANY_CHAR = '_', + ESCAPE = '\\' +}; - automaton res; - fst::Determinize(a, &res); +//////////////////////////////////////////////////////////////////////////////// +/// @brief instantiates minimal DFA from a specified UTF-8 encoded wildcard +/// sequence +/// @param expr valid UTF-8 encoded string +/// @returns DFA accpeting a specified wildcard expression +/// @note control symbols are +/// '%' - match any number of arbitrary characters +/// '_' - match a single arbitrary character +/// '\' - escape control symbol, e.g. '\%' issues literal '%' +/// @note if an input expression is incorrect UTF-8 sequence, function returns +/// empty automaton +//////////////////////////////////////////////////////////////////////////////// +IRESEARCH_API automaton from_wildcard(const bytes_ref& expr); - return res; +inline automaton from_wildcard(const string_ref& expr) { + return from_wildcard(ref_cast(expr)); } NS_END diff --git a/3rdParty/iresearch/external/openfst/draw-impl.h b/3rdParty/iresearch/external/openfst/draw-impl.h index f0b941bde556..e6df7e7fd494 100644 --- a/3rdParty/iresearch/external/openfst/draw-impl.h +++ b/3rdParty/iresearch/external/openfst/draw-impl.h @@ -14,7 +14,6 @@ #include #include #include -#include namespace fst { @@ -261,8 +260,7 @@ inline void drawFst( int fontsize = 14, int precision = 5, const std::string& float_format = "g", - bool show_weight_one = false -) { + bool show_weight_one = false) { FstDrawer drawer( fst, isyms, osyms, ssyms, accep, title, width, height, partrait, vertical, randsep, nodesep, fontsize, precision, @@ -272,6 +270,39 @@ inline void drawFst( drawer.Draw(&strm, dest); } +template +inline bool drawFst( + const Fst& fst, + const std::string& dest, + const SymbolTable* isyms = nullptr, + const SymbolTable* osyms = nullptr, + const SymbolTable* ssyms = nullptr, + bool accep = false, + const std::string& title = "", + float width = 11, + float height = 8.5, + bool partrait = true, + bool vertical = false, + float randsep = 0.4, + float nodesep = 0.25, + int fontsize = 14, + int precision = 5, + const std::string& float_format = "g", + bool show_weight_one = false) { + std::fstream stream; + stream.open(dest, std::fstream::binary | std::fstream::out); + if (!stream) { + return false; + } + + fst::drawFst( + fst, stream, dest, isyms, osyms, ssyms, accep, title, + width, height, partrait, vertical, randsep, nodesep, + fontsize, precision, float_format, show_weight_one); + + return true; +} + } // namespace fst #endif // FST_SCRIPT_DRAW_IMPL_H_ diff --git a/3rdParty/iresearch/external/openfst/fst/string-weight.h b/3rdParty/iresearch/external/openfst/fst/string-weight.h index 86e140a2a29d..3531c5650ad4 100644 --- a/3rdParty/iresearch/external/openfst/fst/string-weight.h +++ b/3rdParty/iresearch/external/openfst/fst/string-weight.h @@ -169,9 +169,9 @@ class StringWeightIterator { private: const Label &first_; - const decltype(Weight::rest_) &rest_; + const std::list