diff --git a/CHANGELOG b/CHANGELOG index ea14fff6e3eb..5732dd97b2be 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,10 @@ devel ----- +* Include K_SHORTEST_PATHS and SHORTEST_PATH execution nodes in AQL query + memory usage accounting. The memory used by these execution node types was + previously not tracked against the configured query memory limit. + * Reduce default value for max-nodes-per-callstack to 200 for OSX, because on OSX worker threads have a stack size of only 512kb. diff --git a/arangod/Graph/AttributeWeightShortestPathFinder.cpp b/arangod/Graph/AttributeWeightShortestPathFinder.cpp index dc58f3e593dc..acd32d37af1b 100644 --- a/arangod/Graph/AttributeWeightShortestPathFinder.cpp +++ b/arangod/Graph/AttributeWeightShortestPathFinder.cpp @@ -24,6 +24,7 @@ #include "AttributeWeightShortestPathFinder.h" #include "Basics/Exceptions.h" +#include "Basics/ResourceUsage.h" #include "Basics/tryEmplaceHelper.h" #include "Graph/EdgeCursor.h" #include "Graph/EdgeDocumentToken.h" @@ -125,7 +126,7 @@ bool AttributeWeightShortestPathFinder::Searcher::oneStep() { bool b = _myInfo._pq.popMinimal(v, s); if (_pathFinder->_bingo || !b) { - // We can leave this functino only under 2 conditions: + // We can leave this function only under 2 conditions: // 1) already bingo==true => bingo = true no effect // 2) This queue is empty => if there would be a // path we would have found it here @@ -136,12 +137,15 @@ bool AttributeWeightShortestPathFinder::Searcher::oneStep() { TRI_ASSERT(s != nullptr); - std::vector> neighbors; - _pathFinder->expandVertex(_backward, v, neighbors); - for (std::unique_ptr& neighbor : neighbors) { + _neighbors.clear(); + // populates _neighbors + _pathFinder->expandVertex(_backward, v, _neighbors); + + for (auto& neighbor : _neighbors) { insertNeighbor(std::move(neighbor), s->weight() + neighbor->weight()); } - // All neighbours are moved out. + _neighbors.clear(); + // All neighbors are moved out. lookupPeer(v, s->weight()); Step* s2 = _myInfo._pq.find(v); @@ -154,6 +158,7 @@ bool AttributeWeightShortestPathFinder::Searcher::oneStep() { AttributeWeightShortestPathFinder::AttributeWeightShortestPathFinder(ShortestPathOptions& options) : ShortestPathFinder(options), + _resourceMonitor(options.resourceMonitor()), _highscoreSet(false), _highscore(0), _bingo(false), @@ -165,7 +170,15 @@ AttributeWeightShortestPathFinder::AttributeWeightShortestPathFinder(ShortestPat _backwardCursor = _options.buildCursor(true); } -AttributeWeightShortestPathFinder::~AttributeWeightShortestPathFinder() = default; +AttributeWeightShortestPathFinder::~AttributeWeightShortestPathFinder() { + // required for memory usage tracking + clearCandidates(); +} + +void AttributeWeightShortestPathFinder::clearCandidates() noexcept { + _resourceMonitor.decreaseMemoryUsage(_candidates.size() * candidateMemoryUsage()); + _candidates.clear(); +} void AttributeWeightShortestPathFinder::clear() { options().cache()->clear(); @@ -174,6 +187,7 @@ void AttributeWeightShortestPathFinder::clear() { _bingo = false; _intermediateSet = false; _intermediate = arangodb::velocypack::StringRef{}; + clearCandidates(); } bool AttributeWeightShortestPathFinder::shortestPath(arangodb::velocypack::Slice const& st, @@ -206,7 +220,7 @@ bool AttributeWeightShortestPathFinder::shortestPath(arangodb::velocypack::Slice Searcher forwardSearcher(this, forward, backward, start, false); std::unique_ptr backwardSearcher; if (_options.bidirectional) { - backwardSearcher.reset(new Searcher(this, backward, forward, target, true)); + backwardSearcher = std::make_unique(this, backward, forward, target, true); } TRI_IF_FAILURE("TraversalOOMInitialize") { @@ -235,6 +249,9 @@ bool AttributeWeightShortestPathFinder::shortestPath(arangodb::velocypack::Slice } Step* s = forward._pq.find(_intermediate); + + // track memory usage for result buildup. + ResourceUsageScope guard(_resourceMonitor); result._vertices.emplace_back(_intermediate); @@ -251,6 +268,8 @@ bool AttributeWeightShortestPathFinder::shortestPath(arangodb::velocypack::Slice break; } + guard.increase(arangodb::graph::ShortestPathResult::resultItemMemoryUsage()); + result._edges.push_front(std::move(s->_edge)); result._vertices.push_front(arangodb::velocypack::StringRef(s->_predecessor)); s = forward._pq.find(s->_predecessor); @@ -270,6 +289,8 @@ bool AttributeWeightShortestPathFinder::shortestPath(arangodb::velocypack::Slice if (s->_predecessor.empty()) { break; } + + guard.increase(arangodb::graph::ShortestPathResult::resultItemMemoryUsage()); result._edges.emplace_back(std::move(s->_edge)); result._vertices.emplace_back(arangodb::velocypack::StringRef(s->_predecessor)); @@ -281,23 +302,28 @@ bool AttributeWeightShortestPathFinder::shortestPath(arangodb::velocypack::Slice } _options.fetchVerticesCoordinator(result._vertices); + // we intentionally don't commit the memory usage to the _resourceMonitor here. return true; } void AttributeWeightShortestPathFinder::inserter( - std::unordered_map& candidates, std::vector>& result, arangodb::velocypack::StringRef const& s, arangodb::velocypack::StringRef const& t, double currentWeight, EdgeDocumentToken&& edge) { + + ResourceUsageScope guard(_resourceMonitor, candidateMemoryUsage()); + auto [cand, emplaced] = - candidates.try_emplace(t, arangodb::lazyConstruct([&] { - result.emplace_back( - std::make_unique(t, s, currentWeight, - std::move(edge))); - return result.size() - 1; - })); - - if (!emplaced) { + _candidates.try_emplace(t, arangodb::lazyConstruct([&] { + result.emplace_back( + std::make_unique(t, s, currentWeight, + std::move(edge))); + return result.size() - 1; + })); + if (emplaced) { + // new candidate created. now candiates are responsible for memory usage tracking + guard.steal(); + } else { // Compare weight auto& old = result[cand->second]; auto oldWeight = old->weight(); @@ -312,10 +338,12 @@ void AttributeWeightShortestPathFinder::inserter( void AttributeWeightShortestPathFinder::expandVertex( bool backward, arangodb::velocypack::StringRef const& vertex, std::vector>& result) { + TRI_ASSERT(result.empty()); + EdgeCursor* cursor = backward ? _backwardCursor.get() : _forwardCursor.get(); cursor->rearm(vertex, 0); - std::unordered_map candidates; + clearCandidates(); cursor->readAll([&](EdgeDocumentToken&& eid, VPackSlice edge, size_t cursorIdx) -> void { if (edge.isString()) { VPackSlice doc = _options.cache()->lookupToken(eid); @@ -323,9 +351,9 @@ void AttributeWeightShortestPathFinder::expandVertex( arangodb::velocypack::StringRef other = _options.cache()->persistString(arangodb::velocypack::StringRef(edge)); if (other.compare(vertex) != 0) { - inserter(candidates, result, vertex, other, currentWeight, std::move(eid)); + inserter(result, vertex, other, currentWeight, std::move(eid)); } else { - inserter(candidates, result, other, vertex, currentWeight, std::move(eid)); + inserter(result, other, vertex, currentWeight, std::move(eid)); } } else { arangodb::velocypack::StringRef fromTmp( @@ -335,305 +363,18 @@ void AttributeWeightShortestPathFinder::expandVertex( arangodb::velocypack::StringRef to = _options.cache()->persistString(toTmp); double currentWeight = _options.weightEdge(edge); if (from == vertex) { - inserter(candidates, result, from, to, currentWeight, std::move(eid)); + inserter(result, from, to, currentWeight, std::move(eid)); } else { - inserter(candidates, result, to, from, currentWeight, std::move(eid)); + inserter(result, to, from, currentWeight, std::move(eid)); } } }); + + clearCandidates(); } -/* -AttributeWeightShortestPathFinder::SearcherTwoThreads::SearcherTwoThreads( - AttributeWeightShortestPathFinder* pathFinder, ThreadInfo& myInfo, - ThreadInfo& peerInfo, arangodb::velocypack::Slice const& start, - ExpanderFunction expander, std::string const& id) - : _pathFinder(pathFinder), - _myInfo(myInfo), - _peerInfo(peerInfo), - _start(start), - _expander(expander), - _id(id) {} - -void AttributeWeightShortestPathFinder::SearcherTwoThreads::insertNeighbor( - Step* step, double newWeight) { - MUTEX_LOCKER(locker, _myInfo._mutex); - - Step* s = _myInfo._pq.find(step->_vertex); - - // Not found, so insert it: - if (s == nullptr) { - step->setWeight(newWeight); - _myInfo._pq.insert(step->_vertex, step); - // step is consumed! - return; - } - if (s->_done) { - delete step; - return; - } - if (s->weight() > newWeight) { - s->_predecessor = step->_predecessor; - s->_edge = step->_edge; - _myInfo._pq.lowerWeight(s->_vertex, newWeight); - } - delete step; -} - -void AttributeWeightShortestPathFinder::SearcherTwoThreads::lookupPeer( - arangodb::velocypack::Slice& vertex, double weight) { - MUTEX_LOCKER(locker, _peerInfo._mutex); - - Step* s = _peerInfo._pq.find(vertex); - if (s == nullptr) { - // Not found, nothing more to do - return; - } - double total = s->weight() + weight; - - // Update the highscore: - MUTEX_LOCKER(resultLocker, _pathFinder->_resultMutex); - - if (!_pathFinder->_highscoreSet || total < _pathFinder->_highscore) { - _pathFinder->_highscoreSet = true; - _pathFinder->_highscore = total; - _pathFinder->_intermediate = vertex; - _pathFinder->_intermediateSet = true; - } - - // Now the highscore is set! - - // Did we find a solution together with the other thread? - if (s->_done) { - if (total <= _pathFinder->_highscore) { - _pathFinder->_intermediate = vertex; - _pathFinder->_intermediateSet = true; - } - // Hacki says: If the highscore was set, and even if - // it is better than total, then this observation here - // proves that it will never be better, so: BINGO. - _pathFinder->_bingo = true; - // We found a way, but somebody else found a better way, so - // this is not the shortest path - return; - } - - // Did we find a solution on our own? This is for the - // single thread case and for the case that the other - // thread is too slow to even finish its own start vertex! - if (s->weight() == 0.0) { - // We have found the target, we have finished all - // vertices with a smaller weight than this one (and did - // not succeed), so this must be a best solution: - _pathFinder->_intermediate = vertex; - _pathFinder->_intermediateSet = true; - _pathFinder->_bingo = true; - } -} - -void AttributeWeightShortestPathFinder::SearcherTwoThreads::run() { - try { - arangodb::velocypack::Slice v; - Step* s; - bool b; - { - MUTEX_LOCKER(locker, _myInfo._mutex); - b = _myInfo._pq.popMinimal(v, s, true); - } - - std::vector neighbors; - - // Iterate while no bingo found and - // there still is a vertex on the stack. - while (!_pathFinder->_bingo && b) { - neighbors.clear(); - _expander(v, neighbors); - for (auto* neighbor : neighbors) { - insertNeighbor(neighbor, s->weight() + neighbor->weight()); - } - lookupPeer(v, s->weight()); - - MUTEX_LOCKER(locker, _myInfo._mutex); - Step* s2 = _myInfo._pq.find(v); - s2->_done = true; - b = _myInfo._pq.popMinimal(v, s, true); - } - // We can leave this loop only under 2 conditions: - // 1) already bingo==true => bingo = true no effect - // 2) This queue is empty => if there would be a - // path we would have found it here - // => No path possible. Set bingo, intermediate is empty. - _pathFinder->_bingo = true; - } catch (arangodb::basics::Exception const& ex) { - _pathFinder->_resultCode = ex.code(); - } catch (std::bad_alloc const&) { - _pathFinder->_resultCode = TRI_ERROR_OUT_OF_MEMORY; - } catch (...) { - _pathFinder->_resultCode = TRI_ERROR_INTERNAL; - } -} - -void AttributeWeightShortestPathFinder::SearcherTwoThreads::start() { - _thread = std::thread(&SearcherTwoThreads::run, this); -} - -void AttributeWeightShortestPathFinder::SearcherTwoThreads::join() { - _thread.join(); -} -*/ - -/* Here is a proof for the correctness of this algorithm: - * - * Assume we are looking for a shortest path from vertex A to vertex B. - * - * We do Dijkstra from both sides, thread 1 from A in forward direction and - * thread 2 from B in backward direction. That is, we administrate a (hash) - * table of distances from A to vertices in forward direction and one of - * distances from B to vertices in backward direction. - * - * We get the following guarantees: - * - * When thread 1 is working on a vertex X, then it knows the distance w - * from A to X. - * - * When thread 2 is working on a vertex Y, then it knows the distance v - * from Y to B. - * - * When thread 1 is working on a vertex X at distance w from A, then it has - * completed the work on all vertices X' at distance < w from A. - * - * When thread 2 is working on a vertex Y at distance v to B, then it has - * completed the work on all vertices X' at (backward) distance < v to B. - * - * This all follows from the standard Dijkstra algorithm. - * - * Additionally, we do the following after we complete the normal work on a - * vertex: - * - * Thread 1 checks for each vertex X at distance w from A whether thread 2 - * already knows it. If so, it makes sure that the highscore and intermediate - * are set to the total length. Thread 2 does the analogous thing. - * - * If Thread 1 finds that vertex X (at distance v to B, say) has already - * been completed by thread 2, then we call bingo. Thread 2 does the - * analogous thing. - * - * We need to prove that the result is a shortest path. - * - * Assume that there is a shortest path of length = w from A - * and thus at distance v' < v to B: - * - * | >=w | v' X' -> Y' -----> B - * - * Therefore, X' has already been completed by thread 1 and Y' has - * already been completed by thread 2. - * - * Therefore, thread 1 has (in this temporal order) done: - * - * 1a: discover Y' and store it in table 1 under mutex 1 - * 1b: lookup X' in thread 2's table under mutex 2 - * 1c: mark X' as complete in table 1 under mutex 1 - * - * And thread 2 has (in this temporal order) done: - * - * 2a: discover X' and store it in table 2 under mutex 2 - * 2b: lookup Y' in thread 1's table under mutex 1 - * 2c: mark Y' as complete in table 2 under mutex 2 - * - * If 1b has happened before 2a, then 1a has happened before 2a and - * thus 2b, so thread 2 has found the highscore w'+z'+v' < v+w. - * Otherwise, 1b has happened after 2a and thus thread 1 has found the - * highscore. - * - * Thus the highscore of this shortest path has already been set and the - * algorithm is correct. - */ - -/* Unused code. Maybe reactivated - -bool AttributeWeightShortestPathFinder::shortestPathTwoThreads( - arangodb::velocypack::Slice& start, arangodb::velocypack::Slice& target, - ShortestPathResult& result) { - // For the result: - result.clear(); - _highscoreSet = false; - _highscore = 0; - _bingo = false; - - // Forward with initialization: - arangodb::velocypack::Slice emptyVertex; - arangodb::velocypack::Slice emptyEdge; - ThreadInfo forward; - forward._pq.insert(start, new Step(start, emptyVertex, 0, emptyEdge)); - - // backward with initialization: - ThreadInfo backward; - backward._pq.insert(target, new Step(target, emptyVertex, 0, emptyEdge)); - - // Now the searcher threads: - SearcherTwoThreads forwardSearcher(this, forward, backward, start, - _forwardExpander, "Forward"); - std::unique_ptr backwardSearcher; - if (_bidirectional) { - backwardSearcher.reset(new SearcherTwoThreads( - this, backward, forward, target, _backwardExpander, "Backward")); - } - - TRI_IF_FAILURE("TraversalOOMInitialize") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - - forwardSearcher.start(); - if (_bidirectional) { - backwardSearcher->start(); - } - forwardSearcher.join(); - if (_bidirectional) { - backwardSearcher->join(); - } - - // check error code returned by the threads - int res = _resultCode.load(); - - if (res != TRI_ERROR_NO_ERROR) { - // one of the threads caught an exception - THROW_ARANGO_EXCEPTION(res); - } - - if (!_bingo || _intermediateSet == false) { - return false; - } - - Step* s = forward._pq.find(_intermediate); - result._vertices.emplace_back(_intermediate); - - // FORWARD Go path back from intermediate -> start. - // Insert all vertices and edges at front of vector - // Do NOT! insert the intermediate vertex - while (!s->_predecessor.isNone()) { - result._edges.push_front(arangodb::velocypack::StringRef(s->_edge)); - result._vertices.push_front(arangodb::velocypack::StringRef(s->_predecessor)); - s = forward._pq.find(s->_predecessor); - } - - // BACKWARD Go path back from intermediate -> target. - // Insert all vertices and edges at back of vector - // Also insert the intermediate vertex - s = backward._pq.find(_intermediate); - while (!s->_predecessor.isNone()) { - result._edges.emplace_back(arangodb::velocypack::StringRef(s->_edge)); - result._vertices.emplace_back(arangodb::velocypack::StringRef(s->_predecessor)); - s = backward._pq.find(s->_predecessor); - } - - TRI_IF_FAILURE("TraversalOOMPath") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - - return true; +size_t AttributeWeightShortestPathFinder::candidateMemoryUsage() const noexcept { + return 16 /*arbitrary overhead*/ + + sizeof(decltype(_candidates)::key_type) + + sizeof(decltype(_candidates)::value_type); } -*/ diff --git a/arangod/Graph/AttributeWeightShortestPathFinder.h b/arangod/Graph/AttributeWeightShortestPathFinder.h index 2626aef7267a..5d0c36ede908 100644 --- a/arangod/Graph/AttributeWeightShortestPathFinder.h +++ b/arangod/Graph/AttributeWeightShortestPathFinder.h @@ -23,9 +23,6 @@ #pragma once -#include "Basics/Mutex.h" -#include "Basics/MutexLocker.h" - #include "Graph/EdgeDocumentToken.h" #include "Graph/ShortestPathFinder.h" #include "Graph/ShortestPathPriorityQueue.h" @@ -33,9 +30,9 @@ #include #include -#include namespace arangodb { +struct ResourceMonitor; namespace graph { class EdgeCursor; @@ -87,65 +84,8 @@ class AttributeWeightShortestPathFinder : public ShortestPathFinder { struct ThreadInfo { PQueue _pq; - arangodb::Mutex _mutex; }; - ////////////////////////////////////////////////////////////////////////////// - /// @brief a Dijkstra searcher for the multi-threaded search - ////////////////////////////////////////////////////////////////////////////// - - /* - class SearcherTwoThreads { - AttributeWeightShortestPathFinder* _pathFinder; - ThreadInfo& _myInfo; - ThreadInfo& _peerInfo; - arangodb::velocypack::Slice _start; - ExpanderFunction _expander; - std::string _id; - - public: - SearcherTwoThreads(AttributeWeightShortestPathFinder* pathFinder, - ThreadInfo& myInfo, ThreadInfo& peerInfo, - arangodb::velocypack::Slice const& start, - ExpanderFunction expander, std::string const& id); - - private: - //////////////////////////////////////////////////////////////////////////////// - /// @brief Insert a neighbor to the todo list. - //////////////////////////////////////////////////////////////////////////////// - void insertNeighbor(Step* step, double newWeight); - - //////////////////////////////////////////////////////////////////////////////// - /// @brief Lookup our current vertex in the data of our peer. - //////////////////////////////////////////////////////////////////////////////// - - void lookupPeer(arangodb::velocypack::Slice& vertex, double weight); - - //////////////////////////////////////////////////////////////////////////////// - /// @brief Search graph starting at Start following edges of the given - /// direction only - //////////////////////////////////////////////////////////////////////////////// - - void run(); - - //////////////////////////////////////////////////////////////////////////////// - /// @brief start and join functions - //////////////////////////////////////////////////////////////////////////////// - - public: - void start(); - - void join(); - - //////////////////////////////////////////////////////////////////////////////// - /// @brief The thread object. - //////////////////////////////////////////////////////////////////////////////// - - private: - std::thread _thread; - }; - */ - ////////////////////////////////////////////////////////////////////////////// /// @brief a Dijkstra searcher for the single-threaded search ////////////////////////////////////////////////////////////////////////////// @@ -182,6 +122,8 @@ class AttributeWeightShortestPathFinder : public ShortestPathFinder { ThreadInfo& _peerInfo; arangodb::velocypack::StringRef _start; bool _backward; + /// @brief temp value, which is used only in Searcher::oneStep() and recycled. + std::vector> _neighbors; }; // ----------------------------------------------------------------------------- @@ -213,9 +155,10 @@ class AttributeWeightShortestPathFinder : public ShortestPathFinder { bool shortestPath(arangodb::velocypack::Slice const& start, arangodb::velocypack::Slice const& target, arangodb::graph::ShortestPathResult& result) override; + + private: - void inserter(std::unordered_map& candidates, - std::vector>& result, + void inserter(std::vector>& result, arangodb::velocypack::StringRef const& s, arangodb::velocypack::StringRef const& t, double currentWeight, graph::EdgeDocumentToken&& edge); @@ -223,20 +166,10 @@ class AttributeWeightShortestPathFinder : public ShortestPathFinder { void expandVertex(bool backward, arangodb::velocypack::StringRef const& source, std::vector>& result); - ////////////////////////////////////////////////////////////////////////////// - /// @brief return the shortest path between the start and target vertex, - /// multi-threaded version using SearcherTwoThreads. - ////////////////////////////////////////////////////////////////////////////// - - // Caller has to free the result - // If this returns true there is a path, if this returns false there is no - // path + void clearCandidates() noexcept; + size_t candidateMemoryUsage() const noexcept; - /* Unused for now maybe reactived - bool shortestPathTwoThreads(arangodb::velocypack::Slice& start, - arangodb::velocypack::Slice& target, - arangodb::graph::ShortestPathResult& result); - */ + arangodb::ResourceMonitor& _resourceMonitor; ////////////////////////////////////////////////////////////////////////////// /// @brief lowest total weight for a complete path found @@ -256,12 +189,6 @@ class AttributeWeightShortestPathFinder : public ShortestPathFinder { std::atomic _bingo; - ////////////////////////////////////////////////////////////////////////////// - /// @brief _resultMutex, this is used to protect access to the result data - ////////////////////////////////////////////////////////////////////////////// - - arangodb::Mutex _resultMutex; - ////////////////////////////////////////////////////////////////////////////// /// @brief _intermediate, one vertex on the shortest path found, flag /// indicates @@ -270,7 +197,11 @@ class AttributeWeightShortestPathFinder : public ShortestPathFinder { bool _intermediateSet; arangodb::velocypack::StringRef _intermediate; - + + /// @brief temporary value, which is going to be populate in inserter, + /// and recycled between calls + std::unordered_map _candidates; + std::unique_ptr _forwardCursor; std::unique_ptr _backwardCursor; }; diff --git a/arangod/Graph/Cache/RefactoredClusterTraverserCache.cpp b/arangod/Graph/Cache/RefactoredClusterTraverserCache.cpp index 4e9f693fd2b2..852008b03148 100644 --- a/arangod/Graph/Cache/RefactoredClusterTraverserCache.cpp +++ b/arangod/Graph/Cache/RefactoredClusterTraverserCache.cpp @@ -96,23 +96,25 @@ auto RefactoredClusterTraverserCache::persistString(arangodb::velocypack::Hashed return *it; } auto res = _stringHeap.registerString(idString); - { - ResourceUsageScope guard(_resourceMonitor, ::costPerPersistedString); + ResourceUsageScope guard(_resourceMonitor, ::costPerPersistedString); - _persistedStrings.emplace(res); + _persistedStrings.emplace(res); - // now make the TraverserCache responsible for memory tracking - guard.steal(); - } + // now make the TraverserCache responsible for memory tracking + guard.steal(); return res; } auto RefactoredClusterTraverserCache::persistEdgeData(velocypack::Slice edgeSlice) -> std::pair { arangodb::velocypack::HashedStringRef edgeIdRef(edgeSlice.get(StaticStrings::IdString)); + + ResourceUsageScope guard(_resourceMonitor, ::costPerVertexOrEdgeStringRefSlice); + auto const [it, inserted] = _edgeData.try_emplace(edgeIdRef, edgeSlice); if (inserted) { - _resourceMonitor.increaseMemoryUsage(costPerVertexOrEdgeStringRefSlice); + // now make the TraverserCache responsible for memory tracking + guard.steal(); } return std::make_pair(it->second, inserted); -} \ No newline at end of file +} diff --git a/arangod/Graph/Cache/RefactoredTraverserCache.cpp b/arangod/Graph/Cache/RefactoredTraverserCache.cpp index d8eb1f3ee436..d5dc52a3f799 100644 --- a/arangod/Graph/Cache/RefactoredTraverserCache.cpp +++ b/arangod/Graph/Cache/RefactoredTraverserCache.cpp @@ -231,7 +231,10 @@ arangodb::velocypack::HashedStringRef RefactoredTraverserCache::persistString( return *it; } auto res = _stringHeap.registerString(idString); + + ResourceUsageScope guard(_resourceMonitor, sizeof(res)); _persistedStrings.emplace(res); - _resourceMonitor.increaseMemoryUsage(sizeof(res)); + + guard.steal(); return res; } diff --git a/arangod/Graph/ConstantWeightShortestPathFinder.cpp b/arangod/Graph/ConstantWeightShortestPathFinder.cpp index b7d32dbcfa81..1b4c244d7111 100644 --- a/arangod/Graph/ConstantWeightShortestPathFinder.cpp +++ b/arangod/Graph/ConstantWeightShortestPathFinder.cpp @@ -23,17 +23,14 @@ #include "ConstantWeightShortestPathFinder.h" -#include "Basics/tryEmplaceHelper.h" -#include "Cluster/ServerState.h" +#include "Basics/ResourceUsage.h" #include "Graph/EdgeCursor.h" #include "Graph/EdgeDocumentToken.h" #include "Graph/ShortestPathOptions.h" #include "Graph/ShortestPathResult.h" #include "Graph/TraverserCache.h" #include "Transaction/Helpers.h" -#include "VocBase/LogicalCollection.h" -#include #include #include #include @@ -41,12 +38,18 @@ using namespace arangodb; using namespace arangodb::graph; -ConstantWeightShortestPathFinder::PathSnippet::PathSnippet(arangodb::velocypack::StringRef& pred, - EdgeDocumentToken&& path) - : _pred(pred), _path(std::move(path)) {} +ConstantWeightShortestPathFinder::PathSnippet::PathSnippet() noexcept + : _pred(), + _path() {} + +ConstantWeightShortestPathFinder::PathSnippet::PathSnippet(arangodb::velocypack::StringRef pred, + EdgeDocumentToken&& path) noexcept + : _pred(pred), + _path(std::move(path)) {} ConstantWeightShortestPathFinder::ConstantWeightShortestPathFinder(ShortestPathOptions& options) - : ShortestPathFinder(options) { + : ShortestPathFinder(options), + _resourceMonitor(options.resourceMonitor()) { // cppcheck-suppress * _forwardCursor = _options.buildCursor(false); // cppcheck-suppress * @@ -76,12 +79,22 @@ bool ConstantWeightShortestPathFinder::shortestPath( _options.fetchVerticesCoordinator(result._vertices); return true; } - _leftClosure.clear(); - _rightClosure.clear(); + clearVisited(); + _leftFound.try_emplace(start, PathSnippet()); + try { + _rightFound.try_emplace(end, PathSnippet()); + } catch (...) { + // leave it in clean state + _leftFound.erase(start); + throw; + } + + // memory usage for the initial start vertices + _resourceMonitor.increaseMemoryUsage(2 * pathSnippetMemoryUsage()); - _leftFound.try_emplace(start, nullptr); - _rightFound.try_emplace(end, nullptr); + _leftClosure.clear(); + _rightClosure.clear(); _leftClosure.emplace_back(start); _rightClosure.emplace_back(end); @@ -109,64 +122,61 @@ bool ConstantWeightShortestPathFinder::shortestPath( } bool ConstantWeightShortestPathFinder::expandClosure( - Closure& sourceClosure, Snippets& sourceSnippets, Snippets& targetSnippets, + Closure& sourceClosure, Snippets& sourceSnippets, Snippets const& targetSnippets, bool isBackward, arangodb::velocypack::StringRef& result) { _nextClosure.clear(); - for (auto& v : sourceClosure) { - _edges.clear(); - _neighbors.clear(); + for (auto const& v : sourceClosure) { + // populates _neighbors expandVertex(isBackward, v); - size_t const neighborsSize = _neighbors.size(); - TRI_ASSERT(_edges.size() == neighborsSize); - - for (size_t i = 0; i < neighborsSize; ++i) { - auto const& n = _neighbors[i]; - - bool emplaced = false; - std::tie(std::ignore, emplaced) = - sourceSnippets.try_emplace(_neighbors[i], arangodb::lazyConstruct([&] { - return new PathSnippet(v, std::move(_edges[i])); - })); - - if (emplaced) { - // NOTE: _edges[i] stays intact after move - // and is reset to a nullptr. So if we crash - // here no mem-leaks. or undefined behavior - // Just make sure _edges is not used after - auto targetFoundIt = targetSnippets.find(n); + + for (auto& n : _neighbors) { + ResourceUsageScope guard(_resourceMonitor, pathSnippetMemoryUsage()); + + // create the PathSnippet if it does not yet exist + if (sourceSnippets.try_emplace(n.vertex, v, std::move(n.edge)).second) { + // new PathSnippet created. now sourceSnippets is responsible for memory usage tracking + guard.steal(); + + auto targetFoundIt = targetSnippets.find(n.vertex); if (targetFoundIt != targetSnippets.end()) { - result = n; + result = n.vertex; return true; } - _nextClosure.emplace_back(n); + _nextClosure.emplace_back(n.vertex); } } } - _edges.clear(); _neighbors.clear(); sourceClosure.swap(_nextClosure); _nextClosure.clear(); return false; } -void ConstantWeightShortestPathFinder::fillResult(arangodb::velocypack::StringRef& n, +void ConstantWeightShortestPathFinder::fillResult(arangodb::velocypack::StringRef n, arangodb::graph::ShortestPathResult& result) { + ResourceUsageScope guard(_resourceMonitor); + result._vertices.emplace_back(n); auto it = _leftFound.find(n); TRI_ASSERT(it != _leftFound.end()); arangodb::velocypack::StringRef next; - while (it != _leftFound.end() && it->second != nullptr) { - next = it->second->_pred; + while (it != _leftFound.end() && !it->second.empty()) { + guard.increase(arangodb::graph::ShortestPathResult::resultItemMemoryUsage()); + + next = it->second._pred; result._vertices.push_front(next); - result._edges.push_front(std::move(it->second->_path)); + result._edges.push_front(std::move(it->second._path)); it = _leftFound.find(next); } + it = _rightFound.find(n); TRI_ASSERT(it != _rightFound.end()); - while (it != _rightFound.end() && it->second != nullptr) { - next = it->second->_pred; + while (it != _rightFound.end() && !it->second.empty()) { + guard.increase(arangodb::graph::ShortestPathResult::resultItemMemoryUsage()); + + next = it->second._pred; result._vertices.emplace_back(next); - result._edges.emplace_back(std::move(it->second->_path)); + result._edges.emplace_back(std::move(it->second._path)); it = _rightFound.find(next); } @@ -175,20 +185,34 @@ void ConstantWeightShortestPathFinder::fillResult(arangodb::velocypack::StringRe } _options.fetchVerticesCoordinator(result._vertices); clearVisited(); + + // we intentionally don't commit the memory usage to the _resourceMonitor here. + // we do this later at the call site if the result will be used for longer. } void ConstantWeightShortestPathFinder::expandVertex(bool backward, arangodb::velocypack::StringRef vertex) { EdgeCursor* cursor = backward ? _backwardCursor.get() : _forwardCursor.get(); cursor->rearm(vertex, 0); + + // we are tracking the memory usage for neighbors temporarily here (only inside this function) + ResourceUsageScope guard(_resourceMonitor); - cursor->readAll([&](EdgeDocumentToken&& eid, VPackSlice edge, size_t cursorIdx) -> void { + _neighbors.clear(); + + cursor->readAll([&](EdgeDocumentToken&& eid, VPackSlice edge, size_t /*cursorIdx*/) -> void { if (edge.isString()) { if (edge.compareString(vertex.data(), vertex.length()) != 0) { + guard.increase(Neighbor::itemMemoryUsage()); + arangodb::velocypack::StringRef id = _options.cache()->persistString(arangodb::velocypack::StringRef(edge)); - _edges.emplace_back(std::move(eid)); - _neighbors.emplace_back(id); + + if (_neighbors.capacity() == 0) { + // avoid a few reallocations for the first members + _neighbors.reserve(8); + } + _neighbors.emplace_back(id, std::move(eid)); } } else { arangodb::velocypack::StringRef other( @@ -198,22 +222,33 @@ void ConstantWeightShortestPathFinder::expandVertex(bool backward, transaction::helpers::extractToFromDocument(edge)); } if (other != vertex) { + guard.increase(Neighbor::itemMemoryUsage()); + arangodb::velocypack::StringRef id = _options.cache()->persistString(other); - _edges.emplace_back(std::move(eid)); - _neighbors.emplace_back(id); + + if (_neighbors.capacity() == 0) { + // avoid a few reallocations for the first members + _neighbors.reserve(8); + } + _neighbors.emplace_back(id, std::move(eid)); } } }); + + // we don't commit the memory usage to the _resourceMonitor here because + // _neighbors is recycled over and over } void ConstantWeightShortestPathFinder::clearVisited() { - for (auto& it : _leftFound) { - delete it.second; - } - _leftFound.clear(); + size_t totalMemoryUsage = (_leftFound.size() + _rightFound.size()) * pathSnippetMemoryUsage(); + _resourceMonitor.decreaseMemoryUsage(totalMemoryUsage); - for (auto& it : _rightFound) { - delete it.second; - } + _leftFound.clear(); _rightFound.clear(); } + +size_t ConstantWeightShortestPathFinder::pathSnippetMemoryUsage() const noexcept { + return 16 /*arbitrary overhead*/ + + sizeof(arangodb::velocypack::StringRef) + + sizeof(PathSnippet); +} diff --git a/arangod/Graph/ConstantWeightShortestPathFinder.h b/arangod/Graph/ConstantWeightShortestPathFinder.h index e023182be0f8..c86b09a3011c 100644 --- a/arangod/Graph/ConstantWeightShortestPathFinder.h +++ b/arangod/Graph/ConstantWeightShortestPathFinder.h @@ -32,6 +32,7 @@ #include namespace arangodb { +struct ResourceMonitor; namespace velocypack { class Slice; @@ -44,14 +45,21 @@ struct ShortestPathOptions; class ConstantWeightShortestPathFinder : public ShortestPathFinder { private: struct PathSnippet { - arangodb::velocypack::StringRef const _pred; + PathSnippet() noexcept; + PathSnippet(arangodb::velocypack::StringRef pred, graph::EdgeDocumentToken&& path) noexcept; + PathSnippet(PathSnippet&& other) noexcept = default; + PathSnippet& operator=(PathSnippet&& other) noexcept = default; + + bool empty() const noexcept { + return _pred.empty(); + } + + arangodb::velocypack::StringRef _pred; graph::EdgeDocumentToken _path; - - PathSnippet(arangodb::velocypack::StringRef& pred, graph::EdgeDocumentToken&& path); }; - typedef std::deque Closure; - typedef std::unordered_map Snippets; + typedef std::vector Closure; + typedef std::unordered_map Snippets; public: explicit ConstantWeightShortestPathFinder(ShortestPathOptions& options); @@ -65,30 +73,47 @@ class ConstantWeightShortestPathFinder : public ShortestPathFinder { void clear() override; private: + // side-effect: populates _neighbors void expandVertex(bool backward, arangodb::velocypack::StringRef vertex); void clearVisited(); - bool expandClosure(Closure& sourceClosure, Snippets& sourceSnippets, Snippets& targetSnippets, + bool expandClosure(Closure& sourceClosure, Snippets& sourceSnippets, Snippets const& targetSnippets, bool direction, arangodb::velocypack::StringRef& result); - void fillResult(arangodb::velocypack::StringRef& n, + void fillResult(arangodb::velocypack::StringRef n, arangodb::graph::ShortestPathResult& result); + size_t pathSnippetMemoryUsage() const noexcept; + private: + arangodb::ResourceMonitor& _resourceMonitor; + Snippets _leftFound; Closure _leftClosure; Snippets _rightFound; Closure _rightClosure; + + std::unique_ptr _forwardCursor; + std::unique_ptr _backwardCursor; + // temp values, only used inside expandClosure() Closure _nextClosure; - std::vector _neighbors; - std::vector _edges; + struct Neighbor { + arangodb::velocypack::StringRef vertex; + graph::EdgeDocumentToken edge; - std::unique_ptr _forwardCursor; - std::unique_ptr _backwardCursor; + Neighbor(arangodb::velocypack::StringRef v, graph::EdgeDocumentToken e) noexcept + : vertex(v), edge(e) {}; + + static constexpr size_t itemMemoryUsage() { + return sizeof(decltype(vertex)) + sizeof(decltype(edge)); + } + }; + + std::vector _neighbors; }; } // namespace graph diff --git a/arangod/Graph/KShortestPathsFinder.cpp b/arangod/Graph/KShortestPathsFinder.cpp index be4b82444c12..cb4607e15bd3 100644 --- a/arangod/Graph/KShortestPathsFinder.cpp +++ b/arangod/Graph/KShortestPathsFinder.cpp @@ -24,6 +24,7 @@ #include "KShortestPathsFinder.h" #include "Aql/AqlValue.h" +#include "Basics/ResourceUsage.h" #include "Cluster/ServerState.h" #include "Graph/EdgeCursor.h" #include "Graph/EdgeDocumentToken.h" @@ -43,6 +44,7 @@ using namespace arangodb::graph; KShortestPathsFinder::KShortestPathsFinder(ShortestPathOptions& options) : ShortestPathFinder(options), + _resourceMonitor(options.resourceMonitor()), _left(FORWARD), _right(BACKWARD) { // cppcheck-suppress * @@ -51,12 +53,26 @@ KShortestPathsFinder::KShortestPathsFinder(ShortestPathOptions& options) _backwardCursor = options.buildCursor(true); } -KShortestPathsFinder::~KShortestPathsFinder() = default; +KShortestPathsFinder::~KShortestPathsFinder() { + try { + // necessary to revert memory usage trackers + clear(); + } catch (...) {} +} void KShortestPathsFinder::clear() { _shortestPaths.clear(); - _candidatePaths.clear(); + + while (!_candidatePaths.empty()) { + _resourceMonitor.decreaseMemoryUsage(_candidatePaths.back().memoryUsage()); + _candidatePaths.pop_back(); + } + TRI_ASSERT(_candidatePaths.empty()); + + // clear cache and forget about its memory usage + _resourceMonitor.decreaseMemoryUsage(_vertexCache.size() * vertexCacheEntryMemoryUsage()); _vertexCache.clear(); + _traversalDone = true; } @@ -68,10 +84,7 @@ bool KShortestPathsFinder::startKShortestPathsTraversal( _start = arangodb::velocypack::StringRef(start); _end = arangodb::velocypack::StringRef(end); - _vertexCache.clear(); - _shortestPaths.clear(); - _candidatePaths.clear(); - + clear(); _traversalDone = false; TRI_IF_FAILURE("Travefalse") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } @@ -87,8 +100,6 @@ bool KShortestPathsFinder::computeShortestPath(VertexRef const& start, VertexRef _right.reset(end); VertexRef join; - result.clear(); - auto currentBest = std::optional{}; // We will not improve anymore if we have found a best path and the smallest @@ -104,6 +115,7 @@ bool KShortestPathsFinder::computeShortestPath(VertexRef const& start, VertexRef } } + result.clear(); if (currentBest.has_value()) { reconstructPath(_left, _right, join, result); return true; @@ -115,8 +127,21 @@ bool KShortestPathsFinder::computeShortestPath(VertexRef const& start, VertexRef void KShortestPathsFinder::computeNeighbourhoodOfVertexCache(VertexRef vertex, Direction direction, std::vector*& res) { - auto lookup = _vertexCache.try_emplace(vertex, FoundVertex(vertex)).first; - auto& cache = lookup->second; // want to update the cached vertex in place + // track memory usage for one more item + // if we can't insert the item into the cache, it means the item is + // already in the cache. then we are not responsible for tracking its memory + // usage. + ResourceUsageScope guard(_resourceMonitor, vertexCacheEntryMemoryUsage()); + + auto result = _vertexCache.try_emplace(vertex, FoundVertex(vertex)); + + if (result.second) { + // successful insert - now _vertexCache has taken over responsibility + // for memory usage tracking + guard.steal(); + } + + auto& cache = result.first->second; // want to update the cached vertex in place switch (direction) { case BACKWARD: @@ -241,6 +266,9 @@ void KShortestPathsFinder::advanceFrontier(Ball& source, Ball const& target, void KShortestPathsFinder::reconstructPath(Ball const& left, Ball const& right, VertexRef const& join, Path& result) { + // track memory used for reconstructing the path + ResourceUsageScope guard(_resourceMonitor); + result.clear(); TRI_ASSERT(!join.empty()); result._vertices.emplace_back(join); @@ -250,6 +278,9 @@ void KShortestPathsFinder::reconstructPath(Ball const& left, Ball const& right, double startToJoin = it->weight(); result._weight = startToJoin; while (it != nullptr && it->getKey() != left.center()) { + // may throw + guard.increase(Path::pathItemMemoryUsage()); + result._vertices.push_front(it->_pred); result._edges.push_front(it->_edge); result._weights.push_front(it->_weight); @@ -263,6 +294,9 @@ void KShortestPathsFinder::reconstructPath(Ball const& left, Ball const& right, double joinToEnd = it->weight(); result._weight += joinToEnd; while (it != nullptr && it->getKey() != right.center()) { + // may throw + guard.increase(Path::pathItemMemoryUsage()); + result._vertices.emplace_back(it->_pred); result._edges.emplace_back(it->_edge); it = right._frontier.find(it->_pred); @@ -274,14 +308,16 @@ void KShortestPathsFinder::reconstructPath(Ball const& left, Ball const& right, TRI_IF_FAILURE("TraversalOOMPath") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } + + // we don't commit the memory usage to the _resourceMonitor here + // intentionally. we will do this later at the call site. } bool KShortestPathsFinder::computeNextShortestPath(Path& result) { VertexSet forbiddenVertices; EdgeSet forbiddenEdges; TRI_ASSERT(!_shortestPaths.empty()); - auto& lastShortestPath = _shortestPaths.back(); - bool available = false; + auto const& lastShortestPath = _shortestPaths.back(); for (size_t i = lastShortestPath._branchpoint; i + 1 < lastShortestPath.length(); ++i) { auto& spur = lastShortestPath._vertices[i]; @@ -316,6 +352,11 @@ bool KShortestPathsFinder::computeNextShortestPath(Path& result) { // abuse result variable for some intermediate calculations here... // the "real" result is only calculated at the very end of this method result.clear(); + + // computeShortestPath will internally track memory usage for result, + // however it won't commit the memory usage, because result is only + // temporary. if we use the result for something else later, we will store + // it in _candidatePaths and track its memory usage there if (computeShortestPath(spur, _end, forbiddenVertices, forbiddenEdges, result)) { _candidate.clear(); _candidate.append(lastShortestPath, 0, i); @@ -327,17 +368,29 @@ bool KShortestPathsFinder::computeNextShortestPath(Path& result) { return v._weight >= _candidate._weight; }); if (it == _candidatePaths.end() || !(*it == _candidate)) { + // track memory usage for the path path + ResourceUsageScope guard(_resourceMonitor, _candidate.memoryUsage()); _candidatePaths.emplace(it, std::move(_candidate)); + + // successful insert - now _candidatePaths has taken over responsibility + // for memory usage tracking + guard.steal(); } } } + // only used for function return value, which is currently ignored + bool available = false; + result.clear(); if (!_candidatePaths.empty()) { auto const& p = _candidatePaths.front(); result.append(p, 0, p.length() - 1); result._branchpoint = p._branchpoint; + + // count down the memory usage again + _resourceMonitor.decreaseMemoryUsage(p.memoryUsage()); _candidatePaths.pop_front(); available = true; } @@ -394,8 +447,6 @@ bool KShortestPathsFinder::getNextPathShortestPathResult(ShortestPathResult& res #endif bool KShortestPathsFinder::getNextPathAql(arangodb::velocypack::Builder& result) { - _tempPath.clear(); - if (getNextPath(_tempPath)) { result.clear(); result.openObject(); @@ -426,6 +477,11 @@ bool KShortestPathsFinder::getNextPathAql(arangodb::velocypack::Builder& result) } bool KShortestPathsFinder::skipPath() { - _tempPath.clear(); return getNextPath(_tempPath); } + +size_t KShortestPathsFinder::vertexCacheEntryMemoryUsage() const noexcept { + return 16 /*arbitrary*/ + + sizeof(typename decltype(_vertexCache)::key_type) + + sizeof(typename decltype(_vertexCache)::value_type); +} diff --git a/arangod/Graph/KShortestPathsFinder.h b/arangod/Graph/KShortestPathsFinder.h index 0f917897afa8..a57ffc952ce8 100644 --- a/arangod/Graph/KShortestPathsFinder.h +++ b/arangod/Graph/KShortestPathsFinder.h @@ -36,6 +36,7 @@ #include namespace arangodb { +struct ResourceMonitor; namespace velocypack { class Slice; @@ -83,7 +84,22 @@ class KShortestPathsFinder : public ShortestPathFinder { _weight = 0; _branchpoint = 0; } + + static constexpr size_t pathItemMemoryUsage() { + return sizeof(typename decltype(_vertices)::value_type) + + sizeof(typename decltype(_edges)::value_type) + + sizeof(typename decltype(_weights)::value_type); + } + + size_t memoryUsage() const noexcept { + return sizeof(Path) + + _vertices.size() * sizeof(typename decltype(_vertices)::value_type) + + _edges.size() * sizeof(typename decltype(_edges)::value_type) + + _weights.size() * sizeof(typename decltype(_weights)::value_type); + } + size_t length() const { return _vertices.size(); } + void append(Path const& p, size_t a, size_t b) { if (this->length() == 0) { _vertices.emplace_back(p._vertices.at(a)); @@ -274,7 +290,12 @@ class KShortestPathsFinder : public ShortestPathFinder { EdgeSet const& forbiddenEdges, VertexRef& join, std::optional& currentBest); + // return the size of a map entry plus some assumed overhead + size_t vertexCacheEntryMemoryUsage() const noexcept; + private: + arangodb::ResourceMonitor& _resourceMonitor; + bool _traversalDone{true}; VertexRef _start; diff --git a/arangod/Graph/PathManagement/PathStore.cpp b/arangod/Graph/PathManagement/PathStore.cpp index b49c8d69b13d..cf658defd7c3 100644 --- a/arangod/Graph/PathManagement/PathStore.cpp +++ b/arangod/Graph/PathManagement/PathStore.cpp @@ -71,9 +71,11 @@ size_t PathStore::append(Step step) { << " Adding step: " << step.toString(); auto idx = _schreier.size(); - _resourceMonitor.increaseMemoryUsage(sizeof(Step)); + + ResourceUsageScope guard(_resourceMonitor, sizeof(Step)); _schreier.emplace_back(std::move(step)); + guard.steal(); return idx; } diff --git a/arangod/Graph/Providers/SingleServerProvider.cpp b/arangod/Graph/Providers/SingleServerProvider.cpp index 245f6abf1d8c..398be4ff87f9 100644 --- a/arangod/Graph/Providers/SingleServerProvider.cpp +++ b/arangod/Graph/Providers/SingleServerProvider.cpp @@ -154,7 +154,7 @@ auto SingleServerProvider::expand(Step const& step, size_t previous, void SingleServerProvider::addVertexToBuilder(Step::Vertex const& vertex, arangodb::velocypack::Builder& builder) { _cache.insertVertexIntoResult(_stats, vertex.getID(), builder); -}; +} void SingleServerProvider::insertEdgeIntoResult(EdgeDocumentToken edge, arangodb::velocypack::Builder& builder) { diff --git a/arangod/Graph/ShortestPathResult.h b/arangod/Graph/ShortestPathResult.h index 7577b42ccf5d..aec37f5a0de2 100644 --- a/arangod/Graph/ShortestPathResult.h +++ b/arangod/Graph/ShortestPathResult.h @@ -82,6 +82,11 @@ class ShortestPathResult { void addVertex(arangodb::velocypack::StringRef v); void addEdge(arangodb::graph::EdgeDocumentToken e); + + static constexpr size_t resultItemMemoryUsage() { + return sizeof(typename decltype(_vertices)::value_type) + + sizeof(typename decltype(_edges)::value_type); + } private: /// @brief Count how many documents have been read diff --git a/tests/js/server/aql/aql-memory-limit.js b/tests/js/server/aql/aql-memory-limit.js index 89327505cfb9..a31c63172532 100644 --- a/tests/js/server/aql/aql-memory-limit.js +++ b/tests/js/server/aql/aql-memory-limit.js @@ -28,24 +28,16 @@ /// @author Copyright 2012, triAGENS GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// -var internal = require("internal"); -var jsunity = require("jsunity"); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief test suite -//////////////////////////////////////////////////////////////////////////////// - -function ahuacatlMemoryLimitTestSuite () { - var errors = internal.errors; +const internal = require("internal"); +const errors = internal.errors; +const jsunity = require("jsunity"); +const db = require("@arangodb").db; +function ahuacatlMemoryLimitStaticQueriesTestSuite () { return { -//////////////////////////////////////////////////////////////////////////////// -/// @brief test unlimited memory -//////////////////////////////////////////////////////////////////////////////// - testUnlimited : function () { - var actual = AQL_EXECUTE("FOR i IN 1..100000 RETURN CONCAT('foobarbaz', i)").json; + let actual = AQL_EXECUTE("FOR i IN 1..100000 RETURN CONCAT('foobarbaz', i)").json; assertEqual(100000, actual.length); actual = AQL_EXECUTE("FOR i IN 1..100000 RETURN CONCAT('foobarbaz', i)", null, { memoryLimit: 0 }).json; @@ -53,7 +45,7 @@ function ahuacatlMemoryLimitTestSuite () { }, testLimitedButValid : function () { - var actual = AQL_EXECUTE("FOR i IN 1..100000 RETURN CONCAT('foobarbaz', i)", null, { memoryLimit: 100 * 1000 * 1000 }).json; + let actual = AQL_EXECUTE("FOR i IN 1..100000 RETURN CONCAT('foobarbaz', i)", null, { memoryLimit: 100 * 1000 * 1000 }).json; assertEqual(100000, actual.length); // should still be ok @@ -74,7 +66,7 @@ function ahuacatlMemoryLimitTestSuite () { }, testLimitedAndInvalid : function () { - var queries = [ + const queries = [ [ "FOR i IN 1..100000 SORT CONCAT('foobarbaz', i) RETURN CONCAT('foobarbaz', i)", 200000 ], [ "FOR i IN 1..100000 SORT CONCAT('foobarbaz', i) RETURN CONCAT('foobarbaz', i)", 100000 ], [ "FOR i IN 1..100000 RETURN CONCAT('foobarbaz', i)", 20000 ], @@ -95,16 +87,244 @@ function ahuacatlMemoryLimitTestSuite () { assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); } }); - } + }, }; } -//////////////////////////////////////////////////////////////////////////////// -/// @brief executes the test suite -//////////////////////////////////////////////////////////////////////////////// +function ahuacatlMemoryLimitReadOnlyQueriesTestSuite () { + const cn = "UnitTestsCollection"; -jsunity.run(ahuacatlMemoryLimitTestSuite); + let c; -return jsunity.done(); + return { + setUpAll : function () { + // only one shard because that is more predictable for memory usage + c = db._create(cn, { numberOfShards: 1 }); + + let docs = []; + for (let i = 0; i < 100 * 1000; ++i) { + docs.push({ value1: i, value2: i % 10, _key: "test" + i }); + if (docs.length === 5000) { + c.insert(docs); + docs = []; + } + } + }, + + tearDownAll : function () { + db._drop(cn); + }, + + testFullScan : function () { + const query = "FOR doc IN " + cn + " RETURN doc"; + + let actual = AQL_EXECUTE(query, null, { memoryLimit: 10 * 1000 * 1000 }).json; + assertEqual(100000, actual.length); + + try { + AQL_EXECUTE(query, null, { memoryLimit: 5 * 1000 * 1000 }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + + testIndexScan : function () { + const query = "FOR doc IN " + cn + " SORT doc._key RETURN doc"; + + let actual = AQL_EXECUTE(query, null, { memoryLimit: 10 * 1000 * 1000 }).json; + assertEqual(100000, actual.length); + + try { + AQL_EXECUTE(query, null, { memoryLimit: 5 * 1000 * 1000 }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + + testSort : function () { + // turn off constrained heap sort + const optimizer = { rules: ["-sort-limit"] }; + const query = "FOR doc IN " + cn + " SORT doc.value1 LIMIT 10 RETURN doc"; + + let actual = AQL_EXECUTE(query, null, { memoryLimit: 15 * 1000 * 1000, optimizer }).json; + assertEqual(10, actual.length); + + try { + AQL_EXECUTE(query, null, { memoryLimit: 10 * 1000 * 1000, optimizer }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + + testCollectOnUniqueAttribute : function () { + // values of doc.value1 are all unique + const query = "FOR doc IN " + cn + " COLLECT v = doc.value1 OPTIONS { method: 'hash' } RETURN v"; + + let actual = AQL_EXECUTE(query, null, { memoryLimit: 10 * 1000 * 1000 }).json; + assertEqual(100000, actual.length); + + try { + AQL_EXECUTE(query, null, { memoryLimit: 5 * 1000 * 1000 }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + + testCollectOnRepeatedAttribute : function () { + // values of doc.value2 are repeating a lot (only 10 different values) + const query = "FOR doc IN " + cn + " COLLECT v = doc.value2 OPTIONS { method: 'hash' } RETURN v"; + + let actual = AQL_EXECUTE(query, null, { memoryLimit: 1000 * 1000 }).json; + assertEqual(10, actual.length); + + actual = AQL_EXECUTE(query, null, { memoryLimit: 500 * 1000 }).json; + assertEqual(10, actual.length); + + try { + AQL_EXECUTE(query, null, { memoryLimit: 10 * 1000 }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + }; +} +function ahuacatlMemoryLimitGraphQueriesTestSuite () { + const vn = "UnitTestsVertex"; + const en = "UnitTestsEdge"; + + return { + setUpAll : function () { + db._drop(en); + db._drop(vn); + + const n = 400; + + // only one shard because that is more predictable for memory usage + let c = db._create(vn, { numberOfShards: 1 }); + + let docs = []; + for (let i = 0; i <= n; ++i) { + docs.push({ _key: "test" + i }); + } + c.insert(docs); + + c = db._createEdgeCollection(en, { numberOfShards: 1 }); + + const weight = 1; + + docs = []; + for (let i = 0; i < n; ++i) { + for (let j = i + 1; j < n; ++j) { + docs.push({ _from: vn + "/test" + i, _to: vn + "/test" + j, weight }); + if (docs.length === 5000) { + c.insert(docs); + docs = []; + } + } + } + if (docs.length) { + c.insert(docs); + } + }, + + tearDownAll : function () { + db._drop(en); + db._drop(vn); + }, + + testKShortestPaths : function () { + const query = "WITH " + vn + " FOR p IN OUTBOUND K_SHORTEST_PATHS '" + vn + "/test0' TO '" + vn + "/test11' " + en + " RETURN p"; + + let actual = AQL_EXECUTE(query, null, { memoryLimit: 5 * 1000 * 1000 }).json; + // no shortest path available + assertEqual(1024, actual.length); + + try { + AQL_EXECUTE(query, null, { memoryLimit: 1000 * 1000 }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + + testKPaths : function () { + const query = "WITH " + vn + " FOR p IN OUTBOUND K_PATHS '" + vn + "/test0' TO '" + vn + "/test317' " + en + " RETURN p"; + + let actual = AQL_EXECUTE(query, null, { memoryLimit: 250 * 1000 }).json; + // no shortest path available + assertEqual(1, actual.length); + + try { + AQL_EXECUTE(query, null, { memoryLimit: 30 * 1000 }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + + testShortestPathDefaultWeight : function () { + const query = "WITH " + vn + " FOR p IN ANY SHORTEST_PATH '" + vn + "/test0' TO '" + vn + "/test310' " + en + " RETURN p"; + + let actual = AQL_EXECUTE(query, null, { memoryLimit: 500 * 1000 }).json; + assertEqual(2, actual.length); + + try { + AQL_EXECUTE(query, null, { memoryLimit: 30 * 1000 }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + + testShortestPathWeightAttribute : function () { + const query = "WITH " + vn + " FOR p IN ANY SHORTEST_PATH '" + vn + "/test0' TO '" + vn + "/test310' " + en + " RETURN p"; + + let actual = AQL_EXECUTE(query, null, { memoryLimit: 1000 * 1000, weightAttribute: "weight" }).json; + assertEqual(2, actual.length); + + try { + AQL_EXECUTE(query, null, { memoryLimit: 30 * 1000, weightAttribute: "weight" }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + + testTraversal : function () { + const query = "WITH " + vn + " FOR v, e, p IN 1..@maxDepth OUTBOUND '" + vn + "/test0' " + en + " RETURN v"; + + let actual = AQL_EXECUTE(query, { maxDepth: 2 }, { memoryLimit: 20 * 1000 * 1000 }).json; + assertEqual(79800, actual.length); + + try { + // run query with same depth, but lower mem limit + AQL_EXECUTE("WITH " + vn + " FOR v, e, p IN 1..@maxDepth OUTBOUND '" + vn + "/test0' " + en + " RETURN v", { maxDepth: 2 }, { memoryLimit: 2 * 1000 * 1000 }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + + try { + // increase traversal depth + AQL_EXECUTE("WITH " + vn + " FOR v, e, p IN 1..@maxDepth OUTBOUND '" + vn + "/test0' " + en + " RETURN v", { maxDepth: 5 }, { memoryLimit: 10 * 1000 * 1000 }); + fail(); + } catch (err) { + assertEqual(errors.ERROR_RESOURCE_LIMIT.code, err.errorNum); + } + }, + + }; +} + +jsunity.run(ahuacatlMemoryLimitStaticQueriesTestSuite); +jsunity.run(ahuacatlMemoryLimitReadOnlyQueriesTestSuite); +jsunity.run(ahuacatlMemoryLimitGraphQueriesTestSuite); + +return jsunity.done();