diff --git a/CHANGELOG b/CHANGELOG index 8dc8830cffcf..b938ce22c63a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,13 @@ devel ----- +* Add detection of two threads working on an ExecutionBlock concurrently. + +* Add detection of two threads waiting for a PrefetchTask concurrently. + +* Add visibility into the state if a thread waits for more than a second + on a PrefetchTask (and does not see InProgress). + * Fix a memory leak in LogContext, which could leak entries which are still, and only, used by a thread-local LogContext when that thread is destroyed. diff --git a/arangod/Aql/ExecutionBlock.h b/arangod/Aql/ExecutionBlock.h index 1da9be17b8d9..66499056683e 100644 --- a/arangod/Aql/ExecutionBlock.h +++ b/arangod/Aql/ExecutionBlock.h @@ -182,6 +182,13 @@ class ExecutionBlock { /// this would harm our implementation. std::atomic _isBlockInUse{false}; #endif + + /// @brief The following is always 0 or 1, if our assumptions are correct. + /// The `execute` method as well as the destructor increment it at their + /// start and decrement it at their end. If we detect a double use, we + /// log the stack traces. + std::atomic _numberOfUsers{0}; + std::atomic _logStacktrace{false}; }; } // namespace aql diff --git a/arangod/Aql/ExecutionBlockImpl.h b/arangod/Aql/ExecutionBlockImpl.h index f2cf07b5e18a..f16843696e7e 100644 --- a/arangod/Aql/ExecutionBlockImpl.h +++ b/arangod/Aql/ExecutionBlockImpl.h @@ -414,6 +414,8 @@ class ExecutionBlockImpl final : public ExecutionBlock { ExecutionBlockImpl& _block; AqlCallStack _stack; + mutable std::atomic _numberWaiters{0}; + mutable std::atomic _logStacktrace{false}; }; /** diff --git a/arangod/Aql/ExecutionBlockImpl.tpp b/arangod/Aql/ExecutionBlockImpl.tpp index 1c474d473792..8dcab969f025 100644 --- a/arangod/Aql/ExecutionBlockImpl.tpp +++ b/arangod/Aql/ExecutionBlockImpl.tpp @@ -355,11 +355,32 @@ ExecutionBlockImpl::~ExecutionBlockImpl() { template void ExecutionBlockImpl::stopAsyncTasks() { - if (_prefetchTask && !_prefetchTask->isConsumed() && - !_prefetchTask->tryClaim()) { - // some thread is still working on our prefetch task - // -> we need to wait for that task to finish first! - _prefetchTask->waitFor(); + if (_prefetchTask) { + // Double use diagnostics: + uint64_t userCount = _numberOfUsers.fetch_add(1, std::memory_order_relaxed); + if (userCount > 0) { + _logStacktrace.store(true, std::memory_order_relaxed); + LOG_TOPIC("52637", ERR, Logger::AQL) + << "ALERT: Double use of ExecutionBlock detected, " + << " Blockinfo: " << printBlockInfo() + << " Query ID: " << getQuery().id() << ", stacktrace:"; + CrashHandler::logBacktrace(); + } + auto guard = scopeGuard([&]() noexcept { + _numberOfUsers.fetch_sub(1, std::memory_order_relaxed); + if (_logStacktrace.load(std::memory_order_relaxed)) { + LOG_TOPIC("52638", WARN, Logger::AQL) + << "ALERT: Found _logStacktrace:" + << " Blockinfo: " << printBlockInfo() + << " Query ID: " << getQuery().id() << ", stacktrace:"; + CrashHandler::logBacktrace(); + } + }); + if (!_prefetchTask->isConsumed() && !_prefetchTask->tryClaim()) { + // some thread is still working on our prefetch task + // -> we need to wait for that task to finish first! + _prefetchTask->waitFor(); + } } } @@ -496,6 +517,27 @@ void ExecutionBlockImpl::collectExecStats(ExecutionStats& stats) { template std::tuple ExecutionBlockImpl::execute(AqlCallStack const& stack) { + // Double use diagnostics: + uint64_t userCount = _numberOfUsers.fetch_add(1, std::memory_order_relaxed); + if (userCount > 0) { + _logStacktrace.store(true, std::memory_order_relaxed); + LOG_TOPIC("52635", WARN, Logger::AQL) + << "ALERT: Double use of ExecutionBlock detected, " + << " Blockinfo: " << printBlockInfo() + << " Query ID: " << getQuery().id() << ", stacktrace:"; + CrashHandler::logBacktrace(); + } + auto waechter = scopeGuard([&]() noexcept { + _numberOfUsers.fetch_sub(1, std::memory_order_relaxed); + if (_logStacktrace.load(std::memory_order_relaxed)) { + LOG_TOPIC("52636", WARN, Logger::AQL) + << "ALERT: Found _logStacktrace:" + << " Blockinfo: " << printBlockInfo() + << " Query ID: " << getQuery().id() << ", stacktrace:"; + CrashHandler::logBacktrace(); + } + }); + #ifdef ARANGODB_ENABLE_MAINTAINER_MODE bool old = false; TRI_ASSERT(_isBlockInUse.compare_exchange_strong(old, true)); @@ -2582,16 +2624,41 @@ bool ExecutionBlockImpl::PrefetchTask::rearmForNextCall( template void ExecutionBlockImpl::PrefetchTask::waitFor() const noexcept { + uint64_t count = _numberWaiters.fetch_add(1, std::memory_order_relaxed); + if (count > 0) { + LOG_TOPIC("62515", WARN, Logger::AQL) + << "ALERT: Detected " << count + 1 << " waiters for a PrefetchTask, " + << " Blockinfo: " << _block.printBlockInfo() + << " Query ID: " << _block.getQuery().id() << ", stacktrace:"; + CrashHandler::logBacktrace(); + _logStacktrace.store(true, std::memory_order_relaxed); + } std::unique_lock guard(_lock); // (1) - this acquire-load synchronizes with the release-store (3) - if (_state.load(std::memory_order_acquire).status == Status::Finished) { - return; + while (_state.load(std::memory_order_acquire).status != Status::Finished) { + std::cv_status s = _bell.wait_for(guard, std::chrono::milliseconds(1000)); + if (s == std::cv_status::timeout) { + auto state = _state.load(std::memory_order_relaxed); + // We only log this if the status is not "InProgress", since + // this is the only one we expect when a timeout occurs! + if (state.status != Status::InProgress) { + LOG_TOPIC("62514", INFO, Logger::AQL) + << "Have waited for a second on an async prefetch task, " + "state is " + << (int)state.status << " abandoned: " << state.abandoned + << " Blockinfo: " << _block.printBlockInfo() + << " Query ID: " << _block.getQuery().id(); + } + } + } + count = _numberWaiters.fetch_sub(1, std::memory_order_relaxed); + if (_logStacktrace.load(std::memory_order_relaxed) == true) { + LOG_TOPIC("62516", WARN, Logger::AQL) << "ALERT: Found logStacktrace:"; + CrashHandler::logBacktrace(); + if (count == 0) { + _logStacktrace.store(false, std::memory_order_relaxed); + } } - - _bell.wait(guard, [this]() { - // (2) - this acquire-load synchronizes with the release-store (3) - return _state.load(std::memory_order_acquire).status == Status::Finished; - }); } template