10000 Bug fix/print on violated block ordering by mchacki · Pull Request #21743 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Bug fix/print on violated block ordering #21743

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge remote-tracking branch 'origin/3.12.4' into bug-fix/print-on-vi…
…olated-block-ordering
  • Loading branch information
neunhoef committed May 2, 2025
commit 325d1f27cf01f5b1f56748823fd6c173b2aaab26
11 changes: 11 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
3.12.4.4 (XXXX-XX-XX)
---------------------

* Add detection of two threads working on an ExecutionBlock concurrently.

* Add detection of two threads waiting for a PrefetchTask concurrently.

* Add visibility into the state if a thread waits for more than a second
on a PrefetchTask.


3.12.4.3 (2025-04-25)
---------------------

Expand Down
5 changes: 3 additions & 2 deletions arangod/Aql/ExecutionBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class ExecutionBlock {
[[nodiscard]] auto isDependencyInList(
std::unordered_set<ExecutionBlock*> const& seenBlocks) const noexcept
-> ExecutionBlock*;

[[nodiscard]] auto hasStoppedAsyncTasks() const noexcept -> bool;

protected:
Expand Down Expand Up @@ -185,7 +185,8 @@ class ExecutionBlock {
/// @brief if this is set, we are done, this is reset to false by execute()
bool _done;

/// @brief if this is set, we have stopped async tasks, this is set to true by stopAsyncTasks()
/// @brief if this is set, we have stopped async tasks, this is set to true by
/// stopAsyncTasks()
bool _stoppedAsyncTasks{false};

#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
Expand Down
50 changes: 35 additions & 15 deletions arangod/Aql/ExecutionBlockImpl.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,30 @@ ExecutionBlockImpl<Executor>::~ExecutionBlockImpl() {

template<class Executor>
void ExecutionBlockImpl<Executor>::stopAsyncTasks() {
TRI_ASSERT(!_stoppedAsyncTasks) << "Someone already stopped async tasks for " << printBlockInfo();
TRI_ASSERT(!_stoppedAsyncTasks)
<< "Someone already stopped async tasks for " << printBlockInfo();
_stoppedAsyncTasks = true;
if (_prefetchTask && !_prefetchTask->isConsumed() &&
!_prefetchTask->tryClaim()) {
// some thread is still working on our prefetch task
// -> we need to wait for that task to finish first!
_prefetchTask->waitFor();
if (_prefetchTask) {
// Double use diagnostics:
uint64_t userCount = _numberOfUsers.fetch_add(1);
if (userCount > 0) {
_logStacktrace.store(true, std::memory_order_relaxed);
LOG_TOPIC("52637", WARN, Logger::AQL)
<< "ALERT: Double use of ExecutionBlock detected, stacktrace:";
CrashHandler::logBacktrace();
}
auto guard = scopeGuard([&]() noexcept {
_numberOfUsers.fetch_sub(1);
if (_logStacktrace.load(std::memory_order_relaxed)) {
LOG_TOPIC("52638", WARN, Logger::AQL) << "ALERT: Found _logStacktrace:";
CrashHandler::logBacktrace();
}
});
if (!_prefetchTask->isConsumed() && !_prefetchTask->tryClaim()) {
// some thread is still working on our prefetch task
// -> we need to wait for that task to finish first!
_prefetchTask->waitFor();
}
}
}

Expand Down Expand Up @@ -1076,17 +1093,20 @@ auto ExecutionBlockImpl<Executor>::executeFetcher(ExecutionContext& ctx,
LOG_TOPIC("14d20", WARN, Logger::AQL)
<< "[query#" << block->getQuery().id() << "] ALERT"
<< block->printBlockInfo()
<< " was asked to stop async task. We still start one. This is an allowed rare race.";
<< " was asked to stop async task. We still start one. "
"This is an allowed rare race.";
}

auto stopGuard = ScopeGuard([block, hasStoppedAsyncTasks]() noexcept {
if (hasStoppedAsyncTasks) {
LOG_TOPIC("14d21", WARN, Logger::AQL)
<< "[query#" << block->getQuery().id() << "] CLEAR ALERT"
<< block->printBlockInfo()
<< " We completed the task of the aforementioned race. All is fine.";
}
});
auto stopGuard =
ScopeGuard([block, hasStoppedAsyncTasks]() noexcept {
if (hasStoppedAsyncTasks) {
LOG_TOPIC("14d21", WARN, Logger::AQL)
<< "[query#" << block->getQuery().id()
<< "] CLEAR ALERT" << block->printBlockInfo()
<< " We completed the task of the aforementioned "
"race. All is fine.";
}
});

TRI_IF_FAILURE("AsyncPrefetch::blocksDestroyedOutOfOrder") {
using namespace std::chrono_literals;
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.
0