From 346d12cc082106d07babf51307fc10ab785d4575 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Wed, 27 Sep 2017 16:36:57 +0200 Subject: [PATCH 1/3] Fix a deadlock between Agent thread and compactor thread. --- arangod/Agency/Compactor.cpp | 15 ++++++++++----- arangod/Agency/Compactor.h | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arangod/Agency/Compactor.cpp b/arangod/Agency/Compactor.cpp index e2b732f1b001..d3c42de3ee7e 100644 --- a/arangod/Agency/Compactor.cpp +++ b/arangod/Agency/Compactor.cpp @@ -32,7 +32,7 @@ using namespace arangodb::consensus; // @brief Construct with agent Compactor::Compactor(Agent* agent) : - Thread("Compactor"), _agent(agent), _waitInterval(1000000) { + Thread("Compactor"), _agent(agent), _wakeupCompactor(false), _waitInterval(1000000) { } @@ -49,10 +49,14 @@ void Compactor::run() { LOG_TOPIC(DEBUG, Logger::AGENCY) << "Starting compactor personality"; - CONDITION_LOCKER(guard, _cv); - while (true) { - _cv.wait(); + { + CONDITION_LOCKER(guard, _cv); + if (!_wakeupCompactor) { + _cv.wait(); + } + } + _wakeupCompactor = false; if (this->isStopping()) { break; @@ -74,8 +78,9 @@ void Compactor::run() { void Compactor::wakeUp () { { CONDITION_LOCKER(guard, _cv); - guard.broadcast(); + _wakeupCompactor = true; } + _cv.broadcast(); } diff --git a/arangod/Agency/Compactor.h b/arangod/Agency/Compactor.h index 626f7afd1ea8..3232ca9ee8d4 100644 --- a/arangod/Agency/Compactor.h +++ b/arangod/Agency/Compactor.h @@ -60,6 +60,7 @@ class Compactor : public arangodb::Thread { Agent* _agent; //< @brief Agent basics::ConditionVariable _cv; + bool _wakeupCompactor; long _waitInterval; //< @brief Wait interval }; From 9038fe73e4c1b3416ee7adfa0b1ffc0ca89eae4e Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Wed, 27 Sep 2017 16:40:49 +0200 Subject: [PATCH 2/3] Improve comments in header. --- arangod/Agency/Compactor.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arangod/Agency/Compactor.h b/arangod/Agency/Compactor.h index 3232ca9ee8d4..618cb85c5ba1 100644 --- a/arangod/Agency/Compactor.h +++ b/arangod/Agency/Compactor.h @@ -59,6 +59,10 @@ class Compactor : public arangodb::Thread { private: Agent* _agent; //< @brief Agent + // This condition variable is used for the compactor thread to go to + // sleep. One has to set _wakeupCompactor to true under the Mutex of _cv + // and then broadcast on the _cv to wake up the compactor thread. + // Note that the Mutex is not held during the actual compaction! basics::ConditionVariable _cv; bool _wakeupCompactor; long _waitInterval; //< @brief Wait interval From d1da25311b08dad0724f874c1700a9cf84e2537a Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Wed, 27 Sep 2017 22:13:32 +0200 Subject: [PATCH 3/3] Organise clean shutdown of agency threads. --- arangod/Agency/Agent.cpp | 13 +++++++------ arangod/Agency/Compactor.cpp | 7 ++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arangod/Agency/Agent.cpp b/arangod/Agency/Agent.cpp index 8cbc40215247..6a2ebc850f2c 100644 --- a/arangod/Agency/Agent.cpp +++ b/arangod/Agency/Agent.cpp @@ -88,20 +88,21 @@ bool Agent::mergeConfiguration(VPackSlice const& persisted) { /// Dtor shuts down thread Agent::~Agent() { - // Give up if constituent breaks shutdown + // Give up if some subthread breaks shutdown int counter = 0; - while (_constituent.isRunning()) { + while (_constituent.isRunning() || _compactor.isRunning() || + (_config.supervision() && _supervision.isRunning()) || + (_inception != nullptr && _inception->isRunning())) { usleep(100000); - // emit warning after 5 seconds - if (++counter == 10 * 5) { - LOG_TOPIC(FATAL, Logger::AGENCY) << "constituent thread did not finish"; + // emit warning after 15 seconds + if (++counter == 10 * 15) { + LOG_TOPIC(FATAL, Logger::AGENCY) << "some agency thread did not finish"; FATAL_ERROR_EXIT(); } } shutdown(); - } /// State machine diff --git a/arangod/Agency/Compactor.cpp b/arangod/Agency/Compactor.cpp index d3c42de3ee7e..c4623cf24b1c 100644 --- a/arangod/Agency/Compactor.cpp +++ b/arangod/Agency/Compactor.cpp @@ -91,9 +91,6 @@ void Compactor::beginShutdown() { Thread::beginShutdown(); - { - CONDITION_LOCKER(guard, _cv); - guard.broadcast(); - } - + wakeUp(); + }