From 3dd62e156369f387cf883941de639a795c7bee01 Mon Sep 17 00:00:00 2001 From: Lars Maier Date: Tue, 19 Nov 2019 11:20:15 +0100 Subject: [PATCH] Added precondition to ensure that server is still as seen before. --- arangod/Agency/Supervision.cpp | 23 +++++++++++++++++------ arangod/Agency/Supervision.h | 6 ++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/arangod/Agency/Supervision.cpp b/arangod/Agency/Supervision.cpp index 97f7c56ca16a..71c5a8747dd4 100644 --- a/arangod/Agency/Supervision.cpp +++ b/arangod/Agency/Supervision.cpp @@ -1280,14 +1280,16 @@ void Supervision::workJobs() { } } - -bool Supervision::verifyCoordinatorRebootID(std::string const& coordinatorID, uint64_t wantedRebootID) { +bool Supervision::verifyCoordinatorRebootID(std::string const& coordinatorID, + uint64_t wantedRebootID, bool& coordinatorFound) { // check if the coordinator exists in health std::string const& health = serverHealth(coordinatorID); LOG_TOPIC("44432", DEBUG, Logger::SUPERVISION) << "verifyCoordinatorRebootID: coordinatorID=" << coordinatorID << " health=" << health; + // if the server is not found, health is an empty string + coordinatorFound = health.empty(); if (health != "GOOD" && health != "BAD") { return false; } @@ -1300,7 +1302,9 @@ bool Supervision::verifyCoordinatorRebootID(std::string const& coordinatorID, ui return rebootID.second && rebootID.first == wantedRebootID; } -void Supervision::deleteBrokenDatabase(std::string const& database, std::string const& coordinatorID, uint64_t rebootID) { +void Supervision::deleteBrokenDatabase(std::string const& database, + std::string const& coordinatorID, + uint64_t rebootID, bool coordinatorFound) { auto envelope = std::make_shared(); { VPackArrayBuilder trxs(envelope.get()); @@ -1329,10 +1333,15 @@ void Supervision::deleteBrokenDatabase(std::string const& database, std::string } { // precondition that this database is still in Plan and is building - VPackObjectBuilder precondition(envelope.get()); + VPackObjectBuilder preconditions(envelope.get()); envelope->add(_agencyPrefix + planDBPrefix + database + "/" + StaticStrings::DatabaseIsBuilding, VPackValue(true)); envelope->add(_agencyPrefix + planDBPrefix + database + "/" + StaticStrings::DatabaseCoordinatorRebootId, VPackValue(rebootID)); envelope->add(_agencyPrefix + planDBPrefix + database + "/" + StaticStrings::DatabaseCoordinator, VPackValue(coordinatorID)); + + { + VPackObjectBuilder precondition(envelope.get(), _agencyPrefix + healthPrefix + "/" + coordinatorID); + envelope->add("oldEmpty", VPackValue(!coordinatorFound)); + } } } } @@ -1371,9 +1380,11 @@ void Supervision::checkBrokenCreatedDatabases() { std::pair coordinatorID = db->hasAsString(StaticStrings::DatabaseCoordinator); bool keepDatabase = true; + bool coordinatorFound = false; if (rebootID.second && coordinatorID.second) { - keepDatabase = verifyCoordinatorRebootID(coordinatorID.first, rebootID.first); + keepDatabase = verifyCoordinatorRebootID(coordinatorID.first, + rebootID.first, coordinatorFound); // incomplete data, should not happen } else { // v---- Please note this awesome log-id @@ -1386,7 +1397,7 @@ void Supervision::checkBrokenCreatedDatabases() { LOG_TOPIC("fe522", INFO, Logger::SUPERVISION) << "checkBrokenCreatedDatabases: removing skeleton database with name " << dbpair.first; // delete this database and all of its collections - deleteBrokenDatabase(dbpair.first, coordinatorID.first, rebootID.first); + deleteBrokenDatabase(dbpair.first, coordinatorID.first, rebootID.first, coordinatorFound); } } } diff --git a/arangod/Agency/Supervision.h b/arangod/Agency/Supervision.h index 00f3174b4193..05697954006b 100644 --- a/arangod/Agency/Supervision.h +++ b/arangod/Agency/Supervision.h @@ -188,8 +188,10 @@ class Supervision : public arangodb::CriticalThread { bool handleJobs(); void handleShutdown(); - bool verifyCoordinatorRebootID(std::string const& coordinatorID, uint64_t wantedRebootID); - void deleteBrokenDatabase(std::string const& database, std::string const& coordinatorID, uint64_t rebootID); + bool verifyCoordinatorRebootID(std::string const& coordinatorID, + uint64_t wantedRebootID, bool& coordinatorFound); + void deleteBrokenDatabase(std::string const& database, std::string const& coordinatorID, + uint64_t rebootID, bool coordinatorFound); /// @brief Migrate chains of distributeShardsLike to depth 1 void fixPrototypeChain(VPackBuilder&);