8000 Added precondition to ensure that server is still as seen before. (#1… · arangodb/arangodb@51af263 · GitHub
[go: up one dir, main page]

Skip to content

Commit 51af263

Browse files
Lars Maierneunhoef
Lars Maier
authored andcommitted
Added precondition to ensure that server is still as seen before. (#10468)
1 parent 3090e49 commit 51af263

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

arangod/Agency/Supervision.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,14 +1280,16 @@ void Supervision::workJobs() {
12801280
}
12811281
}
12821282

1283-
1284-
bool Supervision::verifyCoordinatorRebootID(std::string const& coordinatorID, uint64_t wantedRebootID) {
1283+
bool Supervision::verifyCoordinatorRebootID(std::string const& coordinatorID,
1284+
uint64_t wantedRebootID, bool& coordinatorFound) {
12851285
// check if the coordinator exists in health
12861286
std::string const& health = serverHealth(coordinatorID);
12871287
LOG_TOPIC("44432", DEBUG, Logger::SUPERVISION)
12881288
<< "verifyCoordinatorRebootID: coordinatorID="
12891289
<< coordinatorID << " health=" << health;
1290+
12901291
// if the server is not found, health is an empty string
1292+
coordinatorFound = health.empty();
12911293
if (health != "GOOD" && health != "BAD") {
12921294
return false;
12931295
}
@@ -1300,7 +1302,9 @@ bool Supervision::verifyCoordinatorRebootID(std::string const& coordinatorID, ui
13001302
return rebootID.second && rebootID.first == wantedRebootID;
13011303
}
13021304

1303-
void Supervision::deleteBrokenDatabase(std::string const& database, std::string const& coordinatorID, uint64_t rebootID) {
1305+
void Supervision::deleteBrokenDatabase(std::string const& database,
1306+
std::string const& coordinatorID,
1307+
uint64_t rebootID, bool coordinatorFound) {
13041308
auto envelope = std::make_shared<Builder>();
13051309
{
13061310
VPackArrayBuilder trxs(envelope.get());
@@ -1329,10 +1333,15 @@ void Supervision::deleteBrokenDatabase(std::string const& database, std::string
13291333
}
13301334
{
13311335
// precondition that this database is still in Plan and is building
1332-
VPackObjectBuilder precondition(envelope.get());
1336+
VPackObjectBuilder preconditions(envelope.get());
13331337
envelope->add(_agencyPrefix + planDBPrefix + database + "/" + StaticStrings::DatabaseIsBuilding, VPackValue(true));
13341338
envelope->add(_agencyPrefix + planDBPrefix + database + "/" + StaticStrings::DatabaseCoordinatorRebootId, VPackValue(rebootID));
13351339
envelope->add(_agencyPrefix + planDBPrefix + database + "/" + StaticStrings::DatabaseCoordinator, VPackValue(coordinatorID));
1340+
1341+
{
1342+
VPackObjectBuilder precondition(envelope.get(), _agencyPrefix + healthPrefix + "/" + coordinatorID);
1343+
envelope->add("oldEmpty", VPackValue(!coordinatorFound));
1344+
}
13361345
}
13371346
}
13381347
}
@@ -1371,9 +1380,11 @@ void Supervision::checkBrokenCreatedDatabases() {
13711380
std::pair<std::string, bool> coordinatorID = db->hasAsString(StaticStrings::DatabaseCoordinator);
13721381

13731382
bool keepDatabase = true;
1383+
bool coordinatorFound = false;
13741384

13751385
if (rebootID.second && coordinatorID.second) {
1376-
keepDatabase = verifyCoordinatorRebootID(coordinatorID.first, rebootID.first);
1386+
keepDatabase = verifyCoordinatorRebootID(coordinatorID.first,
1387+
rebootID.first, coordinatorFound);
13771388
// incomplete data, should not happen
13781389
} else {
13791390
// v---- Please note this awesome log-id
@@ -1386,7 +1397,7 @@ void Supervision::checkBrokenCreatedDatabases() {
13861397
LOG_TOPIC("fe522", INFO, Logger::SUPERVISION)
13871398
<< "checkBrokenCreatedDatabases: removing skeleton database with name " << dbpair.first;
13881399
// delete this database and all of its collections
1389-
deleteBrokenDatabase(dbpair.first, coordinatorID.first, rebootID.first);
1400+
deleteBrokenDatabase(dbpair.first, coordinatorID.first, rebootID.first, coordinatorFound);
13901401
}
13911402
}
13921403
}

arangod/Agency/Supervision.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,10 @@ class Supervision : public arangodb::CriticalThread {
188188

189189
bool handleJobs();
190190
void handleShutdown();
191-
bool verifyCoordinatorRebootID(std::string const& coordinatorID, uint64_t wantedRebootID);
192-
void deleteBrokenDatabase(std::string const& database, std::string const& coordinatorID, uint64_t rebootID);
191+
bool verifyCoordinatorRebootID(std::string const& coordinatorID,
192+
uint64_t wantedRebootID, bool& coordinatorFound);
193+
void deleteBrokenDatabase(std::string const& database, std::string const& coordinatorID,
194+
uint64_t rebootID, bool coordinatorFound);
193195

194196
/// @brief Migrate chains of distributeShardsLike to depth 1
195197
void fixPrototypeChain(VPackBuilder&);

0 commit comments

Comments
 (0)
0