8000 Delay MoveShard job until old and new leader ready - BTS-1110 (#17569) · arangodb/arangodb@24ba39d · GitHub
[go: up one dir, main page]

Skip to content

Commit 24ba39d

Browse files
neunhoefKVS85
andauthored
Delay MoveShard job until old and new leader ready - BTS-1110 (#17569)
* Backport to 3.8. * Fix 3.8 compilation. * Fix unittests. Co-authored-by: Vadim <vadim@arangodb.com>
1 parent 7cb3ca0 commit 24ba39d

File tree

4 files changed

+178
-14
lines changed

4 files changed

+178
-14
lines changed

CHANGELOG

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
v3.8.9 (XXXX-XX-XX)
22
-------------------
33

4+
* Delay a MoveShard operation for leader change, until the old leader has
5+
actually assumed its leadership and until the new leader is actually in sync.
6+
This fixes a bug which could block a shard under certain circumstances. This
7+
fixes BTS-1110.
8+
49
* Updated arangosync to v2.13.0.
510

611
* Fixed issue #17291: Server crash on error in the PRUNE expression.

arangod/Agency/MoveShard.cpp

Lines changed: 74 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,50 @@ bool MoveShard::create(std::shared_ptr<VPackBuilder> envelope) {
180180
return false;
181181
}
182182

183+
bool MoveShard::checkLeaderFollowerCurrent(
184+
std::vector<Job::shard_t> const& shardsLikeMe) {
185+
bool ok = true;
186+
for (auto const& s : shardsLikeMe) {
187+
auto sharedPath = _database + "/" + s.collection + "/";
188+
auto currentServersPath = curColPrefix + sharedPath + s.shard + "/servers";
189+
auto const& serverList = _snapshot.hasAsArray(currentServersPath);
190+
if (serverList.second && serverList.first.length() > 0) {
191+
if (_from != serverList.first[0].stringView()) {
192+
LOG_TOPIC("55261", DEBUG, Logger::SUPERVISION)
193+
<< "MoveShard: From server " << _from
194+
<< " has not yet assumed leadership for collection " << s.collection
195+
<< " shard " << s.shard
196+
<< ", delaying start of MoveShard job for shard " << _shard;
197+
ok = false;
198+
break;
199+
}
200+
bool toFound = false;
201+
for (auto server : VPackArrayIterator(serverList.first)) {
202+
if (_to == server.stringView()) {
203+
toFound = true;
204+
break;
205+
}
206+
}
207+
if (!toFound) {
208+
LOG_TOPIC("55262", DEBUG, Logger::SUPERVISION)
209+
<< "MoveShard: To server " << _to
210+
<< " is not in sync for collection " << s.collection << " shard "
211+
<< s.shard << ", delaying start of MoveShard job for shard "
212+
<< _shard;
213+
ok = false;
214+
break;
215+
}
216+
} else {
217+
LOG_TOPIC("55263", INFO, Logger::SUPERVISION)
218+
<< "MoveShard: Did not find a non-empty server list in Current "
219+
"for collection "
220+
<< s.collection << " and shard " << s.shard;
221+
ok = false; // not even a server list found
222+
}
223+
}
224+
return ok;
225+
}
226+
183227
bool MoveShard::start(bool&) {
184228

185229
if (considerCancellation()) {
@@ -347,6 +391,19 @@ bool MoveShard::start(bool&) {
347391
}
348392

349393

394+
if (_isLeader && _toServerIsFollower) {
395+
// Further checks, before we can begin, we must make sure that the
396+
// _fromServer has accepted its leadership already for all shards in the
397+
// shard group and that the _toServer is actually in sync. Otherwise,
398+
// if this job here asks the leader to resign, we would be stuck.
399+
// If the _toServer is not in sync, the job would take overly long.
400+
bool ok = checkLeaderFollowerCurrent(shardsLikeMe);
401+
if (!ok) {
402+
return false; // Do not start job, but leave it in Todo.
403+
// Log messages already written.
404+
}
405+
}
406+
350407
// Copy todo to pending
351408
Builder todo, pending;
352409

@@ -524,8 +581,9 @@ JOB_STATUS MoveShard::pendingLeader() {
524581
// we abort:
525582
if (plan.isArray() && Job::countGoodOrBadServersInList(_snapshot, plan) < plan.length()) {
526583
LOG_TOPIC("de056", DEBUG, Logger::SUPERVISION) 9E7A
527-
<< "MoveShard (leader): found FAILED server in Plan, aborting job, db: " << _database
528-
<< " coll: " << _collection << " shard: " << _shard;
584+
<< "MoveShard (leader): found FAILED server in Plan, aborting job, "
585+
"db: "
586+
<< _database << " coll: " << _collection << " shard: " << _shard;
529587
abort("failed server in Plan");
530588
return FAILED;
531589
}
@@ -632,12 +690,12 @@ JOB_STATUS MoveShard::pendingLeader() {
632690

633691
// We need to switch leaders:
634692
{
635-
// First make sure that the server we want to go to is still in Current
636-
// for all shards. This is important, since some transaction which the leader
637-
// has still executed before its resignation might have dropped a follower
638-
// for some shard, and this could have been our new leader. In this case we
639-
// must abort and go back to the original leader, which is still perfectly
640-
// safe.
693+
// First make sure that the server we want to go to is still in
694+
// Current for all shards. This is important, since some transaction
695+
// which the leader has still executed before its resignation might
696+
// have dropped a follower for some shard, and this could have been
697+
// our new leader. In this case we must abort and go back to the
698+
// original leader, which is still perfectly safe.
641699
for (auto const& sh : shardsLikeMe) {
642700
auto const shardPath = curColPrefix + _database + "/" + sh.collection + "/" + sh.shard;
643701
auto const tmp = _snapshot.hasAsArray(shardPath + "/servers");
@@ -782,7 +840,8 @@ JOB_STATUS MoveShard::pendingLeader() {
782840
}
783841
} else {
784842
LOG_TOPIC("37714", WARN, Logger::SUPERVISION)
785-
<< "failed to iterate over planned servers for "
843+
<< "failed to iterate over planned servers "
844+
"for shard "
786845
<< _shard << " or one of its clones";
787846
failed = true;
788847
return;
@@ -848,8 +907,8 @@ JOB_STATUS MoveShard::pendingFollower() {
848907
Slice plan = _snapshot.hasAsSlice(planPath).first;
849908
if (plan.isArray() && Job::countGoodOrBadServersInList(_snapshot, plan) < plan.length()) {
850909
LOG_TOPIC("f8c22", DEBUG, Logger::SUPERVISION)
851-
<< "MoveShard (follower): found FAILED server in Plan, aborting job, "
852-
"db: "
910+
<< "MoveShard (follower): found FAILED server in Plan, aborting "
911+
"job, db: "
853912
<< _database << " coll: " << _collection << " shard: " << _shard;
854913
abort("failed server in Plan");
855914
return FAILED;
@@ -1039,7 +1098,8 @@ arangodb::Result MoveShard::abort(std::string const& reason) {
10391098
TRI_ASSERT(false);
10401099
return;
10411100
}
1042-
// Add to server last. Will be removed by removeFollower if to much
1101+
// Add to server last. Will be removed by removeFollower
1102+
// if too many
10431103
trx.add(VPackValue(_to));
10441104
}
10451105
});
@@ -1088,8 +1148,8 @@ arangodb::Result MoveShard::abort(std::string const& reason) {
10881148
VPackObjectBuilder preconditionObj(&trx);
10891149
addMoveShardToServerCanUnLock(trx);
10901150
addMoveShardFromServerCanUnLock(trx);
1091-
// If the collection is gone in the meantime, we do nothing here, but the
1092-
// round will move the job to Finished anyway:
1151+
// If the collection is gone in the meantime, we do nothing here, but
1152+
// the round will move the job to Finished anyway:
10931153
addPreconditionCollectionStillThere(trx, _database, _collection);
10941154
}
10951155
}

arangod/Agency/MoveShard.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ struct MoveShard : public Job {
8282
void addMoveShardFromServerCanUnLock(Builder& ops) const;
8383

8484
bool moveShardFinish(bool unlock, bool success, std::string const& msg);
85+
bool checkLeaderFollowerCurrent(
86+
std::vector<Job::shard_t> const& shardsLikeMe);
8587
};
8688
} // namespace consensus
8789
} // namespace arangodb

tests/Agency/MoveShardTest.cpp

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,103 @@ TEST_F(MoveShardTest, the_job_should_wait_until_the_target_server_is_good) {
583583
moveShard.start(aborts);
584584
}
585585

586+
TEST_F(MoveShardTest, the_job_should_wait_until_the_from_server_is_in_current) {
587+
std::function<std::unique_ptr<VPackBuilder>(velocypack::Slice,
588+
std::string const&)>
589+
createTestStructure = [&](velocypack::Slice s, std::string const& path) {
590+
auto builder = std::make_unique<velocypack::Builder>();
591+
if (s.isObject()) {
592+
builder->add(VPackValue(VPackValueType::Object));
593+
for (auto it : VPackObjectIterator(s)) {
594+
auto childBuilder =
595+
createTestStructure(it.value, path + "/" + it.key.copyString());
596+
if (childBuilder) {
597+
builder->add(it.key.copyString(), childBuilder->slice());
598+
}
599+
}
600+
601+
if (path == "/arango/Target/ToDo") {
602+
builder->add(
603+
jobId,
604+
createJob(COLLECTION, SHARD_LEADER, SHARD_FOLLOWER1).slice());
605+
}
606+
builder->close();
607+
} else {
608+
// Simulate a new leader which has not yet assumed its leadership:
609+
if (path == "/arango/Current/Collections/" + DATABASE + "/" +
610+
COLLECTION + "/" + SHARD + "/servers") {
611+
{
612+
VPackArrayBuilder guard(builder.get());
613+
builder->add(VPackValue("follower1"));
614+
builder->add(VPackValue("leader"));
615+
}
616+
} else {
617+
builder->add(s);
618+
}
619+
}
620+
return builder;
621+
};
622+
623+
Mock<AgentInterface> mockAgent;
624+
When(Method(mockAgent, waitFor)).AlwaysReturn();
625+
AgentInterface& agent = mockAgent.get();
626+
627+
auto builder = createTestStructure(baseStructure.toBuilder().slice(), "");
628+
ASSERT_TRUE(builder);
629+
Node agency = createAgencyFromBuilder(*builder);
630+
631+
auto moveShard = MoveShard(agency, &agent, TODO, jobId);
632+
moveShard.start(aborts);
633+
}
634+
635+
TEST_F(MoveShardTest, the_job_should_wait_until_the_to_server_is_in_sync) {
636+
std::function<std::unique_ptr<VPackBuilder>(velocypack::Slice,
637+
std::string const&)>
638+
createTestStructure = [&](velocypack::Slice s, std::string const& path) {
639+
auto builder = std::make_unique<velocypack::Builder>();
640+
if (s.isObject()) {
641+
builder->add(VPackValue(VPackValueType::Object));
642+
for (auto it : VPackObjectIterator(s)) {
643+
auto childBuilder =
644+
createTestStructure(it.value, path + "/" + it.key.copyString());
645+
if (childBuilder) {
646+
builder->add(it.key.copyString(), childBuilder->slice());
647+
}
648+
}
649+
650+
if (path == "/arango/Target/ToDo") {
651+
builder->add(
652+
jobId,
653+
createJob(COLLECTION, SHARD_LEADER, SHARD_FOLLOWER1).slice());
654+
}
655+
builder->close();
656+
} else {
657+
// Simulate a new leader which has not yet assumed its leadership:
658+
if (path == "/arango/Current/Collections/" + DATABASE + "/" +
659+
COLLECTION + "/" + SHARD + "/servers") {
660+
{
661+
VPackArrayBuilder guard(builder.get());
662+
builder->add(VPackValue("leader"));
663+
}
664+
} else {
665+
builder->add(s);
666+
}
667+
}
668+
return builder;
669+
};
670+
671+
Mock<AgentInterface> mockAgent;
672+
When(Method(mockAgent, waitFor)).AlwaysReturn();
673+
AgentInterface& agent = mockAgent.get();
674+
675+
auto builder = createTestStructure(baseStructure.toBuilder().slice(), "");
676+
ASSERT_TRUE(builder);
677+
Node agency = createAgencyFromBuilder(*builder);
678+
679+
auto moveShard = MoveShard(agency, &agent, TODO, jobId);
680+
moveShard.start(aborts);
681+
}
682+
586683
TEST_F(MoveShardTest, the_job_should_fail_if_the_shard_distributes_its_shards_like_some_other) {
587684
std::function<std::unique_ptr<VPackBuilder>(VPackSlice const&, std::string const&)> createTestStructure =
588685
[&](VPackSlice const& s, std::string const& path) {

0 commit comments

Comments
 (0)
0