@@ -180,6 +180,50 @@ bool MoveShard::create(std::shared_ptr<VPackBuilder> envelope) {
180
180
return false ;
181
181
}
182
182
183
+ bool MoveShard::checkLeaderFollowerCurrent (
184
+ std::vector<Job::shard_t > const & shardsLikeMe) {
185
+ bool ok = true ;
186
+ for (auto const & s : shardsLikeMe) {
187
+ auto sharedPath = _database + " /" + s.collection + " /" ;
188
+ auto currentServersPath = curColPrefix + sharedPath + s.shard + " /servers" ;
189
+ auto const & serverList = _snapshot.hasAsArray (currentServersPath);
190
+ if (serverList.second && serverList.first .length () > 0 ) {
191
+ if (_from != serverList.first [0 ].stringView ()) {
192
+ LOG_TOPIC (" 55261" , DEBUG, Logger::SUPERVISION)
193
+ << " MoveShard: From server " << _from
194
+ << " has not yet assumed leadership for collection " << s.collection
195
+ << " shard " << s.shard
196
+ << " , delaying start of MoveShard job for shard " << _shard;
197
+ ok = false ;
198
+ break ;
199
+ }
200
+ bool toFound = false ;
201
+ for (auto server : VPackArrayIterator (serverList.first )) {
202
+ if (_to == server.stringView ()) {
203
+ toFound = true ;
204
+ break ;
205
+ }
206
+ }
207
+ if (!toFound) {
208
+ LOG_TOPIC (" 55262" , DEBUG, Logger::SUPERVISION)
209
+ << " MoveShard: To server " << _to
210
+ << " is not in sync for collection " << s.collection << " shard "
211
+ << s.shard << " , delaying start of MoveShard job for shard "
212
+ << _shard;
213
+ ok = false ;
214
+ break ;
215
+ }
216
+ } else {
217
+ LOG_TOPIC (" 55263" , INFO, Logger::SUPERVISION)
218
+ << " MoveShard: Did not find a non-empty server list in Current "
219
+ " for collection "
220
+ << s.collection << " and shard " << s.shard ;
221
+ ok = false ; // not even a server list found
222
+ }
223
+ }
224
+ return ok;
225
+ }
226
+
183
227
bool MoveShard::start (bool &) {
184
228
185
229
if (considerCancellation ()) {
@@ -347,6 +391,19 @@ bool MoveShard::start(bool&) {
347
391
}
348
392
349
393
394
+ if (_isLeader && _toServerIsFollower) {
395
+ // Further checks, before we can begin, we must make sure that the
396
+ // _fromServer has accepted its leadership already for all shards in the
397
+ // shard group and that the _toServer is actually in sync. Otherwise,
398
+ // if this job here asks the leader to resign, we would be stuck.
399
+ // If the _toServer is not in sync, the job would take overly long.
400
+ bool ok = checkLeaderFollowerCurrent (shardsLikeMe);
401
+ if (!ok) {
402
+ return false ; // Do not start job, but leave it in Todo.
403
+ // Log messages already written.
404
+ }
405
+ }
406
+
350
407
// Copy todo to pending
351
408
Builder todo, pending;
352
409
@@ -524,8 +581,9 @@ JOB_STATUS MoveShard::pendingLeader() {
524
581
// we abort:
525
582
if (plan.isArray () && Job::countGoodOrBadServersInList (_snapshot, plan) < plan.length ()) {
526
583
LOG_TOPIC (" de056" , DEBUG, Logger::SUPERVISION)
9E7A
527
- << " MoveShard (leader): found FAILED server in Plan, aborting job, db: " << _database
528
- << " coll: " << _collection << " shard: " << _shard;
584
+ << " MoveShard (leader): found FAILED server in Plan, aborting job, "
585
+ " db: "
586
+ << _database << " coll: " << _collection << " shard: " << _shard;
529
587
abort (" failed server in Plan" );
530
588
return FAILED;
531
589
}
@@ -632,12 +690,12 @@ JOB_STATUS MoveShard::pendingLeader() {
632
690
633
691
// We need to switch leaders:
634
692
{
635
- // First make sure that the server we want to go to is still in Current
636
- // for all shards. This is important, since some transaction which the leader
637
- // has still executed before its resignation might have dropped a follower
638
- // for some shard, and this could have been our new leader. In this case we
639
- // must abort and go back to the original leader, which is still perfectly
640
- // safe.
693
+ // First make sure that the server we want to go to is still in
694
+ // Current for all shards. This is important, since some transaction
695
+ // which the leader has still executed before its resignation might
696
+ // have dropped a follower for some shard, and this could have been
697
+ // our new leader. In this case we must abort and go back to the
698
+ // original leader, which is still perfectly safe.
641
699
for (auto const & sh : shardsLikeMe) {
642
700
auto const shardPath = curColPrefix + _database + " /" + sh.collection + " /" + sh.shard ;
643
701
auto const tmp = _snapshot.hasAsArray (shardPath + " /servers" );
@@ -782,7 +840,8 @@ JOB_STATUS MoveShard::pendingLeader() {
782
840
}
783
841
} else {
784
842
LOG_TOPIC (" 37714" , WARN, Logger::SUPERVISION)
785
- << " failed to iterate over planned servers for "
843
+ << " failed to iterate over planned servers "
844
+ " for shard "
786
845
<< _shard << " or one of its clones" ;
787
846
failed = true ;
788
847
return ;
@@ -848,8 +907,8 @@ JOB_STATUS MoveShard::pendingFollower() {
848
907
Slice plan = _snapshot.hasAsSlice (planPath).first ;
849
908
if (plan.isArray () && Job::countGoodOrBadServersInList (_snapshot, plan) < plan.length ()) {
850
909
LOG_TOPIC (" f8c22" , DEBUG, Logger::SUPERVISION)
851
- << " MoveShard (follower): found FAILED server in Plan, aborting job, "
852
- " db: "
910
+ << " MoveShard (follower): found FAILED server in Plan, aborting "
911
+ " job, db: "
853
912
<< _database << " coll: " << _collection << " shard: " << _shard;
854
913
abort (" failed server in Plan" );
855
914
return FAILED;
@@ -1039,7 +1098,8 @@ arangodb::Result MoveShard::abort(std::string const& reason) {
1039
1098
TRI_ASSERT (false );
1040
1099
return ;
1041
1100
}
1042
- // Add to server last. Will be removed by removeFollower if to much
1101
+ // Add to server last. Will be removed by removeFollower
1102
+ // if too many
1043
1103
trx.add (VPackValue (_to));
1044
1104
}
1045
1105
});
@@ -1088,8 +1148,8 @@ arangodb::Result MoveShard::abort(std::string const& reason) {
1088
1148
VPackObjectBuilder preconditionObj (&trx);
1089
1149
addMoveShardToServerCanUnLock (trx);
1090
1150
addMoveShardFromServerCanUnLock (trx);
1091
- // If the collection is gone in the meantime, we do nothing here, but the
1092
- // round will move the job to Finished anyway:
1151
+ // If the collection is gone in the meantime, we do nothing here, but
1152
+ // the round will move the job to Finished anyway:
1093
1153
addPreconditionCollectionStillThere (trx, _database, _collection);
1094
1154
}
1095
1155
}
0 commit comments