@@ -204,6 +204,50 @@ bool MoveShard::create(std::shared_ptr<VPackBuilder> envelope) {
204
204
return false ;
205
205
}
206
206
207
+ bool MoveShard::checkLeaderFollowerCurrent (
208
+ std::vector<Job::shard_t > const & shardsLikeMe) {
209
+ bool ok = true ;
210
+ for (auto const & s : shardsLikeMe) {
211
+ auto sharedPath = _database + " /" + s.collection + " /" ;
212
+ auto currentServersPath = curColPrefix + sharedPath + s.shard + " /servers" ;
213
+ auto serverList = _snapshot.hasAsArray (currentServersPath);
214
+ if (serverList && (*serverList).length () > 0 ) {
215
+ if (_from != (*serverList)[0 ].stringView ()) {
216
+ LOG_TOPIC (" 55261" , DEBUG, Logger::SUPERVISION)
217
+ << " MoveShard: From server " << _from
218
+ << " has not yet assumed leadership for collection " << s.collection
219
+ << " shard " << s.shard
220
+ << " , delaying start of MoveShard job for shard " << _shard;
221
+ ok = false ;
222
+ break ;
223
+ }
224
+ bool toFound = false ;
225
+ for (auto server : VPackArrayIterator (*serverList)) {
226
+ if (_to == server.stringView ()) {
227
+ toFound = true ;
228
+ break ;
229
+ }
230
+ }
231
+ if (!toFound) {
232
+ LOG_TOPIC (" 55262" , DEBUG, Logger::SUPERVISION)
233
+ << " MoveShard: To server " << _to
234
+ << " is not in sync for collection " << s.collection << " shard "
235
+ << s.shard << " , delaying start of MoveShard job for shard "
236
+ << _shard;
237
+ ok = false ;
238
+ break ;
239
+ }
240
+ } else {
241
+ LOG_TOPIC (" 55263" , INFO, Logger::SUPERVISION)
242
+ << " MoveShard: Did not find a non-empty server list in Current "
243
+ " for collection "
244
+ << s.collection << " and shard " << s.shard ;
245
+ ok = false ; // not even a server list found
246
+ }
247
+ }
248
+ return ok;
249
+ }
250
+
207
251
bool MoveShard::start (bool &) {
208
252
if (considerCancellation ()) {
209
253
return false ;
@@ -391,6 +435,19 @@ bool MoveShard::start(bool&) {
391
435
}
392
436
}
393
437
438
+ if (_isLeader && _toServerIsFollower) {
439
+ // Further checks, before we can begin, we must make sure that the
440
+ // _fromServer has accepted its leadership already for all shards in the
441
+ // shard group and that the _toServer is actually in sync. Otherwise,
442
+ // if this job here asks the leader to resign, we would be stuck.
443
+ // If the _toServer is not in sync, the job would take overly long.
444
+ bool ok = checkLeaderFollowerCurrent (shardsLikeMe);
445
+ if (!ok) {
446
+ return false ; // Do not start job, but leave it in Todo.
447
+ // Log messages already written.
448
+ }
449
+ }
450
+
394
451
// Copy todo to pending
395
452
Builder todo, pending;
396
453
@@ -732,7 +789,8 @@ JOB_STATUS MoveShard::pendingLeader() {
732
789
if (plan.isArray () &&
733
790
Job::countGoodOrBadServersInList (_snapshot, plan) < plan.length ()) {
734
791
LOG_TOPIC (" de056" , DEBUG, Logger::SUPERVISION)
735
- << " MoveShard (leader): found FAILED server in Plan, aborting job, db: "
792
+ << " MoveShard (leader): found FAILED server in Plan, aborting job, "
793
+ " db: "
736
794
<< _database << " coll: " << _collection << " shard: " << _shard;
737
795
abort (" failed server in Plan" );
738
796
return FAILED;
@@ -839,12 +897,12 @@ JOB_STATUS MoveShard::pendingLeader() {
839
897
840
898
// We need to switch leaders:
841
899
{
842
- // First make sure that the server we want to go to is still in Current
843
- // for all shards. This is important, since some transaction which the
844
- // leader has still executed before its resignation might have dropped a
845
- // follower for some shard, and this could have been our new leader. In
846
- // this case we must abort and go back to the original leader, which is
847
- // still perfectly safe.
900
+ // First make sure that the server we want to go to is still in
901
+ // Current for all shards. This is important, since some transaction
902
+ // which the leader has still executed before its resignation might
903
+ // have dropped a follower for some shard, and this could have been
904
+ // our new leader. In this case we must abort and go back to the
905
+ // original leader, which is still perfectly safe.
848
906
for (auto const & sh : shardsLikeMe) {
849
907
auto const shardPath =
850
908
curColPrefix + _database + " /" + sh.collection + " /" + sh.shard ;
@@ -953,8 +1011,8 @@ JOB_STATUS MoveShard::pendingLeader() {
953
1011
}
954
1012
} else {
955
1013
LOG_TOPIC (" 3294e" , WARN, Logger::SUPERVISION)
956
- << " failed to iterate through current shard servers for "
957
- " shard "
1014
+ << " failed to iterate through current shard servers "
1015
+ " for shard "
958
1016
<< _shard << " or one of its clones" ;
959
1017
TRI_ASSERT (false );
960
1018
return ; // we don't increment done and remain PENDING
@@ -1063,8 +1121,8 @@ JOB_STATUS MoveShard::pendingFollower() {
1063
1121
if (plan.isArray () &&
1064
1122
Job::countGoodOrBadServersInList (_snapshot, plan) < plan.length ()) {
1065
1123
LOG_TOPIC (" f8c22" , DEBUG, Logger::SUPERVISION)
1066
- << " MoveShard (follower): found FAILED server in Plan, aborting job, "
1067
- " db: "
1124
+ << " MoveShard (follower): found FAILED server in Plan, aborting "
1125
+ " job, db: "
1068
1126
<< _database << " coll: " << _collection << " shard: " << _shard;
1069
1127
abort (" failed server in Plan" );
1070
1128
return FAILED;
@@ -1242,8 +1300,8 @@ arangodb::Result MoveShard::abort(std::string const& reason) {
1242
1300
trx.add (VPackValue (_from));
1243
1301
if (plan.isArray ()) {
1244
1302
for (VPackSlice srv : VPackArrayIterator (plan)) {
1245
- // from could be in plan as <from> or <_from>. Exclude to
1246
- // server always.
1303
+ // from could be in plan as <from> or <_from>. Exclude
1304
+ // to server always.
1247
1305
if (srv.isEqualString (_from) ||
1248
1306
srv.isEqualString (" _" + _from) ||
1249
1307
srv.isEqualString (_to)) {
@@ -1259,8 +1317,8 @@ arangodb::Result MoveShard::abort(std::string const& reason) {
1259
1317
TRI_ASSERT (false );
1260
1318
return ;
1261
1319
}
1262
- // Add to server last. Will be removed by removeFollower if to
1263
- // much
1320
+ // Add to server last. Will be removed by removeFollower if
1321
+ // too many.
1264
1322
trx.add (VPackValue (_to));
1265
1323
}
1266
1324
});
@@ -1312,8 +1370,8 @@ arangodb::Result MoveShard::abort(std::string const& reason) {
1312
1370
VPackObjectBuilder preconditionObj (&trx);
1313
1371
addMoveShardToServerCanUnLock (trx);
1314
1372
addMoveShardFromServerCanUnLock (trx);
1315
- // If the collection is gone in the meantime, we do nothing here, but the
1316
- // round will move the job to Finished anyway:
1373
+ // If the collection is gone in the meantime, we do nothing here, but
1374
+ // the round will move the job to Finished anyway:
1317
1375
addPreconditionCollectionStillThere (trx, _database, _collection);
1318
1376
}
1319
1377
}
@@ -1331,19 +1389,19 @@ arangodb::Result MoveShard::abort(std::string const& reason) {
1331
1389
LOG_TOPIC (" 513e6" , INFO, Logger::SUPERVISION)
1332
1390
<< " Precondition failed on MoveShard::abort() for shard " << _shard
1333
1391
<< " of collection " << _collection
1334
- << " , if the collection has been deleted in the meantime, the job "
1335
- " will be finished soon, if this message repeats, tell us." ;
1392
+ << " , if the collection has been deleted in the meantime, the "
1393
+ " job will be finished soon, if this message repeats, tell us." ;
1336
1394
result = Result (
1337
1395
TRI_ERROR_SUPERVISION_GENERAL_FAILURE,
1338
1396
std::string (" Precondition failed while aborting moveShard job " ) +
1339
1397
_jobId);
1340
1398
return result;
1341
- // We intentionally do not move the job object to Failed or Finished here!
1342
- // The failed precondition can either be one of the read locks, which
1343
- // suggests a fundamental problem, and in which case we will log this
1344
- // message in every round of the supervision. Or the collection has been
1345
- // dropped since we took the snapshot, in this case we will move the job
1346
- // to Finished in the next round.
1399
+ // We intentionally do not move the job object to Failed or Finished
1400
+ // here! The failed precondition can either be one of the read locks,
1401
+ // which suggests a fundamental problem, and in which case we will log
1402
+ // this message in every round of the supervision. Or the collection
1403
+ // has been dropped since we took the snapshot, in this case we will
1404
+ // move the job to Finished in the next round.
1347
1405
}
1348
1406
result = Result (
1349
1407
TRI_ERROR_SUPERVISION_GENERAL_FAILURE,
0 commit comments