38
38
#include " VocBase/LogicalCollection.h"
39
39
#include " VocBase/Methods/Databases.h"
40
40
41
+ #include " Cluster/ResignShardLeadership.h"
42
+
41
43
#include < velocypack/Collection.h>
42
44
#include < velocypack/Compare.h>
43
45
#include < velocypack/Iterator.h>
@@ -184,6 +186,17 @@ static VPackBuilder compareIndexes(std::string const& dbname, std::string const&
184
186
return builder;
185
187
}
186
188
189
+ static std::string CreateLeaderString (std::string const & leaderId, bool shouldBeLeading) {
190
+ if (shouldBeLeading) {
191
+ return std::string ();
192
+ }
193
+ TRI_ASSERT (!leaderId.empty ());
194
+ if (leaderId.front () == UNDERSCORE[0 ]) {
195
+ return leaderId.substr (1 );
196
+ }
197
+ return leaderId;
198
+ }
199
+
187
200
void handlePlanShard (uint64_t planIndex, VPackSlice const & cprops, VPackSlice const & ldb,
188
201
std::string const & dbname, std::string const & colname,
189
202
std::string const & shname, std::string const & serverId,
@@ -192,15 +205,13 @@ void handlePlanShard(uint64_t planIndex, VPackSlice const& cprops, VPackSlice co
192
205
MaintenanceFeature::errors_t & errors, MaintenanceFeature& feature,
193
206
std::vector<std::shared_ptr<ActionDescription>>& actions) {
194
207
bool shouldBeLeading = serverId == leaderId;
195
- bool shouldResign = UNDERSCORE + serverId == leaderId;
196
208
197
209
commonShrds.emplace (shname);
198
210
199
211
auto const lcol = ldb.get (shname);
200
212
if (lcol.isObject ()) { // Have local collection with that name
201
213
202
214
std::string_view const localLeader = lcol.get (THE_LEADER).stringView ();
203
- bool const leaderTouched = localLeader != LEADER_NOT_YET_KNOWN;
204
215
bool leading = localLeader.empty ();
205
216
auto const properties = compareRelevantProps (cprops, lcol);
206
217
@@ -258,37 +269,20 @@ void handlePlanShard(uint64_t planIndex, VPackSlice const& cprops, VPackSlice co
258
269
<< " for central " << dbname << " /" << colname << " - skipping" ;
259
270
}
260
271
}
261
-
262
- // Handle leadership change, this is mostly about taking over leadership,
263
- // but it also handles the case that in a failover scenario we used to
264
- // be the leader and now somebody else is the leader. However, it does
265
- // not handle the case of a controlled leadership resignation, see below
266
- // in handleLocalShard for this.
267
- if (shouldResign && !leading) {
268
- // This case is a special one which is triggered if a server
269
- // restarts, has `NOT_YET_TOUCHED` in its local shard as theLeader
270
- // and finds a resignation sign. In that case, it should first officially
271
- // take over leadership. In the following round it will then resign.
272
- // This enables cleanOutServer jobs to continue to work in case of
273
- // a leader restart.
274
- shouldBeLeading = true ;
275
- shouldResign = false ;
276
- }
277
- if ((leading != shouldBeLeading && !shouldResign) || !leaderTouched) {
272
+ if (!leading && shouldBeLeading) {
278
273
LOG_TOPIC (" 52412" , DEBUG, Logger::MAINTENANCE)
279
274
<< " Triggering TakeoverShardLeadership job for shard " << dbname
280
275
<< " /" << colname << " /" << shname
281
276
<< " , local leader: " << lcol.get (THE_LEADER).copyString ()
282
277
<< " , leader id: " << leaderId << " , my id: " << serverId
283
- << " , should be leader: " << (shouldBeLeading ? std::string () : leaderId)
284
- << " , leaderTouched = " << (leaderTouched ? " yes" : " no" );
278
+ << " , should be leader: " ;
285
279
actions.emplace_back (std::make_shared<ActionDescription>(
286
280
std::map<std::string, std::string>{
287
281
{NAME, TAKEOVER_SHARD_LEADERSHIP},
288
282
{DATABASE, dbname},
289
283
{COLLECTION, colname},
290
284
{SHARD, shname},
291
- {THE_LEADER, shouldBeLeading ? std::string () : leaderId },
285
+ {THE_LEADER, std::string ()},
292
286
{LOCAL_LEADER, std::string (localLeader)},
293
287
{OLD_CURRENT_COUNTER, " 0" }, // legacy, no longer used
294
288
{PLAN_RAFT_INDEX, std::to_string (planIndex)}},
@@ -318,7 +312,7 @@ void handlePlanShard(uint64_t planIndex, VPackSlice const& cprops, VPackSlice co
318
312
}
319
313
}
320
314
}
321
- } else { // Create the sucker , if not a previous error stops us
315
+ } else { // Create the collection , if not a previous error stops us
322
316
if (errors.shards .find (dbname + " /" + colname + " /" + shname) ==
323
317
errors.shards .end ()) {
324
318
auto props = createProps (cprops); // Only once might need often!
@@ -328,7 +322,7 @@ void handlePlanShard(uint64_t planIndex, VPackSlice const& cprops, VPackSlice co
328
322
{SHARD, shname},
329
323
{DATABASE, dbname},
330
324
{SERVER_ID, serverId},
331
- {THE_LEADER, shouldBeLeading ? std::string () : leaderId}},
325
+ {THE_LEADER, CreateLeaderString ( leaderId, shouldBeLeading) }},
332
326
shouldBeLeading ? LEADER_PRIORITY : FOLLOWER_PRIORITY, std::move (props)));
333
327
} else {
334
328
LOG_TOPIC (" c1d8e" , DEBUG, Logger::MAINTENANCE)
@@ -343,58 +337,71 @@ void handleLocalShard(std::string const& dbname, std::string const& colname,
343
337
std::unordered_set<std::string>& commonShrds,
344
338
std::unordered_set<std::string>& indis, std::string const & serverId,
345
339
std::vector<std::shared_ptr<ActionDescription>>& actions) {
346
- std::unordered_set<std::string>::const_iterator it;
340
+
341
+ std::unordered_set<std::string>::const_iterator it =
342
+ std::find (commonShrds.begin (), commonShrds.end (), colname);
343
+
344
+ auto localLeader = cprops.get (THE_LEADER).stringRef ();
345
+ bool const isLeading = localLeader.empty ();
346
+ if (it == commonShrds.end ()) {
347
+ // This collection is not planned anymore, can drop it
348
+ actions.emplace_back (std::make_shared<ActionDescription>(
349
+ std::map<std::string, std::string>{{NAME, DROP_COLLECTION},
350
+ {DATABASE, dbname},
351
+ {COLLECTION, colname}},
352
+ isLeading ? LEADER_PRIORITY : FOLLOWER_PRIORITY));
353
+ return ;
354
+ }
355
+ // We dropped out before
356
+ TRI_ASSERT (it != commonShrds.end ());
357
+ // The shard exists in both Plan and Local
358
+ commonShrds.erase (it); // it not a common shard?
347
359
348
360
std::string plannedLeader;
349
361
if (shardMap.get (colname).isArray ()) {
350
362
plannedLeader = shardMap.get (colname)[0 ].copyString ();
351
363
}
352
- bool const localLeader = cprops.get (THE_LEADER).stringRef ().empty ();
353
- if (localLeader && plannedLeader == UNDERSCORE + serverId) {
364
+
365
+ bool const activeResign = isLeading && plannedLeader != serverId;
366
+ bool const adjustResignState =
367
+ (plannedLeader == UNDERSCORE + serverId &&
368
+ localLeader != ResignShardLeadership::LeaderNotYetKnownString) ||
369
+ (plannedLeader != serverId && localLeader == LEADER_NOT_YET_KNOWN);
370
+ /*
371
+ * We need to resign in the following cases:
372
+ * 1) (activeResign) We think we are the leader locally, but the plan says we are not. (including, we are resigned)
373
+ * 2) (adjustResignState) We are not leading, and not in resigned state, but the plan says we should be resigend.
374
+ * - This triggers on rebooted servers, that were in resign process
375
+ * - This triggers if the shard is moved from the server, before it actually took ownership.
376
+ */
377
+
378
+ if (activeResign || adjustResignState) {
354
379
actions.emplace_back (std::make_shared<ActionDescription>(
355
- std::map<std::string, std::string>{{NAME, RESIGN_SHARD_LEADERSHIP}, {DATABASE, dbname}, {SHARD, colname}},
356
- RESIGN_PRIORITY));
357
- } else {
358
- bool drop = false ;
359
- // check if shard is in plan, if not drop it
360
- if (commonShrds.empty ()) {
361
- drop = true ;
362
- } else {
363
- it = std::find (commonShrds.begin (), commonShrds.end (), colname);
364
- if (it == commonShrds.end ()) {
365
- drop = true ;
366
- }
367
- }
380
+ std::map<std::string, std::string>{{NAME, RESIGN_SHARD_LEADERSHIP},
381
+ {DATABASE, dbname},
382
+ {SHARD, colname}},
383
+ RESIGN_PRIORITY));
384
+ }
368
385
369
- if (drop) {
370
- actions.emplace_back (std::make_shared<ActionDescription>(
371
- std::map<std::string, std::string>{{NAME, DROP_COLLECTION}, {DATABASE, dbname}, {COLLECTION, colname}},
372
- localLeader ? LEADER_PRIORITY : FOLLOWER_PRIORITY));
373
- } else {
374
- // The shard exists in both Plan and Local
375
- commonShrds.erase (it); // it not a common shard?
376
-
377
- // We only drop indexes, when collection is not being dropped already
378
- if (cprops.hasKey (INDEXES)) {
379
- if (cprops.get (INDEXES).isArray ()) {
380
- for (auto const & index : VPackArrayIterator (cprops.get (INDEXES))) {
381
- VPackStringRef type = index.get (StaticStrings::IndexType).stringRef ();
382
- if (type != PRIMARY && type != EDGE) {
383
- std::string const id = index.get (ID).copyString ();
384
-
385
- // check if index is in plan
386
- if (indis.find (colname + " /" + id) != indis.end () ||
387
- indis.find (id) != indis.end ()) {
388
- indis.erase (id);
389
- } else {
390
- actions.emplace_back (std::make_shared<ActionDescription>(
391
- std::map<std::string, std::string>{{NAME, DROP_INDEX},
392
- {DATABASE, dbname},
393
- {COLLECTION, colname},
394
- {" index" , id}},
395
- INDEX_PRIORITY));
396
- }
397
- }
386
+ // We only drop indexes, when collection is not being dropped already
387
+ if (cprops.hasKey (INDEXES)) {
388
+ if (cprops.get (INDEXES).isArray ()) {
389
+ for (auto const & index : VPackArrayIterator (cprops.get (INDEXES))) {
390
+ VPackStringRef type = index.get (StaticStrings::IndexType).stringRef ();
391
+ if (type != PRIMARY && type != EDGE) {
392
+ std::string const id = index.get (ID).copyString ();
393
+
394
+ // check if index is in plan
395
+ if (indis.find (colname + " /" + id) != indis.end () ||
396
+ indis.find (id) != indis.end ()) {
397
+ indis.erase (id);
398
+ } else {
399
+ actions.emplace_back (std::make_shared<ActionDescription>(
400
+ std::map<std::string, std::string>{{NAME, DROP_INDEX},
401
+ {DATABASE, dbname},
402
+ {COLLECTION, colname},
403
+ {" index" , id}},
404
+ INDEX_PRIORITY));
398
405
}
399
406
}
400
407
}
0 commit comments