10000 Send out empty heartbeats regardless of non-empty AppendEntriesRPC. · MohammedDeveloper/arangodb@e974501 · GitHub
[go: up one dir, main page]

Skip to content

Commit e974501

Browse files
committed
Send out empty heartbeats regardless of non-empty AppendEntriesRPC.
Also improve logging: Note if a log in the empty heartbeat sending takes > 0.01 s. Clearly mark places where a leader resigns in logging. Log if no empty heartbeat is sent out.
1 parent bd5e84a commit e974501

File tree

2 files changed

+36
-14
lines changed

2 files changed

+36
-14
lines changed

arangod/Agency/Agent.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,10 @@ void Agent::sendAppendEntriesRPC() {
537537
// message if a timeout occurs.
538538

539539
_lastSent[followerId] = system_clock::now();
540-
_constituent.notifyHeartbeatSent(followerId);
540+
// _constituent.notifyHeartbeatSent(followerId);
541+
// Do not notify constituent, because the AppendEntriesRPC here could
542+
// take a very long time, so this must not disturb the empty ones
543+
// being sent out.
541544

542545
LOG_TOPIC(DEBUG, Logger::AGENCY)
543546
<< "Appending (" << (uint64_t) (TRI_microtime() * 1000000000.0) << ") "
@@ -591,8 +594,13 @@ void Agent::sendEmptyAppendEntriesRPC(std::string followerId) {
591594
3 * _config.minPing() * _config.timeoutMult(), true);
592595
_constituent.notifyHeartbeatSent(followerId);
593596

597+
double now = TRI_microtime();
594598
LOG_TOPIC(DEBUG, Logger::AGENCY)
595599
<< "Sending empty appendEntriesRPC to follower " << followerId;
600+
double diff = TRI_microtime() - now;
601+
if (diff > 0.01) {
602+
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Logging of a line took more than 1/100 of a second, this is bad:" << diff;
603+
}
596604
}
597605

598606
void Agent::advanceCommitIndex() {

arangod/Agency/Constituent.cpp

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ void Constituent::candidate() {
271271

272272
if (_leaderID != NO_LEADER) {
273273
_leaderID = NO_LEADER;
274-
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Set _leaderID to NO_LEADER";
274+
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Set _leaderID to NO_LEADER in Constituent::candidate";
275275
}
276276

277277
if (_role != CANDIDATE) {
@@ -739,35 +739,49 @@ void Constituent::run() {
739739
} else if (role == CANDIDATE) {
740740
callElection(); // Run for office
741741
} else {
742-
// This is 1/4th of the minPing timeout (_cv.wait() below is in
743-
// microseconds):
744-
uint64_t timeout =
745-
static_cast<uint64_t>(250000.0 * _agent->config().minPing() *
746-
_agent->config().timeoutMult());
747-
{
748-
CONDITION_LOCKER(guardv, _cv);
749-
_cv.wait(timeout);
750-
}
742+
double interval = 0.25 * _agent->config().minPing()
743+
* _agent->config().timeoutMult();
751744

752745
double now = TRI_microtime();
746+
double nextWakeup = interval; // might be lowered below
747+
753748
std::string const myid = _agent->id();
754749
for (auto const& followerId : _agent->config().active()) {
755750
if (followerId != myid) {
756751
bool needed = false;
757752
{
758753
MUTEX_LOCKER(guard, _heartBeatMutex);
759754
auto it = _lastHeartbeatSent.find(followerId);
760-
if (it == _lastHeartbeatSent.end() ||
761-
now - it->second > _agent->config().minPing()
762-
* _agent->config().timeoutMult() / 4.0) {
755+
if (it == _lastHeartbeatSent.end()) {
763756
needed = true;
757+
} else {
758+
double diff = now - it->second;
759+
if (diff >= interval) {
760+
needed = true;
761+
} else {
762+
// diff < interval, so only needed again in interval-diff s
763+
double waitOnly = interval - diff;
764+
if (nextWakeup > waitOnly) {
765+
nextWakeup = waitOnly;
766+
}
767+
LOG_TOPIC(DEBUG, Logger::AGENCY)
768+
<< "No need for empty AppendEntriesRPC: " << diff;
769+
}
764770
}
765771
}
766772
if (needed) {
767773
_agent->sendEmptyAppendEntriesRPC(followerId);
768774
}
769775
}
770776
}
777+
778+
// This is the smallest time until any of the followers need a
779+
// new empty heartbeat:
780+
uint64_t timeout = static_cast<uint64_t>(1000000.0 * nextWakeup);
781+
{
782+
CONDITION_LOCKER(guardv, _cv);
783+
_cv.wait(timeout);
784+
}
771785
}
772786
}
773787
}

0 commit comments

Comments
 (0)
0