@@ -588,30 +588,66 @@ std::vector<check_t> Supervision::check(std::string const& type) {
588588 HealthRecord persist (shortName, endpoint, hostId, engine, serverVersion, externalEndpoint);
589589
590590 // Get last health entries from transient and persistent key value stores
591+ bool transientHealthRecordFound = true ;
591592 if (_transient.has (healthPrefix + serverID)) {
592593 transist = _transient.hasAsNode (healthPrefix + serverID).first ;
594+ } else {
595+ // In this case this is the first time we look at this server during our
596+ // new leadership. So we do not touch the persisted health record and
597+ // only create a new transient health record.
598+ transientHealthRecordFound = false ;
593599 }
594600 if (snapshot ().has (healthPrefix + serverID)) {
595601 persist = snapshot ().hasAsNode (healthPrefix + serverID).first ;
596602 }
597603
604+ // Here is an important subtlety: We will derive the health status of this
605+ // server a bit further down by looking at the time when we saw the last
606+ // heartbeat. The heartbeat is stored in transient in `Sync/ServerStates`.
607+ // It has a timestamp, however, this was taken on the other server, so we
608+ // cannot trust this time stamp because of possible clock skew. Since we
609+ // do not actually know when this heartbeat came in, we have to proceed as
610+ // follows: We make a copy of the `syncTime` and store it into the health
611+ // record in transient `Supervision/Health`. If we detect a difference, it
612+ // is the first time we have seen the new heartbeat and we can then take
613+ // a reading of our local system clock and use that time. However, if we
614+ // do not yet have a previous reading in our transient health record,
615+ // we must not touch the persisted health record at all. This is what the
616+ // flag `transientHealthRecordFound` means.
617+
598618 // New health record (start with old add current information from sync)
599619 // Sync.time is copied to Health.syncTime
600620 // Sync.status is copied to Health.syncStatus
601- std::string syncTime =
602- _transient.has (syncPrefix + serverID)
603- ? _transient.hasAsString (syncPrefix + serverID + " /time" ).first
604- : timepointToString (std::chrono::system_clock::time_point ());
605- std::string syncStatus =
606- _transient.has (syncPrefix + serverID)
607- ? _transient.hasAsString (syncPrefix + serverID + " /status" ).first
608- : " UNKNOWN" ;
609-
610- // Last change registered in sync (transient != sync)
611- // Either now or value in transient
612- auto lastAckedTime = (syncTime != transist.syncTime )
613- ? startTimeLoop
614- : stringToTimepoint (transist.lastAcked );
621+ std::string syncTime;
622+ std::string syncStatus;
623+ bool heartBeatVisible = _transient.has (syncPrefix + serverID);
624+ if (heartBeatVisible) {
625+ syncTime = _transient.hasAsString (syncPrefix + serverID + " /time" ).first ;
626+ syncStatus = _transient.hasAsString (syncPrefix + serverID + " /status" ).first ;
627+ } else {
628+ syncTime = timepointToString (std::chrono::system_clock::time_point ()); // beginning of time
629+ syncStatus = " UNKNOWN" ;
630+ }
631+
632+ // Compute the time when we last discovered a new heartbeat from that server:
633+ std::chrono::system_clock::time_point lastAckedTime;
634+ if (heartBeatVisible) {
635+ if (transientHealthRecordFound) {
636+ lastAckedTime = (syncTime != transist.syncTime )
637+ ? startTimeLoop
638+ : stringToTimepoint (transist.lastAcked );
639+ } else {
640+ // in this case we do no really know when this heartbeat came in,
641+ // however, it must have been after we became a leader, and since we
642+ // do not have a transient health record yet, we just assume that we
643+ // got it recently and set it to "now". Note that this will not make
644+ // a FAILED server "GOOD" again, since we do not touch the persisted
645+ // health record if we do not have a transient health record yet.
646+ lastAckedTime = startTimeLoop;
647+ }
648+ } else {
649+ lastAckedTime = std::chrono::system_clock::time_point ();
650+ }
615651 transist.lastAcked = timepointToString (lastAckedTime);
616652 transist.syncTime = syncTime;
617653 transist.syncStatus = syncStatus;
@@ -623,34 +659,41 @@ std::vector<check_t> Supervision::check(std::string const& type) {
623659 transist.hostId = hostId;
624660 transist.endpoint = endpoint;
625661
626- // Calculate elapsed since lastAcked
627- auto elapsed = std::chrono::duration<double >(startTimeLoop - lastAckedTime);
662+ // We have now computed a new transient health record under all circumstances.
663+
664+ bool changed;
665+ if (transientHealthRecordFound)
AA32
{
666+ // Calculate elapsed since lastAcked
667+ auto elapsed = std::chrono::duration<double >(startTimeLoop - lastAckedTime);
628668
629- if (elapsed.count () <= _okThreshold) {
630- transist.status = Supervision::HEALTH_STATUS_GOOD;
631- } else if (elapsed.count () <= _gracePeriod) {
632- transist.status = Supervision::HEALTH_STATUS_BAD;
669+ if (elapsed.count () <= _okThreshold) {
670+ transist.status = Supervision::HEALTH_STATUS_GOOD;
671+ } else if (elapsed.count () <= _gracePeriod) {
672+ transist.status = Supervision::HEALTH_STATUS_BAD;
673+ } else {
674+ transist.status = Supervision::HEALTH_STATUS_FAILED;
675+ }
676+
677+ // Status changed?
678+ changed = transist.statusDiff (persist);
633679 } else {
634- transist.status = Supervision::HEALTH_STATUS_FAILED;
680+ transist.status = persist.status ;
681+ changed = false ;
635682 }
636683
637- // Status changed?
638- bool changed = transist.statusDiff (persist);
639-
640684 // Take necessary actions if any
641685 std::shared_ptr<VPackBuilder> envelope;
642686 if (changed) {
643687 LOG_TOPIC (" bbbde" , DEBUG, Logger::SUPERVISION)
644688 << " Status of server " << serverID << " has changed from "
645689 << persist.status << " to " << transist.status ;
646690 handleOnStatus (_agent, snapshot (), persist, transist, serverID, _jobId, envelope);
691+ persist = transist; // Now copy Status, SyncStatus from transient to persited
647692 } else {
648693 LOG_TOPIC (" 44253" , TRACE, Logger::SUPERVISION)
649694 << " Health of server " << machine.first << " remains " << transist.status ;
650695 }
651696
652- persist = transist; // Now copy Status, SyncStatus from transient to persited
653-
654697 // Transient report
655698 std::shared_ptr<Builder> tReport = std::make_shared<Builder>();
656699 {
0 commit comments