diff --git a/3rdParty/immer/v0.6.2/immer/transience/no_transience_policy.hpp b/3rdParty/immer/v0.6.2/immer/transience/no_transience_policy.hpp index aa3d44ed4287..3c15c7eed729 100644 --- a/3rdParty/immer/v0.6.2/immer/transience/no_transience_policy.hpp +++ b/3rdParty/immer/v0.6.2/immer/transience/no_transience_policy.hpp @@ -26,7 +26,7 @@ struct no_transience_policy struct owner { operator edit () const { return {}; } - owner& operator=(const owner&) { return *this; }; + //owner& operator=(const owner&) { return *this; }; -- already fixed upstream }; struct ownee diff --git a/arangod/Agency/Supervision.cpp b/arangod/Agency/Supervision.cpp index b955da5ca64c..b08932c1dda5 100644 --- a/arangod/Agency/Supervision.cpp +++ b/arangod/Agency/Supervision.cpp @@ -2244,8 +2244,8 @@ void Supervision::checkReplicatedLogs() { }; // check if Plan has replicated logs - auto const& node = snapshot().hasAsNode(planRepLogPrefix); - if (!node) { + auto const& planNode = snapshot().hasAsNode(planRepLogPrefix); + if (!planNode) { return; } @@ -2267,10 +2267,10 @@ void Supervision::checkReplicatedLogs() { auto builder = std::make_shared(); auto envelope = arangodb::agency::envelope::into_builder(*builder); - for (auto const& [dbName, db] : node->get().children()) { + for (auto const& [dbName, db] : planNode->get().children()) { for (auto const& [idString, node] : db->children()) { auto spec = readPlanSpecification(*node); - auto current = std::invoke([&, &dbName = dbName, &idString = idString]() -> LogCurrent { + auto current = std::invoke([&, &dbName = dbName, &idString = idString]() -> std::optional { using namespace cluster::paths; auto currentPath = aliases::current() @@ -2278,9 +2278,18 @@ void Supervision::checkReplicatedLogs() { ->database(dbName) ->log(idString) ->str(SkipComponents(1) /* skip first path component, i.e. 'arango' */); - return readLogCurrent(snapshot().get(currentPath)->get()); + + auto cnode = snapshot().get(currentPath); + if (cnode.has_value()) { + return readLogCurrent(cnode->get()); + } + return std::nullopt; }); - auto newTermSpec = checkReplicatedLog(dbName, spec, current, info); + if (!current.has_value()) { + continue; + } + + auto newTermSpec = checkReplicatedLog(dbName, spec, *current, info); envelope = std::visit( overload{[&, &dbName = dbName](LogPlanTermSpecification const& newSpec) { diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 6c2b85ed96fe..0d90f13c8d32 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -566,7 +566,7 @@ set(LIB_ARANGO_REPLICATION2_SOURCES Replication2/ReplicatedLog/types.cpp Replication2/Version.cpp RestHandler/RestLogHandler.cpp - ) +) set (LIB_ARANGO_METRICS_SOURCES RestServer/Metrics.cpp diff --git a/arangod/Replication2/AgencyMethods.cpp b/arangod/Replication2/AgencyMethods.cpp index 077182ab780b..c0b2742059ef 100644 --- a/arangod/Replication2/AgencyMethods.cpp +++ b/arangod/Replication2/AgencyMethods.cpp @@ -20,13 +20,28 @@ /// @author Lars Maier //////////////////////////////////////////////////////////////////////////////// -#include - #include "AgencyMethods.h" -#include -#include -#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "Agency/AsyncAgencyComm.h" +#include "Agency/TransactionBuilder.h" +#include "Agency/AgencyPaths.h" +#include "Cluster/ClusterTypes.h" +#include "Replication2/ReplicatedLog/AgencyLogSpecification.h" +#include "Replication2/ReplicatedLog/LogCommon.h" + +namespace arangodb { +class Result; +} // namespace arangodb using namespace std::chrono_literals; diff --git a/arangod/Replication2/AgencyMethods.h b/arangod/Replication2/AgencyMethods.h index dcc637e439fe..caf033e8c335 100644 --- a/arangod/Replication2/AgencyMethods.h +++ b/arangod/Replication2/AgencyMethods.h @@ -21,13 +21,26 @@ //////////////////////////////////////////////////////////////////////////////// #pragma once -#include - #include #include #include - #include +#include + +#include "Futures/Future.h" + +namespace arangodb { +class Result; +} // namespace arangodb +namespace arangodb::replication2 { +class LogId; +struct LogTerm; +} // namespace arangodb::replication2 +namespace arangodb::replication2::agency { +struct LogCurrentSupervisionElection; +struct LogPlanSpecification; +struct LogPlanTermSpecification; +} // namespace arangodb::replication2::agency namespace arangodb::replication2::agency::methods { diff --git a/arangod/Replication2/LoggerContext.h b/arangod/Replication2/LoggerContext.h index ebc7eaad15e6..5782bbcd59ef 100644 --- a/arangod/Replication2/LoggerContext.h +++ b/arangod/Replication2/LoggerContext.h @@ -58,7 +58,7 @@ struct LogNameValuePair : LoggableValue { }; struct LoggerContext { - explicit LoggerContext(LogTopic topic) : topic(std::move(topic)) {} + explicit LoggerContext(LogTopic const& topic) : topic(topic) {} template auto with(T&& t) const -> LoggerContext { @@ -67,8 +67,8 @@ struct LoggerContext { return LoggerContext(values.push_back(std::move(pair)), topic); } - auto withTopic(LogTopic newTopic) const { - return LoggerContext(values, std::move(newTopic)); + auto withTopic(LogTopic const& newTopic) const { + return LoggerContext(values, newTopic); } friend auto operator<<(std::ostream& os, LoggerContext const& ctx) -> std::ostream& { @@ -85,12 +85,13 @@ struct LoggerContext { return os; } - LogTopic const topic; - ::immer::flex_vector, arangodb::immer::arango_memory_policy> const values = {}; + using Container = ::immer::flex_vector, arangodb::immer::arango_memory_policy>; + LogTopic const& topic; + Container const values = {}; private: - LoggerContext(decltype(values) values, LogTopic topic) - : topic(std::move(topic)), values(std::move(values)) {} + LoggerContext(Container values, LogTopic const& topic) + : topic(topic), values(std::move(values)) {} }; } diff --git a/arangod/Replication2/ReplicatedLog/AgencyLogSpecification.cpp b/arangod/Replication2/ReplicatedLog/AgencyLogSpecification.cpp index 01401bb5cdd7..47bbd97ca27d 100644 --- a/arangod/Replication2/ReplicatedLog/AgencyLogSpecification.cpp +++ b/arangod/Replication2/ReplicatedLog/AgencyLogSpecification.cpp @@ -57,9 +57,9 @@ auto LogPlanTermSpecification::toVelocyPack(VPackBuilder& builder) const -> void } } -LogPlanTermSpecification::LogPlanTermSpecification(from_velocypack_t, VPackSlice slice) { - term = slice.get(StaticStrings::Term).extract(); - config = LogConfig(slice.get(StaticStrings::Config)); +LogPlanTermSpecification::LogPlanTermSpecification(from_velocypack_t, VPackSlice slice) + : term(slice.get(StaticStrings::Term).extract()), + config(slice.get(StaticStrings::Config)) { for (auto const& [key, value] : VPackObjectIterator(slice.get(StaticStrings::Participants))) { TRI_ASSERT(value.isEmptyObject()); @@ -82,9 +82,9 @@ auto LogPlanSpecification::toVelocyPack(VPackBuilder& builder) const -> void { } } -LogPlanSpecification::LogPlanSpecification(from_velocypack_t, VPackSlice slice) { - id = slice.get(StaticStrings::Id).extract(); - targetConfig = LogConfig(slice.get(StaticStrings::TargetConfig)); +LogPlanSpecification::LogPlanSpecification(from_velocypack_t, VPackSlice slice) + : id(slice.get(StaticStrings::Id).extract()), + targetConfig(slice.get(StaticStrings::TargetConfig)) { if (auto term = slice.get(StaticStrings::CurrentTerm); !term.isNone()) { currentTerm = LogPlanTermSpecification{from_velocypack, term}; } @@ -99,7 +99,8 @@ LogPlanTermSpecification::LogPlanTermSpecification(LogTerm term, LogConfig confi participants(std::move(participants)) {} LogPlanSpecification::LogPlanSpecification(LogId id, std::optional term, - LogConfig config) : id(id), currentTerm(std::move(term)), targetConfig(config) {} + LogConfig config) + : id(id), currentTerm(std::move(term)), targetConfig(config) {} LogCurrentLocalState::LogCurrentLocalState(from_velocypack_t, VPackSlice slice) { auto spearheadSlice = slice.get(StaticStrings::Spearhead); @@ -135,10 +136,11 @@ LogCurrentSupervision::LogCurrentSupervision(from_velocypack_t, VPackSlice slice } } -LogCurrentSupervisionElection::LogCurrentSupervisionElection(from_velocypack_t, VPackSlice slice) { - term = slice.get(StaticStrings::Term).extract(); - participantsRequired = slice.get("participantsRequired").getNumericValue(); - participantsAvailable = slice.get("participantsAvailable").getNumericValue(); +LogCurrentSupervisionElection::LogCurrentSupervisionElection(from_velocypack_t, VPackSlice slice) + : term(slice.get(StaticStrings::Term).extract()), + participantsRequired(slice.get("participantsRequired").getNumericValue()), + participantsAvailable( + slice.get("participantsAvailable").getNumericValue()) { for (auto [key, value] : VPackObjectIterator(slice.get("details"))) { detail.emplace(key.copyString(), value.get("code").getNumericValue()); } @@ -172,21 +174,23 @@ auto LogCurrentSupervisionElection::toVelocyPack(VPackBuilder& builder) const -> builder.add("participantsAvailable", VPackValue(participantsAvailable)); { VPackObjectBuilder db(&builder, "details"); - for (auto const&[server, error] : detail) { + for (auto const& [server, error] : detail) { builder.add(VPackValue(server)); ::toVelocyPack(error, builder); } } } -auto agency::toVelocyPack(LogCurrentSupervisionElection::ErrorCode ec, VPackBuilder& builder) -> void { +auto agency::toVelocyPack(LogCurrentSupervisionElection::ErrorCode ec, + VPackBuilder& builder) -> void { VPackObjectBuilder ob(&builder); builder.add("code", VPackValue(static_cast(ec))); builder.add("message", VPackValue(to_string(ec))); } -auto agency::to_string(LogCurrentSupervisionElection::ErrorCode ec) noexcept -> std::string_view { - switch(ec) { +auto agency::to_string(LogCurrentSupervisionElection::ErrorCode ec) noexcept + -> std::string_view { + switch (ec) { case LogCurrentSupervisionElection::ErrorCode::OK: return "the server is ok"; case LogCurrentSupervisionElection::ErrorCode::SERVER_NOT_GOOD: diff --git a/arangod/Replication2/ReplicatedLog/Algorithms.cpp b/arangod/Replication2/ReplicatedLog/Algorithms.cpp index ca734ca3929e..728e373b3518 100644 --- a/arangod/Replication2/ReplicatedLog/Algorithms.cpp +++ b/arangod/Replication2/ReplicatedLog/Algorithms.cpp @@ -223,8 +223,6 @@ auto algorithms::detectConflict(replicated_log::InMemoryLog const& log, TermInde TermIndexPair{lastEntry->entry().logTerm(), lastEntry->entry().logIndex() + 1}); } else { - // this can only happen if we drop log entries, check the code below before removing the assert - TRI_ASSERT(false); TRI_ASSERT(prevLog.index < lastEntry->entry().logIndex()); TRI_ASSERT(prevLog.index < log.getFirstEntry()->entry().logIndex()); // the given index too old, reset to (0, 0) diff --git a/arangod/Replication2/ReplicatedLog/Algorithms.h b/arangod/Replication2/ReplicatedLog/Algorithms.h index 2a35bedf9488..966fc1d2360f 100644 --- a/arangod/Replication2/ReplicatedLog/Algorithms.h +++ b/arangod/Replication2/ReplicatedLog/Algorithms.h @@ -27,8 +27,7 @@ #include #include "Cluster/ClusterTypes.h" -#include "InMemoryLog.h" -#include "ReplicatedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" #include "Replication2/ReplicatedLog/AgencyLogSpecification.h" namespace arangodb::replication2::algorithms { diff --git a/arangod/Replication2/ReplicatedLog/ILogParticipant.cpp b/arangod/Replication2/ReplicatedLog/ILogParticipant.cpp index c0fbe53129b5..5125505fab69 100644 --- a/arangod/Replication2/ReplicatedLog/ILogParticipant.cpp +++ b/arangod/Replication2/ReplicatedLog/ILogParticipant.cpp @@ -28,6 +28,7 @@ #include "RestServer/Metrics.h" #include +#include using namespace arangodb; using namespace arangodb::replication2; @@ -64,3 +65,23 @@ auto replicated_log::ILogParticipant::waitForIterator(LogIndex index) auto replicated_log::ILogParticipant::getTerm() const noexcept -> std::optional { return getStatus().getCurrentTerm(); } + +auto replicated_log::LogUnconfiguredParticipant::release(LogIndex doneWithIdx) -> Result { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +replicated_log::WaitForResult::WaitForResult(LogIndex index, + std::shared_ptr quorum) + : currentCommitIndex(index), quorum(std::move(quorum)) {} + +void replicated_log::WaitForResult::toVelocyPack(velocypack::Builder& builder) const { + VPackObjectBuilder ob(&builder); + builder.add(StaticStrings::CommitIndex, VPackValue(currentCommitIndex)); + builder.add(VPackValue("quorum")); + quorum->toVelocyPack(builder); +} + +replicated_log::WaitForResult::WaitForResult(velocypack::Slice s) { + currentCommitIndex = s.get(StaticStrings::CommitIndex).extract(); + quorum = std::make_shared(s.get("quorum")); +} diff --git a/arangod/Replication2/ReplicatedLog/ILogParticipant.h b/arangod/Replication2/ReplicatedLog/ILogParticipant.h index 4ee236312557..e60bb6af3946 100644 --- a/arangod/Replication2/ReplicatedLog/ILogParticipant.h +++ b/arangod/Replication2/ReplicatedLog/ILogParticipant.h @@ -33,11 +33,28 @@ #include #include +namespace arangodb { +class Result; +} + namespace arangodb::replication2::replicated_log { struct LogCore; struct LogStatus; +struct WaitForResult { + /// @brief contains the _current_ commit index. (Not the index waited for) + LogIndex currentCommitIndex; + /// @brief Quorum information + std::shared_ptr quorum; + + WaitForResult(LogIndex index, std::shared_ptr quorum); + WaitForResult() = default; + WaitForResult(velocypack::Slice); + + void toVelocyPack(velocypack::Builder&) const; +}; + /** * @brief Interface for a log participant: That is, usually either a leader or a * follower (LogLeader and LogFollower). Can also be a LogUnconfiguredParticipant, @@ -51,21 +68,23 @@ struct ILogParticipant { [[nodiscard]] virtual auto resign() && -> std::tuple, DeferredAction> = 0; - using WaitForPromise = futures::Promise>; - using WaitForFuture = futures::Future>; - using WaitForIteratorFuture = futures::Future>; + using WaitForPromise = futures::Promise; + using WaitForFuture = futures::Future; + using WaitForIteratorFuture = futures::Future>; using WaitForQueue = std::multimap; [[nodiscard]] virtual auto waitFor(LogIndex index) -> WaitForFuture = 0; [[nodiscard]] virtual auto waitForIterator(LogIndex index) -> WaitForIteratorFuture; [[nodiscard]] virtual auto getTerm() const noexcept -> std::optional; + + [[nodiscard]] virtual auto release(LogIndex doneWithIdx) -> Result = 0; }; /** * @brief Unconfigured log participant, i.e. currently neither a leader nor * follower. Holds a LogCore, does nothing else. */ -struct LogUnconfiguredParticipant +struct LogUnconfiguredParticipant final : std::enable_shared_from_this, ILogParticipant { ~LogUnconfiguredParticipant() override; @@ -76,6 +95,7 @@ struct LogUnconfiguredParticipant auto resign() && -> std::tuple, DeferredAction> override; [[nodiscard]] auto waitFor(LogIndex) -> WaitForFuture override; + [[nodiscard]] auto release(LogIndex doneWithIdx) -> Result override; private: std::unique_ptr _logCore; std::shared_ptr const _logMetrics; diff --git a/arangod/Replication2/ReplicatedLog/InMemoryLog.cpp b/arangod/Replication2/ReplicatedLog/InMemoryLog.cpp index 766189e691ba..881b303e59c4 100644 --- a/arangod/Replication2/ReplicatedLog/InMemoryLog.cpp +++ b/arangod/Replication2/ReplicatedLog/InMemoryLog.cpp @@ -51,15 +51,10 @@ using namespace arangodb; using namespace arangodb::replication2; auto replicated_log::InMemoryLog::getLastIndex() const noexcept -> LogIndex { - auto const result = LogIndex{_log.size()}; - // log empty => result == 0 - TRI_ASSERT(!_log.empty() || result == LogIndex(0)); - // !log empty => result index == last entry - TRI_ASSERT(_log.empty() || result == _log.back().entry().logIndex()); - return result; + return getLastTermIndexPair().index; } -auto replicated_log::InMemoryLog::getLastTermIndexPair() const noexcept -> TermIndexPair{ +auto replicated_log::InMemoryLog::getLastTermIndexPair() const noexcept -> TermIndexPair { if (_log.empty()) { return {}; } @@ -67,36 +62,38 @@ auto replicated_log::InMemoryLog::getLastTermIndexPair() const noexcept -> TermI } auto replicated_log::InMemoryLog::getLastTerm() const noexcept -> LogTerm { - if (_log.empty()) { - return LogTerm{0}; - } - return _log.back().entry().logTerm(); + return getLastTermIndexPair().term; } auto replicated_log::InMemoryLog::getNextIndex() const noexcept -> LogIndex { - return getLastIndex() + 1; + return _first + _log.size(); } auto replicated_log::InMemoryLog::getEntryByIndex(LogIndex const idx) const noexcept -> std::optional { - if (_log.size() < idx.value || idx.value == 0) { + if (_first + _log.size() <= idx || idx < _first) { return std::nullopt; } - auto const& e = _log.at(idx.value - 1); - TRI_ASSERT(e.entry().logIndex() == idx); + auto const& e = _log.at(idx.value - _first.value); + TRI_ASSERT(e.entry().logIndex() == idx) + << "idx = " << idx << ", entry = " << e.entry().logIndex(); return e; } auto replicated_log::InMemoryLog::slice(LogIndex from, LogIndex to) const -> log_type { - from = LogIndex{std::max(from.value, 1)}; - TRI_ASSERT(from <= to); - auto res = _log.take(to.value - 1).drop(from.value - 1); - TRI_ASSERT(res.size() == to.value - from.value); + from = std::max(from, _first); + to = std::max(to, _first); + TRI_ASSERT(from <= to) << "from = " << from << ", to = " << to; + auto res = _log.take(to.value - _first.value).drop(from.value - _first.value); + TRI_ASSERT(res.size() <= to.value - from.value) + << "res.size() = " << res.size() << ", to = " << to.value + << ", from = " << from.value; return res; } -auto replicated_log::InMemoryLog::getFirstIndexOfTerm(LogTerm term) const noexcept -> std::optional { +auto replicated_log::InMemoryLog::getFirstIndexOfTerm(LogTerm term) const noexcept + -> std::optional { auto it = std::lower_bound(_log.begin(), _log.end(), term, [](auto const& entry, auto const& term) { return term > entry.entry().logTerm(); @@ -125,17 +122,20 @@ auto replicated_log::InMemoryLog::getLastIndexOfTerm(LogTerm term) const noexcep } } -replicated_log::InMemoryLog::InMemoryLog(LoggerContext const& logContext, - replicated_log::LogCore const& logCore) { - auto iter = logCore.read(LogIndex{0}); - auto log = _log.transient(); - while (auto entry = iter->next()) { - log.push_back(InMemoryLogEntry(std::move(entry).value())); - } - _log = std::move(log).persistent(); +auto replicated_log::InMemoryLog::release(LogIndex stop) const -> replicated_log::InMemoryLog { + auto [from, to] = getIndexRange(); + auto newLog = slice(stop, to); + return InMemoryLog(newLog); } -replicated_log::InMemoryLog::InMemoryLog(log_type log) : _log(std::move(log)) {} +replicated_log::InMemoryLog::InMemoryLog(log_type log) + : _log(std::move(log)), + _first(_log.empty() ? LogIndex{1} : _log.front().entry().logIndex()) {} + +replicated_log::InMemoryLog::InMemoryLog(log_type log, LogIndex first) + : _log(std::move(log)), _first(first) { + TRI_ASSERT(_log.empty() || first == _log.front().entry().logIndex()); +} #if (_MSC_VER >= 1) // suppress false positive warning: @@ -144,7 +144,8 @@ replicated_log::InMemoryLog::InMemoryLog(log_type log) : _log(std::move(log)) {} #pragma warning(disable : 4297) #endif replicated_log::InMemoryLog::InMemoryLog(replicated_log::InMemoryLog&& other) noexcept try - : _log(std::move(other._log)) { + : _log(std::move(other._log)), _first(other._first) { + other._first = LogIndex{1}; // Note that immer::flex_vector is currently not nothrow move-assignable, // though it probably does not throw any exceptions. However, we *need* this // to be noexcept, otherwise we cannot keep the persistent and in-memory state @@ -186,6 +187,8 @@ auto replicated_log::InMemoryLog::operator=(replicated_log::InMemoryLog&& other) // The try/catch is *only* for logging, but *must* terminate (e.g. by // rethrowing) the process if an exception is caught. _log = std::move(other._log); + _first = other._first; + other._first = LogIndex{1}; return *this; } catch (std::exception const& ex) { LOG_TOPIC("bf5c5", FATAL, Logger::REPLICATION2) @@ -206,21 +209,22 @@ auto replicated_log::InMemoryLog::getIteratorFrom(LogIndex fromIdx) const -> std::unique_ptr { // if we want to have read from log entry 1 onwards, we have to drop // no entries, because log entry 0 does not exist. - auto log = _log.drop(fromIdx.saturatedDecrement().value); + auto log = _log.drop(fromIdx.saturatedDecrement(_first.value).value); return std::make_unique(std::move(log)); } -auto replicated_log::InMemoryLog::getInternalIteratorFrom(LogIndex fromIdx) const -> std::unique_ptr { +auto replicated_log::InMemoryLog::getInternalIteratorFrom(LogIndex fromIdx) const + -> std::unique_ptr { // if we want to have read from log entry 1 onwards, we have to drop // no entries, because log entry 0 does not exist. - auto log = _log.drop(fromIdx.saturatedDecrement().value); + auto log = _log.drop(fromIdx.saturatedDecrement(_first.value).value); return std::make_unique(std::move(log)); } auto replicated_log::InMemoryLog::getIteratorRange(LogIndex fromIdx, LogIndex toIdx) const - -> std::unique_ptr { - auto log = _log.take(toIdx.saturatedDecrement().value) - .drop(fromIdx.saturatedDecrement().value); + -> std::unique_ptr { + auto log = _log.take(toIdx.saturatedDecrement(_first.value).value) + .drop(fromIdx.saturatedDecrement(_first.value).value); return std::make_unique(std::move(log)); } @@ -240,25 +244,32 @@ void replicated_log::InMemoryLog::appendInPlace(LoggerContext const& logContext, auto replicated_log::InMemoryLog::append(LoggerContext const& logContext, log_type entries) const -> InMemoryLog { + TRI_ASSERT(entries.empty() || getNextIndex() == entries.front().entry().logIndex()) + << std::boolalpha << "entries.empty() = " << entries.empty() + << ", front = " << entries.front().entry().logIndex() + << ", getNextIndex = " << getNextIndex(); auto transient = _log.transient(); transient.append(std::move(entries).transient()); - return InMemoryLog{std::move(transient).persistent()}; + return InMemoryLog{std::move(transient).persistent(), _first}; } -auto replicated_log::InMemoryLog::append( - LoggerContext const& logContext, - ::immer::flex_vector const& entries) const - -> InMemoryLog { +auto replicated_log::InMemoryLog::append(LoggerContext const& logContext, + log_type_persisted const& entries) const -> InMemoryLog { + TRI_ASSERT(entries.empty() || getNextIndex() == entries.front().logIndex()) + << std::boolalpha << "entries.empty() = " << entries.empty() + << ", front = " << entries.front().logIndex() + << ", getNextIndex = " << getNextIndex(); auto transient = _log.transient(); for (auto const& entry : entries) { transient.push_back(InMemoryLogEntry(entry)); } - return InMemoryLog{std::move(transient).persistent()}; + return InMemoryLog{std::move(transient).persistent(), _first}; } auto replicated_log::InMemoryLog::takeSnapshotUpToAndIncluding(LogIndex until) const -> InMemoryLog { - return InMemoryLog(_log.take(until.value)); + TRI_ASSERT(_first <= (until + 1)); + return InMemoryLog{_log.take(until.value - _first.value + 1), _first}; } auto replicated_log::InMemoryLog::copyFlexVector() const -> log_type { @@ -289,7 +300,7 @@ auto replicated_log::InMemoryLog::getFirstEntry() const noexcept return _log.front(); } -auto replicated_log::InMemoryLog::dump(replicated_log::InMemoryLog::log_type log) +auto replicated_log::InMemoryLog::dump(replicated_log::InMemoryLog::log_type const& log) -> std::string { auto builder = velocypack::Builder(); auto stream = std::stringstream(); @@ -310,4 +321,24 @@ auto replicated_log::InMemoryLog::dump(replicated_log::InMemoryLog::log_type log return stream.str(); } -auto replicated_log::InMemoryLog::dump() -> std::string { return dump(_log); } +auto replicated_log::InMemoryLog::dump() const -> std::string { + return dump(_log); +} + +auto replicated_log::InMemoryLog::getIndexRange() const noexcept -> LogRange { + return {_first, _first + _log.size()}; +} + +auto replicated_log::InMemoryLog::getFirstIndex() const noexcept -> LogIndex { + return _first; +} + +auto replicated_log::InMemoryLog::loadFromLogCore(replicated_log::LogCore const& core) + -> replicated_log::InMemoryLog { + auto iter = core.read(LogIndex{0}); + auto log = log_type::transient_type{}; + while (auto entry = iter->next()) { + log.push_back(InMemoryLogEntry(std::move(entry).value())); + } + return InMemoryLog{log.persistent()}; +} diff --git a/arangod/Replication2/ReplicatedLog/InMemoryLog.h b/arangod/Replication2/ReplicatedLog/InMemoryLog.h index 9a7f6512a9d5..8100797e561f 100644 --- a/arangod/Replication2/ReplicatedLog/InMemoryLog.h +++ b/arangod/Replication2/ReplicatedLog/InMemoryLog.h @@ -26,6 +26,7 @@ #include "Replication2/ReplicatedLog/LogCommon.h" #include +#include #include @@ -37,7 +38,6 @@ // result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?) #pragma warning(disable : 4334) #endif -#include #include #if (_MSC_VER >= 1) #pragma warning(pop) @@ -56,15 +56,18 @@ struct PersistedLogIterator; */ struct InMemoryLog { public: - using log_type = - ::immer::flex_vector; + template + using log_type_t = ::immer::flex_vector; + using log_type = log_type_t; + using log_type_persisted = log_type_t; private: log_type _log{}; + LogIndex _first{0}; public: InMemoryLog() = delete; - InMemoryLog(LoggerContext const& logContext, replicated_log::LogCore const& logCore); + explicit InMemoryLog(log_type log); InMemoryLog(InMemoryLog&& other) noexcept; InMemoryLog(InMemoryLog const&) = default; @@ -79,6 +82,7 @@ struct InMemoryLog { [[nodiscard]] auto getLastTerm() const noexcept -> LogTerm; [[nodiscard]] auto getLastEntry() const noexcept -> std::optional; [[nodiscard]] auto getFirstEntry() const noexcept -> std::optional; + [[nodiscard]] auto getFirstIndex() const noexcept -> LogIndex; [[nodiscard]] auto getNextIndex() const noexcept -> LogIndex; [[nodiscard]] auto getEntryByIndex(LogIndex idx) const noexcept -> std::optional; @@ -89,34 +93,39 @@ struct InMemoryLog { [[nodiscard]] auto getLastIndexOfTerm(LogTerm term) const noexcept -> std::optional; + [[nodiscard]] auto getIndexRange() const noexcept -> LogRange; + // @brief Unconditionally accesses the last element [[nodiscard]] auto back() const noexcept -> decltype(_log)::const_reference; [[nodiscard]] auto empty() const noexcept -> bool; + [[nodiscard]] auto release(LogIndex stop) const -> InMemoryLog; + void appendInPlace(LoggerContext const& logContext, InMemoryLogEntry entry); [[nodiscard]] auto append(LoggerContext const& logContext, log_type entries) const -> InMemoryLog; [[nodiscard]] auto append(LoggerContext const& logContext, - ::immer::flex_vector const& entries) const - -> InMemoryLog; + log_type_persisted const& entries) const -> InMemoryLog; [[nodiscard]] auto getIteratorFrom(LogIndex fromIdx) const -> std::unique_ptr; [[nodiscard]] auto getInternalIteratorFrom(LogIndex fromIdx) const -> std::unique_ptr; // get an iterator for range [from, to). [[nodiscard]] auto getIteratorRange(LogIndex fromIdx, LogIndex toIdx) const - -> std::unique_ptr; + -> std::unique_ptr; [[nodiscard]] auto takeSnapshotUpToAndIncluding(LogIndex until) const -> InMemoryLog; [[nodiscard]] auto copyFlexVector() const -> log_type; // helpful for debugging - [[nodiscard]] static auto dump(log_type log) -> std::string; - [[nodiscard]] auto dump() -> std::string; + [[nodiscard]] static auto dump(log_type const& log) -> std::string; + [[nodiscard]] auto dump() const -> std::string; + + [[nodiscard]] static auto loadFromLogCore(LogCore const&) -> InMemoryLog; protected: - explicit InMemoryLog(log_type log); + explicit InMemoryLog(log_type log, LogIndex first); }; } // namespace arangodb::replication2::replicated_log diff --git a/arangod/Replication2/ReplicatedLog/LogCommon.cpp b/arangod/Replication2/ReplicatedLog/LogCommon.cpp index 7f1b17eb1176..6b0e09a66088 100644 --- a/arangod/Replication2/ReplicatedLog/LogCommon.cpp +++ b/arangod/Replication2/ReplicatedLog/LogCommon.cpp @@ -304,3 +304,74 @@ auto replication2::operator==(LogConfig const& left, LogConfig const& right) noe auto replication2::operator!=(const LogConfig& left, const LogConfig& right) noexcept -> bool { return !(left == right); } + +LogRange::LogRange(LogIndex from, LogIndex to) noexcept : from(from), to(to) { + TRI_ASSERT(from <= to); +} + +auto LogRange::empty() const noexcept -> bool { return from == to; } + +auto LogRange::count() const noexcept -> std::size_t { + return to.value - from.value; +} + +auto LogRange::contains(LogIndex idx) const noexcept -> bool { + return from <= idx && idx < to; +} + +auto replication2::operator<<(std::ostream& os, LogRange const& r) -> std::ostream& { + return os << "[" << r.from << ", " << r.to << ")"; +} + +auto replication2::intersect(LogRange a, LogRange b) noexcept -> LogRange { + auto max_from = std::max(a.from, b.from); + auto min_to = std::min(a.to, b.to); + if (max_from > min_to) { + return {LogIndex{0}, LogIndex{0}}; + } else { + return {max_from, min_to}; + } +} + +auto LogRange::end() const noexcept -> LogRange::Iterator { + return Iterator{to}; +} +auto LogRange::begin() const noexcept -> LogRange::Iterator { + return Iterator{from}; +} + +auto LogRange::Iterator::operator++() noexcept -> LogRange::Iterator& { + current = current + 1; + return *this; +} + +auto LogRange::Iterator::operator++(int) noexcept -> LogRange::Iterator { + auto idx = current; + current = current + 1; + return Iterator(idx); +} + +auto LogRange::Iterator::operator*() const noexcept -> LogIndex { + return current; +} +auto LogRange::Iterator::operator->() const noexcept -> LogIndex const* { + return ¤t; +} + +auto replication2::operator==(LogRange a, LogRange b) noexcept -> bool { + return a.from == b.from && a.to == b.to; +} + +auto replication2::operator!=(LogRange a, LogRange b) noexcept -> bool { + return !(a == b); +} + +auto replication2::operator==(LogRange::Iterator const& a, + LogRange::Iterator const& b) noexcept -> bool { + return a.current == b.current; +} + +auto replication2::operator!=(LogRange::Iterator const& a, + LogRange::Iterator const& b) noexcept -> bool { + return !(a == b); +} diff --git a/arangod/Replication2/ReplicatedLog/LogCommon.h b/arangod/Replication2/ReplicatedLog/LogCommon.h index 8f9fabc492af..21bdbe76f825 100644 --- a/arangod/Replication2/ReplicatedLog/LogCommon.h +++ b/arangod/Replication2/ReplicatedLog/LogCommon.h @@ -132,6 +132,48 @@ struct TermIndexPair : implement_compare { auto operator<=(TermIndexPair, TermIndexPair) noexcept -> bool; auto operator<<(std::ostream&, TermIndexPair) -> std::ostream&; +struct LogRange { + LogIndex from; + LogIndex to; + + LogRange(LogIndex from, LogIndex to) noexcept; + + [[nodiscard]] auto empty() const noexcept -> bool; + [[nodiscard]] auto count() const noexcept -> std::size_t; + [[nodiscard]] auto contains(LogIndex idx) const noexcept -> bool; + + friend auto operator<<(std::ostream& os, LogRange const& r) -> std::ostream&; + friend auto intersect(LogRange a, LogRange b) noexcept -> LogRange; + + struct Iterator { + auto operator++() noexcept -> Iterator&; + auto operator++(int) noexcept -> Iterator; + auto operator*() const noexcept -> LogIndex; + auto operator->() const noexcept -> LogIndex const*; + friend auto operator==(Iterator const& a, Iterator const& b) noexcept -> bool; + friend auto operator!=(Iterator const& a, Iterator const& b) noexcept -> bool; + + private: + friend LogRange; + explicit Iterator(LogIndex idx) : current(idx) {} + LogIndex current; + }; + + friend auto operator==(LogRange, LogRange) noexcept -> bool; + friend auto operator!=(LogRange, LogRange) noexcept -> bool; + + [[nodiscard]] auto begin() const noexcept -> Iterator; + [[nodiscard]] auto end() const noexcept -> Iterator; +}; + +auto operator<<(std::ostream& os, LogRange const& r) -> std::ostream&; +auto intersect(LogRange a, LogRange b) noexcept -> LogRange; +auto operator==(LogRange, LogRange) noexcept -> bool; +auto operator!=(LogRange, LogRange) noexcept -> bool; + +auto operator==(LogRange::Iterator const& a, LogRange::Iterator const& b) noexcept -> bool; +auto operator!=(LogRange::Iterator const& a, LogRange::Iterator const& b) noexcept -> bool; + struct LogPayload { explicit LogPayload(velocypack::UInt8Buffer dummy); @@ -240,13 +282,26 @@ class LogId : public arangodb::basics::Identifier { auto to_string(LogId logId) -> std::string; -struct LogIterator { - virtual ~LogIterator() = default; +template +struct TypedLogIterator { + virtual ~TypedLogIterator() = default; // The returned view is guaranteed to stay valid until a successive next() // call (only). - virtual auto next() -> std::optional = 0; + virtual auto next() -> std::optional = 0; }; +template +struct TypedLogRangeIterator : TypedLogIterator { + // returns the index interval [from, to) + // Note that this does not imply that all indexes in the range [from, to) + // are returned. Hence (to - from) is only an upper bound on the number of + // entries returned. + virtual auto range() const noexcept -> LogRange = 0; +}; + +using LogIterator = TypedLogIterator; +using LogRangeIterator = TypedLogRangeIterator; + struct LogConfig { std::size_t writeConcern = 1; bool waitForSync = false; diff --git a/arangod/Replication2/ReplicatedLog/LogCore.cpp b/arangod/Replication2/ReplicatedLog/LogCore.cpp index e0a65832b731..c5c7d1611fec 100644 --- a/arangod/Replication2/ReplicatedLog/LogCore.cpp +++ b/arangod/Replication2/ReplicatedLog/LogCore.cpp @@ -57,13 +57,14 @@ auto replicated_log::LogCore::insert(PersistedLogIterator& iter, bool waitForSyn return _persistedLog->insert(iter, opts); } -auto replicated_log::LogCore::read(LogIndex first) const -> std::unique_ptr { +auto replicated_log::LogCore::read(LogIndex first) const + -> std::unique_ptr { std::unique_lock guard(_operationMutex); return _persistedLog->read(first); } -auto replicated_log::LogCore::insertAsync(std::unique_ptr iter, bool waitForSync) - -> futures::Future { +auto replicated_log::LogCore::insertAsync(std::unique_ptr iter, + bool waitForSync) -> futures::Future { std::unique_lock guard(_operationMutex); // This will hold the mutex PersistedLog::WriteOptions opts; @@ -83,3 +84,8 @@ auto replicated_log::LogCore::releasePersistedLog() && -> std::shared_ptr LogId { return _persistedLog->id(); } + +auto LogCore::removeFront(LogIndex stop) -> Result { + std::unique_lock guard(_operationMutex); + return _persistedLog->removeFront(stop); +} diff --git a/arangod/Replication2/ReplicatedLog/LogCore.h b/arangod/Replication2/ReplicatedLog/LogCore.h index 95abccc43225..358fcb0f919f 100644 --- a/arangod/Replication2/ReplicatedLog/LogCore.h +++ b/arangod/Replication2/ReplicatedLog/LogCore.h @@ -60,6 +60,7 @@ struct alignas(64) LogCore { auto insert(PersistedLogIterator& iter, bool waitForSync) -> Result; [[nodiscard]] auto read(LogIndex first) const -> std::unique_ptr; auto removeBack(LogIndex first) -> Result; + auto removeFront(LogIndex stop) -> Result; auto releasePersistedLog() && -> std::shared_ptr; diff --git a/arangod/Replication2/ReplicatedLog/LogFollower.cpp b/arangod/Replication2/ReplicatedLog/LogFollower.cpp index 609ef32d7b73..827bf6ff730a 100644 --- a/arangod/Replication2/ReplicatedLog/LogFollower.cpp +++ b/arangod/Replication2/ReplicatedLog/LogFollower.cpp @@ -24,10 +24,10 @@ #include "Replication2/ReplicatedLog/Algorithms.h" #include "Replication2/ReplicatedLog/LogContextKeys.h" -#include "Replication2/ReplicatedLog/ReplicatedLogIterator.h" #include "Replication2/ReplicatedLog/LogStatus.h" #include "Replication2/ReplicatedLog/NetworkMessages.h" #include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLogIterator.h" #include "Replication2/ReplicatedLog/ReplicatedLogMetrics.h" #include "RestServer/Metrics.h" @@ -38,8 +38,10 @@ #include #include -#include +#include +#include +#include #if (_MSC_VER >= 1) // suppress warnings: #pragma warning(push) @@ -48,8 +50,6 @@ // result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?) #pragma warning(disable : 4334) #endif -#include -#include #include #include #if (_MSC_VER >= 1) @@ -95,8 +95,7 @@ auto LogFollower::appendEntriesPreFlightChecks(GuardedFollowerData const& data, // It is always allowed to replace the log entirely if (req.prevLogEntry.index > LogIndex{0}) { - if (auto conflict = - algorithms::detectConflict(data._inMemoryLog, req.prevLogEntry); + if (auto conflict = algorithms::detectConflict(data._inMemoryLog, req.prevLogEntry); conflict.has_value()) { auto [reason, next] = *conflict; @@ -115,7 +114,6 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) auto self = _guardedFollowerData.getLockedGuard(); - { // Preflight checks - does the leader, log and other stuff match? // This code block should not modify the local state, only check values. @@ -133,7 +131,8 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) // as a copy, then modify the log on disk. This is an atomic operation. If // it fails, we forget the new state. Otherwise we replace the old in memory // state with the new value. - auto newInMemoryLog = self->_inMemoryLog.takeSnapshotUpToAndIncluding(req.prevLogEntry.index); + auto newInMemoryLog = + self->_inMemoryLog.takeSnapshotUpToAndIncluding(req.prevLogEntry.index); if (self->_inMemoryLog.getLastIndex() != req.prevLogEntry.index) { auto res = self->_logCore->removeBack(req.prevLogEntry.index + 1); @@ -149,30 +148,28 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) self->_inMemoryLog = std::move(newInMemoryLog); } - struct WaitForQueueResolve { - using QueueGuard = Guarded::mutex_guard_type; - - WaitForQueueResolve(QueueGuard guard, LogIndex commitIndex) noexcept - : _guard(std::move(guard)), - begin(_guard->begin()), - end(_guard->upper_bound(commitIndex)) {} - - QueueGuard _guard; - WaitForQueue::iterator begin; - WaitForQueue::iterator end; - }; - // Allocations - auto newInMemoryLog = self->_inMemoryLog.append(_loggerContext, req.entries); + auto newInMemoryLog = std::invoke([&] { + // if prevLogIndex is 0, we want to replace the entire log + // Note that req.entries might not start at 1, because the log could be + // compacted already. + if (req.prevLogEntry.index == LogIndex{0}) { + TRI_ASSERT(!req.entries.empty()); + LOG_CTX("14696", DEBUG, _loggerContext) + << "replacing my log. New logs starts at " + << req.entries.front().entry().logTermIndexPair() << "."; + return InMemoryLog{req.entries}; + } + return self->_inMemoryLog.append(_loggerContext, req.entries); + }); auto iter = std::make_unique(req.entries); - auto toBeResolvedPtr = std::make_unique>(); + auto toBeResolved = std::make_unique(); auto* core = self->_logCore.get(); static_assert(std::is_nothrow_move_constructible_v); auto commitToMemoryAndResolve = [selfGuard = std::move(self), req = std::move(req), - newInMemoryLog = std::move(newInMemoryLog), - toBeResolvedPtr = std::move(toBeResolvedPtr)]( + newInMemoryLog = std::move(newInMemoryLog), toBeResolved = std::move(toBeResolved)]( futures::Try&& tryRes) mutable -> std::pair { // We have to release the guard after this lambda is finished. // Otherwise it would be released when the lambda is destroyed, which @@ -203,25 +200,63 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) << req.prevLogEntry.index << ", leader commit index = " << req.leaderCommit; } + auto const generateToBeResolved = [&] { + try { + auto waitForQueue = self->_waitForQueue.getLockedGuard(); + + auto const end = waitForQueue->upper_bound(self->_commitIndex); + for (auto it = waitForQueue->begin(); it != end;) { + LOG_CTX("69022", TRACE, self->_follower._loggerContext) + << "resolving promise for index " << it->first; + toBeResolved->insert(waitForQueue->extract(it++)); + } + return DeferredAction([commitIndex = self->_commitIndex, + toBeResolved = std::move(toBeResolved)]() noexcept { + for (auto& it : *toBeResolved) { + if (!it.second.isFulfilled()) { + // This only throws if promise was fulfilled earlier. + it.second.setValue(WaitForResult{commitIndex, std::shared_ptr{}}); + } + } + }); + } catch (std::exception const& e) { + // If those promises are not fulfilled we can not continue. + // Note that the move constructor of std::multi_map is not noexcept. + LOG_CTX("e7a3d", FATAL, self->_follower._loggerContext) + << "failed to fulfill replication promises due to exception; " + "system " + "can not continue. message: " + << e.what(); + FATAL_ERROR_EXIT(); + } catch (...) { + // If those promises are not fulfilled we can not continue. + // Note that the move constructor of std::multi_map is not noexcept. + LOG_CTX("c0bba", FATAL, self->_follower._loggerContext) + << "failed to fulfill replication promises due to exception; " + "system can not continue"; + FATAL_ERROR_EXIT(); + } + }; + auto action = std::invoke([&]() noexcept -> DeferredAction { + TRI_ASSERT(req.largestCommonIndex >= self->_largestCommonIndex) + << "req.lci = " << req.largestCommonIndex + << ", self.lci = " << self->_largestCommonIndex; + if (self->_largestCommonIndex < req.largestCommonIndex) { + LOG_CTX("fc467", TRACE, self->_follower._loggerContext) + << "largest common index went from " << self->_largestCommonIndex + << " to " << req.largestCommonIndex << "."; + self->_largestCommonIndex = req.largestCommonIndex; + // TODO do we want to call checkCompaction here? + std::ignore = self->checkCompaction(); + } + if (self->_commitIndex < req.leaderCommit && !self->_inMemoryLog.empty()) { self->_commitIndex = std::min(req.leaderCommit, self->_inMemoryLog.back().entry().logIndex()); LOG_CTX("1641d", TRACE, self->_follower._loggerContext) << "increment commit index: " << self->_commitIndex; - - auto toBeResolved = std::optional{std::in_place, self->_waitForQueue.getLockedGuard(), self->_commitIndex}; - static_assert(std::is_nothrow_move_assignable_v>); - *toBeResolvedPtr = std::move(toBeResolved); - return DeferredAction([toBeResolved = std::move(toBeResolvedPtr)]() noexcept { - auto& resolve = toBeResolved->value(); - for (auto it = resolve.begin; it != resolve.end; it = resolve._guard->erase(it)) { - if (!it->second.isFulfilled()) { - // This only throws if promise was fulfilled earlier. - it->second.setValue(std::shared_ptr{}); - } - } - }); + return generateToBeResolved(); } return {}; @@ -240,17 +275,19 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) .then(std::move(commitToMemoryAndResolve)) .then([measureTime = std::move(measureTimeGuard)](auto&& res) mutable { measureTime.fire(); - auto&& [result, toBeResolved] = res.get(); - // It is okay to fire here, because commitToMemoryAndResolve has released - // the guard already. - toBeResolved.fire(); + auto&& [result, action] = res.get(); + // It is okay to fire here, because commitToMemoryAndResolve has + // released the guard already. + action.fire(); return std::move(result); }); } replicated_log::LogFollower::GuardedFollowerData::GuardedFollowerData( LogFollower const& self, std::unique_ptr logCore, InMemoryLog inMemoryLog) - : _follower(self), _inMemoryLog(std::move(inMemoryLog)), _logCore(std::move(logCore)) {} + : _follower(self), + _inMemoryLog(std::move(inMemoryLog)), + _logCore(std::move(logCore)) {} auto replicated_log::LogFollower::getStatus() const -> LogStatus { return _guardedFollowerData.doUnderLock([this](auto const& followerData) { @@ -261,6 +298,7 @@ auto replicated_log::LogFollower::getStatus() const -> LogStatus { status.local = followerData.getLocalStatistics(); status.leader = _leaderId; status.term = _currentTerm; + status.largestCommonIndex = followerData._largestCommonIndex; return LogStatus{std::move(status)}; }); } @@ -320,8 +358,8 @@ auto replicated_log::LogFollower::waitFor(LogIndex idx) -> replicated_log::ILogParticipant::WaitForFuture { auto self = _guardedFollowerData.getLockedGuard(); if (self->_commitIndex >= idx) { - return futures::Future>{ - std::in_place, std::make_shared(idx, _currentTerm)}; + return futures::Future{std::in_place, self->_commitIndex, + std::make_shared(idx, _currentTerm)}; } // emplace might throw a std::bad_alloc but the remainder is noexcept // so either you inserted it and or nothing happens @@ -335,17 +373,19 @@ auto replicated_log::LogFollower::waitFor(LogIndex idx) auto replicated_log::LogFollower::waitForIterator(LogIndex index) -> replicated_log::ILogParticipant::WaitForIteratorFuture { if (index == LogIndex{0}) { - THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, "invalid parameter; log index 0 is invalid"); + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, + "invalid parameter; log index 0 is invalid"); } - return waitFor(index).thenValue([this, self = shared_from_this(), index](auto&& quorum) -> WaitForIteratorFuture { + return waitFor(index).thenValue([this, self = shared_from_this(), + index](auto&& quorum) -> WaitForIteratorFuture { auto [fromIndex, iter] = _guardedFollowerData.doUnderLock( - [&](GuardedFollowerData& followerData) -> std::pair> { + [&](GuardedFollowerData& followerData) -> std::pair> { TRI_ASSERT(index <= followerData._commitIndex); /* * This code here ensures that if only private log entries are present - * we do not reply with an empty iterator but instead wait for the next - * entry containing payload. + * we do not reply with an empty iterator but instead wait for the + * next entry containing payload. */ auto actualIndex = index; @@ -377,7 +417,7 @@ auto replicated_log::LogFollower::waitForIterator(LogIndex index) } auto replicated_log::LogFollower::getLogIterator(LogIndex firstIndex) const --> std::unique_ptr { + -> std::unique_ptr { return _guardedFollowerData.doUnderLock( [&](GuardedFollowerData const& data) -> std::unique_ptr { auto const endIdx = data._inMemoryLog.getLastTermIndexPair().index + 1; @@ -387,7 +427,7 @@ auto replicated_log::LogFollower::getLogIterator(LogIndex firstIndex) const } auto replicated_log::LogFollower::getCommittedLogIterator(LogIndex firstIndex) const --> std::unique_ptr { + -> std::unique_ptr { return _guardedFollowerData.doUnderLock( [&](GuardedFollowerData const& data) -> std::unique_ptr { return data.getCommittedLogIterator(firstIndex); @@ -395,7 +435,7 @@ auto replicated_log::LogFollower::getCommittedLogIterator(LogIndex firstIndex) c } auto replicated_log::LogFollower::GuardedFollowerData::getCommittedLogIterator(LogIndex firstIndex) const --> std::unique_ptr { + -> std::unique_ptr { auto const endIdx = _inMemoryLog.getNextIndex(); TRI_ASSERT(firstIndex < endIdx); // return an iterator for the range [firstIndex, _commitIndex + 1) @@ -406,12 +446,46 @@ replicated_log::LogFollower::~LogFollower() { _logMetrics->replicatedLogFollowerNumber->fetch_sub(1); } +auto LogFollower::release(LogIndex doneWithIdx) -> Result { + return _guardedFollowerData.doUnderLock([&](GuardedFollowerData& self) -> Result { + TRI_ASSERT(doneWithIdx <= self._inMemoryLog.getLastIndex()); + if (doneWithIdx <= self._releaseIndex) { + return {}; + } + self._releaseIndex = doneWithIdx; + LOG_CTX("a0c95", TRACE, _loggerContext) + << "new release index set to " << self._releaseIndex; + return self.checkCompaction(); + }); +} auto replicated_log::LogFollower::GuardedFollowerData::getLocalStatistics() const noexcept -> LogStatistics { auto result = LogStatistics{}; result.commitIndex = _commitIndex; - result.spearHead.index = _inMemoryLog.getLastIndex(); - result.spearHead.term = _inMemoryLog.getLastTerm(); + result.firstIndex = _inMemoryLog.getFirstIndex(); + result.spearHead = _inMemoryLog.getLastTermIndexPair(); return result; } + +auto LogFollower::GuardedFollowerData::checkCompaction() -> Result { + auto const compactionStop = std::min(_largestCommonIndex, _releaseIndex + 1); + LOG_CTX("080d5", TRACE, _follower._loggerContext) + << "compaction index calculated as " << compactionStop; + if (compactionStop <= _inMemoryLog.getFirstIndex() + 1000) { + // only do a compaction every 1000 entries + LOG_CTX("ebb9f", TRACE, _follower._loggerContext) + << "won't trigger a compaction, not enough entries. First index = " + << _inMemoryLog.getFirstIndex(); + return {}; + } + + auto newLog = _inMemoryLog.release(compactionStop); + auto res = _logCore->removeFront(compactionStop); + if (res.ok()) { + _inMemoryLog = std::move(newLog); + } + LOG_CTX("f1028", TRACE, _follower._loggerContext) + << "compaction result = " << res.errorMessage(); + return res; +} diff --git a/arangod/Replication2/ReplicatedLog/LogFollower.h b/arangod/Replication2/ReplicatedLog/LogFollower.h index 14c285da2528..0e36d52ecb0d 100644 --- a/arangod/Replication2/ReplicatedLog/LogFollower.h +++ b/arangod/Replication2/ReplicatedLog/LogFollower.h @@ -43,9 +43,9 @@ namespace arangodb::replication2::replicated_log { /** * @brief Follower instance of a replicated log. */ -class LogFollower : public ILogParticipant, - public AbstractFollower, - public std::enable_shared_from_this { +class LogFollower final : public ILogParticipant, + public AbstractFollower, + public std::enable_shared_from_this { public: ~LogFollower() override; LogFollower(LoggerContext const&, std::shared_ptr logMetrics, @@ -53,7 +53,8 @@ class LogFollower : public ILogParticipant, std::optional leaderId, InMemoryLog inMemoryLog); // follower only - [[nodiscard]] auto appendEntries(AppendEntriesRequest) -> futures::Future override; + [[nodiscard]] auto appendEntries(AppendEntriesRequest) + -> futures::Future override; [[nodiscard]] auto getStatus() const -> LogStatus override; [[nodiscard]] auto resign() && -> std::tuple, DeferredAction> override; @@ -61,8 +62,12 @@ class LogFollower : public ILogParticipant, [[nodiscard]] auto waitFor(LogIndex) -> WaitForFuture override; [[nodiscard]] auto waitForIterator(LogIndex index) -> WaitForIteratorFuture override; [[nodiscard]] auto getParticipantId() const noexcept -> ParticipantId const& override; - [[nodiscard]] auto getLogIterator(LogIndex firstIndex) const -> std::unique_ptr; - [[nodiscard]] auto getCommittedLogIterator(LogIndex firstIndex) const -> std::unique_ptr; + [[nodiscard]] auto getLogIterator(LogIndex firstIndex) const + -> std::unique_ptr; + [[nodiscard]] auto getCommittedLogIterator(LogIndex firstIndex) const + -> std::unique_ptr; + + [[nodiscard]] auto release(LogIndex doneWithIdx) -> Result override; private: struct GuardedFollowerData { @@ -72,12 +77,15 @@ class LogFollower : public ILogParticipant, [[nodiscard]] auto getLocalStatistics() const noexcept -> LogStatistics; [[nodiscard]] auto getCommittedLogIterator(LogIndex firstIndex) const - -> std::unique_ptr; + -> std::unique_ptr; + [[nodiscard]] auto checkCompaction() -> Result; LogFollower const& _follower; InMemoryLog _inMemoryLog; std::unique_ptr _logCore; LogIndex _commitIndex{0}; + LogIndex _largestCommonIndex; + LogIndex _releaseIndex; MessageId _lastRecvMessageId{0}; Guarded _waitForQueue; }; @@ -88,8 +96,8 @@ class LogFollower : public ILogParticipant, LogTerm const _currentTerm; // We use the unshackled mutex because guards are captured by futures. - // When using a std::mutex we would have to release the mutex in the same thread. - // Using the UnshackledMutex this is no longer required. + // When using a std::mutex we would have to release the mutex in the same + // thread. Using the UnshackledMutex this is no longer required. Guarded _guardedFollowerData; [[nodiscard]] auto appendEntriesPreFlightChecks(GuardedFollowerData const&, diff --git a/arangod/Replication2/ReplicatedLog/LogLeader.cpp b/arangod/Replication2/ReplicatedLog/LogLeader.cpp index 0fc0ada8daf2..6d38e5b353c2 100644 --- a/arangod/Replication2/ReplicatedLog/LogLeader.cpp +++ b/arangod/Replication2/ReplicatedLog/LogLeader.cpp @@ -22,16 +22,6 @@ #include "LogLeader.h" -#include "Replication2/ReplicatedLog/InMemoryLog.h" -#include "Replication2/ReplicatedLog/LogContextKeys.h" -#include "Replication2/ReplicatedLog/LogCore.h" -#include "Replication2/ReplicatedLog/LogStatus.h" -#include "Replication2/ReplicatedLog/PersistedLog.h" -#include "Replication2/ReplicatedLog/ReplicatedLogIterator.h" -#include "Replication2/ReplicatedLog/ReplicatedLogMetrics.h" -#include "RestServer/Metrics.h" -#include "Scheduler/SchedulerFeature.h" - #include #include #include @@ -45,12 +35,36 @@ #include #include #include - +#include #include -#include +#include +#include #include -#include +#include +#include +#include +#include #include +#include +#include + +#include "Basics/ErrorCode.h" +#include "Futures/Promise-inl.h" +#include "Futures/Promise.h" +#include "Futures/Unit.h" +#include "Replication2/DeferredExecution.h" +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogContextKeys.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogStatus.h" +#include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLogIterator.h" +#include "Replication2/ReplicatedLog/ReplicatedLogMetrics.h" +#include "RestServer/Metrics.h" +#include "Scheduler/SchedulerFeature.h" +#include "Scheduler/SupervisedScheduler.h" +#include "immer/detail/iterator_facade.hpp" +#include "immer/detail/rbts/rrbtree_iterator.hpp" #if (_MSC_VER >= 1) // suppress warnings: @@ -62,7 +76,6 @@ #endif #include #include -#include #if (_MSC_VER >= 1) #pragma warning(pop) #endif @@ -179,7 +192,7 @@ void replicated_log::LogLeader::handleResolvedPromiseSet( for (auto& promise : resolvedPromises._set) { TRI_ASSERT(promise.second.valid()); - promise.second.setValue(resolvedPromises._quorum); + promise.second.setValue(resolvedPromises.result); } } @@ -211,10 +224,13 @@ void replicated_log::LogLeader::executeAppendEntriesRequests( << "last acked index = " << follower->lastAckedEntry << ", current index = " << lastAvailableIndex << ", last acked commit index = " << follower->lastAckedCommitIndex - << ", current commit index = " << self._commitIndex; + << ", current commit index = " << self._commitIndex + << ", last acked lci = " << follower->lastAckedLCI + << ", current lci = " << self._largestCommonIndex; // We can only get here if there is some new information for this follower TRI_ASSERT(follower->lastAckedEntry.index != lastAvailableIndex.index || - self._commitIndex != follower->lastAckedCommitIndex); + self._commitIndex != follower->lastAckedCommitIndex || + self._largestCommonIndex != follower->lastAckedLCI); return self.createAppendEntriesRequest(*follower, lastAvailableIndex); }); @@ -222,6 +238,10 @@ void replicated_log::LogLeader::executeAppendEntriesRequests( auto messageId = request.messageId; LOG_CTX("1b0ec", TRACE, follower->logContext) << "sending append entries, messageId = " << messageId; + + // We take the start time here again to have a more precise measurement. + // (And do not use follower._lastRequestStartTP) + // TODO really needed? auto startTime = std::chrono::steady_clock::now(); // Capture a weak pointer `parentLog` that will be locked // when the request returns. If the locking is successful @@ -229,6 +249,7 @@ void replicated_log::LogLeader::executeAppendEntriesRequests( follower->_impl->appendEntries(std::move(request)) .thenFinal([weakParentLog = it->_parentLog, weakFollower = it->_follower, lastIndex = lastIndex, currentCommitIndex = request.leaderCommit, + currentLCI = request.largestCommonIndex, currentTerm = logLeader->_currentTerm, messageId = messageId, startTime, logMetrics = logMetrics]( futures::Try&& res) noexcept { @@ -250,8 +271,8 @@ void replicated_log::LogLeader::executeAppendEntriesRequests( if (!guarded->_didResign) { // Is throwing the right thing to do here? - No, we are in a finally return guarded->handleAppendEntriesResponse( - *follower, lastIndex, currentCommitIndex, currentTerm, - std::move(res), endTime - startTime, messageId); + *follower, lastIndex, currentCommitIndex, currentLCI, + currentTerm, std::move(res), endTime - startTime, messageId); } else { LOG_CTX("da116", DEBUG, follower->logContext) << "received response from follower but leader " @@ -302,7 +323,7 @@ auto replicated_log::LogLeader::construct( std::move(id), term, std::move(inMemoryLog)) {} }; - auto log = InMemoryLog{logContext, *logCore}; + auto log = InMemoryLog::loadFromLogCore(*logCore); auto const lastIndex = log.getLastTermIndexPair(); if (lastIndex.term != term) { // Immediately append an empty log entry in the new term. This is necessary @@ -398,19 +419,39 @@ auto replicated_log::LogLeader::readReplicatedEntryByIndex(LogIndex idx) const } auto replicated_log::LogLeader::getStatus() const -> LogStatus { - return _guardedLeaderData.doUnderLock([term = _currentTerm](auto& leaderData) { + return _guardedLeaderData.doUnderLock([term = _currentTerm](GuardedLeaderData const& leaderData) { if (leaderData._didResign) { THROW_ARANGO_EXCEPTION(TRI_ERROR_REPLICATION_REPLICATED_LOG_LEADER_RESIGNED); } LeaderStatus status; status.local = leaderData.getLocalStatistics(); status.term = term; + status.largestCommonIndex = leaderData._largestCommonIndex; for (FollowerInfo const& f : leaderData._follower) { - status.follower[f._impl->getParticipantId()] = { - LogStatistics{f.lastAckedEntry, f.lastAckedCommitIndex}, f.lastErrorReason, - std::chrono::duration_cast>(f._lastRequestLatency) - .count()}; + auto lastRequestLatencyMS = + std::chrono::duration_cast>(f._lastRequestLatency); + auto state = std::invoke([&] { + switch (f._state) { + case FollowerInfo::State::ERROR_BACKOFF: + return FollowerState::withErrorBackoff( + std::chrono::duration_cast>( + f._errorBackoffEndTP - std::chrono::steady_clock::now()), + f.numErrorsSinceLastAnswer); + case FollowerInfo::State::REQUEST_IN_FLIGHT: + return FollowerState::withRequestInFlight( + std::chrono::duration_cast>( + std::chrono::steady_clock::now() - f._lastRequestStartTP)); + default: + return FollowerState::withUpToDate(); + } + }); + status.follower.emplace(f._impl->getParticipantId(), + FollowerStatistics{LogStatistics{f.lastAckedEntry, f.lastAckedCommitIndex}, + f.lastErrorReason, + lastRequestLatencyMS, state}); } + + status.commitLagMS = leaderData.calculateCommitLag(); return LogStatus{std::move(status)}; }); } @@ -450,8 +491,8 @@ auto replicated_log::LogLeader::waitFor(LogIndex index) -> WaitForFuture { return promise.getFuture(); } if (leaderData._commitIndex >= index) { - return futures::Future>{std::in_place, - leaderData._lastQuorum}; + return futures::Future{std::in_place, leaderData._commitIndex, + leaderData._lastQuorum}; } auto it = leaderData._waitForQueue.emplace(index, WaitForPromise{}); auto& promise = it->second; @@ -478,7 +519,7 @@ auto replicated_log::LogLeader::triggerAsyncReplication() -> void { auto replicated_log::LogLeader::GuardedLeaderData::updateCommitIndexLeader( LogIndex newIndex, std::shared_ptr quorum) -> ResolvedPromiseSet { LOG_CTX("a9a7e", TRACE, _self._logContext) - << "updating commit index to " << newIndex << "with quorum " << quorum->quorum; + << "updating commit index to " << newIndex << " with quorum " << quorum->quorum; auto oldIndex = _commitIndex; TRI_ASSERT(_commitIndex < newIndex) @@ -490,16 +531,17 @@ auto replicated_log::LogLeader::GuardedLeaderData::updateCommitIndexLeader( WaitForQueue toBeResolved; auto const end = _waitForQueue.upper_bound(_commitIndex); for (auto it = _waitForQueue.begin(); it != end;) { - LOG_CTX("37d9c", TRACE, _self._logContext) + LOG_CTX("37d9d", TRACE, _self._logContext) << "resolving promise for index " << it->first; toBeResolved.insert(_waitForQueue.extract(it++)); } - return ResolvedPromiseSet{std::move(toBeResolved), std::move(quorum), + return ResolvedPromiseSet{std::move(toBeResolved), + WaitForResult(newIndex, std::move(quorum)), _inMemoryLog.slice(oldIndex, newIndex + 1)}; } catch (std::exception const& e) { // If those promises are not fulfilled we can not continue. // Note that the move constructor of std::multi_map is not noexcept. - LOG_CTX("e7a4d", FATAL, _self._logContext) + LOG_CTX("e7a4e", FATAL, _self._logContext) << "failed to fulfill replication promises due to exception; system " "can not continue. message: " << e.what(); @@ -525,7 +567,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::prepareAppendEntries() auto replicated_log::LogLeader::GuardedLeaderData::prepareAppendEntry(FollowerInfo& follower) -> std::optional { - if (follower.requestInFlight) { + if (follower._state != FollowerInfo::State::IDLE) { LOG_CTX("1d7b6", TRACE, follower.logContext) << "request in flight - skipping"; return std::nullopt; // wait for the request to return @@ -536,14 +578,17 @@ auto replicated_log::LogLeader::GuardedLeaderData::prepareAppendEntry(FollowerIn << "last acked index = " << follower.lastAckedEntry << ", current index = " << lastAvailableIndex << ", last acked commit index = " << follower.lastAckedCommitIndex - << ", current commit index = " << _commitIndex; + << ", current commit index = " << _commitIndex + << ", last acked lci = " << follower.lastAckedLCI + << ", current lci = " << _largestCommonIndex; if (follower.lastAckedEntry.index == lastAvailableIndex.index && - _commitIndex == follower.lastAckedCommitIndex) { - LOG_CTX("74b71", TRACE, _self._logContext) << "up to date"; + _commitIndex == follower.lastAckedCommitIndex && + _largestCommonIndex == follower.lastAckedLCI) { + LOG_CTX("74b71", TRACE, follower.logContext) << "up to date"; return std::nullopt; // nothing to replicate } - follower.requestInFlight = true; + auto const executionDelay = std::invoke([&] { using namespace std::chrono_literals; if (follower.numErrorsSinceLastAnswer > 0) { @@ -555,8 +600,11 @@ auto replicated_log::LogLeader::GuardedLeaderData::prepareAppendEntry(FollowerIn << follower.numErrorsSinceLastAnswer << " requests failed, last one was " << follower.lastSentMessageId << " - waiting " << executionDelay / 1ms << "ms before sending next message."; + follower._state = FollowerInfo::State::ERROR_BACKOFF; + follower._errorBackoffEndTP = std::chrono::steady_clock::now() + executionDelay; return executionDelay; } else { + follower._state = FollowerInfo::State::PREPARE; return 0us; } }); @@ -571,14 +619,19 @@ auto replicated_log::LogLeader::GuardedLeaderData::createAppendEntriesRequest( AppendEntriesRequest req; req.leaderCommit = _commitIndex; + req.largestCommonIndex = _largestCommonIndex; req.leaderTerm = _self._currentTerm; req.leaderId = _self._id; req.waitForSync = _self._config.waitForSync; req.messageId = ++follower.lastSentMessageId; + follower._state = FollowerInfo::State::REQUEST_IN_FLIGHT; + follower._lastRequestStartTP = std::chrono::steady_clock::now(); + if (lastAcked) { req.prevLogEntry.index = lastAcked->entry().logIndex(); req.prevLogEntry.term = lastAcked->entry().logTerm(); + TRI_ASSERT(req.prevLogEntry.index == follower.lastAckedEntry.index); } else { req.prevLogEntry.index = LogIndex{0}; req.prevLogEntry.term = LogTerm{0}; @@ -605,14 +658,15 @@ auto replicated_log::LogLeader::GuardedLeaderData::createAppendEntriesRequest( << "creating append entries request with " << req.entries.size() << " entries , prevLogEntry.term = " << req.prevLogEntry.term << ", prevLogEntry.index = " << req.prevLogEntry.index - << ", leaderCommit = " << req.leaderCommit; + << ", leaderCommit = " << req.leaderCommit + << ", lci = " << req.largestCommonIndex << ", msg-id = " << req.messageId; return std::make_pair(std::move(req), lastIndex); } auto replicated_log::LogLeader::GuardedLeaderData::handleAppendEntriesResponse( FollowerInfo& follower, TermIndexPair lastIndex, LogIndex currentCommitIndex, - LogTerm currentTerm, futures::Try&& res, + LogIndex currentLCI, LogTerm currentTerm, futures::Try&& res, std::chrono::steady_clock::duration latency, MessageId messageId) -> std::pair>, ResolvedPromiseSet> { if (currentTerm != _self._currentTerm) { @@ -629,7 +683,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::handleAppendEntriesResponse( LOG_CTX("35a32", TRACE, follower.logContext) << "received message " << messageId << " - no other requests in flight"; // there is no request in flight currently - follower.requestInFlight = false; + follower._state = FollowerInfo::State::IDLE; } if (res.hasValue()) { auto& response = res.get(); @@ -645,6 +699,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::handleAppendEntriesResponse( follower.numErrorsSinceLastAnswer = 0; follower.lastAckedEntry = lastIndex; follower.lastAckedCommitIndex = currentCommitIndex; + follower.lastAckedLCI = currentLCI; toBeResolved = checkCommitIndex(); } else { TRI_ASSERT(response.reason != AppendEntriesErrorReason::NONE); @@ -704,7 +759,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::getInternalLogIterator(LogInd } auto replicated_log::LogLeader::GuardedLeaderData::getCommittedLogIterator(LogIndex firstIndex) const - -> std::unique_ptr { + -> std::unique_ptr { auto const endIdx = _inMemoryLog.getNextIndex(); TRI_ASSERT(firstIndex < endIdx); // return an iterator for the range [firstIndex, _commitIndex + 1) @@ -714,6 +769,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::getCommittedLogIterator(LogIn auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> ResolvedPromiseSet { auto const quorum_size = _self._config.writeConcern; + auto newLargestCommonIndex = _commitIndex; std::vector> indexes; indexes.reserve(_follower.size()); for (auto const& follower : _follower) { @@ -741,6 +797,8 @@ auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> Resolve << lastAckedEntry.index << ") is of term " << lastAckedEntry.term << ", but we're in term " << _self._currentTerm << "."; } + + newLargestCommonIndex = std::min(follower.lastAckedCommitIndex, newLargestCommonIndex); } LOG_CTX("a2d04", TRACE, _self._logContext) << "checking commit index on set " << indexes; @@ -751,6 +809,20 @@ auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> Resolve return {}; } + if (newLargestCommonIndex != _largestCommonIndex) { + TRI_ASSERT(newLargestCommonIndex > _largestCommonIndex); + LOG_CTX("851bb", TRACE, _self._logContext) + << "largest common index went from " << _largestCommonIndex << " to " + << newLargestCommonIndex; + _largestCommonIndex = newLargestCommonIndex; + // TODO this not the right place to do a sync compaction on the log + // when we want to update the commit index. + // We can either ignore compactions that would be triggered by an lci + // increment (because eventually the state machine will call release + // again) or we put this in a deferred action. + std::ignore = checkCompaction(); + } + auto nth = indexes.begin(); std::advance(nth, quorum_size - 1); @@ -772,7 +844,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> Resolve auto const quorum_data = std::make_shared(commitIndex, _self._currentTerm, std::move(quorum)); - return updateCommitIndexLeader(commitIndex, std::move(quorum_data)); + return updateCommitIndexLeader(commitIndex, quorum_data); } return {}; } @@ -780,8 +852,8 @@ auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> Resolve auto replicated_log::LogLeader::GuardedLeaderData::getLocalStatistics() const -> LogStatistics { auto result = LogStatistics{}; result.commitIndex = _commitIndex; - result.spearHead.index = _inMemoryLog.getLastIndex(); - result.spearHead.term = _inMemoryLog.getLastTerm(); + result.firstIndex = _inMemoryLog.getFirstIndex(); + result.spearHead = _inMemoryLog.getLastTermIndexPair(); return result; } @@ -789,6 +861,55 @@ replicated_log::LogLeader::GuardedLeaderData::GuardedLeaderData(replicated_log:: InMemoryLog inMemoryLog) : _self(self), _inMemoryLog(std::move(inMemoryLog)) {} +auto replicated_log::LogLeader::release(LogIndex doneWithIdx) -> Result { + return _guardedLeaderData.doUnderLock([&](GuardedLeaderData& self) -> Result { + TRI_ASSERT(doneWithIdx <= self._inMemoryLog.getLastIndex()); + if (doneWithIdx <= self._releaseIndex) { + return {}; + } + self._releaseIndex = doneWithIdx; + LOG_CTX("a0c96", TRACE, _logContext) << "new release index set to " << self._releaseIndex; + return self.checkCompaction(); + }); +} + +auto replicated_log::LogLeader::GuardedLeaderData::checkCompaction() -> Result { + auto const compactionStop = std::min(_largestCommonIndex, _releaseIndex + 1); + LOG_CTX("080d6", TRACE, _self._logContext) + << "compaction index calculated as " << compactionStop; + if (compactionStop <= _inMemoryLog.getFirstIndex() + 1000) { + // only do a compaction every 1000 entries + LOG_CTX("ebba0", TRACE, _self._logContext) + << "won't trigger a compaction, not enough entries. First index = " + << _inMemoryLog.getFirstIndex(); + return {}; + } + + auto newLog = _inMemoryLog.release(compactionStop); + auto res = _self._localFollower->release(compactionStop); + if (res.ok()) { + _inMemoryLog = std::move(newLog); + } + LOG_CTX("f1029", TRACE, _self._logContext) + << "compaction result = " << res.errorMessage(); + return res; +} + +auto replicated_log::LogLeader::GuardedLeaderData::calculateCommitLag() const noexcept + -> std::chrono::duration { + auto memtry = _inMemoryLog.getEntryByIndex(_commitIndex + 1); + if (memtry.has_value()) { + return std::chrono::duration_cast>( + std::chrono::steady_clock::now() - memtry->insertTp()); + } else { + TRI_ASSERT(_commitIndex == _inMemoryLog.getLastIndex()) + << "If there is no entry following the commitIndex the last index " + "should be the commitIndex. _commitIndex = " + << _commitIndex << ", lastIndex = " << _inMemoryLog.getLastIndex(); + return {}; + } +} + auto replicated_log::LogLeader::getReplicatedLogSnapshot() const -> InMemoryLog::log_type { auto [log, commitIndex] = _guardedLeaderData.doUnderLock([](auto const& leaderData) { if (leaderData._didResign) { @@ -808,15 +929,16 @@ auto replicated_log::LogLeader::waitForIterator(LogIndex index) "invalid parameter; log index 0 is invalid"); } - return waitFor(index).thenValue([this, self = shared_from_this(), index](auto&& quorum) -> WaitForIteratorFuture { + return waitFor(index).thenValue([this, self = shared_from_this(), + index](auto&& quorum) -> WaitForIteratorFuture { auto [actualIndex, iter] = _guardedLeaderData.doUnderLock( - [&](GuardedLeaderData& leaderData) -> std::pair> { + [&](GuardedLeaderData& leaderData) -> std::pair> { TRI_ASSERT(index <= leaderData._commitIndex); /* * This code here ensures that if only private log entries are present - * we do not reply with an empty iterator but instead wait for the next - * entry containing payload. + * we do not reply with an empty iterator but instead wait for the + * next entry containing payload. */ auto actualIndex = index; @@ -859,6 +981,10 @@ auto replicated_log::LogLeader::construct( term, logContext, std::move(logMetrics)); } +auto replicated_log::LogLeader::copyInMemoryLog() const -> replicated_log::InMemoryLog { + return _guardedLeaderData.getLockedGuard()->_inMemoryLog; +} + replicated_log::LogLeader::LocalFollower::LocalFollower( replicated_log::LogLeader& self, LoggerContext logContext, std::unique_ptr logCore, [[maybe_unused]] TermIndexPair lastIndex) @@ -942,6 +1068,17 @@ auto replicated_log::LogLeader::LocalFollower::resign() && noexcept }); } +auto replicated_log::LogLeader::LocalFollower::release(LogIndex stop) const -> Result { + auto res = _guardedLogCore.doUnderLock([&](auto& core) { + LOG_CTX("23745", DEBUG, _logContext) + << "local follower releasing with stop at " << stop; + return core->removeFront(stop); + }); + LOG_CTX_IF("2aba1", WARN, _logContext, res.fail()) + << "local follower failed to release log entries: " << res.errorMessage(); + return res; +} + replicated_log::LogLeader::PreparedAppendEntryRequest::PreparedAppendEntryRequest( std::shared_ptr const& logLeader, FollowerInfo& follower, std::chrono::steady_clock::duration executionDelay) diff --git a/arangod/Replication2/ReplicatedLog/LogLeader.h b/arangod/Replication2/ReplicatedLog/LogLeader.h index 88d9a6cd428c..5d0ecb043dc4 100644 --- a/arangod/Replication2/ReplicatedLog/LogLeader.h +++ b/arangod/Replication2/ReplicatedLog/LogLeader.h @@ -22,17 +22,8 @@ #pragma once -#include "Replication2/ReplicatedLog/ILogParticipant.h" -#include "Replication2/ReplicatedLog/InMemoryLog.h" -#include "Replication2/ReplicatedLog/LogCommon.h" -#include "Replication2/ReplicatedLog/NetworkMessages.h" -#include "Replication2/ReplicatedLog/types.h" - -#include "Replication2/LoggerContext.h" - #include #include - #include #include #include @@ -40,6 +31,23 @@ #include #include #include +#include +#include + +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogCommon.h" +#include "Replication2/ReplicatedLog/NetworkMessages.h" +#include "Replication2/ReplicatedLog/types.h" +#include "Replication2/LoggerContext.h" +#include "Basics/Result.h" +#include "Futures/Future.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogStatus.h" + +namespace arangodb { +struct DeferredAction; +} // namespace arangodb #if (_MSC_VER >= 1) // suppress warnings: @@ -62,9 +70,10 @@ class Try; namespace arangodb::replication2::replicated_log { struct LogCore; struct ReplicatedLogMetrics; -} +} // namespace arangodb::replication2::replicated_log namespace arangodb::replication2::replicated_log { +struct PersistedLogIterator; /** * @brief Leader instance of a replicated log. @@ -108,7 +117,8 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar [[nodiscard]] auto getReplicatedLogSnapshot() const -> InMemoryLog::log_type; - [[nodiscard]] auto readReplicatedEntryByIndex(LogIndex idx) const -> std::optional; + [[nodiscard]] auto readReplicatedEntryByIndex(LogIndex idx) const + -> std::optional; // Triggers sending of appendEntries requests to all followers. This continues // until all participants are perfectly in sync, and will then stop. @@ -122,6 +132,10 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar [[nodiscard]] auto getParticipantId() const noexcept -> ParticipantId const&; + [[nodiscard]] auto release(LogIndex doneWithIdx) -> Result override; + + [[nodiscard]] auto copyInMemoryLog() const -> InMemoryLog; + protected: // Use the named constructor construct() to create a leader! LogLeader(LoggerContext logContext, std::shared_ptr logMetrics, @@ -129,6 +143,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar private: struct GuardedLeaderData; + using Guard = MutexGuard>; using ConstGuard = MutexGuard>; @@ -137,14 +152,23 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar TermIndexPair lastLogIndex, LoggerContext const& logContext); std::chrono::steady_clock::duration _lastRequestLatency{}; + std::chrono::steady_clock::time_point _lastRequestStartTP{}; + std::chrono::steady_clock::time_point _errorBackoffEndTP{}; std::shared_ptr _impl; TermIndexPair lastAckedEntry = TermIndexPair{LogTerm{0}, LogIndex{0}}; LogIndex lastAckedCommitIndex = LogIndex{0}; + LogIndex lastAckedLCI = LogIndex{0}; MessageId lastSentMessageId{0}; std::size_t numErrorsSinceLastAnswer = 0; AppendEntriesErrorReason lastErrorReason = AppendEntriesErrorReason::NONE; LoggerContext const logContext; - bool requestInFlight = false; + + enum class State { + IDLE, + PREPARE, + ERROR_BACKOFF, + REQUEST_IN_FLIGHT, + } _state = State::IDLE; }; struct LocalFollower final : AbstractFollower { @@ -164,6 +188,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar -> arangodb::futures::Future override; [[nodiscard]] auto resign() && noexcept -> std::unique_ptr; + [[nodiscard]] auto release(LogIndex stop) const -> Result; private: LogLeader& _leader; @@ -187,7 +212,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar struct ResolvedPromiseSet { WaitForQueue _set; - std::shared_ptr _quorum; + WaitForResult result; ::immer::flex_vector _commitedLogEntries; }; @@ -208,11 +233,12 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar [[nodiscard]] auto handleAppendEntriesResponse( FollowerInfo& follower, TermIndexPair lastIndex, LogIndex currentCommitIndex, - LogTerm currentTerm, futures::Try&& res, + LogIndex currentLCI, LogTerm currentTerm, futures::Try&& res, std::chrono::steady_clock::duration latency, MessageId messageId) -> std::pair>, ResolvedPromiseSet>; [[nodiscard]] auto checkCommitIndex() -> ResolvedPromiseSet; + [[nodiscard]] auto checkCompaction() -> Result; [[nodiscard]] auto updateCommitIndexLeader(LogIndex newIndex, std::shared_ptr quorum) @@ -222,7 +248,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar -> std::unique_ptr; [[nodiscard]] auto getCommittedLogIterator(LogIndex firstIndex) const - -> std::unique_ptr; + -> std::unique_ptr; [[nodiscard]] auto getLocalStatistics() const -> LogStatistics; @@ -230,12 +256,17 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar TermIndexPair const& lastAvailableIndex) const -> std::pair; + [[nodiscard]] auto calculateCommitLag() const noexcept + -> std::chrono::duration; + LogLeader& _self; InMemoryLog _inMemoryLog; std::vector _follower{}; WaitForQueue _waitForQueue{}; std::shared_ptr _lastQuorum{}; LogIndex _commitIndex{0}; + LogIndex _largestCommonIndex{0}; + LogIndex _releaseIndex{0}; bool _didResign{false}; }; @@ -262,7 +293,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar std::vector> requests, std::shared_ptr const& logMetrics); static void handleResolvedPromiseSet(ResolvedPromiseSet set, - std::shared_ptr const& logMetrics); + std::shared_ptr const& logMetrics); auto tryHardToClearQueue() noexcept -> void; }; diff --git a/arangod/Replication2/ReplicatedLog/LogStatus.cpp b/arangod/Replication2/ReplicatedLog/LogStatus.cpp index 811239611e27..a275c1f1e49e 100644 --- a/arangod/Replication2/ReplicatedLog/LogStatus.cpp +++ b/arangod/Replication2/ReplicatedLog/LogStatus.cpp @@ -39,7 +39,7 @@ void UnconfiguredStatus::toVelocyPack(velocypack::Builder& builder) const { auto UnconfiguredStatus::fromVelocyPack(velocypack::Slice slice) -> UnconfiguredStatus { TRI_ASSERT(slice.get("role").isEqualString("unconfigured")); - return UnconfiguredStatus(); + return {}; } void FollowerStatus::toVelocyPack(velocypack::Builder& builder) const { @@ -49,6 +49,7 @@ void FollowerStatus::toVelocyPack(velocypack::Builder& builder) const { builder.add(StaticStrings::Leader, VPackValue(*leader)); } builder.add(StaticStrings::Term, VPackValue(term.value)); + builder.add("largestCommonIndex", VPackValue(largestCommonIndex.value)); builder.add(VPackValue("local")); local.toVelocyPack(builder); } @@ -57,6 +58,7 @@ auto FollowerStatus::fromVelocyPack(velocypack::Slice slice) -> FollowerStatus { TRI_ASSERT(slice.get("role").isEqualString(StaticStrings::Follower)); FollowerStatus status; status.term = slice.get(StaticStrings::Term).extract(); + status.largestCommonIndex = slice.get("largestCommonIndex").extract(); status.local = LogStatistics::fromVelocyPack(slice); if (auto leader = slice.get(StaticStrings::Leader); !leader.isNone()) { status.leader = leader.copyString(); @@ -68,6 +70,8 @@ void LeaderStatus::toVelocyPack(velocypack::Builder& builder) const { VPackObjectBuilder ob(&builder); builder.add("role", VPackValue(StaticStrings::Leader)); builder.add(StaticStrings::Term, VPackValue(term.value)); + builder.add("largestCommonIndex", VPackValue(largestCommonIndex.value)); + builder.add("commitLagMS", VPackValue(commitLagMS.count())); builder.add(VPackValue("local")); local.toVelocyPack(builder); { @@ -84,30 +88,36 @@ auto LeaderStatus::fromVelocyPack(velocypack::Slice slice) -> LeaderStatus { LeaderStatus status; status.term = slice.get(StaticStrings::Term).extract(); status.local = LogStatistics::fromVelocyPack(slice.get("local")); + status.commitLagMS = std::chrono::duration{ + slice.get("commitLagMS").extract()}; for (auto [key, value] : VPackObjectIterator(slice.get(StaticStrings::Follower))) { auto id = ParticipantId{key.copyString()}; auto stat = FollowerStatistics::fromVelocyPack(value); - status.follower.emplace(std::move(id), std::move(stat)); + status.follower.emplace(std::move(id), stat); } return status; } -void LeaderStatus::FollowerStatistics::toVelocyPack(velocypack::Builder& builder) const { +void FollowerStatistics::toVelocyPack(velocypack::Builder& builder) const { VPackObjectBuilder ob(&builder); builder.add(StaticStrings::CommitIndex, VPackValue(commitIndex.value)); builder.add(VPackValue(StaticStrings::Spearhead)); spearHead.toVelocyPack(builder); builder.add("lastErrorReason", VPackValue(int(lastErrorReason))); builder.add("lastErrorReasonMessage", VPackValue(to_string(lastErrorReason))); - builder.add("lastRequestLatencyMS", VPackValue(lastRequestLatencyMS)); + builder.add("lastRequestLatencyMS", VPackValue(lastRequestLatencyMS.count())); + builder.add(VPackValue("state")); + internalState.toVelocyPack(builder); } -auto LeaderStatus::FollowerStatistics::fromVelocyPack(velocypack::Slice slice) -> FollowerStatistics { +auto FollowerStatistics::fromVelocyPack(velocypack::Slice slice) -> FollowerStatistics { FollowerStatistics stats; stats.commitIndex = slice.get(StaticStrings::CommitIndex).extract(); stats.spearHead = TermIndexPair::fromVelocyPack(slice.get(StaticStrings::Spearhead)); stats.lastErrorReason = AppendEntriesErrorReason{slice.get("lastErrorReason").getNumericValue()}; - stats.lastRequestLatencyMS = slice.get("lastRequestLatencyMS").getDouble(); + stats.lastRequestLatencyMS = std::chrono::duration{ + slice.get("lastRequestLatencyMS").getDouble()}; + stats.internalState = FollowerState::fromVelocyPack(slice.get("state")); return stats; } diff --git a/arangod/Replication2/ReplicatedLog/LogStatus.h b/arangod/Replication2/ReplicatedLog/LogStatus.h index 39c2a9b00b7e..e289990915a6 100644 --- a/arangod/Replication2/ReplicatedLog/LogStatus.h +++ b/arangod/Replication2/ReplicatedLog/LogStatus.h @@ -33,17 +33,21 @@ namespace arangodb::replication2::replicated_log { -struct LeaderStatus { - struct FollowerStatistics : LogStatistics { - AppendEntriesErrorReason lastErrorReason; - double lastRequestLatencyMS; - void toVelocyPack(velocypack::Builder& builder) const; - static auto fromVelocyPack(velocypack::Slice slice) -> FollowerStatistics; - }; +struct FollowerStatistics : LogStatistics { + AppendEntriesErrorReason lastErrorReason; + std::chrono::duration lastRequestLatencyMS; + FollowerState internalState; + void toVelocyPack(velocypack::Builder& builder) const; + static auto fromVelocyPack(velocypack::Slice slice) -> FollowerStatistics; +}; +struct LeaderStatus { LogStatistics local; LogTerm term; + LogIndex largestCommonIndex; std::unordered_map follower; + // now() - insertTP of last uncommitted entry + std::chrono::duration commitLagMS; void toVelocyPack(velocypack::Builder& builder) const; static auto fromVelocyPack(velocypack::Slice slice) -> LeaderStatus; @@ -53,6 +57,7 @@ struct FollowerStatus { LogStatistics local; std::optional leader; LogTerm term; + LogIndex largestCommonIndex; void toVelocyPack(velocypack::Builder& builder) const; static auto fromVelocyPack(velocypack::Slice slice) -> FollowerStatus; diff --git a/arangod/Replication2/ReplicatedLog/NetworkMessages.cpp b/arangod/Replication2/ReplicatedLog/NetworkMessages.cpp index abfba203ec70..28d50762b537 100644 --- a/arangod/Replication2/ReplicatedLog/NetworkMessages.cpp +++ b/arangod/Replication2/ReplicatedLog/NetworkMessages.cpp @@ -60,6 +60,7 @@ AppendEntriesRequest::AppendEntriesRequest(AppendEntriesRequest&& other) noexcep leaderId(std::move(other.leaderId)), prevLogEntry(other.prevLogEntry), leaderCommit(other.leaderCommit), + largestCommonIndex(other.largestCommonIndex), messageId(other.messageId), entries(std::move(other.entries)), waitForSync(other.waitForSync) { @@ -108,6 +109,7 @@ auto AppendEntriesRequest::operator=(replicated_log::AppendEntriesRequest&& othe leaderId = std::move(other.leaderId); prevLogEntry = other.prevLogEntry; leaderCommit = other.leaderCommit; + largestCommonIndex = other.largestCommonIndex; messageId = other.messageId; waitForSync = other.waitForSync; entries = std::move(other.entries); @@ -203,27 +205,27 @@ auto replicated_log::AppendEntriesResult::withConflict(LogTerm term, replicated_log::MessageId id, TermIndexPair conflict) noexcept -> replicated_log::AppendEntriesResult { - return AppendEntriesResult(term, id, conflict); + return {term, id, conflict}; } auto replicated_log::AppendEntriesResult::withRejection(LogTerm term, MessageId id, AppendEntriesErrorReason reason) noexcept -> AppendEntriesResult { - return AppendEntriesResult(term, TRI_ERROR_REPLICATION_REPLICATED_LOG_APPEND_ENTRIES_REJECTED, - reason, id); + return {term, TRI_ERROR_REPLICATION_REPLICATED_LOG_APPEND_ENTRIES_REJECTED, + reason, id}; } auto replicated_log::AppendEntriesResult::withPersistenceError(LogTerm term, replicated_log::MessageId id, Result const& res) noexcept -> replicated_log::AppendEntriesResult { - return AppendEntriesResult(term, res.errorNumber(), - AppendEntriesErrorReason::PERSISTENCE_FAILURE, id); + return {term, res.errorNumber(), + AppendEntriesErrorReason::PERSISTENCE_FAILURE, id}; } auto replicated_log::AppendEntriesResult::withOk(LogTerm term, replicated_log::MessageId id) noexcept -> replicated_log::AppendEntriesResult { - return AppendEntriesResult(term, id); + return {term, id}; } auto replicated_log::AppendEntriesResult::isSuccess() const noexcept -> bool { @@ -238,6 +240,7 @@ void replicated_log::AppendEntriesRequest::toVelocyPack(velocypack::Builder& bui builder.add(VPackValue("prevLogEntry")); prevLogEntry.toVelocyPack(builder); builder.add("leaderCommit", VPackValue(leaderCommit.value)); + builder.add("largestCommonIndex", VPackValue(largestCommonIndex.value)); builder.add("messageId", VPackValue(messageId)); builder.add("waitForSync", VPackValue(waitForSync)); builder.add("entries", VPackValue(VPackValueType::Array)); @@ -254,6 +257,7 @@ auto replicated_log::AppendEntriesRequest::fromVelocyPack(velocypack::Slice slic auto leaderId = ParticipantId{slice.get("leaderId").copyString()}; auto prevLogEntry = TermIndexPair::fromVelocyPack(slice.get("prevLogEntry")); auto leaderCommit = slice.get("leaderCommit").extract(); + auto largestCommonIndex = slice.get("largestCommonIndex").extract(); auto messageId = slice.get("messageId").extract(); auto waitForSync = slice.get("waitForSync").extract(); auto entries = std::invoke([&] { @@ -266,19 +270,20 @@ auto replicated_log::AppendEntriesRequest::fromVelocyPack(velocypack::Slice slic return std::move(transientEntries).persistent(); }); - return AppendEntriesRequest{leaderTerm, leaderId, prevLogEntry, - leaderCommit, messageId, waitForSync, - std::move(entries)}; + return AppendEntriesRequest{leaderTerm, leaderId, prevLogEntry, + leaderCommit, largestCommonIndex, messageId, + waitForSync, std::move(entries)}; } replicated_log::AppendEntriesRequest::AppendEntriesRequest( LogTerm leaderTerm, ParticipantId leaderId, TermIndexPair prevLogEntry, - LogIndex leaderCommit, replicated_log::MessageId messageId, - bool waitForSync, EntryContainer entries) + LogIndex leaderCommit, LogIndex largestCommonIndex, + replicated_log::MessageId messageId, bool waitForSync, EntryContainer entries) : leaderTerm(leaderTerm), leaderId(std::move(leaderId)), prevLogEntry(prevLogEntry), leaderCommit(leaderCommit), + largestCommonIndex(largestCommonIndex), messageId(messageId), entries(std::move(entries)), waitForSync(waitForSync) {} diff --git a/arangod/Replication2/ReplicatedLog/NetworkMessages.h b/arangod/Replication2/ReplicatedLog/NetworkMessages.h index 413b780118d8..3fac1e727b43 100644 --- a/arangod/Replication2/ReplicatedLog/NetworkMessages.h +++ b/arangod/Replication2/ReplicatedLog/NetworkMessages.h @@ -99,6 +99,7 @@ struct AppendEntriesRequest { ParticipantId leaderId; TermIndexPair prevLogEntry; LogIndex leaderCommit; + LogIndex largestCommonIndex; MessageId messageId; EntryContainer entries{}; bool waitForSync = false; @@ -106,7 +107,8 @@ struct AppendEntriesRequest { AppendEntriesRequest() = default; AppendEntriesRequest(LogTerm leaderTerm, ParticipantId leaderId, TermIndexPair prevLogEntry, LogIndex leaderCommit, - MessageId messageId, bool waitForSync, EntryContainer entries); + LogIndex largestCommonIndex, MessageId messageId, + bool waitForSync, EntryContainer entries); ~AppendEntriesRequest() noexcept = default; AppendEntriesRequest(AppendEntriesRequest&& other) noexcept; diff --git a/arangod/Replication2/ReplicatedLog/PersistedLog.h b/arangod/Replication2/ReplicatedLog/PersistedLog.h index f08050df315b..b28b844d85af 100644 --- a/arangod/Replication2/ReplicatedLog/PersistedLog.h +++ b/arangod/Replication2/ReplicatedLog/PersistedLog.h @@ -32,10 +32,7 @@ namespace arangodb::replication2::replicated_log { // ReplicatedLog-internal iterator over PersistingLogEntries -struct PersistedLogIterator { - virtual ~PersistedLogIterator() = default; - virtual auto next() -> std::optional = 0; -}; +struct PersistedLogIterator : TypedLogIterator {}; /** * @brief Interface to persist a replicated log locally. Implemented by diff --git a/arangod/Replication2/ReplicatedLog/ReplicatedLog.cpp b/arangod/Replication2/ReplicatedLog/ReplicatedLog.cpp index d1790d8bcd95..c7b95f114686 100644 --- a/arangod/Replication2/ReplicatedLog/ReplicatedLog.cpp +++ b/arangod/Replication2/ReplicatedLog/ReplicatedLog.cpp @@ -93,7 +93,7 @@ auto replicated_log::ReplicatedLog::becomeFollower(ParticipantId id, LogTerm ter LOG_CTX("1ed24", DEBUG, _logContext) << "becoming follower in term " << term << " with leader " << leaderId.value_or(""); - auto log = InMemoryLog{_logContext, *logCore}; + auto log = InMemoryLog::loadFromLogCore(*logCore); auto follower = std::make_shared(_logContext, _metrics, std::move(id), std::move(logCore), term, std::move(leaderId), log); diff --git a/arangod/Replication2/ReplicatedLog/ReplicatedLogIterator.h b/arangod/Replication2/ReplicatedLog/ReplicatedLogIterator.h index 04924d8df23c..cbe8847cf595 100644 --- a/arangod/Replication2/ReplicatedLog/ReplicatedLogIterator.h +++ b/arangod/Replication2/ReplicatedLog/ReplicatedLogIterator.h @@ -42,7 +42,7 @@ namespace arangodb::replication2::replicated_log { -class ReplicatedLogIterator : public LogIterator { +class ReplicatedLogIterator : public LogRangeIterator { public: using log_type = ::immer::flex_vector; @@ -63,6 +63,14 @@ class ReplicatedLogIterator : public LogIterator { return std::nullopt; } + auto range() const noexcept -> LogRange override { + if (_container.empty()) { + return {LogIndex{0}, LogIndex{0}}; + } else { + return {_container.front().entry().logIndex(), _container.back().entry().logIndex() + 1}; + } + } + private: log_type _container; log_type::const_iterator _begin; diff --git a/arangod/Replication2/ReplicatedLog/types.cpp b/arangod/Replication2/ReplicatedLog/types.cpp index 9974dcfd0ecf..029f0f340621 100644 --- a/arangod/Replication2/ReplicatedLog/types.cpp +++ b/arangod/Replication2/ReplicatedLog/types.cpp @@ -71,10 +71,10 @@ void replicated_log::QuorumData::toVelocyPack(velocypack::Builder& builder) cons } } - void replicated_log::LogStatistics::toVelocyPack(velocypack::Builder& builder) const { VPackObjectBuilder ob(&builder); builder.add(StaticStrings::CommitIndex, VPackValue(commitIndex.value)); + builder.add("firstIndex", VPackValue(firstIndex.value)); builder.add(VPackValue(StaticStrings::Spearhead)); spearHead.toVelocyPack(builder); } @@ -82,6 +82,7 @@ void replicated_log::LogStatistics::toVelocyPack(velocypack::Builder& builder) c auto replicated_log::LogStatistics::fromVelocyPack(velocypack::Slice slice) -> LogStatistics { LogStatistics stats; stats.commitIndex = slice.get(StaticStrings::CommitIndex).extract(); + stats.firstIndex = slice.get("firstIndex").extract(); stats.spearHead = TermIndexPair::fromVelocyPack(slice.get(StaticStrings::Spearhead)); return stats; } @@ -111,3 +112,72 @@ auto arangodb::replication2::replicated_log::to_string(AppendEntriesErrorReason << static_cast>(reason); FATAL_ERROR_ABORT(); } + +auto FollowerState::withUpToDate() noexcept -> FollowerState { + return FollowerState(std::in_place, UpToDate{}); +} + +auto FollowerState::withErrorBackoff(std::chrono::duration duration, std::size_t retryCount) noexcept + -> FollowerState { + return FollowerState(std::in_place, ErrorBackoff{duration, retryCount}); +} + +auto FollowerState::withRequestInFlight(std::chrono::duration duration) noexcept -> FollowerState { + return FollowerState(std::in_place, RequestInFlight{duration}); +} + +constexpr static std::string_view upToDateString = "up-to-date"; +constexpr static std::string_view errorBackoffString = "error-backoff"; +constexpr static std::string_view requestInFlightString = "request-in-flight"; + +auto FollowerState::fromVelocyPack(velocypack::Slice slice) -> FollowerState { + auto state = slice.get("state").extract(); + if (state == errorBackoffString) { + return FollowerState::withErrorBackoff( + std::chrono::duration{slice.get("durationMS").extract()}, + slice.get("retryCount").extract()); + } else if (state == requestInFlightString) { + return FollowerState::withRequestInFlight(std::chrono::duration{ + slice.get("durationMS").extract()}); + } else { + return FollowerState::withUpToDate(); + } +} + +void FollowerState::toVelocyPack(velocypack::Builder& builder) const { + struct ToVelocyPackVisitor { + auto operator()(FollowerState::UpToDate const&) { + builder.add("state", VPackValue(upToDateString)); + } + + auto operator()(FollowerState::ErrorBackoff const& err) { + builder.add("state", VPackValue(errorBackoffString)); + builder.add("durationMS", VPackValue(err.durationMS.count())); + builder.add("retryCount", VPackValue(err.retryCount)); + } + + auto operator()(FollowerState::RequestInFlight const& rif) { + builder.add("state", VPackValue(requestInFlightString)); + builder.add("durationMS", VPackValue(rif.durationMS.count())); + } + + velocypack::Builder& builder; + }; + + VPackObjectBuilder ob(&builder); + std::visit(ToVelocyPackVisitor{builder}, value); +} + +auto to_string(FollowerState const& state) -> std::string_view { + struct ToStringVisitor { + auto operator()(FollowerState::UpToDate const&) { return upToDateString; } + auto operator()(FollowerState::ErrorBackoff const& err) { + return errorBackoffString; + } + auto operator()(FollowerState::RequestInFlight const& rif) { + return requestInFlightString; + } + }; + + return std::visit(ToStringVisitor{}, state.value); +} diff --git a/arangod/Replication2/ReplicatedLog/types.h b/arangod/Replication2/ReplicatedLog/types.h index 2aa94ebdbda3..2305403665ac 100644 --- a/arangod/Replication2/ReplicatedLog/types.h +++ b/arangod/Replication2/ReplicatedLog/types.h @@ -25,6 +25,7 @@ #include #include #include +#include #include "Replication2/ReplicatedLog/LogCommon.h" @@ -40,6 +41,35 @@ class Slice; namespace arangodb::replication2::replicated_log { +struct FollowerState { + + struct UpToDate {}; + struct ErrorBackoff { + std::chrono::duration durationMS; + std::size_t retryCount; + }; + struct RequestInFlight { + std::chrono::duration durationMS; + }; + + std::variant value; + + static auto withUpToDate() noexcept -> FollowerState; + static auto withErrorBackoff(std::chrono::duration, + std::size_t retryCount) noexcept -> FollowerState; + static auto withRequestInFlight(std::chrono::duration) noexcept + -> FollowerState; + static auto fromVelocyPack(velocypack::Slice) -> FollowerState; + void toVelocyPack(velocypack::Builder&) const; + + FollowerState() = default; + private: + template + explicit FollowerState(std::in_place_t, Args&&... args) : value(std::forward(args)...) {} +}; + +auto to_string(FollowerState const&) -> std::string_view; + struct AppendEntriesRequest; struct AppendEntriesResult; @@ -59,6 +89,7 @@ enum class AppendEntriesErrorReason { struct LogStatistics { TermIndexPair spearHead{}; LogIndex commitIndex{}; + LogIndex firstIndex{}; void toVelocyPack(velocypack::Builder& builder) const; [[nodiscard]] static auto fromVelocyPack(velocypack::Slice slice) -> LogStatistics; diff --git a/arangod/Replication2/ReplicatedState/AbstractStateMachine.h b/arangod/Replication2/ReplicatedState/AbstractStateMachine.h new file mode 100644 index 000000000000..355983b15a23 --- /dev/null +++ b/arangod/Replication2/ReplicatedState/AbstractStateMachine.h @@ -0,0 +1,75 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once +#include + +#include "Basics/Result.h" +#include "Basics/Guarded.h" +#include "Basics/UnshackledMutex.h" +#include "Futures/Future.h" + +#include "Replication2/ReplicatedLog/LogCommon.h" +#include "Replication2/ReplicatedLog/types.h" + +namespace arangodb::replication2 { +namespace replicated_log { +struct ReplicatedLog; +} + +namespace replicated_state { + +template +struct AbstractStateMachine : std::enable_shared_from_this> { + // TODO Maybe we can create a non-templated base class for functions that do not + // require the template parameter. (waitFor, pollEntries, ...) + using LogIterator = TypedLogIterator; + using LogRangeIterator = TypedLogRangeIterator; + + virtual ~AbstractStateMachine() = default; + + explicit AbstractStateMachine(std::shared_ptr log); + auto triggerPollEntries() -> futures::Future; + + protected: + virtual auto installSnapshot(ParticipantId const&) -> futures::Future = 0; + virtual auto applyEntries(std::unique_ptr) + -> futures::Future = 0; + + void releaseIndex(LogIndex); + auto getEntry(LogIndex) -> std::optional; + auto getIterator(LogIndex first) -> std::unique_ptr; + auto insert(T const&) -> LogIndex; + auto waitFor(LogIndex) -> futures::Future; + + private: + struct GuardedData { + bool pollOnGoing{false}; + LogIndex nextIndex{1}; + }; + + Guarded _guardedData; + std::shared_ptr const log; +}; + +} // namespace replicated_state +} // namespace arangodb::replication2 diff --git a/arangod/Replication2/ReplicatedState/AbstractStateMachine.tpp b/arangod/Replication2/ReplicatedState/AbstractStateMachine.tpp new file mode 100644 index 000000000000..08b128c6aa4c --- /dev/null +++ b/arangod/Replication2/ReplicatedState/AbstractStateMachine.tpp @@ -0,0 +1,132 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#include +#include +#include "AbstractStateMachine.h" +#include "Replication2/ReplicatedLog/LogCommon.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" + +using namespace arangodb; +using namespace arangodb::replication2; + +template +auto replicated_state::AbstractStateMachine::getIterator(LogIndex first) + -> std::unique_ptr { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +template +auto replicated_state::AbstractStateMachine::getEntry(LogIndex) + -> std::optional { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +template +auto replicated_state::AbstractStateMachine::insert(T const& v) -> LogIndex { + velocypack::UInt8Buffer payload; + { + velocypack::Builder builder(payload); + v.toVelocyPack(builder); + } + return log->getLeader()->insert(LogPayload(std::move(payload))); +} + +template +auto replication2::replicated_state::AbstractStateMachine::waitFor(LogIndex idx) + -> futures::Future { + return log->getParticipant()->waitFor(idx); +} + +template +void replicated_state::AbstractStateMachine::releaseIndex(LogIndex) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +namespace { +template +struct DeserializeLogIterator : TypedLogRangeIterator { + explicit DeserializeLogIterator(std::unique_ptr base) + : base(std::move(base)) {} + + auto next() -> std::optional override { + if (auto entry = base->next(); entry.has_value()) { + return T::fromVelocyPack(entry->logPayload()); + } + + return std::nullopt; + } + + auto range() const noexcept -> LogRange override { + return base->range(); + } + + std::unique_ptr base; +}; +} // namespace + +template +auto replicated_state::AbstractStateMachine::triggerPollEntries() + -> futures::Future { + auto nextIndex = + _guardedData.doUnderLock([&](GuardedData& guard) -> std::optional { + if (guard.pollOnGoing) { + return std::nullopt; + } + + guard.pollOnGoing = true; + return guard.nextIndex; + }); + + if (nextIndex.has_value()) { + return log->getParticipant() + ->waitForIterator(*nextIndex) + .thenValue([weak = this->weak_from_this()]( + std::unique_ptr res) { + if (auto self = weak.lock()) { + auto [from, to] = res->range(); // [from, to) + TRI_ASSERT(from != to); + + auto iter = std::make_unique>(std::move(res)); + return self->applyEntries(std::move(iter)).thenValue([self, to = to](Result&& result) { + auto guard = self->_guardedData.getLockedGuard(); + guard->pollOnGoing = false; + TRI_ASSERT(to > guard->nextIndex); + guard->nextIndex = to; + return std::move(result); + }); + } + + return futures::Future{TRI_ERROR_NO_ERROR}; + }); + } + + return futures::Future{TRI_ERROR_NO_ERROR}; +} + +template +replicated_state::AbstractStateMachine::AbstractStateMachine( + std::shared_ptr log) + : log(std::move(log)) {} diff --git a/arangod/Replication2/Streams/LogMultiplexer.h b/arangod/Replication2/Streams/LogMultiplexer.h new file mode 100644 index 000000000000..1f9c309695c3 --- /dev/null +++ b/arangod/Replication2/Streams/LogMultiplexer.h @@ -0,0 +1,81 @@ +#pragma once +#include + +#include + +#include +#include +#include + +#include +#include + +namespace arangodb::replication2::replicated_log { +class LogFollower; +class LogLeader; +} // namespace arangodb::replication2::replicated_log + +namespace arangodb::replication2::streams { + +/** + * Common stream dispatcher class for Multiplexer and Demultiplexer. You can + * obtain a stream given its id using getStreamById. Alternatively, you can + * static_cast the a pointer to StreamBase for the given stream. + * @tparam Self + * @tparam Spec + * @tparam StreamType + */ +template typename StreamType> +struct LogMultiplexerStreamDispatcher : std::enable_shared_from_this, + StreamDispatcherBase { + template > + auto getStreamBaseById() + -> std::shared_ptr> { + return getStreamByDescriptor(); + } + + template + auto getStreamById() -> std::shared_ptr>> { + return getStreamByDescriptor>(); + } + + template + auto getStreamByDescriptor() + -> std::shared_ptr> { + return std::static_pointer_cast>( + this->shared_from_this()); + } +}; + +/** + * Demultiplexer class. Use ::construct to create an instance. + * @tparam Spec Log specification + */ +template +struct LogDemultiplexer + : LogMultiplexerStreamDispatcher, Spec, Stream> { + virtual auto digestIterator(LogRangeIterator& iter) -> void = 0; + virtual auto listen() -> void = 0; + + static auto construct(std::shared_ptr) + -> std::shared_ptr; + + protected: + LogDemultiplexer() = default; +}; + +/** + * Multiplexer class. Use ::construct to create an instance. + * @tparam Spec Log specification + */ +template +struct LogMultiplexer + : LogMultiplexerStreamDispatcher, Spec, ProducerStream> { + static auto construct(std::shared_ptr leader) + -> std::shared_ptr; + + protected: + LogMultiplexer() = default; +}; + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/LogMultiplexer.tpp b/arangod/Replication2/Streams/LogMultiplexer.tpp new file mode 100644 index 000000000000..f03a8fec5d60 --- /dev/null +++ b/arangod/Replication2/Streams/LogMultiplexer.tpp @@ -0,0 +1,364 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include +#include + +#include +#include + +#if (_MSC_VER >= 1) +// suppress warnings: +#pragma warning(push) +// conversion from 'size_t' to 'immer::detail::rbts::count_t', possible loss of data +#pragma warning(disable : 4267) +// result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?) +#pragma warning(disable : 4334) +#endif +#include +#include +#if (_MSC_VER >= 1) +#pragma warning(pop) +#endif + +#include +#include +#include +#include + +#include "LogMultiplexer.h" + +#include +#include + +#include +#include +#include + +#include +#include + +namespace arangodb::replication2::streams { + +namespace { +template +auto allUnresolved(std::pair& q) { + return std::all_of(std::begin(q.first), std::end(q.first), + [&](auto const& pair) { return !pair.second.isFulfilled(); }); +} +template > +auto resolvePromiseSet(std::pair& q) { + TRI_ASSERT(allUnresolved(q)); + std::for_each(std::begin(q.first), std::end(q.first), [&](auto& pair) { + TRI_ASSERT(!pair.second.isFulfilled()); + if (!pair.second.isFulfilled()) { + pair.second.setValue(q.second); + } + }); +} + +template +auto resolvePromiseSets(stream_descriptor_set, + std::index_sequence, std::tuple& pairs) { + (resolvePromiseSet(std::get(pairs)), ...); +} + +template +auto resolvePromiseSets(stream_descriptor_set, std::tuple& pairs) { + resolvePromiseSets(stream_descriptor_set{}, + std::index_sequence_for{}, pairs); +} +} // namespace + +template typename StreamInterface, typename Interface> +struct LogMultiplexerImplementationBase { + explicit LogMultiplexerImplementationBase(std::shared_ptr const& interface_) + : _guardedData(static_cast(*this)), _interface(interface_) {} + + template , + typename E = StreamEntryView> + auto waitForIteratorInternal(LogIndex first) + -> futures::Future>> { + return waitForInternal(first).thenValue( + [that = shared_from_self(), first](auto&&) { + return that->_guardedData.doUnderLock([&](MultiplexerData& self) { + auto& block = std::get>(self._blocks); + return block.getIteratorRange(first, self._firstUncommittedIndex); + }); + }); + } + + template , + typename W = typename Stream::WaitForResult> + auto waitForInternal(LogIndex index) -> futures::Future { + return _guardedData.doUnderLock([&](MultiplexerData& self) { + if (self._firstUncommittedIndex > index) { + return futures::Future{std::in_place}; + } + auto& block = std::get>(self._blocks); + return block.registerWaitFor(index); + }); + } + + template + auto releaseInternal(LogIndex index) -> void { + // update the release index for the given stream + // then compute the minimum and forward it to the + // actual log implementation + auto globalReleaseIndex = _guardedData.doUnderLock( + [&](MultiplexerData& self) -> std::optional { + { + auto& block = self.template getBlockForDescriptor(); + auto newIndex = std::max(block._releaseIndex, index); + if (newIndex == block._releaseIndex) { + return std::nullopt; + } + TRI_ASSERT(newIndex > block._releaseIndex); + block._releaseIndex = newIndex; + } + + return self.minReleaseIndex(); + }); + + if (globalReleaseIndex) { + // TODO handle return value + std::ignore = _interface->release(*globalReleaseIndex); + } + } + + template , + typename E = StreamEntryView> + auto getIteratorInternal() -> std::unique_ptr> { + return _guardedData.doUnderLock([](MultiplexerData& self) { + auto& block = self.template getBlockForDescriptor(); + return block.getIterator(); + }); + } + + protected: + template + struct MultiplexerData; + template + struct MultiplexerData> { + std::tuple...> _blocks; + LogIndex _firstUncommittedIndex{1}; + LogIndex _lastIndex; + bool _pendingWaitFor{false}; + + Derived& _self; + + explicit MultiplexerData(Derived& self) : _self(self) {} + void digestIterator(LogRangeIterator& iter) { + while (auto memtry = iter.next()) { + auto muxedValue = + MultiplexedValues::fromVelocyPack(memtry->logPayload()); + std::visit( + [&](auto&& value) { + using ValueTag = std::decay_t; + using Descriptor = typename ValueTag::DescriptorType; + std::get>(_blocks).appendEntry( + memtry->logIndex(), std::move(value.value)); + }, + std::move(muxedValue.variant())); + } + } + + auto getWaitForResolveSetAll(LogIndex commitIndex) { + return std::make_tuple(std::make_pair( + getBlockForDescriptor().getWaitForResolveSet(commitIndex), + typename StreamInformationBlock::WaitForResult{})...); + } + + // returns a LogIndex to wait for (if necessary) + auto checkWaitFor() -> std::optional { + if (!_pendingWaitFor && _lastIndex >= _firstUncommittedIndex) { + // we have to trigger a waitFor operation + // and wait for the next index + _pendingWaitFor = true; + return _firstUncommittedIndex; + } + return std::nullopt; + } + + auto minReleaseIndex() -> LogIndex { + return std::min({getBlockForDescriptor()._releaseIndex...}); + } + + template + auto getBlockForDescriptor() -> StreamInformationBlock& { + return std::get>(_blocks); + } + }; + + auto shared_from_self() -> std::shared_ptr { + return std::static_pointer_cast(static_cast(*this).shared_from_this()); + } + + Guarded, basics::UnshackledMutex> _guardedData{}; + std::shared_ptr const _interface; +}; + +#if (_MSC_VER >= 1) +// suppress warnings: +#pragma warning(push) +// '': inherits '' via dominance +#pragma warning(disable : 4250) +#endif + +template +struct LogDemultiplexerImplementation + : LogDemultiplexer, // implement the actual class + ProxyStreamDispatcher, Spec, Stream>, // use a proxy stream dispatcher + LogMultiplexerImplementationBase, Spec, arangodb::replication2::streams::Stream, Interface> { + explicit LogDemultiplexerImplementation(std::shared_ptr interface_) + : LogMultiplexerImplementationBase, Spec, arangodb::replication2::streams::Stream, Interface>( + std::move(interface_)) {} + + auto digestIterator(LogRangeIterator& iter) -> void override { + this->_guardedData.getLockedGuard()->digestIterator(iter); + } + + auto listen() -> void override { + auto nextIndex = + this->_guardedData.doUnderLock([](auto& self) -> std::optional { + if (!self._pendingWaitFor) { + self._pendingWaitFor = true; + return self._firstUncommittedIndex; + } + return std::nullopt; + }); + if (nextIndex.has_value()) { + triggerWaitFor(*nextIndex); + } + } + + private: + void triggerWaitFor(LogIndex waitForIndex) { + this->_interface->waitForIterator(waitForIndex) + .thenValue([weak = this->weak_from_this()](std::unique_ptr&& iter) { + if (auto locked = weak.lock(); locked) { + auto that = std::static_pointer_cast(locked); + auto [nextIndex, promiseSets] = that->_guardedData.doUnderLock([&](auto& self) { + self._firstUncommittedIndex = iter->range().to; + self.digestIterator(*iter); + return std::make_tuple(self._firstUncommittedIndex, + self.getWaitForResolveSetAll( + self._firstUncommittedIndex.saturatedDecrement())); + }); + + that->triggerWaitFor(nextIndex); + resolvePromiseSets(Spec{}, promiseSets); + } + }); + } +}; + +template +struct LogMultiplexerImplementation + : LogMultiplexer, + ProxyStreamDispatcher, Spec, ProducerStream>, + LogMultiplexerImplementationBase, Spec, arangodb::replication2::streams::ProducerStream, Interface> { + using SelfClass = LogMultiplexerImplementation; + + explicit LogMultiplexerImplementation(std::shared_ptr interface_) + : LogMultiplexerImplementationBase, Spec, arangodb::replication2::streams::ProducerStream, Interface>( + std::move(interface_)) {} + + template > + auto insertInternal(T const& t) -> LogIndex { + auto serialized = std::invoke([&] { + velocypack::UInt8Buffer buffer; + velocypack::Builder builder(buffer); + MultiplexedValues::toVelocyPack(t, builder); + return buffer; + }); + + // we have to lock before we insert, otherwise we could mess up the order + // or log entries for this stream + auto [index, waitForIndex] = this->_guardedData.doUnderLock([&](auto& self) { + // First write to replicated log + auto insertIndex = this->_interface->insert(LogPayload(std::move(serialized))); + TRI_ASSERT(insertIndex > self._lastIndex); + self._lastIndex = insertIndex; + + // Now we insert the value T into the StreamsLog, + // but it is not yet visible because of the commitIndex + auto& block = self.template getBlockForDescriptor(); + block.appendEntry(insertIndex, t); + return std::make_pair(insertIndex, self.checkWaitFor()); + }); + + if (waitForIndex.has_value()) { + triggerWaitForIndex(*waitForIndex); + } + return index; + } + + private: + void triggerWaitForIndex(LogIndex waitForIndex) { + auto f = this->_interface->waitFor(waitForIndex); + std::move(f).thenValue([weak = this->weak_from_this()]( + replicated_log::WaitForResult&& result) noexcept { + // First lock the shared pointer + if (auto locked = weak.lock(); locked) { + auto that = std::static_pointer_cast(locked); + // now acquire the mutex + auto [resolveSets, nextIndex] = that->_guardedData.doUnderLock([&](auto& self) { + self._pendingWaitFor = false; + + // find out what the commit index is + self._firstUncommittedIndex = result.currentCommitIndex + 1; + return std::make_pair(self.getWaitForResolveSetAll(result.currentCommitIndex), + self.checkWaitFor()); + }); + + resolvePromiseSets(Spec{}, resolveSets); + if (nextIndex.has_value()) { + that->triggerWaitForIndex(*nextIndex); + } + } + }); + } +}; + +#if (_MSC_VER >= 1) +#pragma warning(pop) +#endif + +template +auto LogDemultiplexer::construct(std::shared_ptr interface_) + -> std::shared_ptr { + return std::make_shared>( + std::move(interface_)); +} + +template +auto LogMultiplexer::construct(std::shared_ptr leader) + -> std::shared_ptr { + return std::make_shared>( + std::move(leader)); +} + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/MultiplexedValues.h b/arangod/Replication2/Streams/MultiplexedValues.h new file mode 100644 index 000000000000..b977e966b482 --- /dev/null +++ b/arangod/Replication2/Streams/MultiplexedValues.h @@ -0,0 +1,101 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +namespace arangodb::replication2::streams { + +template > +struct DescriptorValueTag { + using DescriptorType = Descriptor; + explicit DescriptorValueTag(Type value) : value(std::move(value)) {} + Type value; +}; + +template +struct MultiplexedVariant { + using VariantType = std::variant...>; + + [[nodiscard]] auto variant() & -> VariantType& { return _value; } + [[nodiscard]] auto variant() && -> VariantType&& { return std::move(_value); } + [[nodiscard]] auto variant() const& -> VariantType& { return _value; } + + template + explicit MultiplexedVariant(std::in_place_t, Args&&... args) + : _value(std::forward(args)...) {} + + private: + VariantType _value; +}; + +struct MultiplexedValues { + template > + static void toVelocyPack(Type const& v, velocypack::Builder& builder) { + using PrimaryTag = stream_descriptor_primary_tag_t; + using Serializer = typename PrimaryTag::serializer; + velocypack::ArrayBuilder ab(&builder); + builder.add(velocypack::Value(PrimaryTag::tag)); + static_assert( + std::is_invocable_r_v, + std::add_lvalue_reference_t>, + std::add_lvalue_reference_t>); + std::invoke(Serializer{}, serializer_tag, v, builder); + } + + template + static auto fromVelocyPack(velocypack::Slice slice) + -> MultiplexedVariant { + TRI_ASSERT(slice.isArray()); + auto [tag, valueSlice] = slice.unpackTuple(); + return FromVelocyPackHelper, Descriptors...>::extract(tag, valueSlice); + } + + private: + template + struct FromVelocyPackHelper { + static auto extract(StreamTag tag, velocypack::Slice slice) -> ValueType { + return extractTags(stream_descriptor_tags_t{}, tag, slice); + } + + template + static auto extractTags(tag_descriptor_set, StreamTag tag, + velocypack::Slice slice) -> ValueType { + if (Tag::tag == tag) { + return extractValue(slice); + } else if constexpr (sizeof...(Tags) > 0) { + return extractTags(tag_descriptor_set{}, tag, slice); + } else if constexpr (sizeof...(Other) > 0) { + return FromVelocyPackHelper::extract(tag, slice); + } else { + std::abort(); + } + } + + template > + static auto extractValue(velocypack::Slice slice) -> ValueType { + static_assert(std::is_invocable_r_v, velocypack::Slice>); + auto value = std::invoke(Deserializer{}, serializer_tag, slice); + return ValueType(std::in_place, std::in_place_type>, + std::move(value)); + } + }; +}; +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/StreamInformationBlock.h b/arangod/Replication2/Streams/StreamInformationBlock.h new file mode 100644 index 000000000000..fbec2d6ad887 --- /dev/null +++ b/arangod/Replication2/Streams/StreamInformationBlock.h @@ -0,0 +1,76 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include + +#if (_MSC_VER >= 1) +// suppress warnings: +#pragma warning(push) +// conversion from 'size_t' to 'immer::detail::rbts::count_t', possible loss of data +#pragma warning(disable : 4267) +// result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?) +#pragma warning(disable : 4334) +#endif +#include +#include +#if (_MSC_VER >= 1) +#pragma warning(pop) +#endif + +#include "Replication2/ReplicatedLog/LogCommon.h" +#include "Replication2/Streams/Streams.h" + +namespace arangodb::replication2::streams { + +template +struct StreamInformationBlock; +template +struct StreamInformationBlock> { + using StreamType = streams::Stream; + using EntryType = StreamEntry; + using Iterator = TypedLogRangeIterator>; + + using ContainerType = ::immer::flex_vector; + using TransientType = typename ContainerType::transient_type; + using LogVariantType = std::variant; + + using WaitForResult = typename StreamType::WaitForResult; + using WaitForPromise = futures::Promise; + using WaitForQueue = std::multimap; + + LogIndex _releaseIndex{0}; + LogVariantType _container; + WaitForQueue _waitForQueue; + + auto appendEntry(LogIndex index, Type t); + auto getWaitForResolveSet(LogIndex commitIndex) -> WaitForQueue; + auto registerWaitFor(LogIndex index) -> futures::Future; + auto getIterator() -> std::unique_ptr; + auto getIteratorRange(LogIndex start, LogIndex stop) -> std::unique_ptr; + + private: + auto getTransientContainer() -> TransientType&; + auto getPersistentContainer() -> ContainerType&; +}; + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/StreamInformationBlock.tpp b/arangod/Replication2/Streams/StreamInformationBlock.tpp new file mode 100644 index 000000000000..9303cf8aa612 --- /dev/null +++ b/arangod/Replication2/Streams/StreamInformationBlock.tpp @@ -0,0 +1,137 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once +#include "Replication2/Streams/StreamInformationBlock.h" +#include "Replication2/Streams/Streams.h" + +namespace arangodb::replication2::streams { + +template +auto StreamInformationBlock>::getTransientContainer() + -> TransientType& { + if (!std::holds_alternative(_container)) { + _container = std::get(_container).transient(); + } + return std::get(_container); +} + +template +auto StreamInformationBlock>::getPersistentContainer() + -> ContainerType& { + if (!std::holds_alternative(_container)) { + _container = std::get(_container).persistent(); + } + return std::get(_container); +} + +template +auto StreamInformationBlock>::appendEntry(LogIndex index, + Type t) { + getTransientContainer().push_back(EntryType{index, std::move(t)}); +} + +template +auto StreamInformationBlock>::getWaitForResolveSet(LogIndex commitIndex) + -> std::multimap> { + WaitForQueue toBeResolved; + auto const end = _waitForQueue.upper_bound(commitIndex); + for (auto it = _waitForQueue.begin(); it != end;) { + toBeResolved.insert(_waitForQueue.extract(it++)); + } + return toBeResolved; +} + +template +auto StreamInformationBlock>::registerWaitFor(LogIndex index) + -> futures::Future { + return _waitForQueue.emplace(index, futures::Promise{})->second.getFuture(); +} + +template +auto StreamInformationBlock>::getIterator() + -> std::unique_ptr { + auto log = getPersistentContainer(); + + struct Iterator : TypedLogRangeIterator> { + ContainerType log; + typename ContainerType::iterator current; + + auto next() -> std::optional> override { + if (current != std::end(log)) { + auto view = std::make_pair(current->first, std::cref(current->second)); + ++current; + return view; + } + return std::nullopt; + } + + [[nodiscard]] auto range() const noexcept -> LogRange override { + abort(); // TODO + } + + explicit Iterator(ContainerType log) + : log(std::move(log)), current(this->log.begin()) {} + }; + + return std::make_unique(std::move(log)); +} + +template +auto StreamInformationBlock>::getIteratorRange(LogIndex start, LogIndex stop) + -> std::unique_ptr { + TRI_ASSERT(stop >= start); + + auto const log = getPersistentContainer(); + + using ContainerIterator = typename ContainerType::iterator; + + struct Iterator : TypedLogRangeIterator> { + ContainerType _log; + ContainerIterator current; + LogIndex start, stop; + + auto next() -> std::optional> override { + if (current != std::end(_log) && current->first < stop) { + auto view = std::make_pair(current->first, std::cref(current->second)); + ++current; + return view; + } + return std::nullopt; + } + [[nodiscard]] auto range() const noexcept -> LogRange override { + return {start, stop}; + } + + explicit Iterator(ContainerType log, LogIndex start, LogIndex stop) + : _log(std::move(log)), + current(std::lower_bound(std::begin(_log), std::end(_log), start, + [](StreamEntry const& left, LogIndex index) { + return left.first < index; + })), + start(start), + stop(stop) {} + }; + return std::make_unique(std::move(log), start, stop); +} + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/StreamSpecification.h b/arangod/Replication2/Streams/StreamSpecification.h new file mode 100644 index 000000000000..4021c9e14a13 --- /dev/null +++ b/arangod/Replication2/Streams/StreamSpecification.h @@ -0,0 +1,193 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once +#include +#include + +#include +#include + +namespace arangodb::replication2::streams { + +using StreamId = std::uint64_t; +using StreamTag = std::uint64_t; + +template +struct serializer_tag_t {}; +template +inline constexpr auto serializer_tag = serializer_tag_t{}; + +template +struct tag_descriptor { + using deserializer = D; + using serializer = S; + static constexpr auto tag = Tag; +}; + +template +struct is_tag_descriptor : std::false_type {}; +template +struct is_tag_descriptor> : std::true_type {}; +template +inline constexpr bool is_tag_descriptor_v = is_tag_descriptor::value; + +template +struct tag_descriptor_set { + static_assert((is_tag_descriptor_v && ...)); +}; + +template +struct tag_descriptor_set_primary; +template +struct tag_descriptor_set_primary> { + using type = D; +}; +template +using tag_descriptor_set_primary_t = tag_descriptor_set_primary; + +template +struct stream_descriptor; +template +struct stream_descriptor> { + static constexpr auto id = StreamId; + using tags = tag_descriptor_set; + using type = Type; + + // Check that all deserializers are invocable with (serializer_tag{}, + // slice) and return Type. + static_assert((std::is_invocable_r_v, velocypack::Slice> && + ...)); + + // Check that all serializers are invocable with (serializer_tag{}, T + // const&, Builder) and return void. + static_assert((std::is_invocable_r_v, + std::add_lvalue_reference_t>, + std::add_lvalue_reference_t> && + ...)); +}; + +template +struct is_stream_descriptor : std::false_type {}; +template +struct is_stream_descriptor> : std::true_type { +}; +template +inline constexpr auto is_stream_descriptor_v = is_stream_descriptor::value; + +template +struct stream_descriptor_set { + static_assert((is_stream_descriptor_v && ...)); + + static constexpr auto length = sizeof...(Descriptors); + + template + static void for_each_descriptor(F&& f, Args&&... args) { + (std::invoke(std::forward(f), Descriptors{}, std::forward(args)...), ...); + } +}; + +template +struct is_stream_descriptor_set : std::false_type {}; +template +struct is_stream_descriptor_set> + : std::true_type {}; +template +inline constexpr auto is_stream_descriptor_set_v = is_stream_descriptor_set::value; + +template +struct stream_descriptor_type { + static_assert(is_stream_descriptor_v); + using type = typename T::type; +}; +template +using stream_descriptor_type_t = typename stream_descriptor_type::type; +template +struct stream_descriptor_id { + static inline constexpr auto value = T::id; +}; +template +inline constexpr auto stream_descriptor_id_v = stream_descriptor_id::value; +template +struct stream_descriptor_tags { + using type = typename T::tags; +}; +template +using stream_descriptor_tags_t = typename stream_descriptor_tags::type; + +template +using stream_descriptor_primary_tag_t = + typename tag_descriptor_set_primary_t>::type; + +namespace detail { +template +struct stream_descriptor_by_id_impl; +template +struct stream_descriptor_by_id_impl + : std::conditional, D, + typename stream_descriptor_by_id_impl::type> {}; +template +struct stream_descriptor_by_id_impl { + // static_assert(StreamId == stream_descriptor_id_v); + using type = D; +}; + +} // namespace detail + +template +struct stream_descriptor_by_id; +template +struct stream_descriptor_by_id> { + static_assert(((stream_descriptor_id_v == StreamId) || ...)); + using type = typename detail::stream_descriptor_by_id_impl::type; +}; +template +using stream_descriptor_by_id_t = typename stream_descriptor_by_id::type; + +template +using stream_type_by_id_t = + stream_descriptor_type_t>; + +namespace detail { +template +struct stream_index_by_id_impl; +template +struct stream_index_by_id_impl + : std::conditional_t, std::integral_constant, + stream_index_by_id_impl> {}; +template +struct stream_index_by_id_impl + : std::integral_constant {}; +} // namespace detail + +template +struct stream_index_by_id; +template +struct stream_index_by_id> { + static_assert(((stream_descriptor_id_v == StreamId) || ...)); + static inline constexpr std::size_t value = + detail::stream_index_by_id_impl<0, StreamId, Ds...>::value; +}; + +template +inline constexpr auto stream_index_by_id_v = stream_index_by_id::value; +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/Streams.h b/arangod/Replication2/Streams/Streams.h new file mode 100644 index 000000000000..6654faf0ca4d --- /dev/null +++ b/arangod/Replication2/Streams/Streams.h @@ -0,0 +1,103 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include + +namespace arangodb::replication2::streams { + +/** + * Object returned by a stream iterator. Allows read only access + * to the stored object. The view does not own the value and remains + * valid until the iterator is destroyed or next() is called. + * @tparam T Object Type + */ +template +using StreamEntryView = std::pair; +template +using StreamEntry = std::pair; + +/** + * Consumer interface for a multiplexed object stream. Provides methods for + * iteraction with the replicated logs stream. + * @tparam T Object Type + */ +template +struct Stream { + virtual ~Stream() = default; + + struct WaitForResult {}; + virtual auto waitFor(LogIndex) -> futures::Future = 0; + + using Iterator = TypedLogRangeIterator>; + virtual auto waitForIterator(LogIndex) + -> futures::Future> = 0; + + virtual auto release(LogIndex) -> void = 0; +}; + +/** + * Producing interface for a multiplexed object stream. Besides the Stream + * methods it additionally provides a insert method. + * @tparam T Object Type + */ +template +struct ProducerStream : Stream { + virtual auto insert(T const&) -> LogIndex = 0; +}; + +/** + * StreamGenericBase is the base for all Stream implementations. In general + * users don't need to access this object directly. It provides more information + * about the stream. + * @tparam Descriptor The associated stream descriptor. + * @tparam StreamType Either Stream or ProducerStream. + * @tparam Type Object Type, default is extracted from Descriptor + */ +template typename StreamType, typename Type = stream_descriptor_type_t> +struct StreamGenericBase : StreamType { + static_assert(is_stream_descriptor_v, + "Descriptor is not a valid stream descriptor"); + + using Iterator = typename StreamType::Iterator; + virtual auto getAllEntriesIterator() -> std::unique_ptr = 0; +}; + +template +using StreamBase = StreamGenericBase; +template +using ProducerStreamBase = StreamGenericBase; + +template typename> +struct StreamDispatcherBase; + +/** + * This class declares the general interface for an entity that provides a given + * set of streams. It has the StreamBases as virtual base classes. + * @tparam Streams + * @tparam StreamType Either Stream or ProducerStream + */ +template typename StreamType> +struct StreamDispatcherBase, StreamType> + : virtual StreamGenericBase... {}; + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/Streams.tpp b/arangod/Replication2/Streams/Streams.tpp new file mode 100644 index 000000000000..e6041ee3cfce --- /dev/null +++ b/arangod/Replication2/Streams/Streams.tpp @@ -0,0 +1,99 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once +#include "Replication2/Streams/Streams.h" + +namespace arangodb::replication2::streams { + +/** + * This is the implementation of the stream interfaces. They are just proxy + * objects that static_cast the this pointer to the respective implementor and + * forward the call, annotated with the stream descriptor. + * @tparam Implementation Implementor Top Class + * @tparam Descriptor Stream Descriptor + * @tparam StreamInterface Stream or ProducerStream + */ +template typename StreamInterface> +struct StreamGenericImplementationBase + : virtual StreamGenericBase { + static_assert(is_stream_descriptor_v); + + using ValueType = stream_descriptor_type_t; + using Iterator = TypedLogRangeIterator>; + using WaitForResult = typename StreamInterface::WaitForResult; + + auto waitForIterator(LogIndex index) -> futures::Future> override final { + return implementation().template waitForIteratorInternal(index); + } + auto waitFor(LogIndex index) -> futures::Future override final { + return implementation().template waitForInternal(index); + } + auto release(LogIndex index) -> void override final { + return implementation().template releaseInternal(index); + } + auto getAllEntriesIterator() -> std::unique_ptr override final { + return implementation().template getIteratorInternal(); + } + + private: + auto implementation() -> Implementation& { return static_cast(*this); } +}; + +/** + * Wrapper about StreamGenericImplementationBase, that adds depending on the + * StreamInterface more methods. Is specialized for ProducerStream. + * @tparam Implementation Implementor Top Class + * @tparam Descriptor Stream Descriptor + * @tparam StreamInterface Stream or ProducerStream + */ +template typename StreamInterface> +struct StreamGenericImplementation + : StreamGenericImplementationBase {}; +template +struct StreamGenericImplementation + : StreamGenericImplementationBase { + using ValueType = stream_descriptor_type_t; + auto insert(ValueType const& t) -> LogIndex override { + return static_cast(this)->template insertInternal(t); + } +}; + +template +using StreamImplementation = StreamGenericImplementation; +template +using ProducerStreamImplementation = + StreamGenericImplementation; + +template typename> +struct ProxyStreamDispatcher; + +/** + * Class that implements all streams as virtual base classes. + * @tparam Implementation + * @tparam Streams + * @tparam StreamInterface + */ +template typename StreamInterface> +struct ProxyStreamDispatcher, StreamInterface> + : StreamGenericImplementation... {}; + +} // namespace arangodb::replication2::streams diff --git a/arangod/RestHandler/RestLogHandler.cpp b/arangod/RestHandler/RestLogHandler.cpp index 9558beb215b5..b6030dec6fd4 100644 --- a/arangod/RestHandler/RestLogHandler.cpp +++ b/arangod/RestHandler/RestLogHandler.cpp @@ -58,8 +58,7 @@ struct arangodb::ReplicatedLogMethods { } virtual auto getReplicatedLogs() const - -> futures::Future> { + -> futures::Future> { THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } @@ -68,7 +67,8 @@ struct arangodb::ReplicatedLogMethods { THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } - virtual auto getLogEntryByIndex(LogId, LogIndex) const -> futures::Future> { + virtual auto getLogEntryByIndex(LogId, LogIndex) const + -> futures::Future> { THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } @@ -78,12 +78,16 @@ struct arangodb::ReplicatedLogMethods { } virtual auto tailEntries(LogId, LogIndex, std::size_t limit) const - -> futures::Future> { + -> futures::Future> { THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } virtual auto insert(LogId, LogPayload) const - -> futures::Future>> { + -> futures::Future> { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); + } + + virtual auto releaseIndex(LogId, LogIndex) const -> futures::Future { THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } @@ -95,32 +99,34 @@ struct arangodb::ReplicatedLogMethods { namespace { -auto sendInsertRequest(network::ConnectionPool *pool, std::string const& server, std::string const& database, - LogId id, LogPayload payload) - -> futures::Future>> { - +auto sendInsertRequest(network::ConnectionPool* pool, std::string const& server, + std::string const& database, LogId id, LogPayload payload) + -> futures::Future> { auto path = basics::StringUtils::joinT("/", "_api/log", id, "insert"); network::RequestOptions opts; opts.database = database; - return network::sendRequest(pool, "server:" + server, fuerte::RestVerb::Post, path, - payload.dummy, opts) + return network::sendRequest(pool, "server:" + server, fuerte::RestVerb::Post, + path, payload.dummy, opts) .thenValue([](network::Response&& resp) { if (resp.fail() || !fuerte::statusIsSuccess(resp.statusCode())) { THROW_ARANGO_EXCEPTION(resp.combinedResult()); } auto result = resp.slice().get("result"); - auto quorum = std::make_shared(result.get("quorum")); + auto waitResult = result.get("result"); + + auto quorum = std::make_shared( + waitResult.get("quorum")); + auto commitIndex = waitResult.get("commitIndex").extract(); auto index = result.get("index").extract(); - return std::make_pair(index, std::move(quorum)); + return std::make_pair(index, replicated_log::WaitForResult(commitIndex, + std::move(quorum))); }); } - -auto sendLogStatusRequest(network::ConnectionPool *pool, std::string const& server, std::string const& database, - LogId id) --> futures::Future { - +auto sendLogStatusRequest(network::ConnectionPool* pool, std::string const& server, + std::string const& database, LogId id) + -> futures::Future { auto path = basics::StringUtils::joinT("/", "_api/log", id); network::RequestOptions opts; @@ -130,14 +136,14 @@ auto sendLogStatusRequest(network::ConnectionPool *pool, std::string const& serv if (resp.fail() || !fuerte::statusIsSuccess(resp.statusCode())) { THROW_ARANGO_EXCEPTION(resp.combinedResult()); } - return replication2::replicated_log::LogStatus::fromVelocyPack(resp.slice().get("result")); + return replication2::replicated_log::LogStatus::fromVelocyPack( + resp.slice().get("result")); }); } -auto sendReadEntryRequest(network::ConnectionPool *pool, std::string const& server, std::string const& database, - LogId id, LogIndex index) --> futures::Future> { - +auto sendReadEntryRequest(network::ConnectionPool* pool, std::string const& server, + std::string const& database, LogId id, LogIndex index) + -> futures::Future> { auto path = basics::StringUtils::joinT("/", "_api/log", id, "readEntry", index.value); network::RequestOptions opts; @@ -190,7 +196,7 @@ auto sendTailRequest(network::ConnectionPool* pool, std::string const& server, return std::make_unique(resp.response().stealPayload()); }); } -} +} // namespace struct ReplicatedLogMethodsCoord final : ReplicatedLogMethods { auto getLogLeader(LogId id) const { @@ -203,7 +209,7 @@ struct ReplicatedLogMethodsCoord final : ReplicatedLogMethods { } auto insert(LogId id, LogPayload payload) const - -> futures::Future>> override { + -> futures::Future> override { return sendInsertRequest(pool, getLogLeader(id), vocbase.name(), id, std::move(payload)); } @@ -258,8 +264,7 @@ struct ReplicatedLogMethodsDBServ final : ReplicatedLogMethods { } auto getReplicatedLogs() const - -> futures::Future> override { + -> futures::Future> override { return vocbase.getReplicatedLogs(); } @@ -268,11 +273,13 @@ struct ReplicatedLogMethodsDBServ final : ReplicatedLogMethods { return vocbase.getReplicatedLogById(id)->getParticipant()->getStatus(); } - auto getLogEntryByIndex(LogId id, LogIndex idx) const -> futures::Future> override { + auto getLogEntryByIndex(LogId id, LogIndex idx) const + -> futures::Future> override { return vocbase.getReplicatedLogLeaderById(id)->readReplicatedEntryByIndex(idx); } - auto tailEntries(LogId id, LogIndex idx, std::size_t limit) const -> futures::Future> override { + auto tailEntries(LogId id, LogIndex idx, std::size_t limit) const + -> futures::Future> override { struct LimitingIterator : LogIterator { LimitingIterator(size_t limit, std::unique_ptr source) : _limit(limit), _source(std::move(source)) {} @@ -301,11 +308,16 @@ struct ReplicatedLogMethodsDBServ final : ReplicatedLogMethods { } auto insert(LogId logId, LogPayload payload) const - -> futures::Future>> override { + -> futures::Future> override { auto log = vocbase.getReplicatedLogLeaderById(logId); auto idx = log->insert(std::move(payload)); return log->waitFor(idx).thenValue( - [idx](auto&& quorum) { return std::make_pair(idx, std::move(quorum)); }); + [idx](auto&& result) { return std::make_pair(idx, std::move(result)); }); + } + + auto releaseIndex(LogId id, LogIndex idx) const -> futures::Future override { + auto log = vocbase.getReplicatedLogById(id); + return log->getParticipant()->release(idx); } explicit ReplicatedLogMethodsDBServ(TRI_vocbase_t& vocbase) @@ -316,7 +328,6 @@ struct ReplicatedLogMethodsDBServ final : ReplicatedLogMethods { }; RestStatus RestLogHandler::execute() { - // for now required admin access to the database if (!ExecContext::current().isAdminUser()) { generateError(rest::ResponseCode::FORBIDDEN, TRI_ERROR_HTTP_FORBIDDEN); @@ -355,9 +366,7 @@ RestStatus RestLogHandler::executeByMethod(ReplicatedLogMethods const& methods) return RestStatus::DONE; } - RestStatus RestLogHandler::handlePostRequest(ReplicatedLogMethods const& methods) { - std::vector const& suffixes = _request->decodedSuffixes(); bool parseSuccess = false; @@ -393,40 +402,52 @@ RestStatus RestLogHandler::handlePostRequest(ReplicatedLogMethods const& methods if (auto& verb = suffixes[1]; verb == "insert") { return waitForFuture( - methods.insert(logId, LogPayload::createFromSlice(body)).thenValue([this](auto&& quorum) { + methods.insert(logId, LogPayload::createFromSlice(body)).thenValue([this](auto&& waitForResult) { VPackBuilder response; { VPackObjectBuilder result(&response); - response.add("index", VPackValue(quorum.first)); - response.add(VPackValue("quorum")); - quorum.second->toVelocyPack(response); + response.add("index", VPackValue(waitForResult.first)); + response.add(VPackValue("result")); + waitForResult.second.toVelocyPack(response); } generateOk(rest::ResponseCode::ACCEPTED, response.slice()); })); - } else if(verb == "updateTermSpecification") { - auto term = replication2::agency::LogPlanTermSpecification(replication2::agency::from_velocypack, body); - return waitForFuture( - methods.updateTermSpecification(logId, term).thenValue([this](auto&& result) { - if (result.ok()) { - generateOk(ResponseCode::ACCEPTED, VPackSlice::emptyObjectSlice()); + } else if (verb == "release") { + auto idx = LogIndex{basics::StringUtils::uint64(_request->value("index"))}; + return waitForFuture(methods.releaseIndex(logId, idx).thenValue([this](Result&& res) { + if (res.fail()) { + generateError(res); } else { - generateError(result); + generateOk(rest::ResponseCode::ACCEPTED, VPackSlice::noneSlice()); } })); - } else if(verb == "becomeLeader") { + } else if (verb == "updateTermSpecification") { + auto term = replication2::agency::LogPlanTermSpecification(replication2::agency::from_velocypack, + body); + return waitForFuture( + methods.updateTermSpecification(logId, term).thenValue([this](auto&& result) { + if (result.ok()) { + generateOk(ResponseCode::ACCEPTED, VPackSlice::emptyObjectSlice()); + } else { + generateError(result); + } + })); + } else if (verb == "becomeLeader") { if (!ServerState::instance()->isDBServer()) { THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } auto log = _vocbase.getReplicatedLogById(logId); auto term = LogTerm{body.get(StaticStrings::Term).getNumericValue()}; - auto writeConcern = body.get(StaticStrings::WriteConcern).getNumericValue(); + auto writeConcern = + body.get(StaticStrings::WriteConcern).getNumericValue(); auto waitForSync = body.get(StaticStrings::WaitForSyncString).isTrue(); std::vector> follower; for (auto const& part : VPackArrayIterator(body.get(StaticStrings::Follower))) { auto partId = part.copyString(); - follower.emplace_back(std::make_shared(server().getFeature().pool(), partId, _vocbase.name(), logId)); + follower.emplace_back(std::make_shared( + server().getFeature().pool(), partId, _vocbase.name(), logId)); } replication2::LogConfig config; @@ -457,10 +478,8 @@ RestStatus RestLogHandler::handlePostRequest(ReplicatedLogMethods const& methods return waitForFuture(std::move(f)); } else { - generateError( - rest::ResponseCode::NOT_FOUND, TRI_ERROR_HTTP_NOT_FOUND, - "expecting one of the resources 'insert', "); - + generateError(rest::ResponseCode::NOT_FOUND, TRI_ERROR_HTTP_NOT_FOUND, + "expecting one of the resources 'insert', "); } return RestStatus::DONE; } @@ -490,7 +509,6 @@ RestStatus RestLogHandler::handleGetRequest(ReplicatedLogMethods const& methods) } RestStatus RestLogHandler::handleDeleteRequest(ReplicatedLogMethods const& methods) { - std::vector const& suffixes = _request->decodedSuffixes(); if (suffixes.size() != 1) { @@ -549,17 +567,18 @@ RestStatus RestLogHandler::handleGetTail(const ReplicatedLogMethods& methods, LogIndex logIdx{basics::StringUtils::uint64(_request->value("first"))}; std::size_t limit{basics::StringUtils::uint64(_request->value("limit"))}; - auto fut = methods.tailEntries(logId, logIdx, limit).thenValue([&](std::unique_ptr iter) { - VPackBuilder builder; - { - VPackArrayBuilder ab(&builder); - while (auto entry = iter->next()) { - entry->toVelocyPack(builder); - } - } + auto fut = + methods.tailEntries(logId, logIdx, limit).thenValue([&](std::unique_ptr iter) { + VPackBuilder builder; + { + VPackArrayBuilder ab(&builder); + while (auto entry = iter->next()) { + entry->toVelocyPack(builder); + } + } - generateOk(rest::ResponseCode::OK, builder.slice()); - }); + generateOk(rest::ResponseCode::OK, builder.slice()); + }); return waitForFuture(std::move(fut)); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 64441a092482..6aa62188a9c4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -291,25 +291,38 @@ set(ARANGODB_TESTS_SOURCES ${ADDITIONAL_TEST_SOURCES}) set(ARANGODB_REPLICATION2_TEST_SOURCES - Replication2/AppendEntriesBatchTest.cpp - Replication2/ChangeStreamTests.cpp - Replication2/CheckLogsTest.cpp - Replication2/ConcurrencyTests.cpp - Replication2/DetectConflictTest.cpp - Replication2/FollowerAppendEntries.cpp - Replication2/FollowerWaitFor.cpp - Replication2/LeaderAppendEntriesTest.cpp - Replication2/LogReclaimTest.cpp - Replication2/MaintenanceTests.cpp - Replication2/MultiTermTest.cpp - Replication2/ReplicatedLogMetricsMock.cpp - Replication2/ReplicatedLogTest.cpp - Replication2/RewriteLogTest.cpp - Replication2/RocksDBLogTest.cpp - Replication2/SimpleInsertTests.cpp - Replication2/TestHelper.cpp - Replication2/UpdateReplicatedLogTests.cpp -) + Replication2/ReplicatedLog/AppendEntriesBatchTest.cpp + Replication2/ReplicatedLog/ChangeStreamTests.cpp + Replication2/ReplicatedLog/CheckLogsTest.cpp + Replication2/ReplicatedLog/ConcurrencyTests.cpp + Replication2/ReplicatedLog/DetectConflictTest.cpp + Replication2/ReplicatedLog/FollowerAppendEntries.cpp + Replication2/ReplicatedLog/FollowerWaitFor.cpp + Replication2/ReplicatedLog/LeaderAppendEntriesTest.cpp + Replication2/ReplicatedLog/LogReclaimTest.cpp + Replication2/ReplicatedLog/MaintenanceTests.cpp + Replication2/ReplicatedLog/MultiTermTest.cpp + Replication2/Mocks/ReplicatedLogMetricsMock.cpp + Replication2/ReplicatedLog/ReplicatedLogTest.cpp + Replication2/ReplicatedLog/RewriteLogTest.cpp + Replication2/ReplicatedLog/RocksDBLogTest.cpp + Replication2/ReplicatedLog/SimpleInsertTests.cpp + Replication2/ReplicatedLog/TestHelper.cpp + Replication2/ReplicatedLog/UpdateReplicatedLogTests.cpp + Replication2/ReplicatedState/AbstractStateMachinePollTest.cpp + Replication2/ReplicatedState/StateMachineTestHelper.cpp + Replication2/ReplicatedState/StateMachineTestHelper.h + Replication2/Mocks/PersistedLog.cpp + Replication2/Mocks/PersistedLog.h + Replication2/ReplicatedLog/InMemoryLogTest.cpp + Replication2/Streams/LogMultiplexerTest.cpp + Replication2/Mocks/FakeReplicatedLog.cpp + Replication2/Mocks/FakeReplicatedLog.h + Replication2/Streams/TestLogSpecification.cpp + Replication2/Streams/TestLogSpecification.h + Replication2/Streams/MultiplexerConcurrencyTest.cpp + Replication2/Mocks/AsyncFollower.cpp + Replication2/Mocks/AsyncFollower.h) if (LINUX) # add "-fno-var-tracking" to the compiler flags diff --git a/tests/Replication2/Mocks/AsyncFollower.cpp b/tests/Replication2/Mocks/AsyncFollower.cpp new file mode 100644 index 000000000000..0177044dabc5 --- /dev/null +++ b/tests/Replication2/Mocks/AsyncFollower.cpp @@ -0,0 +1,106 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#include "AsyncFollower.h" + +#include + +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogStatus.h" + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; + +auto AsyncFollower::getStatus() const -> LogStatus { + return _follower->getStatus(); +} + +auto AsyncFollower::resign() && -> std::tuple, DeferredAction> { + return std::move(*_follower).resign(); +} + +auto AsyncFollower::waitFor(arangodb::replication2::LogIndex index) -> WaitForFuture { + return _follower->waitFor(index); +} + +auto AsyncFollower::release(arangodb::replication2::LogIndex doneWithIdx) -> Result { + return _follower->release(doneWithIdx); +} + +auto AsyncFollower::getParticipantId() const noexcept -> ParticipantId const& { + return _follower->getParticipantId(); +} + +auto AsyncFollower::appendEntries(AppendEntriesRequest request) + -> futures::Future { + std::unique_lock guard(_mutex); + _cv.notify_all(); + return _requests.emplace_back(std::move(request)).promise.getFuture(); +} + +AsyncFollower::AsyncFollower(std::shared_ptr follower) + : _follower(std::move(follower)), _asyncWorker([this] { this->runWorker(); }) {} + +AsyncFollower::~AsyncFollower() noexcept { + if (!_stopping) { + stop(); + } +} + +void AsyncFollower::runWorker() { + while (true) { + std::vector requests; + { + std::unique_lock guard(_mutex); + if (_stopping) { + break; + } + if (!_requests.empty()) { + std::swap(requests, _requests); + } else { + _cv.wait(guard); + } + } + + for (auto& req : requests) { + _follower->appendEntries(req.request).thenFinal([promise = std::move(req.promise)](auto&& res) mutable { + promise.setValue(std::forward(res)); + }); + } + } +} + +void AsyncFollower::stop() noexcept { + { + std::unique_lock guard(_mutex); + _stopping = true; + _cv.notify_all(); + } + + TRI_ASSERT(_asyncWorker.joinable()); + _asyncWorker.join(); +} + +AsyncFollower::AsyncRequest::AsyncRequest(AppendEntriesRequest request) + : request(std::move(request)) {} diff --git a/tests/Replication2/Mocks/AsyncFollower.h b/tests/Replication2/Mocks/AsyncFollower.h new file mode 100644 index 000000000000..1e922f045fa6 --- /dev/null +++ b/tests/Replication2/Mocks/AsyncFollower.h @@ -0,0 +1,64 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include + +#include "Replication2/ReplicatedLog/LogFollower.h" + +namespace arangodb::replication2::test { + +struct AsyncFollower : replicated_log::ILogParticipant, replicated_log::AbstractFollower { + explicit AsyncFollower(std::shared_ptr follower); + ~AsyncFollower() noexcept override; + [[nodiscard]] auto getStatus() const -> replicated_log::LogStatus override; + auto resign() && -> std::tuple, DeferredAction> override; + auto waitFor(LogIndex index) -> WaitForFuture override; + auto release(LogIndex doneWithIdx) -> Result override; + [[nodiscard]] auto getParticipantId() const noexcept -> ParticipantId const& override; + auto appendEntries(replicated_log::AppendEntriesRequest request) + -> futures::Future override; + + void stop() noexcept; + private: + void runWorker(); + + struct AsyncRequest { + AsyncRequest(replicated_log::AppendEntriesRequest request); + replicated_log::AppendEntriesRequest request; + futures::Promise promise; + }; + + std::mutex _mutex; + std::condition_variable _cv; + std::vector _requests; + std::shared_ptr const _follower; + bool _stopping{false}; + + std::thread _asyncWorker; +}; + +} // namespace arangodb::replication2::test diff --git a/tests/Replication2/Mocks/FakeReplicatedLog.cpp b/tests/Replication2/Mocks/FakeReplicatedLog.cpp new file mode 100644 index 000000000000..db19b27c7e96 --- /dev/null +++ b/tests/Replication2/Mocks/FakeReplicatedLog.cpp @@ -0,0 +1,45 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include "FakeReplicatedLog.h" + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; + + +auto TestReplicatedLog::becomeFollower(ParticipantId const& id, LogTerm term, ParticipantId leaderId) +-> std::shared_ptr { + auto ptr = ReplicatedLog::becomeFollower(id, term, std::move(leaderId)); + return std::make_shared(ptr); +} + +auto TestReplicatedLog::becomeLeader(ParticipantId const& id, LogTerm term, + std::vector> const& follower, + std::size_t writeConcern) + -> std::shared_ptr { + LogConfig config; + config.writeConcern = writeConcern; + config.waitForSync = false; + return becomeLeader(config, id, term, follower); +} diff --git a/tests/Replication2/Mocks/FakeReplicatedLog.h b/tests/Replication2/Mocks/FakeReplicatedLog.h new file mode 100644 index 000000000000..9eece1c31178 --- /dev/null +++ b/tests/Replication2/Mocks/FakeReplicatedLog.h @@ -0,0 +1,131 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once +#include + +#include "Replication2/Mocks/ReplicatedLogMetricsMock.h" +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/ReplicatedLog/LogStatus.h" +#include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" +#include "Replication2/ReplicatedLog/types.h" + +namespace arangodb::replication2::test { + +struct DelayedFollowerLog : replicated_log::AbstractFollower, replicated_log::ILogParticipant { + explicit DelayedFollowerLog(std::shared_ptr follower) + : _follower(std::move(follower)) {} + + DelayedFollowerLog(LoggerContext const& logContext, + std::shared_ptr logMetricsMock, + ParticipantId const& id, std::unique_ptr logCore, + LogTerm term, ParticipantId leaderId) + : DelayedFollowerLog([&] { + auto inMemoryLog = replicated_log::InMemoryLog::loadFromLogCore(*logCore); + return std::make_shared( + logContext, std::move(logMetricsMock), id, std::move(logCore), + term, std::move(leaderId), std::move(inMemoryLog)); + }()) {} + + auto appendEntries(replicated_log::AppendEntriesRequest req) + -> arangodb::futures::Future override { + auto future = _asyncQueue.doUnderLock([&](auto& queue) { + return queue.emplace_back(std::make_shared(std::move(req))) + ->promise.getFuture(); + }); + return std::move(future).thenValue([this](auto&& result) mutable { + return _follower->appendEntries(std::forward(result)); + }); + } + + void runAsyncAppendEntries() { + auto asyncQueue = _asyncQueue.doUnderLock([](auto& _queue) { + auto queue = std::move(_queue); + _queue.clear(); + return queue; + }); + + for (auto& p : asyncQueue) { + p->promise.setValue(std::move(p->request)); + } + } + + using WaitForAsyncPromise = futures::Promise; + + struct AsyncRequest { + explicit AsyncRequest(replicated_log::AppendEntriesRequest request) + : request(std::move(request)) {} + replicated_log::AppendEntriesRequest request; + WaitForAsyncPromise promise; + }; + [[nodiscard]] auto pendingAppendEntries() const + -> std::deque> { + return _asyncQueue.copy(); + } + [[nodiscard]] auto hasPendingAppendEntries() const -> bool { + return _asyncQueue.doUnderLock( + [](auto const& queue) { return !queue.empty(); }); + } + + auto getParticipantId() const noexcept -> ParticipantId const& override { + return _follower->getParticipantId(); + } + + auto getStatus() const -> replicated_log::LogStatus override { + return _follower->getStatus(); + } + + [[nodiscard]] auto resign() && -> std::tuple, DeferredAction> override { + return std::move(*_follower).resign(); + } + + auto waitFor(LogIndex index) -> WaitForFuture override { return _follower->waitFor(index); } + + auto waitForIterator(LogIndex index) -> WaitForIteratorFuture override { + return _follower->waitForIterator(index); + } + + auto release(LogIndex doneWithIdx) -> Result override { + return _follower->release(doneWithIdx); + } + + private: + Guarded>> _asyncQueue; + std::shared_ptr _follower; +}; + +struct TestReplicatedLog : replicated_log::ReplicatedLog { + using ReplicatedLog::becomeLeader; + using ReplicatedLog::ReplicatedLog; + auto becomeFollower(ParticipantId const& id, LogTerm term, ParticipantId leaderId) + -> std::shared_ptr; + + auto becomeLeader(ParticipantId const& id, LogTerm term, + std::vector> const&, + std::size_t writeConcern) -> std::shared_ptr; +}; +} // namespace arangodb::replication2::test diff --git a/tests/Replication2/TestHelper.cpp b/tests/Replication2/Mocks/PersistedLog.cpp similarity index 59% rename from tests/Replication2/TestHelper.cpp rename to tests/Replication2/Mocks/PersistedLog.cpp index ad4b74c52c8e..65e5866e095a 100644 --- a/tests/Replication2/TestHelper.cpp +++ b/tests/Replication2/Mocks/PersistedLog.cpp @@ -1,37 +1,13 @@ -//////////////////////////////////////////////////////////////////////////////// -/// DISCLAIMER -/// -/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is ArangoDB GmbH, Cologne, Germany -/// -/// @author Lars Maier -//////////////////////////////////////////////////////////////////////////////// - -#include "TestHelper.h" - -#include "Replication2/ReplicatedLog/LogCore.h" -#include "Replication2/ReplicatedLog/LogLeader.h" -#include "Replication2/ReplicatedLog/ReplicatedLog.h" -#include "Replication2/ReplicatedLog/types.h" - -#include +// +// Created by lars on 10/08/2021. +// + +#include "PersistedLog.h" using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; auto MockLog::insert(PersistedLogIterator& iter, WriteOptions const&) -> arangodb::Result { auto lastIndex = LogIndex{0}; @@ -84,7 +60,7 @@ auto MockLog::removeBack(replication2::LogIndex start) -> Result { auto MockLog::drop() -> Result { _storage.clear(); - return Result(); + return {}; } void MockLog::setEntry(replication2::LogIndex idx, replication2::LogTerm term, @@ -145,32 +121,9 @@ void AsyncMockLog::runWorker() { std::swap(queue, _queue); } } - for (auto& lambda : queue) { - auto res = insert(*lambda->iter, lambda->opts); - lambda->promise.setValue(res); + for (auto& entry : queue) { + auto res = insert(*entry->iter, entry->opts); + entry->promise.setValue(res); } } } - -auto TestReplicatedLog::becomeFollower(ParticipantId const& id, LogTerm term, ParticipantId leaderId) - -> std::shared_ptr { - auto ptr = ReplicatedLog::becomeFollower(id, term, std::move(leaderId)); - return std::make_shared(ptr); -} - -auto TestReplicatedLog::becomeLeader(LogConfig config, ParticipantId id, LogTerm term, - std::vector> const& follower) - -> std::shared_ptr { - return ReplicatedLog::becomeLeader(config, std::move(id), term, follower); -} - -auto TestReplicatedLog::becomeLeader(ParticipantId const& id, LogTerm term, - std::vector> const& follower, - std::size_t writeConcern) - -> std::shared_ptr { - LogConfig config; - config.writeConcern = writeConcern; - config.waitForSync = false; - - return becomeLeader(config, id, term, follower); -} diff --git a/tests/Replication2/Mocks/PersistedLog.h b/tests/Replication2/Mocks/PersistedLog.h new file mode 100644 index 000000000000..e71c97c89c15 --- /dev/null +++ b/tests/Replication2/Mocks/PersistedLog.h @@ -0,0 +1,104 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/ReplicatedLog/LogStatus.h" +#include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" +#include "Replication2/ReplicatedLog/types.h" + +namespace arangodb::replication2::test { + +using namespace replicated_log; + +struct MockLog : replication2::replicated_log::PersistedLog { + using storeType = std::map; + + explicit MockLog(replication2::LogId id); + MockLog(replication2::LogId id, storeType storage); + + auto insert(replication2::replicated_log::PersistedLogIterator& iter, + WriteOptions const&) -> Result override; + auto insertAsync(std::unique_ptr iter, + WriteOptions const&) -> futures::Future override; + auto read(replication2::LogIndex start) + -> std::unique_ptr override; + auto removeFront(replication2::LogIndex stop) -> Result override; + auto removeBack(replication2::LogIndex start) -> Result override; + auto drop() -> Result override; + + void setEntry(replication2::LogIndex idx, replication2::LogTerm term, + replication2::LogPayload payload); + void setEntry(replication2::PersistingLogEntry); + + [[nodiscard]] storeType getStorage() const { return _storage; } + + private: + using iteratorType = storeType::iterator; + storeType _storage; +}; + +struct AsyncMockLog : MockLog { + explicit AsyncMockLog(replication2::LogId id); + + ~AsyncMockLog() noexcept; + + auto insertAsync(std::unique_ptr iter, + WriteOptions const&) -> futures::Future override; + + auto stop() noexcept -> void { + if (!_stopping) { + { + std::unique_lock guard(_mutex); + _stopping = true; + _cv.notify_all(); + } + _asyncWorker.join(); + } + } + + private: + struct QueueEntry { + WriteOptions opts; + std::unique_ptr iter; + futures::Promise promise; + }; + + void runWorker(); + + std::mutex _mutex; + std::vector> _queue; + std::condition_variable _cv; + std::atomic _stopping = false; + bool _stopped = false; + // _asyncWorker *must* be initialized last, otherwise starting the thread + // races with initializing the coordination variables. + std::thread _asyncWorker; +}; + +} diff --git a/tests/Replication2/ReplicatedLogMetricsMock.cpp b/tests/Replication2/Mocks/ReplicatedLogMetricsMock.cpp similarity index 100% rename from tests/Replication2/ReplicatedLogMetricsMock.cpp rename to tests/Replication2/Mocks/ReplicatedLogMetricsMock.cpp diff --git a/tests/Replication2/ReplicatedLogMetricsMock.h b/tests/Replication2/Mocks/ReplicatedLogMetricsMock.h similarity index 100% rename from tests/Replication2/ReplicatedLogMetricsMock.h rename to tests/Replication2/Mocks/ReplicatedLogMetricsMock.h diff --git a/tests/Replication2/AppendEntriesBatchTest.cpp b/tests/Replication2/ReplicatedLog/AppendEntriesBatchTest.cpp similarity index 96% rename from tests/Replication2/AppendEntriesBatchTest.cpp rename to tests/Replication2/ReplicatedLog/AppendEntriesBatchTest.cpp index 0e0aee610675..35c8ec5c6deb 100644 --- a/tests/Replication2/AppendEntriesBatchTest.cpp +++ b/tests/Replication2/ReplicatedLog/AppendEntriesBatchTest.cpp @@ -28,6 +28,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct AppendEntriesBatchTest : ReplicatedLogTest {}; @@ -72,7 +73,8 @@ TEST_F(AppendEntriesBatchTest, test_with_two_batches) { // 1. AppendEntries 1..1000 // 2. AppendEntries 2..2000 // 3. AppendEntries CommitIndex - EXPECT_EQ(num_requests, 3 + 1); + // 4. AppendEntries LCI + EXPECT_EQ(num_requests, 3 + 1 + 1); } { diff --git a/tests/Replication2/ChangeStreamTests.cpp b/tests/Replication2/ReplicatedLog/ChangeStreamTests.cpp similarity index 99% rename from tests/Replication2/ChangeStreamTests.cpp rename to tests/Replication2/ReplicatedLog/ChangeStreamTests.cpp index 786a64474d59..3bb2b5271d23 100644 --- a/tests/Replication2/ChangeStreamTests.cpp +++ b/tests/Replication2/ReplicatedLog/ChangeStreamTests.cpp @@ -28,6 +28,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct ChangeStreamTests : ReplicatedLogTest {}; diff --git a/tests/Replication2/CheckLogsTest.cpp b/tests/Replication2/ReplicatedLog/CheckLogsTest.cpp similarity index 100% rename from tests/Replication2/CheckLogsTest.cpp rename to tests/Replication2/ReplicatedLog/CheckLogsTest.cpp diff --git a/tests/Replication2/ConcurrencyTests.cpp b/tests/Replication2/ReplicatedLog/ConcurrencyTests.cpp similarity index 99% rename from tests/Replication2/ConcurrencyTests.cpp rename to tests/Replication2/ReplicatedLog/ConcurrencyTests.cpp index cf495b475ddf..d6aa1b37d4ce 100644 --- a/tests/Replication2/ConcurrencyTests.cpp +++ b/tests/Replication2/ReplicatedLog/ConcurrencyTests.cpp @@ -35,6 +35,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct ReplicatedLogConcurrentTest : ReplicatedLogTest { @@ -216,7 +217,6 @@ TEST_F(ReplicatedLogConcurrentTest, lonelyLeader) { using namespace std::chrono_literals; auto replicatedLog = makeReplicatedLogWithAsyncMockLog(LogId{1}); - // TODO this test hangs because there is not local follower currently auto leaderLog = replicatedLog->becomeLeader("leader", LogTerm{1}, {}, 1); auto data = ThreadCoordinationData{leaderLog}; diff --git a/tests/Replication2/DetectConflictTest.cpp b/tests/Replication2/ReplicatedLog/DetectConflictTest.cpp similarity index 57% rename from tests/Replication2/DetectConflictTest.cpp rename to tests/Replication2/ReplicatedLog/DetectConflictTest.cpp index 4f8682b03f0a..d551ac786efc 100644 --- a/tests/Replication2/DetectConflictTest.cpp +++ b/tests/Replication2/ReplicatedLog/DetectConflictTest.cpp @@ -27,13 +27,14 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::algorithms; +using namespace arangodb::replication2::test; struct DetectConflictTest : ::testing::Test { }; struct TestInMemoryLog : InMemoryLog { - TestInMemoryLog(InMemoryLog::log_type log) : InMemoryLog(std::move(log)) {} + explicit TestInMemoryLog(InMemoryLog::log_type log) : InMemoryLog(std::move(log)) {} }; TEST_F(DetectConflictTest, log_empty) { @@ -63,10 +64,10 @@ TEST_F(DetectConflictTest, log_skip_term) { TEST_F(DetectConflictTest, log_missing_after) { auto log = TestInMemoryLog{{ - InMemoryLogEntry(PersistingLogEntry(LogTerm{1}, LogIndex{1}, LogPayload::createFromString("A"))), - InMemoryLogEntry(PersistingLogEntry(LogTerm{1}, LogIndex{2}, LogPayload::createFromString("A"))), - InMemoryLogEntry(PersistingLogEntry(LogTerm{1}, LogIndex{3}, LogPayload::createFromString("A"))), - }}; + InMemoryLogEntry(PersistingLogEntry(LogTerm{1}, LogIndex{1}, LogPayload::createFromString("A"))), + InMemoryLogEntry(PersistingLogEntry(LogTerm{1}, LogIndex{2}, LogPayload::createFromString("A"))), + InMemoryLogEntry(PersistingLogEntry(LogTerm{1}, LogIndex{3}, LogPayload::createFromString("A"))), + }}; auto res = algorithms::detectConflict(log, TermIndexPair{LogTerm{4}, LogIndex{6}}); ASSERT_TRUE(res.has_value()); auto [reason, next] = *res; @@ -74,3 +75,34 @@ TEST_F(DetectConflictTest, log_missing_after) { EXPECT_EQ(TermIndexPair(LogTerm{1}, LogIndex{4}), next); } +TEST_F(DetectConflictTest, log_missing_before) { + auto log = TestInMemoryLog{{ + InMemoryLogEntry(PersistingLogEntry(LogTerm{4}, LogIndex{11}, + LogPayload::createFromString("A"))), + InMemoryLogEntry(PersistingLogEntry(LogTerm{4}, LogIndex{12}, + LogPayload::createFromString("A"))), + InMemoryLogEntry(PersistingLogEntry(LogTerm{4}, LogIndex{13}, + LogPayload::createFromString("A"))), + }}; + auto res = algorithms::detectConflict(log, TermIndexPair{LogTerm{4}, LogIndex{6}}); + ASSERT_TRUE(res.has_value()); + auto [reason, next] = *res; + EXPECT_EQ(reason, ConflictReason::LOG_ENTRY_BEFORE_BEGIN); + EXPECT_EQ(TermIndexPair(LogTerm{0}, LogIndex{0}), next); +} + +TEST_F(DetectConflictTest, log_missing_before_wrong_term) { + auto log = TestInMemoryLog{{ + InMemoryLogEntry(PersistingLogEntry(LogTerm{4}, LogIndex{11}, + LogPayload::createFromString("A"))), + InMemoryLogEntry(PersistingLogEntry(LogTerm{4}, LogIndex{12}, + LogPayload::createFromString("A"))), + InMemoryLogEntry(PersistingLogEntry(LogTerm{4}, LogIndex{13}, + LogPayload::createFromString("A"))), + }}; + auto res = algorithms::detectConflict(log, TermIndexPair{LogTerm{5}, LogIndex{12}}); + ASSERT_TRUE(res.has_value()); + auto [reason, next] = *res; + EXPECT_EQ(reason, ConflictReason::LOG_ENTRY_NO_MATCH); + EXPECT_EQ(TermIndexPair(LogTerm{4}, LogIndex{11}), next); +} diff --git a/tests/Replication2/FollowerAppendEntries.cpp b/tests/Replication2/ReplicatedLog/FollowerAppendEntries.cpp similarity index 87% rename from tests/Replication2/FollowerAppendEntries.cpp rename to tests/Replication2/ReplicatedLog/FollowerAppendEntries.cpp index 759851afdd93..8aef71dbfcc7 100644 --- a/tests/Replication2/FollowerAppendEntries.cpp +++ b/tests/Replication2/ReplicatedLog/FollowerAppendEntries.cpp @@ -24,7 +24,7 @@ #include "Basics/voc-errors.h" -#include "ReplicatedLogMetricsMock.h" +#include "Replication2/Mocks/ReplicatedLogMetricsMock.h" #include "Replication2/ReplicatedLog/LogFollower.h" #include "Replication2/ReplicatedLog/ReplicatedLog.h" @@ -33,6 +33,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct FollowerAppendEntriesTest : ReplicatedLogTest { auto makeFollower(ParticipantId id, LogTerm term, ParticipantId leaderId) -> std::shared_ptr { @@ -71,7 +72,7 @@ TEST_F(FollowerAppendEntriesTest, valid_append_entries) { AppendEntriesRequest request; request.leaderId = "leader"; request.leaderTerm = LogTerm{5}; - request.prevLogEntry = TermIndexPair{LogTerm{0}, LogIndex{0}}; + request.prevLogEntry = TermIndexPair{LogTerm{1}, LogIndex{1}}; request.leaderCommit = LogIndex{1}; request.messageId = ++nextMessageId; request.entries = {}; @@ -291,4 +292,45 @@ TEST_F(FollowerAppendEntriesTest, outdated_message_id) { } } +TEST_F(FollowerAppendEntriesTest, rewrite_log) { + auto follower = makeFollower("follower", LogTerm{5}, "leader"); + + { + AppendEntriesRequest request; + request.leaderId = "leader"; + request.leaderTerm = LogTerm{5}; + request.prevLogEntry = TermIndexPair{LogTerm{0}, LogIndex{0}}; + request.leaderCommit = LogIndex{0}; + request.messageId = ++nextMessageId; + request.entries = {InMemoryLogEntry( + PersistingLogEntry(LogTerm{5}, LogIndex{20}, LogPayload::createFromString("some payload")))}; + + auto f = follower->appendEntries(std::move(request)); + ASSERT_TRUE(f.isReady()); + { + auto result = f.get(); + EXPECT_EQ(result.logTerm, LogTerm{5}); + EXPECT_EQ(result.errorCode, TRI_ERROR_NO_ERROR); + EXPECT_EQ(result.reason, AppendEntriesErrorReason::NONE); + } + } + + { + auto status = follower->getStatus(); + ASSERT_TRUE(std::holds_alternative(status.getVariant())); + auto fstatus = std::get(status.getVariant()); + EXPECT_EQ(fstatus.local.firstIndex, LogIndex{20}); + } + + auto iter = follower->getLogIterator(LogIndex{1}); + { + auto entry = iter->next(); + ASSERT_TRUE(entry.has_value()); + EXPECT_EQ(entry->logIndex(), LogIndex{20}); + } + { + auto entry = iter->next(); + ASSERT_FALSE(entry.has_value()); + } +} diff --git a/tests/Replication2/FollowerWaitFor.cpp b/tests/Replication2/ReplicatedLog/FollowerWaitFor.cpp similarity index 98% rename from tests/Replication2/FollowerWaitFor.cpp rename to tests/Replication2/ReplicatedLog/FollowerWaitFor.cpp index e0713aca84c3..d25d64d0ba4e 100644 --- a/tests/Replication2/FollowerWaitFor.cpp +++ b/tests/Replication2/ReplicatedLog/FollowerWaitFor.cpp @@ -30,6 +30,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct FollowerWaitForTest : ReplicatedLogTest { auto makeFollower(ParticipantId id, LogTerm term, ParticipantId leaderId) -> std::shared_ptr { diff --git a/tests/Replication2/ReplicatedLog/InMemoryLogTest.cpp b/tests/Replication2/ReplicatedLog/InMemoryLogTest.cpp new file mode 100644 index 000000000000..e6df0161af27 --- /dev/null +++ b/tests/Replication2/ReplicatedLog/InMemoryLogTest.cpp @@ -0,0 +1,317 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include + +#include "Containers/Enumerate.h" + +#include "Replication2/ReplicatedLog/InMemoryLog.h" + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::replicated_log; + +struct TestInMemoryLog : InMemoryLog { + explicit TestInMemoryLog(log_type log) : InMemoryLog(std::move(log)) {} + explicit TestInMemoryLog(log_type log, LogIndex first) + : InMemoryLog(std::move(log), first) {} + TestInMemoryLog() : InMemoryLog(log_type{}) {} +}; + +struct InMemoryLogTestBase { + static auto createLogForRangeSingleTerm(LogRange range, LogTerm term = LogTerm{1}) + -> TestInMemoryLog { + auto transient = InMemoryLog::log_type::transient_type{}; + for (auto i : range) { + transient.push_back(InMemoryLogEntry( + {term, LogIndex{i}, LogPayload::createFromString("foo")})); + } + return TestInMemoryLog(transient.persistent(), range.from); + } +}; + +struct InMemoryLogTest : ::testing::TestWithParam, InMemoryLogTestBase {}; + +TEST_P(InMemoryLogTest, first_last_next) { + auto const term = LogTerm{1}; + auto const range = GetParam(); + auto const log = createLogForRangeSingleTerm(range, term); + auto [from, to] = range; + + EXPECT_EQ(!range.empty(), log.getFirstEntry().has_value()); + EXPECT_EQ(!range.empty(), log.getLastEntry().has_value()); + EXPECT_EQ(log.getNextIndex(), to); + + EXPECT_EQ(log.getIndexRange(), range); + + if (!range.empty()) { + { + auto memtry = log.getFirstEntry().value(); + EXPECT_EQ(memtry.entry().logIndex(), from); + } + { + auto memtry = log.getLastEntry().value(); + EXPECT_EQ(memtry.entry().logIndex() + 1, to); + EXPECT_EQ(log.getLastIndex() + 1, to); + EXPECT_EQ(log.back().entry().logIndex() + 1, to); + + EXPECT_EQ(memtry.entry().logTerm(), term); + EXPECT_EQ(log.getLastTerm(), term); + EXPECT_EQ(log.back().entry().logTerm(), term); + } + } +} + +TEST_P(InMemoryLogTest, get_entry_by_index) { + auto const range = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + auto const tests = {LogIndex{1}, LogIndex{12}, LogIndex{45}}; + for (auto idx : tests) { + auto memtry = log.getEntryByIndex(idx); + EXPECT_EQ(range.contains(idx), memtry.has_value()) + << "Range is " << range << " and index is " << idx; + if (range.contains(idx)) { + auto entry = memtry->entry(); + EXPECT_EQ(entry.logIndex(), idx); + } + } +} + +TEST_P(InMemoryLogTest, empty) { + auto const range = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + EXPECT_EQ(range.empty(), log.empty()); +} + +TEST_P(InMemoryLogTest, append_in_place) { + auto const range = GetParam(); + auto log = createLogForRangeSingleTerm(range); + + auto memtry = + InMemoryLogEntry({LogTerm{1}, range.to, LogPayload::createFromString("foo")}); + log.appendInPlace(LoggerContext(Logger::FIXME), std::move(memtry)); + { + auto result = log.getEntryByIndex(range.to); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->entry().logIndex(), range.to); + } + { + auto result = log.getLastEntry(); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->entry().logIndex(), range.to); + } +} + +auto const LogRanges = ::testing::Values(LogRange(LogIndex{1}, LogIndex{15}), + LogRange(LogIndex{1}, LogIndex{1234}), + LogRange(LogIndex{1}, LogIndex{1}), + LogRange(LogIndex{5}, LogIndex{18}), + LogRange(LogIndex{76}, LogIndex{76})); + +INSTANTIATE_TEST_CASE_P(InMemoryLogTestInstance, InMemoryLogTest, LogRanges); + +struct InMemoryLogAppendTest + : testing::TestWithParam>, + InMemoryLogTestBase { + static auto getPersistedEntriesVector(LogIndex first, std::size_t length, + LogTerm term = LogTerm{1}) { + auto result = InMemoryLog::log_type_persisted::transient_type{}; + for (auto idx : LogRange(first, first + length)) { + result.push_back(PersistingLogEntry{term, idx, LogPayload::createFromString("foo")}); + } + return result.persistent(); + } +}; + +TEST_P(InMemoryLogAppendTest, append_peristed_entries) { + auto const [length, range] = GetParam(); + auto const log = createLogForRangeSingleTerm(range, LogTerm{1}); + auto const toAppend = getPersistedEntriesVector(range.to, length, LogTerm{2}); + + auto const newLog = log.append(LoggerContext(Logger::FIXME), toAppend); + for (auto idx : LogRange(range.from, range.to + length)) { + auto memtry = newLog.getEntryByIndex(idx); + ASSERT_TRUE(memtry.has_value()) << "idx = " << idx; + auto const expectedTerm = range.contains(idx) ? LogTerm{1} : LogTerm{2}; + EXPECT_EQ(memtry->entry().logIndex(), idx); + EXPECT_EQ(memtry->entry().logTerm(), expectedTerm); + } +} + +INSTANTIATE_TEST_CASE_P(InMemoryLogAppendTest, InMemoryLogAppendTest, + ::testing::Combine(::testing::Range(0, 10), LogRanges)); + +struct InMemoryLogSliceTest : ::testing::TestWithParam>, + InMemoryLogTestBase {}; + +TEST_P(InMemoryLogSliceTest, slice) { + auto const [range, testRange] = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + + auto s = log.slice(testRange.from, testRange.to); + auto const expectedRange = intersect(testRange, range); + + ASSERT_EQ(s.size(), expectedRange.count()); + for (auto const& [idx, e] : enumerate(s)) { + EXPECT_EQ(e.entry().logIndex(), expectedRange.from + idx); + } +} + +TEST_P(InMemoryLogSliceTest, get_iterator_range) { + auto const [range, testRange] = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + + auto const expectedRange = intersect(range, testRange); + auto iter = log.getIteratorRange(testRange.from, testRange.to); + auto [from, to] = iter->range(); + if (expectedRange.empty()) { + EXPECT_TRUE(from == to); + + } else { + EXPECT_EQ(from, expectedRange.from); + EXPECT_EQ(to, expectedRange.to); + + for (auto idx : expectedRange) { + auto value = iter->next(); + ASSERT_TRUE(value.has_value()) << "idx = " << idx << " range = " << expectedRange; + EXPECT_EQ(value->logIndex(), idx); + } + } + + EXPECT_EQ(iter->next(), std::nullopt); +} + +TEST_P(InMemoryLogSliceTest, get_iterator_from) { + auto [range, testRange] = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + testRange.to = range.to; // no bound on to + + auto const expectedRange = intersect(range, testRange); + auto iter = log.getIteratorFrom(testRange.from); + + for (auto idx : expectedRange) { + auto value = iter->next(); + ASSERT_TRUE(value.has_value()) << "idx = " << idx << " range = " << expectedRange; + EXPECT_EQ(value->logIndex(), idx); + } + + EXPECT_EQ(iter->next(), std::nullopt); +} + +TEST_P(InMemoryLogSliceTest, release) { + auto [range, testRange] = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + testRange.to = range.to; + auto const expectedRange = intersect(range, testRange); + if (!expectedRange.empty()) { + auto newLog = log.release(testRange.from); + EXPECT_EQ(newLog.getIndexRange(), expectedRange); + } +} + +auto const SliceRanges = ::testing::Values(LogRange(LogIndex{4}, LogIndex{6}), + LogRange(LogIndex{1}, LogIndex{8}), + LogRange(LogIndex{100}, LogIndex{120}), + LogRange(LogIndex{18}, LogIndex{18})); + +INSTANTIATE_TEST_CASE_P(InMemoryLogSliceTest, InMemoryLogSliceTest, + ::testing::Combine(LogRanges, SliceRanges)); + +using TermDistribution = std::map; + +using TermTestData = std::tuple; + +struct IndexOfTermTest : ::testing::TestWithParam, InMemoryLogTestBase { + static auto createLogForDistribution(LogIndex first, TermDistribution const& dist) + -> TestInMemoryLog { + auto transient = InMemoryLog::log_type::transient_type{}; + auto next = first; + for (auto [term, length] : dist) { + for (auto idx : LogRange(next, next + length)) { + transient.push_back( + InMemoryLogEntry({term, idx, LogPayload::createFromString("foo")})); + } + next = next + length; + } + return TestInMemoryLog(transient.persistent()); + } + + static auto getTermBounds(LogIndex first, TermDistribution const& dist, LogTerm wanted) + -> std::optional { + auto next = first; + for (auto [term, length] : dist) { + if (term == wanted) { + return LogRange{next, next + length}; + } + next = next + length; + } + + return std::nullopt; + } +}; + +TEST_P(IndexOfTermTest, first_index_of_term) { + auto [term, first, dist] = GetParam(); + auto log = createLogForDistribution(first, dist); + + auto range = getTermBounds(first, dist, term); + auto firstInTerm = log.getFirstIndexOfTerm(term); + auto lastInTerm = log.getLastIndexOfTerm(term); + + ASSERT_EQ(range.has_value(), firstInTerm.has_value()); + ASSERT_EQ(range.has_value(), lastInTerm.has_value()); + + if (range.has_value()) { + EXPECT_EQ(range->from, *firstInTerm) << "term = " << term << " log = " << log.dump(); + EXPECT_EQ(range->to, *lastInTerm + 1); + } +} + +auto Distributions = ::testing::Values( + TermDistribution{ + {LogTerm{1}, 5}, + }, + TermDistribution{ + {LogTerm{1}, 5}, + {LogTerm{2}, 18}, + }, + TermDistribution{ + {LogTerm{1}, 5}, + {LogTerm{2}, 18}, + }, + TermDistribution{ + {LogTerm{1}, 5}, + {LogTerm{2}, 18}, + {LogTerm{3}, 18}, + }, + TermDistribution{ + {LogTerm{1}, 5}, + {LogTerm{2}, 18}, + {LogTerm{3}, 18}, + }); + +INSTANTIATE_TEST_CASE_P( + IndexOfTermTest, IndexOfTermTest, + ::testing::Combine(::testing::Values(LogTerm{1}, LogTerm{2}, LogTerm{3}), + ::testing::Values(LogIndex{1}, LogIndex{10}), Distributions)); diff --git a/tests/Replication2/LeaderAppendEntriesTest.cpp b/tests/Replication2/ReplicatedLog/LeaderAppendEntriesTest.cpp similarity index 99% rename from tests/Replication2/LeaderAppendEntriesTest.cpp rename to tests/Replication2/ReplicatedLog/LeaderAppendEntriesTest.cpp index 97803bac64b1..e6bd6eee3b76 100644 --- a/tests/Replication2/LeaderAppendEntriesTest.cpp +++ b/tests/Replication2/ReplicatedLog/LeaderAppendEntriesTest.cpp @@ -31,6 +31,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct LeaderAppendEntriesTest : ReplicatedLogTest {}; diff --git a/tests/Replication2/LogReclaimTest.cpp b/tests/Replication2/ReplicatedLog/LogReclaimTest.cpp similarity index 94% rename from tests/Replication2/LogReclaimTest.cpp rename to tests/Replication2/ReplicatedLog/LogReclaimTest.cpp index 0c77eab52fee..4bc07aca3540 100644 --- a/tests/Replication2/LogReclaimTest.cpp +++ b/tests/Replication2/ReplicatedLog/LogReclaimTest.cpp @@ -21,7 +21,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "Replication2/ReplicatedLog/types.h" -#include "Replication2/TestHelper.h" +#include "TestHelper.h" #include @@ -30,6 +30,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; TEST_F(ReplicatedLogTest, reclaim_leader_after_term_change) { @@ -42,7 +43,7 @@ TEST_F(ReplicatedLogTest, reclaim_leader_after_term_change) { auto idx = leader->insert(LogPayload::createFromString("payload"), false, LogLeader::doNotTriggerAsyncReplication); auto f = leader->waitFor(idx).then( - [&](futures::Try>&& quorum) { + [&](futures::Try&& quorum) { EXPECT_TRUE(quorum.hasException()); try { quorum.throwIfFailed(); @@ -73,7 +74,7 @@ TEST_F(ReplicatedLogTest, reclaim_follower_after_term_change) { auto idx = leader->insert(LogPayload::createFromString("payload"), false, LogLeader::doNotTriggerAsyncReplication); auto f = follower->waitFor(idx).then( - [&](futures::Try>&& quorum) { + [&](futures::Try&& quorum) { EXPECT_TRUE(quorum.hasException()); try { quorum.throwIfFailed(); diff --git a/tests/Replication2/MaintenanceTests.cpp b/tests/Replication2/ReplicatedLog/MaintenanceTests.cpp similarity index 97% rename from tests/Replication2/MaintenanceTests.cpp rename to tests/Replication2/ReplicatedLog/MaintenanceTests.cpp index 26952ce2f507..1d444349bf6b 100644 --- a/tests/Replication2/MaintenanceTests.cpp +++ b/tests/Replication2/ReplicatedLog/MaintenanceTests.cpp @@ -171,9 +171,8 @@ TEST_F(ReplicationMaintenanceTest, create_replicated_log_detect_wrong_term) { auto const logId = LogId{12}; auto const database = DatabaseID{"mydb"}; auto const localLogs = ReplicatedLogStatusMap{ - {logId, - replicated_log::LogStatus{ - replicated_log::FollowerStatus{{}, ParticipantId{"leader"}, LogTerm{4}}}}, + {logId, replicated_log::LogStatus{replicated_log::FollowerStatus{ + {}, ParticipantId{"leader"}, LogTerm{4}, LogIndex{0}}}}, }; auto const defaultConfig = LogConfig{}; diff --git a/tests/Replication2/MultiTermTest.cpp b/tests/Replication2/ReplicatedLog/MultiTermTest.cpp similarity index 95% rename from tests/Replication2/MultiTermTest.cpp rename to tests/Replication2/ReplicatedLog/MultiTermTest.cpp index 0604f82c4c7a..040b2e7e306e 100644 --- a/tests/Replication2/MultiTermTest.cpp +++ b/tests/Replication2/ReplicatedLog/MultiTermTest.cpp @@ -23,11 +23,12 @@ #include #include "Replication2/ReplicatedLog/types.h" -#include "Replication2/TestHelper.h" +#include "TestHelper.h" using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct MultiTermTest : ReplicatedLogTest {}; @@ -44,8 +45,8 @@ TEST_F(MultiTermTest, add_follower_test) { leader->triggerAsyncReplication(); { ASSERT_TRUE(f.isReady()); - auto const& quorum = f.get(); - EXPECT_EQ(quorum->quorum, std::vector{"leader"}); + auto const& result = f.get(); + EXPECT_EQ(result.quorum->quorum, std::vector{"leader"}); } { auto stats = std::get(leader->getStatus().getVariant()).local; @@ -250,10 +251,11 @@ TEST_F(MultiTermTest, resign_leader_append_entries) { ASSERT_TRUE(f2.isReady()); { - auto quorum = f2.get(); - EXPECT_EQ(quorum->index, LogIndex{3}); - EXPECT_EQ(quorum->term, LogTerm{2}); - EXPECT_EQ(quorum->quorum, + auto result = f2.get(); + EXPECT_EQ(result.currentCommitIndex, LogIndex{3}); + EXPECT_EQ(result.quorum->index, LogIndex{3}); + EXPECT_EQ(result.quorum->term, LogTerm{2}); + EXPECT_EQ(result.quorum->quorum, (std::vector{"newLeader", "newFollower"})); } } diff --git a/tests/Replication2/ReplicatedLogTest.cpp b/tests/Replication2/ReplicatedLog/ReplicatedLogTest.cpp similarity index 96% rename from tests/Replication2/ReplicatedLogTest.cpp rename to tests/Replication2/ReplicatedLog/ReplicatedLogTest.cpp index f51eb79fdd43..e23c92eafd14 100644 --- a/tests/Replication2/ReplicatedLogTest.cpp +++ b/tests/Replication2/ReplicatedLog/ReplicatedLogTest.cpp @@ -20,10 +20,11 @@ /// @author Tobias Gödderz //////////////////////////////////////////////////////////////////////////////// -#include "Replication2/TestHelper.h" +#include "TestHelper.h" using namespace arangodb; using namespace arangodb::replication2; +using namespace arangodb::replication2::test; TEST(LogIndexTest, compareOperators) { diff --git a/tests/Replication2/RewriteLogTest.cpp b/tests/Replication2/ReplicatedLog/RewriteLogTest.cpp similarity index 97% rename from tests/Replication2/RewriteLogTest.cpp rename to tests/Replication2/ReplicatedLog/RewriteLogTest.cpp index 56d2507f6bc0..db8d4f5785eb 100644 --- a/tests/Replication2/RewriteLogTest.cpp +++ b/tests/Replication2/ReplicatedLog/RewriteLogTest.cpp @@ -22,11 +22,12 @@ #include "Replication2/ReplicatedLog/LogCore.h" #include "Replication2/ReplicatedLog/types.h" -#include "Replication2/TestHelper.h" +#include "TestHelper.h" using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct RewriteLogTest : ReplicatedLogTest {}; @@ -101,7 +102,8 @@ TEST_F(RewriteLogTest, rewrite_old_leader) { } // AppendEntries with prevLogIndex 0 -> success = true // AppendEntries with new commitIndex - EXPECT_EQ(number_of_runs, 2); + // AppendEntries with new lci + EXPECT_EQ(number_of_runs, 3); } { diff --git a/tests/Replication2/RocksDBLogTest.cpp b/tests/Replication2/ReplicatedLog/RocksDBLogTest.cpp similarity index 100% rename from tests/Replication2/RocksDBLogTest.cpp rename to tests/Replication2/ReplicatedLog/RocksDBLogTest.cpp diff --git a/tests/Replication2/SimpleInsertTests.cpp b/tests/Replication2/ReplicatedLog/SimpleInsertTests.cpp similarity index 93% rename from tests/Replication2/SimpleInsertTests.cpp rename to tests/Replication2/ReplicatedLog/SimpleInsertTests.cpp index 05f036367b59..fbabfe4d58c2 100644 --- a/tests/Replication2/SimpleInsertTests.cpp +++ b/tests/Replication2/ReplicatedLog/SimpleInsertTests.cpp @@ -32,6 +32,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; TEST_F(ReplicatedLogTest, write_single_entry_to_follower) { auto coreA = makeLogCore(LogId{1}); @@ -169,10 +170,11 @@ TEST_F(ReplicatedLogTest, write_single_entry_to_follower) { { // Expect the quorum to consist of the follower only ASSERT_TRUE(f.isReady()); - auto quorum = f.get(); - EXPECT_EQ(quorum->index, LogIndex{2}); - EXPECT_EQ(quorum->term, LogTerm{1}); - EXPECT_EQ(quorum->quorum, (std::vector{leaderId, followerId})); + auto result = f.get(); + EXPECT_EQ(result.currentCommitIndex, LogIndex{2}); + EXPECT_EQ(result.quorum->index, LogIndex{2}); + EXPECT_EQ(result.quorum->term, LogTerm{1}); + EXPECT_EQ(result.quorum->quorum, (std::vector{leaderId, followerId})); } // Follower should have pending append entries @@ -187,19 +189,23 @@ TEST_F(ReplicatedLogTest, write_single_entry_to_follower) { EXPECT_EQ(status.local.spearHead.index, LogIndex{2}); } + // LCI update + EXPECT_TRUE(follower->hasPendingAppendEntries()); + follower->runAsyncAppendEntries(); EXPECT_FALSE(follower->hasPendingAppendEntries()); } { - // Metric should have registered four appendEntries. + // Metric should have registered six appendEntries. // There was one insert, resulting in one appendEntries each to the follower // and the local follower. After the followers responded, the commit index // is updated, and both followers get another appendEntries request. + // Finally, the LCI is updated with another round of requests. auto numAppendEntries = countHistogramEntries(_logMetricsMock->replicatedLogAppendEntriesRttUs); - EXPECT_EQ(numAppendEntries, 4); + EXPECT_EQ(numAppendEntries, 6); auto numFollowerAppendEntries = countHistogramEntries(_logMetricsMock->replicatedLogFollowerAppendEntriesRtUs); - EXPECT_EQ(numFollowerAppendEntries, 4); + EXPECT_EQ(numFollowerAppendEntries, 6); } } @@ -262,7 +268,8 @@ TEST_F(ReplicatedLogTest, wake_up_as_leader_with_persistent_data) { // AppendEntries with prevLogIndex 2 -> success = false // AppendEntries with prevLogIndex 0 -> success = true // AppendEntries with new commitIndex - EXPECT_EQ(number_of_runs, 3); + // AppendEntries with new LCI + EXPECT_EQ(number_of_runs, 4); } { @@ -374,10 +381,11 @@ TEST_F(ReplicatedLogTest, multiple_follower) { // and update of commitIndex on both follower { ASSERT_TRUE(future.isReady()); - auto quorum = future.get(); - EXPECT_EQ(quorum->term, LogTerm{1}); - EXPECT_EQ(quorum->index, LogIndex{2}); - EXPECT_EQ(quorum->quorum, (std::vector{leaderId, followerId_1, followerId_2})); + auto result = future.get(); + EXPECT_EQ(result.currentCommitIndex, LogIndex{2}); + EXPECT_EQ(result.quorum->term, LogTerm{1}); + EXPECT_EQ(result.quorum->index, LogIndex{2}); + EXPECT_EQ(result.quorum->quorum, (std::vector{leaderId, followerId_1, followerId_2})); } EXPECT_TRUE(follower_1->hasPendingAppendEntries()); @@ -401,6 +409,12 @@ TEST_F(ReplicatedLogTest, multiple_follower) { EXPECT_EQ(status.local.spearHead.index, LogIndex{2}); } + // LCI updates + follower_1->runAsyncAppendEntries(); + EXPECT_FALSE(follower_1->hasPendingAppendEntries()); // no lci update yet + follower_2->runAsyncAppendEntries(); + EXPECT_TRUE(follower_2->hasPendingAppendEntries()); + follower_1->runAsyncAppendEntries(); EXPECT_FALSE(follower_1->hasPendingAppendEntries()); follower_2->runAsyncAppendEntries(); @@ -483,7 +497,8 @@ TEST_F(ReplicatedLogTest, write_concern_one_immediate_leader_commit_on_startup) } // AppendEntries with prevLogIndex 2 -> success = false, replicated log empty // AppendEntries with prevLogIndex 2 -> success = true, including commit index - EXPECT_EQ(number_of_runs, 2); + // AppendEntries with LCI + EXPECT_EQ(number_of_runs, 3); } { diff --git a/tests/Replication2/ReplicatedLog/TestHelper.cpp b/tests/Replication2/ReplicatedLog/TestHelper.cpp new file mode 100644 index 000000000000..8fba1ec85407 --- /dev/null +++ b/tests/Replication2/ReplicatedLog/TestHelper.cpp @@ -0,0 +1,30 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include "TestHelper.h" + +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" +#include "Replication2/ReplicatedLog/types.h" + +#include diff --git a/tests/Replication2/ReplicatedLog/TestHelper.h b/tests/Replication2/ReplicatedLog/TestHelper.h new file mode 100644 index 000000000000..c557c810c837 --- /dev/null +++ b/tests/Replication2/ReplicatedLog/TestHelper.h @@ -0,0 +1,98 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "Replication2/Mocks/ReplicatedLogMetricsMock.h" + +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/ReplicatedLog/LogStatus.h" +#include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" +#include "Replication2/ReplicatedLog/types.h" + +#include + +#include +#include +#include + +#include "Replication2/Mocks/PersistedLog.h" +#include "Replication2/Mocks/FakeReplicatedLog.h" + +namespace arangodb::replication2::test { + +using namespace replicated_log; + +struct ReplicatedLogTest : ::testing::Test { + + auto makeLogCore(LogId id) -> std::unique_ptr { + auto persisted = makePersistedLog(id); + return std::make_unique(persisted); + } + + auto getPersistedLogById(LogId id) -> std::shared_ptr { + return _persistedLogs.at(id); + } + + auto makePersistedLog(LogId id) -> std::shared_ptr { + auto persisted = std::make_shared(id); + _persistedLogs[id] = persisted; + return persisted; + } + + auto makeReplicatedLog(LogId id) -> std::shared_ptr { + auto core = makeLogCore(id); + return std::make_shared(std::move(core), _logMetricsMock, + LoggerContext(Logger::FIXME)); + } + + auto makeReplicatedLogWithAsyncMockLog(LogId id) -> std::shared_ptr { + auto persisted = std::make_shared(id); + _persistedLogs[id] = persisted; + auto core = std::make_unique(persisted); + return std::make_shared(std::move(core), _logMetricsMock, + LoggerContext(Logger::FIXME)); + } + + auto defaultLogger() { + return LoggerContext(Logger::REPLICATION2); + } + + auto stopAsyncMockLogs() -> void { + for (auto const& it : _persistedLogs) { + if (auto log = std::dynamic_pointer_cast(it.second); log != nullptr) { + log->stop(); + } + } + } + + std::unordered_map> _persistedLogs; + std::shared_ptr _logMetricsMock = std::make_shared(); +}; + + +} diff --git a/tests/Replication2/UpdateReplicatedLogTests.cpp b/tests/Replication2/ReplicatedLog/UpdateReplicatedLogTests.cpp similarity index 99% rename from tests/Replication2/UpdateReplicatedLogTests.cpp rename to tests/Replication2/ReplicatedLog/UpdateReplicatedLogTests.cpp index 89e7e7dbc796..c82a0e0c3bb0 100644 --- a/tests/Replication2/UpdateReplicatedLogTests.cpp +++ b/tests/Replication2/ReplicatedLog/UpdateReplicatedLogTests.cpp @@ -31,6 +31,7 @@ using namespace arangodb; using namespace arangodb::replication2; +using namespace arangodb::replication2::test; namespace { diff --git a/tests/Replication2/ReplicatedState/AbstractStateMachinePollTest.cpp b/tests/Replication2/ReplicatedState/AbstractStateMachinePollTest.cpp new file mode 100644 index 000000000000..2b11702f5070 --- /dev/null +++ b/tests/Replication2/ReplicatedState/AbstractStateMachinePollTest.cpp @@ -0,0 +1,208 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +#include "StateMachineTestHelper.h" + +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" + +#include "Basics/voc-errors.h" +#include "Basics/Exceptions.h" +#include "Replication2/ReplicatedState/AbstractStateMachine.h" + +using namespace arangodb; +using namespace arangodb::replication2; + +namespace { +struct MyTestStateMachine : replicated_state::AbstractStateMachine { + explicit MyTestStateMachine(std::shared_ptr log) + : replicated_state::AbstractStateMachine(std::move(log)) {} + + auto add(std::string_view value) -> LogIndex { + auto idx = insert(TestLogEntry(std::string{value})); + waitFor(idx).thenValue([weak = weak_from_this()](auto&& res) mutable { + if (auto self = weak.lock()) { + self->triggerPollEntries(); + } + return Result{TRI_ERROR_NO_ERROR}; + }); + return idx; + } + + auto get() -> std::unordered_set { + std::unique_lock guard(mutex); + return _entries; + } + + protected: + auto installSnapshot(ParticipantId const& id) -> futures::Future override { + TRI_ASSERT(false); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); + } + auto applyEntries(std::unique_ptr ptr) + -> futures::Future override { + std::unique_lock guard(mutex); + while (auto e = ptr->next()) { + _entries.insert(e->payload); + } + + return futures::Future{std::in_place, TRI_ERROR_NO_ERROR}; + } + + std::mutex mutex; + std::unordered_set _entries; +}; + +struct ParticipantBase { + explicit ParticipantBase(std::shared_ptr const& log) + : state(std::make_shared(log)) {} + std::shared_ptr state; +}; + +struct Follower : ParticipantBase { + explicit Follower(std::shared_ptr const& log, + ParticipantId const& p, LogTerm term, ParticipantId const& leader) + : ParticipantBase(log), log(log->becomeFollower(p, term, leader)) {} + + std::shared_ptr log; +}; + +struct Leader : ParticipantBase { + explicit Leader(std::shared_ptr const& log, + LogConfig config, ParticipantId id, LogTerm term, + std::vector> const& follower) + : ParticipantBase(log), + log(log->becomeLeader(config, std::move(id), term, follower)) {} + + std::shared_ptr log; +}; + +} + +struct PollStateMachineTest : StateMachineTest {}; + +TEST_F(PollStateMachineTest, check_apply_entries) { + auto A = createReplicatedLog(); + auto B = createReplicatedLog(); + + { + auto follower = std::make_shared(B, "B", LogTerm{1}, "A"); + auto leader = std::make_shared( + A, LogConfig{2, false}, "A", LogTerm{1}, + std::vector>{follower->log}); + + leader->state->add("first"); + auto f = follower->state->triggerPollEntries(); + ASSERT_TRUE(f.isReady()); + + using namespace std::string_literals; + + { + auto set = follower->state->get(); + EXPECT_EQ(set.size(), 1); + EXPECT_EQ(set, std::unordered_set{"first"s}); + } + { + auto set = leader->state->get(); + EXPECT_EQ(set.size(), 1); + EXPECT_EQ(set, std::unordered_set{"first"s}); + } + } + + { + auto follower = std::make_shared(B, "B", LogTerm{2}, "A"); + auto leader = std::make_shared( + A, LogConfig{2, false}, "A", LogTerm{2}, + std::vector>{follower->log}); + + auto f1 = leader->state->triggerPollEntries(); + ASSERT_FALSE(f1.isReady()); + auto f2 = follower->state->triggerPollEntries(); + ASSERT_FALSE(f2.isReady()); + + leader->log->triggerAsyncReplication(); + ASSERT_TRUE(f1.isReady()); + ASSERT_TRUE(f2.isReady()); + + using namespace std::string_literals; + + { + auto set = follower->state->get(); + EXPECT_EQ(set.size(), 1); + EXPECT_EQ(set, std::unordered_set{"first"s}); + } + { + auto set = leader->state->get(); + EXPECT_EQ(set.size(), 1); + EXPECT_EQ(set, std::unordered_set{"first"s}); + } + } +} + +TEST_F(PollStateMachineTest, insert_multiple) { + auto A = createReplicatedLog(); + auto B = createReplicatedLog(); + + { + auto follower = std::make_shared(B, "B", LogTerm{1}, "A"); + auto leader = std::make_shared( + A, LogConfig{2, false}, "A", LogTerm{1}, + std::vector>{follower->log}); + + leader->state->add("first"); + leader->state->add("second"); + leader->state->add("third"); + auto f = follower->state->triggerPollEntries(); + ASSERT_TRUE(f.isReady()); + + using namespace std::string_literals; + + { + auto set = follower->state->get(); + EXPECT_EQ(set, std::unordered_set({"first"s, "second"s, "third"s})); + } + } + + { + auto follower = std::make_shared(B, "B", LogTerm{2}, "A"); + auto leader = std::make_shared( + A, LogConfig{2, false}, "A", LogTerm{2}, + std::vector>{follower->log}); + + auto f2 = follower->state->triggerPollEntries(); + ASSERT_FALSE(f2.isReady()); + + leader->log->triggerAsyncReplication(); + ASSERT_TRUE(f2.isReady()); + + using namespace std::string_literals; + + { + auto set = follower->state->get(); + EXPECT_EQ(set, std::unordered_set({"first"s, "second"s, "third"s})); + } + } +} diff --git a/tests/Replication2/ReplicatedState/StateMachineTestHelper.cpp b/tests/Replication2/ReplicatedState/StateMachineTestHelper.cpp new file mode 100644 index 000000000000..e290d368368d --- /dev/null +++ b/tests/Replication2/ReplicatedState/StateMachineTestHelper.cpp @@ -0,0 +1,51 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include "StateMachineTestHelper.h" + +#include +#include + +#include +#include "Replication2/Mocks/PersistedLog.h" + +void arangodb::TestLogEntry::toVelocyPack(arangodb::velocypack::Builder& builder) const { + velocypack::ObjectBuilder ob(&builder); + builder.add("payload", velocypack::Value(payload)); +} + +auto arangodb::TestLogEntry::fromVelocyPack(arangodb::velocypack::Slice slice) + -> arangodb::TestLogEntry { + return TestLogEntry(slice.get("payload").copyString()); +} + +#include "Replication2/ReplicatedState/AbstractStateMachine.tpp" + +template struct replicated_state::AbstractStateMachine; + +auto StateMachineTest::createReplicatedLog() + -> std::shared_ptr { + auto persisted = std::make_shared(LogId{0}); + auto core = std::make_unique(persisted); + auto metrics = std::make_shared(); + return std::make_shared(std::move(core), metrics, LoggerContext(Logger::REPLICATION2)); +} diff --git a/tests/Replication2/ReplicatedState/StateMachineTestHelper.h b/tests/Replication2/ReplicatedState/StateMachineTestHelper.h new file mode 100644 index 000000000000..04e4aaac2e03 --- /dev/null +++ b/tests/Replication2/ReplicatedState/StateMachineTestHelper.h @@ -0,0 +1,51 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include + +#include + +#include "Replication2/ReplicatedLog/ReplicatedLog.h" + +namespace arangodb { + +namespace velocypack { +class Builder; +class Slice; +} // namespace velocypack + +struct StateMachineTest : ::testing::Test { + // returns two replicated logs, the seconds is the leader of the first + static auto createReplicatedLog() -> std::shared_ptr; +}; + +struct TestLogEntry { + explicit TestLogEntry(std::string payload) : payload(std::move(payload)) {} + static auto fromVelocyPack(velocypack::Slice slice) -> TestLogEntry; + void toVelocyPack(velocypack::Builder& builder) const; + std::string payload; +}; + +} // namespace arangodb diff --git a/tests/Replication2/Streams/LogMultiplexerTest.cpp b/tests/Replication2/Streams/LogMultiplexerTest.cpp new file mode 100644 index 000000000000..fb6ee822b599 --- /dev/null +++ b/tests/Replication2/Streams/LogMultiplexerTest.cpp @@ -0,0 +1,205 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include +#include + +#include +#include +#include + +#include + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::streams; +using namespace arangodb::replication2::test; + +struct LogMultiplexerTest : LogMultiplexerTestBase {}; + +TEST_F(LogMultiplexerTest, leader_follower_test) { + auto ints = {12, 13, 14, 15, 16}; + auto strings = {"foo", "bar", "baz", "fuz"}; + + auto leaderLog = createReplicatedLog(); + auto followerLog = createReplicatedLog(); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto leader = + leaderLog->becomeLeader(LogConfig(2, false), "leader", LogTerm{1}, {follower}); + + auto mux = LogMultiplexer::construct(leader); + auto demux = LogDemultiplexer::construct(follower); + demux->listen(); + + auto leaderStreamA = mux->getStreamBaseById(); + auto leaderStreamB = mux->getStreamBaseById(); + + { + auto iterA = ints.begin(); + auto iterB = strings.begin(); + while (iterA != ints.end() || iterB != strings.end()) { + if (iterA != ints.end()) { + leaderStreamA->insert(*iterA); + ++iterA; + } + if (iterB != strings.end()) { + leaderStreamB->insert(*iterB); + ++iterB; + } + } + } + + auto followerStreamA = demux->getStreamBaseById(); + auto followerStreamB = demux->getStreamBaseById(); + + auto futureA = followerStreamA->waitFor(LogIndex{2}); + auto futureB = followerStreamB->waitFor(LogIndex{1}); + ASSERT_TRUE(futureA.isReady()); + ASSERT_TRUE(futureB.isReady()); + + { + auto iter = followerStreamA->getAllEntriesIterator(); + for (auto x : ints) { + auto entry = iter->next(); + ASSERT_TRUE(entry.has_value()) << "expected value " << x; + auto const& [index, value] = *entry; + EXPECT_EQ(value, x); + } + EXPECT_EQ(iter->next(), std::nullopt); + } + { + auto iter = followerStreamB->getAllEntriesIterator(); + for (auto x : strings) { + auto entry = iter->next(); + ASSERT_TRUE(entry.has_value()); + auto const& [index, value] = *entry; + EXPECT_EQ(value, x); + } + EXPECT_EQ(iter->next(), std::nullopt); + } +} + +TEST_F(LogMultiplexerTest, leader_wait_for) { + auto leaderLog = createReplicatedLog(); + auto followerLog = createFakeReplicatedLog(); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto leader = + leaderLog->becomeLeader(LogConfig(2, false), "leader", LogTerm{1}, {follower}); + auto mux = LogMultiplexer::construct(leader); + + auto stream = mux->getStreamById(); + + // Write an entry and wait for it + auto idx = stream->insert(12); + auto f = stream->waitFor(idx); + // Future not yet resolved because follower did not answer yet + EXPECT_FALSE(f.isReady()); + + // let follower run + EXPECT_TRUE(follower->hasPendingAppendEntries()); + while (follower->hasPendingAppendEntries()) { + follower->runAsyncAppendEntries(); + } + + // future should be ready + ASSERT_TRUE(f.isReady()); +} + +TEST_F(LogMultiplexerTest, leader_wait_for_multiple) { + auto leaderLog = createReplicatedLog(); + auto followerLog = createFakeReplicatedLog(); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto leader = + leaderLog->becomeLeader(LogConfig(2, false), "leader", LogTerm{1}, {follower}); + auto mux = LogMultiplexer::construct(leader); + + auto streamA = mux->getStreamById(); + auto streamB = mux->getStreamById(); + + // Write an entry and wait for it + auto idxA = streamA->insert(12); + auto fA = streamA->waitFor(idxA); + // Future not yet resolved because follower did not answer yet + EXPECT_FALSE(fA.isReady()); + // Follower has pending append entries + EXPECT_TRUE(follower->hasPendingAppendEntries()); + + // Write another entry + auto idxB = streamB->insert("hello world"); + auto fB = streamB->waitFor(idxB); + // Both futures are not yet resolved because follower did not answer yet + EXPECT_FALSE(fB.isReady()); + EXPECT_FALSE(fA.isReady()); + + // Do a single follower run + follower->runAsyncAppendEntries(); + + // future A should be ready and follower has still pending append entries + EXPECT_TRUE(fA.isReady()); + EXPECT_TRUE(follower->hasPendingAppendEntries()); + + // Now future B should become ready. + while (follower->hasPendingAppendEntries()) { + follower->runAsyncAppendEntries(); + } + EXPECT_TRUE(fB.isReady()); +} + +TEST_F(LogMultiplexerTest, follower_wait_for) { + auto leaderLog = createReplicatedLog(LogId{1}); + auto followerLog = createFakeReplicatedLog(LogId{2}); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto leader = + leaderLog->becomeLeader(LogConfig(2, false), "leader", LogTerm{1}, {follower}); + // handle first leader log entry (empty) + leader->triggerAsyncReplication(); + while (follower->hasPendingAppendEntries()) { + follower->runAsyncAppendEntries(); + } + + auto mux = LogMultiplexer::construct(leader); + auto demux = LogDemultiplexer::construct(follower); + demux->listen(); + + auto inStream = mux->getStreamById(); + auto outStream = demux->getStreamById(); + + auto idx = inStream->insert(17); + auto f = outStream->waitFor(idx); + EXPECT_FALSE(f.isReady()); + EXPECT_TRUE(follower->hasPendingAppendEntries()); + + // Handle append request, entry not yet committed on follower + follower->runAsyncAppendEntries(); + EXPECT_FALSE(f.isReady()); + EXPECT_TRUE(follower->hasPendingAppendEntries()); + + // Receive commit update + follower->runAsyncAppendEntries(); + EXPECT_TRUE(f.isReady()); +} diff --git a/tests/Replication2/Streams/MultiplexerConcurrencyTest.cpp b/tests/Replication2/Streams/MultiplexerConcurrencyTest.cpp new file mode 100644 index 000000000000..15b39e4631d5 --- /dev/null +++ b/tests/Replication2/Streams/MultiplexerConcurrencyTest.cpp @@ -0,0 +1,175 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include +#include +#include + +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/Streams/LogMultiplexer.h" + +#include "Replication2/Streams/TestLogSpecification.h" + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; + +struct LogMultiplexerConcurrencyTest : LogMultiplexerTestBase { + using Spec = test::MyTestSpecification; + + template + struct StateMachine : std::enable_shared_from_this> { + using ValueType = streams::stream_type_by_id_t; + + explicit StateMachine(std::shared_ptr> stream) + : _stream(std::move(stream)) {} + + void start() { + waitForStream(LogIndex{1}); + } + + void waitForStream(LogIndex next) { + _stream->waitForIterator(next).thenValue([weak = this->weak_from_this()](auto&& iter) { + if (auto self = weak.lock(); self) { + auto [start, stop] = iter->range(); + TRI_ASSERT(start != stop); + while (auto memtry = iter->next()) { + self->_observedLog.emplace(*memtry); + } + self->waitForStream(stop); + } else { + TRI_ASSERT(false); + } + }); + } + + std::map _observedLog; + std::shared_ptr> _stream; + }; + + template + struct StateCombiner; + template + struct StateCombiner> { + std::tuple>...> _states; + + template + explicit StateCombiner(std::shared_ptr const& demux) + : _states(std::make_shared>( + demux->template getStreamById())...) { + ((std::get>>(_states)->start()), ...); + } + }; + + struct FollowerInstance { + explicit FollowerInstance(std::shared_ptr const& follower) + : _follower(follower), + _demux(streams::LogDemultiplexer::construct(follower)), + combiner(_demux) { + _demux->listen(); + } + + std::shared_ptr _follower; + std::shared_ptr> _demux; + StateCombiner combiner; + }; + + struct LeaderInstance { + explicit LeaderInstance(std::shared_ptr const& leader) + : _leader(leader), + _mux(streams::LogMultiplexer::construct(leader)), + combiner(_mux) {} + + std::shared_ptr _leader; + std::shared_ptr> _mux; + StateCombiner combiner; + }; +}; + +TEST_F(LogMultiplexerConcurrencyTest, test) { + auto followerLog = createAsyncReplicatedLog(LogId{1}); + auto leaderLog = createAsyncReplicatedLog(LogId{2}); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto asyncFollower = std::make_shared(follower); + + auto leader = leaderLog->becomeLeader(LogConfig(2, false), "leader", + LogTerm{1}, {asyncFollower}); + + auto followerInstance = std::make_shared(follower); + auto leaderInstance = std::make_shared(leader); + + auto producer = leaderInstance->_mux->getStreamById(); + + constexpr std::size_t num_threads = 8; + constexpr std::size_t num_inserts_per_thread = 10000; + constexpr auto lastIndex = LogIndex{num_threads * num_inserts_per_thread + 1}; + + std::vector threads; + std::generate_n(std::back_inserter(threads), num_threads, [&]{ + return std::thread([&, producer]{ + auto index = LogIndex{0}; + for (std::size_t i = 0; i < num_inserts_per_thread; i++) { + index = producer->insert((int)i); + } + producer->waitFor(index).wait(); + }); + }); + + std::for_each(std::begin(threads), std::end(threads), [](std::thread& t) { + t.join(); + }); + asyncFollower->waitFor(lastIndex).wait(); + asyncFollower->stop(); + + auto iterA = follower->waitForIterator(LogIndex{1}).get(); + auto iterB = leader->waitForIterator(LogIndex{1}).get(); + + EXPECT_EQ(iterA->range(), iterB->range()); + while (auto A = iterA->next()) { + auto B = iterB->next(); + ASSERT_TRUE(B.has_value()); + EXPECT_EQ(A->logIndex(), B->logIndex()); + bool equal = basics::VelocyPackHelper::equal(A->logPayload(), B->logPayload(), true); + EXPECT_TRUE(equal) << A->logPayload().toJson() << " " << B->logPayload().toJson(); + } + EXPECT_FALSE(iterB->next().has_value()); + + MyTestSpecification::for_each_descriptor([&](auto p) { + using Descriptor = decltype(p); + auto streamA = leaderInstance->_mux->getStreamByDescriptor(); + auto streamB = followerInstance->_demux->getStreamByDescriptor(); + + auto iterA = streamA->waitForIterator(LogIndex{1}).get(); + auto iterB = streamB->waitForIterator(LogIndex{1}).get(); + + EXPECT_EQ(iterA->range(), iterB->range()); + while (auto A = iterA->next()) { + ASSERT_EQ(A, iterB->next()); + } + EXPECT_FALSE(iterB->next().has_value()); + }); +} diff --git a/tests/Replication2/Streams/TestLogSpecification.cpp b/tests/Replication2/Streams/TestLogSpecification.cpp new file mode 100644 index 000000000000..2facd49cbf46 --- /dev/null +++ b/tests/Replication2/Streams/TestLogSpecification.cpp @@ -0,0 +1,28 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include "TestLogSpecification.h" + +#include "Replication2/Streams/LogMultiplexer.tpp" + +template struct arangodb::replication2::streams::LogMultiplexer; +template struct arangodb::replication2::streams::LogDemultiplexer; diff --git a/tests/Replication2/Streams/TestLogSpecification.h b/tests/Replication2/Streams/TestLogSpecification.h new file mode 100644 index 000000000000..a34fe8c4ae9e --- /dev/null +++ b/tests/Replication2/Streams/TestLogSpecification.h @@ -0,0 +1,109 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include + +#include +#include + +#include +#include + +#include +#include +#include + +namespace arangodb::replication2::test { + +struct LogMultiplexerTestBase : ::testing::Test { + static auto createReplicatedLog(LogId id = LogId{0}) + -> std::shared_ptr { + return createReplicatedLogImpl(id); + } + + static auto createAsyncReplicatedLog(LogId id = LogId{0}) + -> std::shared_ptr { + return createReplicatedLogImpl(id); + } + + static auto createFakeReplicatedLog(LogId id = LogId{0}) + -> std::shared_ptr { + return createReplicatedLogImpl(id); + } + + private: + template + static auto createReplicatedLogImpl(LogId id) -> std::shared_ptr { + auto persisted = std::make_shared(id); + auto core = std::make_unique(persisted); + auto metrics = std::make_shared(); + return std::make_shared(std::move(core), metrics, + LoggerContext(Logger::REPLICATION2)); + } +}; + +struct default_deserializer { + template + auto operator()(streams::serializer_tag_t, velocypack::Slice s) -> T { + return s.extract(); + } +}; + +struct default_serializer { + template + void operator()(streams::serializer_tag_t, T const& t, velocypack::Builder& b) { + b.add(velocypack::Value(t)); + } +}; + +inline constexpr auto my_int_stream_id = streams::StreamId{1}; +inline constexpr auto my_string_stream_id = streams::StreamId{8}; +inline constexpr auto my_string2_stream_id = streams::StreamId{9}; + +inline constexpr auto my_int_stream_tag = streams::StreamTag{12}; +inline constexpr auto my_string_stream_tag = streams::StreamTag{55}; +inline constexpr auto my_string2_stream_tag = streams::StreamTag{56}; +inline constexpr auto my_string2_stream_tag2 = streams::StreamTag{58}; + +/* clang-format off */ + +using MyTestSpecification = streams::stream_descriptor_set< + streams::stream_descriptor + >>, + streams::stream_descriptor + >>, + streams::stream_descriptor, + streams::tag_descriptor + >> + >; + +/* clang-format on */ + +} // namespace arangodb::replication2::test + +extern template struct arangodb::replication2::streams::LogMultiplexer; +extern template struct arangodb::replication2::streams::LogDemultiplexer; diff --git a/tests/Replication2/TestHelper.h b/tests/Replication2/TestHelper.h deleted file mode 100644 index 48ca40f4a489..000000000000 --- a/tests/Replication2/TestHelper.h +++ /dev/null @@ -1,252 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -/// DISCLAIMER -/// -/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is ArangoDB GmbH, Cologne, Germany -/// -/// @author Lars Maier -//////////////////////////////////////////////////////////////////////////////// - -#pragma once - -#include "ReplicatedLogMetricsMock.h" - -#include "Replication2/ReplicatedLog/ILogParticipant.h" -#include "Replication2/ReplicatedLog/InMemoryLog.h" -#include "Replication2/ReplicatedLog/LogCore.h" -#include "Replication2/ReplicatedLog/LogFollower.h" -#include "Replication2/ReplicatedLog/LogLeader.h" -#include "Replication2/ReplicatedLog/LogStatus.h" -#include "Replication2/ReplicatedLog/PersistedLog.h" -#include "Replication2/ReplicatedLog/ReplicatedLog.h" -#include "Replication2/ReplicatedLog/types.h" - -#include - -#include -#include -#include - -namespace arangodb::replication2 { - -using namespace replicated_log; - -struct MockLog : replication2::replicated_log::PersistedLog { - using storeType = std::map; - - explicit MockLog(replication2::LogId id); - MockLog(replication2::LogId id, storeType storage); - - auto insert(replication2::replicated_log::PersistedLogIterator& iter, WriteOptions const&) -> Result override; - auto insertAsync(std::unique_ptr iter, - WriteOptions const&) -> futures::Future override; - auto read(replication2::LogIndex start) - -> std::unique_ptr override; - auto removeFront(replication2::LogIndex stop) -> Result override; - auto removeBack(replication2::LogIndex start) -> Result override; - auto drop() -> Result override; - - void setEntry(replication2::LogIndex idx, replication2::LogTerm term, - replication2::LogPayload payload); - void setEntry(replication2::PersistingLogEntry); - - [[nodiscard]] storeType getStorage() const { return _storage; } - private: - using iteratorType = storeType::iterator; - storeType _storage; -}; - -struct AsyncMockLog : MockLog { - - explicit AsyncMockLog(replication2::LogId id); - - ~AsyncMockLog() noexcept; - - auto insertAsync(std::unique_ptr iter, - WriteOptions const&) -> futures::Future override; - - auto stop() noexcept -> void { - if (!_stopping) { - { - std::unique_lock guard(_mutex); - _stopping = true; - _cv.notify_all(); - } - _asyncWorker.join(); - } - } - - private: - struct QueueEntry { - WriteOptions opts; - std::unique_ptr iter; - futures::Promise promise; - }; - - void runWorker(); - - std::mutex _mutex; - std::vector> _queue; - std::condition_variable _cv; - std::atomic _stopping = false; - bool _stopped = false; - // _asyncWorker *must* be initialized last, otherwise starting the thread - // races with initializing the coordination variables. - std::thread _asyncWorker; -}; - -struct DelayedFollowerLog : AbstractFollower { - explicit DelayedFollowerLog(std::shared_ptr follower) - : _follower(std::move(follower)) {} - - DelayedFollowerLog(LoggerContext const& logContext, - std::shared_ptr logMetricsMock, - ParticipantId const& id, std::unique_ptr logCore, - LogTerm term, ParticipantId leaderId) - : DelayedFollowerLog([&] { - auto inMemoryLog = InMemoryLog{logContext, *logCore}; - return std::make_shared(logContext, std::move(logMetricsMock), - id, std::move(logCore), term, - std::move(leaderId), - std::move(inMemoryLog)); - }()) {} - - auto appendEntries(AppendEntriesRequest req) - -> arangodb::futures::Future override { - auto future = _asyncQueue.doUnderLock([&](auto& queue) { - return queue.emplace_back(std::make_shared(std::move(req))) - ->promise.getFuture(); - }); - return std::move(future).thenValue( - [this](auto&& result) mutable { - return _follower->appendEntries(std::forward(result)); - }); - } - - void runAsyncAppendEntries() { - auto asyncQueue = _asyncQueue.doUnderLock([](auto& _queue) { - auto queue = std::move(_queue); - _queue.clear(); - return queue; - }); - - for (auto& p : asyncQueue) { - p->promise.setValue(std::move(p->request)); - } - } - - using WaitForAsyncPromise = futures::Promise; - - struct AsyncRequest { - explicit AsyncRequest(AppendEntriesRequest request) - : request(std::move(request)) {} - AppendEntriesRequest request; - WaitForAsyncPromise promise; - }; - [[nodiscard]] auto pendingAppendEntries() const - -> std::deque> { - return _asyncQueue.copy(); - } - [[nodiscard]] auto hasPendingAppendEntries() const -> bool { - return _asyncQueue.doUnderLock( - [](auto const& queue) { return !queue.empty(); }); - } - - auto getParticipantId() const noexcept -> ParticipantId const& override { - return _follower->getParticipantId(); - } - - auto getStatus() const -> LogStatus { - return _follower->getStatus(); - } - - auto resign() && { - return std::move(*_follower).resign(); - } - - auto waitFor(LogIndex index) { - return _follower->waitFor(index); - } - - auto waitForIterator(LogIndex index) { - return _follower->waitForIterator(index); - } - private: - Guarded>> _asyncQueue; - std::shared_ptr _follower; -}; - -struct TestReplicatedLog : ReplicatedLog { - using ReplicatedLog::ReplicatedLog; - auto becomeFollower(ParticipantId const& id, LogTerm term, ParticipantId leaderId) - -> std::shared_ptr; - auto becomeLeader(ParticipantId const& id, LogTerm term, - std::vector> const& follower, - std::size_t writeConcern) -> std::shared_ptr; - auto becomeLeader(LogConfig config, ParticipantId id, LogTerm term, - std::vector> const& follower) - -> std::shared_ptr; -}; - -struct ReplicatedLogTest : ::testing::Test { - - auto makeLogCore(LogId id) -> std::unique_ptr { - auto persisted = makePersistedLog(id); - return std::make_unique(persisted); - } - - auto getPersistedLogById(LogId id) -> std::shared_ptr { - return _persistedLogs.at(id); - } - - auto makePersistedLog(LogId id) -> std::shared_ptr { - auto persisted = std::make_shared(id); - _persistedLogs[id] = persisted; - return persisted; - } - - auto makeReplicatedLog(LogId id) -> std::shared_ptr { - auto core = makeLogCore(id); - return std::make_shared(std::move(core), _logMetricsMock, - LoggerContext(Logger::FIXME)); - } - - auto makeReplicatedLogWithAsyncMockLog(LogId id) -> std::shared_ptr { - auto persisted = std::make_shared(id); - _persistedLogs[id] = persisted; - auto core = std::make_unique(persisted); - return std::make_shared(std::move(core), _logMetricsMock, - LoggerContext(Logger::FIXME)); - } - - auto defaultLogger() { - return LoggerContext(Logger::REPLICATION2); - } - - auto stopAsyncMockLogs() -> void { - for (auto const& it : _persistedLogs) { - if (auto log = std::dynamic_pointer_cast(it.second); log != nullptr) { - log->stop(); - } - } - } - - std::unordered_map> _persistedLogs; - std::shared_ptr _logMetricsMock = std::make_shared(); -}; - - -}