diff --git a/arangod/Agency/FailedServer.cpp b/arangod/Agency/FailedServer.cpp index af47f913cdc9..956c24713ddd 100644 --- a/arangod/Agency/FailedServer.cpp +++ b/arangod/Agency/FailedServer.cpp @@ -29,6 +29,7 @@ #include "Agency/FailedLeader.h" #include "Agency/Job.h" #include "Basics/StaticStrings.h" +#include "Replication2/Version.h" using namespace arangodb::consensus; @@ -159,6 +160,17 @@ bool FailedServer::start(bool& aborts) { for (auto const& database : databases) { // dead code auto cdatabase = current.at(database.first)->children(); + if (auto version = database.second->hasAsString(StaticStrings::ReplicationVersion); version.has_value()) { + if (auto res = replication::parseVersion(*version); res.ok()) { + switch (res.get()) { + case replication::Version::ONE: + break; + case replication::Version::TWO: + continue; // Don't create FailedLeader or failedFollower jobs for new replication + } + } + } + for (auto const& collptr : database.second->children()) { auto const& collection = *(collptr.second); diff --git a/arangod/Agency/Supervision.cpp b/arangod/Agency/Supervision.cpp index b955da5ca64c..5ebe1bb7dcb6 100644 --- a/arangod/Agency/Supervision.cpp +++ b/arangod/Agency/Supervision.cpp @@ -23,9 +23,22 @@ #include "Supervision.h" -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include #include "Agency/ActiveFailoverJob.h" #include "Agency/AddFollower.h" @@ -48,6 +61,22 @@ #include "Replication2/ReplicatedLog/AgencyLogSpecification.h" #include "Replication2/ReplicatedLog/Algorithms.h" #include "StorageEngine/HealthData.h" +#include "Agency/AgencyStrings.h" +#include "Agency/AgentConfiguration.h" +#include "Agency/AgentInterface.h" +#include "Agency/PathComponent.h" +#include "Agency/TimeString.h" +#include "Agency/TransactionBuilder.h" +#include "Basics/Result.h" +#include "Basics/VelocyPackHelper.h" +#include "Basics/debugging.h" +#include "Basics/overload.h" +#include "Logger/LogMacros.h" +#include "Logger/Logger.h" +#include "Logger/LoggerStream.h" +#include "Replication2/ReplicatedLog/LogCommon.h" +#include "RestServer/Metrics.h" +#include "RestServer/MetricsFeature.h" using namespace arangodb; using namespace arangodb::consensus; @@ -2282,20 +2311,21 @@ void Supervision::checkReplicatedLogs() { }); auto newTermSpec = checkReplicatedLog(dbName, spec, current, info); - envelope = std::visit( - overload{[&, &dbName = dbName](LogPlanTermSpecification const& newSpec) { - return arangodb::replication2::agency::methods::updateTermSpecificationTrx( - std::move(envelope), dbName, spec.id, newSpec, - spec.currentTerm->term); - }, - [&, &dbName = dbName](LogCurrentSupervisionElection const& newElection) { - return arangodb::replication2::agency::methods::updateElectionResult( - std::move(envelope), dbName, spec.id, newElection); - }, - [&](std::monostate const&) { - return std::move(envelope); // do nothing - }}, - newTermSpec); + envelope = + std::visit(arangodb::overload{ + [&, &dbName = dbName](LogPlanTermSpecification const& newSpec) { + return arangodb::replication2::agency::methods::updateTermSpecificationTrx( + std::move(envelope), dbName, spec.id, newSpec, + spec.currentTerm->term); + }, + [&, &dbName = dbName](LogCurrentSupervisionElection const& newElection) { + return arangodb::replication2::agency::methods::updateElectionResult( + std::move(envelope), dbName, spec.id, newElection); + }, + [&](std::monostate const&) { + return std::move(envelope); // do nothing + }}, + newTermSpec); } } diff --git a/arangod/Agency/Supervision.h b/arangod/Agency/Supervision.h index 35e60197529c..8d34528bb6fb 100644 --- a/arangod/Agency/Supervision.h +++ b/arangod/Agency/Supervision.h @@ -23,17 +23,36 @@ #pragma once +#include +#include +#include +#include +#include +#include +#include + #include "Agency/AgencyCommon.h" #include "Agency/AgentInterface.h" +#include "Agency/Node.h" #include "Agency/Store.h" #include "Agency/TimeString.h" #include "Basics/ConditionVariable.h" #include "Basics/Mutex.h" #include "Basics/Thread.h" +#include "Cluster/ClusterTypes.h" #include "RestServer/MetricsFeature.h" -namespace arangodb { -namespace consensus { +class Counter; +namespace arangodb::application_features { +class ApplicationServer; +} // namespace arangodb::application_features +namespace arangodb::consensus { +class AgentInterface; +} // namespace arangodb::consensus +template class Histogram; +template struct log_scale_t; + +namespace arangodb::consensus { class Agent; @@ -328,6 +347,5 @@ class Supervision : public arangodb::Thread { */ query_t removeTransactionBuilder(std::vector const&); -} // namespace consensus } // namespace arangodb diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 6c2b85ed96fe..35c2d8a5d588 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -566,7 +566,7 @@ set(LIB_ARANGO_REPLICATION2_SOURCES Replication2/ReplicatedLog/types.cpp Replication2/Version.cpp RestHandler/RestLogHandler.cpp - ) + Replication2/ReplicatedState/AbstractStateMachine.tpp Replication2/ReplicatedState/AbstractStateMachine.h Replication2/Streams/LogMultiplexer.tpp Replication2/Streams/LogMultiplexer.h Replication2/Streams/StreamSpecification.h Replication2/Streams/Streams.h Replication2/Streams/MultiplexedValues.h Replication2/Streams/StreamInformationBlock.h) set (LIB_ARANGO_METRICS_SOURCES RestServer/Metrics.cpp diff --git a/arangod/Cluster/ClusterCollectionCreationInfo.cpp b/arangod/Cluster/ClusterCollectionCreationInfo.cpp index 580c525ad9fb..8cd16ec2abad 100644 --- a/arangod/Cluster/ClusterCollectionCreationInfo.cpp +++ b/arangod/Cluster/ClusterCollectionCreationInfo.cpp @@ -26,6 +26,7 @@ #include "Basics/StaticStrings.h" #include "Basics/VelocyPackHelper.h" #include "Cluster/ClusterTypes.h" +#include "Replication2/ReplicatedLog/LogCommon.h" #include #include @@ -35,17 +36,20 @@ using namespace arangodb; ClusterCollectionCreationInfo::ClusterCollectionCreationInfo( std::string cID, uint64_t shards, uint64_t replicationFactor, - uint64_t writeConcern, bool waitForRep, velocypack::Slice const& slice, - std::string coordinatorId, RebootId rebootId) + uint64_t writeConcern, bool waitForSync, bool waitForRep, + velocypack::Slice const& slice, std::string coordinatorId, RebootId rebootId, + std::optional>> replicatedLogs) : collectionID(std::move(cID)), numberOfShards(shards), replicationFactor(replicationFactor), writeConcern(writeConcern), + waitForSync(waitForSync), waitForReplication(waitForRep), json(slice), name(basics::VelocyPackHelper::getStringValue(json, StaticStrings::DataSourceName, - StaticStrings::Empty)), + StaticStrings::Empty)), state(ClusterCollectionCreationState::INIT), + replicatedLogs(std::move(replicatedLogs)), creator(std::in_place, std::move(coordinatorId), rebootId) { TRI_ASSERT(creator); TRI_ASSERT(creator->rebootId().initialized()); diff --git a/arangod/Cluster/ClusterCollectionCreationInfo.h b/arangod/Cluster/ClusterCollectionCreationInfo.h index 2bd81d65af3b..146a57e774ac 100644 --- a/arangod/Cluster/ClusterCollectionCreationInfo.h +++ b/arangod/Cluster/ClusterCollectionCreationInfo.h @@ -31,24 +31,32 @@ #include #include +namespace arangodb::replication2 { +class LogId; +} + namespace arangodb { enum class ClusterCollectionCreationState { INIT, FAILED, DONE }; struct ClusterCollectionCreationInfo { - ClusterCollectionCreationInfo(std::string cID, uint64_t shards, - uint64_t replicationFactor, uint64_t writeConcern, - bool waitForRep, velocypack::Slice const& slice, - std::string coordinatorId, RebootId rebootId); + ClusterCollectionCreationInfo() = delete; + ClusterCollectionCreationInfo( + std::string cID, uint64_t shards, uint64_t replicationFactor, + uint64_t writeConcern, bool waitForSync, bool waitForRep, + velocypack::Slice const& slice, std::string coordinatorId, RebootId rebootId, + std::optional>> replicatedLogs); std::string const collectionID; uint64_t numberOfShards; uint64_t replicationFactor; uint64_t writeConcern; + bool waitForSync; bool waitForReplication; velocypack::Slice const json; std::string name; ClusterCollectionCreationState state; + std::optional>> replicatedLogs; class CreatorInfo : public velocypack::Serializable { public: @@ -56,10 +64,10 @@ struct ClusterCollectionCreationInfo { void toVelocyPack(velocypack::Builder& builder) const override; - virtual ~CreatorInfo() = default; + ~CreatorInfo() override = default; - RebootId rebootId() const noexcept; - std::string const& coordinatorId() const noexcept; + [[nodiscard]] RebootId rebootId() const noexcept; + [[nodiscard]] std::string const& coordinatorId() const noexcept; private: std::string _coordinatorId; @@ -69,10 +77,10 @@ struct ClusterCollectionCreationInfo { std::optional creator; public: - velocypack::Slice isBuildingSlice() const; + [[nodiscard]] velocypack::Slice isBuildingSlice() const; private: - bool needsBuildingFlag() const; + [[nodiscard]] bool needsBuildingFlag() const; private: velocypack::Builder _isBuildingJson; diff --git a/arangod/Cluster/ClusterInfo.cpp b/arangod/Cluster/ClusterInfo.cpp index 5a11cb3e1010..757ae27fdffb 100644 --- a/arangod/Cluster/ClusterInfo.cpp +++ b/arangod/Cluster/ClusterInfo.cpp @@ -78,6 +78,7 @@ #include #include #include +#include #include #include @@ -87,6 +88,8 @@ #include +using namespace arangodb; + namespace arangodb { /// @brief internal helper struct for counting the number of shards etc. struct ShardStatistics { @@ -207,23 +210,22 @@ void addToShardStatistics(std::unordered_map const& participants, + std::uint64_t replicationFactor, + std::uint64_t writeConcern, bool waitForSync) { + using namespace arangodb::cluster::paths::aliases; + auto const logPath = plan()->replicatedLogs()->database(dbName)->log(to_string(logId)); + + auto config = replication2::LogConfig(); + config.writeConcern = writeConcern; + // config.replicationFactor = replicationFactor; + config.waitForSync = waitForSync; + + auto firstTerm = std::invoke([&] { + using namespace replication2; + using namespace replication2::agency; + auto spec = LogPlanTermSpecification(); + spec.term = LogTerm{1}; + spec.config = config; + std::transform(participants.cbegin(), participants.cend(), + std::inserter(spec.participants, spec.participants.end()), + [](auto const& it) { + return std::make_pair(it, LogPlanTermSpecification::Participant{}); + }); + return spec; + }); + + auto spec = replication2::agency::LogPlanSpecification(); + spec.id = logId; + spec.currentTerm = firstTerm; + spec.targetConfig = config; + + auto builder = std::make_shared(); + spec.toVelocyPack(*builder); + + return std::make_pair(AgencyPrecondition{logPath, AgencyPrecondition::Type::EMPTY, true}, + AgencyOperation{logPath, arangodb::AgencyValueOperationType::SET, + std::move(builder)}); +} + +arangodb::AgencyPrecondition CreateCollectionOrderPrecondition(std::string const& dbName, + std::string const& collection, + VPackSlice const& info) { arangodb::AgencyPrecondition prec{collectionPath(dbName, collection), arangodb::AgencyPrecondition::Type::VALUE, info}; return prec; } -static inline arangodb::AgencyOperation CreateCollectionSuccess( - std::string const& dbName, std::string const& collection, VPackSlice const& info) { +arangodb::AgencyOperation CreateCollectionSuccess(std::string const& dbName, + std::string const& collection, + VPackSlice const& info) { TRI_ASSERT(!info.hasKey(arangodb::StaticStrings::AttrIsBuilding)); return arangodb::AgencyOperation{collectionPath(dbName, collection), arangodb::AgencyValueOperationType::SET, info}; @@ -1412,9 +1455,9 @@ void ClusterInfo::loadPlan() { auto stuff = std::make_shared(); { auto replicatedLogsPaths = cluster::paths::aliases::plan() - ->replicatedLogs() - ->database(databaseName) - ->vec(); + ->replicatedLogs() + ->database(databaseName) + ->vec(); auto logsSlice = query->slice()[0].get(replicatedLogsPaths); if (!logsSlice.isNone()) { @@ -2664,21 +2707,26 @@ Result ClusterInfo::dropDatabaseCoordinator( // drop database /// error code and the errorMsg is set accordingly. One possible error /// is a timeout, a timeout of 0.0 means no timeout. //////////////////////////////////////////////////////////////////////////////// -Result ClusterInfo::createCollectionCoordinator( // create collection - std::string const& databaseName, std::string const& collectionID, uint64_t numberOfShards, - uint64_t replicationFactor, uint64_t writeConcern, bool waitForReplication, - velocypack::Slice const& json, // collection definition - double timeout, // request timeout, - bool isNewDatabase, std::shared_ptr const& colToDistributeShardsLike) { +Result ClusterInfo::createCollectionCoordinator( + std::string const& databaseName, std::string const& collectionID, + uint64_t numberOfShards, uint64_t replicationFactor, uint64_t writeConcern, + bool waitForSync, bool waitForReplication, velocypack::Slice const& json, + double timeout, bool isNewDatabase, + std::shared_ptr const& colToDistributeShardsLike, + replication::Version replicationVersion, + std::optional>> replicatedLogs) { TRI_ASSERT(ServerState::instance()->isCoordinator()); - auto serverState = ServerState::instance(); - std::vector infos{ClusterCollectionCreationInfo{ - collectionID, numberOfShards, replicationFactor, writeConcern, waitForReplication, - json, serverState->getId(), serverState->getRebootId()}}; + auto const* const serverState = ServerState::instance(); + std::vector infos{ + ClusterCollectionCreationInfo{collectionID, numberOfShards, replicationFactor, + writeConcern, waitForSync, waitForReplication, json, + serverState->getId(), serverState->getRebootId(), + std::move(replicatedLogs)}}; double const realTimeout = getTimeout(timeout); double const endTime = TRI_microtime() + realTimeout; return createCollectionsCoordinator(databaseName, infos, endTime, - isNewDatabase, colToDistributeShardsLike); + isNewDatabase, colToDistributeShardsLike, + replication::Version::ONE); } /// @brief this method does an atomic check of the preconditions for the @@ -2738,7 +2786,8 @@ Result ClusterInfo::checkCollectionPreconditions(std::string const& databaseName Result ClusterInfo::createCollectionsCoordinator( std::string const& databaseName, std::vector& infos, double endTime, bool isNewDatabase, - std::shared_ptr const& colToDistributeShardsLike) { + std::shared_ptr const& colToDistributeShardsLike, + replication::Version replicationVersion) { TRI_ASSERT(ServerState::instance()->isCoordinator()); using arangodb::velocypack::Slice; @@ -2753,13 +2802,15 @@ Result ClusterInfo::createCollectionsCoordinator( // closure and the main thread executing this function. Note that it can // happen that the callback is called only after we return from this // function! - auto dbServerResult = - std::make_shared>>(std::nullopt); - auto nrDone = std::make_shared>(0); - auto errMsg = std::make_shared(); - auto cacheMutex = std::make_shared(); - auto cacheMutexOwner = std::make_shared>(); - auto isCleaned = std::make_shared(false); + struct SharedDataForCallback { + std::atomic> dbServerResult; + std::atomic nrDone{0}; + std::string errMsg; + Mutex cacheMutex; + std::atomic cacheMutexOwner; + bool isCleaned{false}; + }; + auto sharedData = std::make_shared(); AgencyComm ac(_server); std::vector> agencyCallbacks; @@ -2777,8 +2828,8 @@ Result ClusterInfo::createCollectionsCoordinator( // d) info might be deleted, so we cannot use it. // e) If the callback is ongoing during cleanup, the callback will // hold the Mutex and delay the cleanup. - RECURSIVE_MUTEX_LOCKER(*cacheMutex, *cacheMutexOwner); - *isCleaned = true; + RECURSIVE_MUTEX_LOCKER(sharedData->cacheMutex, sharedData->cacheMutexOwner); + sharedData->isCleaned = true; for (auto& cb : agencyCallbacks) { _agencyCallbackRegistry->unregisterCallback(cb); } @@ -2799,7 +2850,7 @@ Result ClusterInfo::createCollectionsCoordinator( if (info.state == ClusterCollectionCreationState::DONE) { // This is possible in Enterprise / Smart Collection situation - (*nrDone)++; + sharedData->nrDone++; } std::map> shardServers; @@ -2816,13 +2867,12 @@ Result ClusterInfo::createCollectionsCoordinator( } // The AgencyCallback will copy the closure will take responsibilty of it. - auto closure = [cacheMutex, cacheMutexOwner, &info, dbServerResult, errMsg, - nrDone, isCleaned, shardServers, this](VPackSlice const& result) { + auto closure = [&info, sharedData, shardServers, this](VPackSlice const& result) { // NOTE: This ordering here is important to cover against a race in cleanup. // a) The Guard get's the Mutex, sets isCleaned == true, then removes the callback // b) If the callback is acquired it is saved in a shared_ptr, the Mutex will be acquired first, then it will check if it isCleaned - RECURSIVE_MUTEX_LOCKER(*cacheMutex, *cacheMutexOwner); - if (*isCleaned) { + RECURSIVE_MUTEX_LOCKER(sharedData->cacheMutex, sharedData->cacheMutexOwner); + if (sharedData->isCleaned) { return true; } TRI_ASSERT(!info.name.empty()); @@ -2866,10 +2916,10 @@ Result ClusterInfo::createCollectionsCoordinator( LOG_TOPIC("9ed54", ERR, Logger::CLUSTER) << "Did not find shard in _shardServers: " << p.key.copyString() << ". Maybe the collection is already dropped."; - *errMsg = "Error in creation of collection: " + p.key.copyString() + + sharedData->errMsg = "Error in creation of collection: " + p.key.copyString() + ". Collection already dropped. " + __FILE__ + ":" + std::to_string(__LINE__); - dbServerResult->store(TRI_ERROR_CLUSTER_COULD_NOT_CREATE_COLLECTION, + sharedData->dbServerResult.store(TRI_ERROR_CLUSTER_COULD_NOT_CREATE_COLLECTION, std::memory_order_release); TRI_ASSERT(info.state != ClusterCollectionCreationState::DONE); info.state = ClusterCollectionCreationState::FAILED; @@ -2910,9 +2960,9 @@ Result ClusterInfo::createCollectionsCoordinator( } } if (!tmpError.empty()) { - *errMsg = "Error in creation of collection:" + tmpError + " " + + sharedData->errMsg = "Error in creation of collection:" + tmpError + " " + __FILE__ + std::to_string(__LINE__); - dbServerResult->store(TRI_ERROR_CLUSTER_COULD_NOT_CREATE_COLLECTION, + sharedData->dbServerResult.store(TRI_ERROR_CLUSTER_COULD_NOT_CREATE_COLLECTION, std::memory_order_release); // We cannot get into bad state after a collection was created TRI_ASSERT(info.state != ClusterCollectionCreationState::DONE); @@ -2922,7 +2972,7 @@ Result ClusterInfo::createCollectionsCoordinator( // As soon as all leaders are done we are either FAILED or DONE, this cannot be altered later. TRI_ASSERT(info.state != ClusterCollectionCreationState::FAILED); info.state = ClusterCollectionCreationState::DONE; - (*nrDone)++; + sharedData->nrDone++; } } return true; @@ -2986,6 +3036,19 @@ Result ClusterInfo::createCollectionsCoordinator( // additionally ensure that no such collectionID exists yet in Plan/Collections precs.emplace_back(AgencyPrecondition("Plan/Collections/" + databaseName + "/" + info.collectionID, AgencyPrecondition::Type::EMPTY, true)); + + if (replicationVersion == replication::Version::TWO) { + auto const& replicatedLogs = *info.replicatedLogs.value(); + for (auto const& it : replicatedLogs) { + auto const logId = it.second; + // TODO add a check whether shardServers contains it.first + auto [prec, oper] = + createReplicatedLog(databaseName, logId, shardServers.at(it.first), + info.replicationFactor, info.writeConcern, info.waitForSync); + precs.emplace_back(std::move(prec)); + opers.emplace_back(std::move(oper)); + } + } } // We need to make sure our plan is up to date. @@ -2999,10 +3062,15 @@ Result ClusterInfo::createCollectionsCoordinator( if (!isNewDatabase) { Result res = checkCollectionPreconditions(databaseName, infos, planVersion); if (res.fail()) { + auto collections = std::vector{}; + collections.reserve(infos.size()); + std::transform(infos.cbegin(), infos.cend(), std::back_inserter(collections), + [](auto const& info) { return info.name; }); LOG_TOPIC("98762", DEBUG, Logger::CLUSTER) << "Failed createCollectionsCoordinator for " << infos.size() - << " collections in database " << databaseName << " isNewDatabase: " << isNewDatabase - << " first collection name: " << ((infos.size() > 0) ? infos[0].name : std::string()); + << " collections in database " << databaseName + << ", isNewDatabase: " << isNewDatabase + << ", collection names: " << StringUtils::join(collections, ", "); return res; } } @@ -3107,7 +3175,7 @@ Result ClusterInfo::createCollectionsCoordinator( // using loadPlan, this is necessary for the callback closure to // see the new planned state for this collection. Otherwise it cannot // recognize completion of the create collection operation properly: - RECURSIVE_MUTEX_LOCKER(*cacheMutex, *cacheMutexOwner); + RECURSIVE_MUTEX_LOCKER(sharedData->cacheMutex, sharedData->cacheMutexOwner); auto res = ac.sendTransactionWithFailover(transaction); // Only if not precondition failed if (!res.successful()) { @@ -3155,7 +3223,7 @@ Result ClusterInfo::createCollectionsCoordinator( << "createCollectionCoordinator, Plan changed, waiting for success..."; do { - auto tmpRes = dbServerResult->load(std::memory_order_acquire); + auto tmpRes = sharedData->dbServerResult.load(std::memory_order_acquire); if (TRI_microtime() > endTime) { for (auto const& info : infos) { LOG_TOPIC("f6b57", ERR, Logger::CLUSTER) @@ -3172,7 +3240,7 @@ Result ClusterInfo::createCollectionsCoordinator( } } - if (nrDone->load(std::memory_order_acquire) == infos.size()) { + if (sharedData->nrDone.load(std::memory_order_acquire) == infos.size()) { // We do not need to lock all condition variables // we are save by cacheMutex cbGuard.fire(); @@ -3265,7 +3333,7 @@ Result ClusterInfo::createCollectionsCoordinator( << "Failed createCollectionsCoordinator for " << infos.size() << " collections in database " << databaseName << " isNewDatabase: " << isNewDatabase << " first collection name: " << infos[0].name << " result: " << *tmpRes; - return {*tmpRes, *errMsg}; + return {*tmpRes, sharedData->errMsg}; } // If we get here we have not tried anything. @@ -3318,19 +3386,19 @@ Result ClusterInfo::createCollectionsCoordinator( /// error code and the errorMsg is set accordingly. One possible error /// is a timeout, a timeout of 0.0 means no timeout. //////////////////////////////////////////////////////////////////////////////// -Result ClusterInfo::dropCollectionCoordinator( // drop collection - std::string const& dbName, // database name - std::string const& collectionID, - double timeout // request timeout -) { +Result ClusterInfo::dropCollectionCoordinator(std::string const& dbName, + std::string const& collectionID, + double timeout) { TRI_ASSERT(ServerState::instance()->isCoordinator()); if (dbName.empty() || (dbName[0] > '0' && dbName[0] < '9')) { events::DropCollection(dbName, collectionID, TRI_ERROR_ARANGO_DATABASE_NAME_INVALID); return Result(TRI_ERROR_ARANGO_DATABASE_NAME_INVALID); } - AgencyComm ac(_server); - AgencyCommResult res; + // TODO Drop replicated logs, if this is a replication::Version::TWO vocbase! + // TODO Think/discuss whether it is correct to get the logs to be deleted in + // as an argument, originally taken from ShardingInfo. Or whether it + // should better be read from the AgencyCache, like the shards below. // First check that no other collection has a distributeShardsLike // entry pointing to us: @@ -3437,21 +3505,78 @@ Result ClusterInfo::dropCollectionCoordinator( // drop collection return Result(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND); } + if (coll->vocbase().replicationVersion() == replication::Version::TWO) { + if (coll->shardingInfo()->replicatedLogs()->empty()) { + LOG_TOPIC("df403", WARN, Logger::CLUSTER) + << "When dropping collection " << coll->name() << " in database " + << dbName << ": " + << "No replicated logs listed, even though the databases uses " + "replicationVersion 2."; + TRI_ASSERT(false); + } + } + + auto createDropCollectionTrx = + [&dbName = std::as_const(dbName), &collectionID = std::as_const(collectionID), + &coll = std::as_const(coll)](VPackBuffer& buffer) { + namespace paths = cluster::paths::aliases; + auto builder = VPackBuilder(buffer); + auto envelope = agency::envelope::into_builder(builder); + + auto writeTrx = envelope.write() + .inc(paths::plan()->version()->str()) + .remove(paths::plan() + ->collections() + ->database(dbName) + ->collection(collectionID) + ->str()); + + if (coll->vocbase().replicationVersion() == replication::Version::TWO) { + auto const& logs = *coll->shardingInfo()->replicatedLogs(); + auto const& dbPath = paths::plan()->replicatedLogs()->database(dbName); + for (auto const& it : logs) { + auto const& logId = it.second; + writeTrx = std::move(writeTrx).remove(dbPath->log(logId)->str()); + } + } + auto precTrx = std::move(writeTrx).precs().isNotEmpty( + paths::plan()->databases()->database(dbName)->str()); + + if (coll->vocbase().replicationVersion() == replication::Version::TWO) { + auto const& logs = *coll->shardingInfo()->replicatedLogs(); + VPackBuilder replicatedLogs; + { + VPackObjectBuilder guard(&replicatedLogs); + for (auto const& it : logs) { + auto const& [shardId, logId] = it; + replicatedLogs.add(shardId, VPackValue(to_string(logId))); + } + } + precTrx = std::move(precTrx).isEqual(paths::plan() + ->collections() + ->database(dbName) + ->collection(collectionID) + ->str(), + replicatedLogs.slice()); + } + + auto trx = precTrx.end(); + }; - // Transact to agency - AgencyOperation delPlanCollection("Plan/Collections/" + dbName + "/" + collectionID, - AgencySimpleOperationType::DELETE_OP); - AgencyOperation incrementVersion("Plan/Version", AgencySimpleOperationType::INCREMENT_OP); - AgencyPrecondition precondition = - AgencyPrecondition("Plan/Databases/" + dbName, AgencyPrecondition::Type::EMPTY, false); - AgencyWriteTransaction trans({delPlanCollection, incrementVersion}, precondition); - res = ac.sendTransactionWithFailover(trans); + auto trx = VPackBuffer(); + createDropCollectionTrx(trx); + auto trxCpy = velocypack::SharedSlice(trx); - if (!res.successful()) { - if (res.httpCode() == rest::ResponseCode::PRECONDITION_FAILED) { + using namespace std::chrono_literals; + // TODO Stop using .get(), return a future instead. + auto result = AsyncAgencyComm().sendWriteTransaction(120s, std::move(trx)).get(); + + auto res = result.asResult(); + if (!res.ok()) { + if (result.statusCode() == fuerte::StatusPreconditionFailed) { LOG_TOPIC("279c5", ERR, Logger::CLUSTER) - << "Precondition failed for this agency transaction: " << trans.toJson() - << ", return code: " << res.httpCode(); + << "Precondition failed for this agency transaction: " << trxCpy.toJson() + << ", return code: " << result.statusCode(); } logAgencyDump(); @@ -3461,7 +3586,7 @@ Result ClusterInfo::dropCollectionCoordinator( // drop collection events::DropCollection(dbName, collectionID, TRI_ERROR_CLUSTER_COULD_NOT_DROP_COLLECTION); return Result(TRI_ERROR_CLUSTER_COULD_NOT_DROP_COLLECTION); } - if (VPackSlice resultsSlice = res.slice().get("results"); resultsSlice.length() > 0) { + if (VPackSlice resultsSlice = result.slice().get("results"); resultsSlice.length() > 0) { Result r = waitForPlan(resultsSlice[0].getNumber()).get(); if (r.fail()) { return r; @@ -3479,10 +3604,14 @@ Result ClusterInfo::dropCollectionCoordinator( // drop collection if (tmpRes.has_value()) { cbGuard.fire(); // unregister cb before calling ac.removeValues(...) // ...remove the entire directory for the collection - AgencyOperation delCurrentCollection("Current/Collections/" + dbName + "/" + collectionID, + AgencyOperation delCurrentCollection(paths::aliases::current() + ->collections() + ->database(dbName) + ->collection(collectionID) + ->str(), AgencySimpleOperationType::DELETE_OP); AgencyWriteTransaction cx({delCurrentCollection}); - res = ac.sendTransactionWithFailover(cx); + std::ignore = AgencyComm(_server).sendTransactionWithFailover(cx); events::DropCollection(dbName, collectionID, *tmpRes); return Result(*tmpRes); } @@ -3491,7 +3620,7 @@ Result ClusterInfo::dropCollectionCoordinator( // drop collection LOG_TOPIC("76ea6", ERR, Logger::CLUSTER) << "Timeout in _drop collection (" << realTimeout << ")" << ": database: " << dbName << ", collId:" << collectionID - << "\ntransaction sent to agency: " << trans.toJson(); + << "\ntransaction sent to agency: " << trxCpy.toJson(); logAgencyDump(); @@ -5375,7 +5504,7 @@ std::shared_ptr const> ClusterInfo::getResponsibleServer(S } - return std::make_shared>(); + return nullptr; } ////////////////////////////////////////////////////////////////////////////// @@ -5698,7 +5827,7 @@ CollectionID ClusterInfo::getCollectionNameForShard(ShardID const& shardId) { } auto ClusterInfo::getReplicatedLogLeader(DatabaseID const& database, replication2::LogId id) const --> std::optional { + -> std::optional { READ_LOCKER(readLocker, _planProt.lock); if (auto it = _newStuffByDatabase.find(database); it != std::end(_newStuffByDatabase)) { @@ -5715,6 +5844,21 @@ auto ClusterInfo::getReplicatedLogLeader(DatabaseID const& database, replication return std::nullopt; } +auto ClusterInfo::getCollectionGroupById(DatabaseID const& database, + replication2::agency::CollectionGroupId id) + -> std::shared_ptr { + READ_LOCKER(readLocker, _planProt.lock); + + if (auto it = _newStuffByDatabase.find(database); it != std::end(_newStuffByDatabase)) { + if (auto it2 = it->second->collectionGroups.find(id); + it2 != std::end(it->second->collectionGroups)) { + return it2->second; + } + } + + return nullptr; +} + arangodb::Result ClusterInfo::agencyDump(std::shared_ptr body) { AgencyCommResult dump = _agency.dump(); diff --git a/arangod/Cluster/ClusterInfo.h b/arangod/Cluster/ClusterInfo.h index dc9cf4fb0527..e04ccef3f186 100644 --- a/arangod/Cluster/ClusterInfo.h +++ b/arangod/Cluster/ClusterInfo.h @@ -667,14 +667,14 @@ class ClusterInfo final { ////////////////////////////////////////////////////////////////////////////// /// @brief create collection in coordinator ////////////////////////////////////////////////////////////////////////////// - Result createCollectionCoordinator( // create collection - std::string const& databaseName, // database name - std::string const& collectionID, uint64_t numberOfShards, - uint64_t replicationFactor, uint64_t writeConcern, - bool waitForReplication, arangodb::velocypack::Slice const& json, - double timeout, // request timeout - bool isNewDatabase, - std::shared_ptr const& colToDistributeShardsLike); + Result createCollectionCoordinator( + std::string const& databaseName, std::string const& collectionID, + uint64_t numberOfShards, uint64_t replicationFactor, + uint64_t writeConcern, bool waitForSync, bool waitForReplication, + velocypack::Slice const& json, double timeout, bool isNewDatabase, + std::shared_ptr const& colToDistributeShardsLike, + replication::Version replicationVersion, + std::optional>> replicatedLogs); /// @brief this method does an atomic check of the preconditions for the /// collections to be created, using the currently loaded plan. it populates @@ -689,17 +689,15 @@ class ClusterInfo final { /// Note that in contrast to most other methods here, this method does not /// get a timeout parameter, but an endTime parameter!!! Result createCollectionsCoordinator(std::string const& databaseName, - std::vector&, + std::vector& infos, double endTime, bool isNewDatabase, - std::shared_ptr const& colToDistributeShardsLike); + std::shared_ptr const& colToDistributeShardsLike, + replication::Version replicationVersion); /// @brief drop collection in coordinator ////////////////////////////////////////////////////////////////////////////// - Result dropCollectionCoordinator( // drop collection - std::string const& databaseName, // database name - std::string const& collectionID, // collection identifier - double timeout // request timeout - ); + Result dropCollectionCoordinator(std::string const& dbName, + std::string const& collectionID, double timeout); ////////////////////////////////////////////////////////////////////////////// /// @brief set collection properties in coordinator @@ -962,6 +960,9 @@ class ClusterInfo final { auto getReplicatedLogLeader(DatabaseID const& database, replication2::LogId) const -> std::optional; + auto getCollectionGroupById(DatabaseID const& database, replication2::agency::CollectionGroupId id) + -> std::shared_ptr; + /** * @brief Lock agency's hot backup with TTL 60 seconds * diff --git a/arangod/Cluster/ClusterMethods.cpp b/arangod/Cluster/ClusterMethods.cpp index 41be023d93e7..4a83fc565499 100644 --- a/arangod/Cluster/ClusterMethods.cpp +++ b/arangod/Cluster/ClusterMethods.cpp @@ -48,6 +48,7 @@ #include "Network/Methods.h" #include "Network/NetworkFeature.h" #include "Network/Utils.h" +#include "Replication2/ReplicatedLog/LogCommon.h" #include "Rest/Version.h" #include "Sharding/ShardingInfo.h" #include "StorageEngine/HotBackupCommon.h" @@ -89,9 +90,10 @@ #include #include +#include #include -#include #include +#include using namespace arangodb; using namespace arangodb::basics; @@ -612,7 +614,7 @@ void ClusterMethods::realNameFromSmartName(std::string&) { } /// fetched from ClusterInfo and with shuffle to mix it up. //////////////////////////////////////////////////////////////////////////////// -static std::shared_ptr>> DistributeShardsEvenly( +static std::shared_ptr>> distributeShardsEvenly( ClusterInfo& ci, uint64_t numberOfShards, uint64_t replicationFactor, std::vector& dbServers, bool warnAboutReplicationFactor) { auto shards = @@ -683,11 +685,20 @@ static std::shared_ptr> /// @brief Clone shard distribution from other collection //////////////////////////////////////////////////////////////////////////////// -static std::shared_ptr>> CloneShardDistribution( +static std::shared_ptr>> cloneShardDistribution( ClusterInfo& ci, std::shared_ptr col, std::shared_ptr const& other) { TRI_ASSERT(col); TRI_ASSERT(other); + if (col->vocbase().replicationVersion() == replication::Version::TWO) { + LOG_DEVEL << "TODO: FIXME: " << "distributeShardsLike is not yet implemented for replication 2.0."; + /* + // TODO implement this + ASSERT_OR_THROW_ARANGO_EXCEPTION_MESSAGE( + TRI_ERROR_NOT_IMPLEMENTED, + "distributeShardsLike is not yet implemented for replication 2.0."); + */ + } if (!other->distributeShardsLike().empty()) { CollectionNameResolver resolver(col->vocbase()); @@ -2575,6 +2586,97 @@ std::vector> ClusterMethods::createCollection } #endif +namespace { +auto initializeShardMap(bool enforceReplicationFactor, + std::shared_ptr const& colToDistributeLike, + std::vector& dbServers, + std::shared_ptr const& col, ClusterInfo& ci) + -> std::shared_ptr>> { + std::vector avoid = col->avoidServers(); + std::string distributeShardsLike = col->distributeShardsLike(); + if (!distributeShardsLike.empty()) { + std::shared_ptr myColToDistributeLike; + + if (colToDistributeLike != nullptr) { + myColToDistributeLike = colToDistributeLike; + } else { + CollectionNameResolver resolver(col->vocbase()); + myColToDistributeLike = resolver.getCollection(distributeShardsLike); + if (myColToDistributeLike == nullptr) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CLUSTER_UNKNOWN_DISTRIBUTESHARDSLIKE, + "Collection not found: " + distributeShardsLike + + " in database " + col->vocbase().name()); + } + } + + return cloneShardDistribution(ci, col, myColToDistributeLike); + } else { + // system collections should never enforce replicationfactor + // to allow them to come up with 1 dbserver + if (col->system()) { + enforceReplicationFactor = false; + } + + size_t replicationFactor = col->replicationFactor(); + size_t writeConcern = col->writeConcern(); + size_t numberOfShards = col->numberOfShards(); + + // the default behavior however is to bail out and inform the user + // that the requested replicationFactor is not possible right now + if (dbServers.size() < replicationFactor) { + TRI_ASSERT(writeConcern <= replicationFactor); + // => (dbServers.size() < writeConcern) is granted + LOG_TOPIC("9ce2e", DEBUG, Logger::CLUSTER) + << "Do not have enough DBServers for requested replicationFactor," + << " nrDBServers: " << dbServers.size() + << " replicationFactor: " << replicationFactor; + if (enforceReplicationFactor) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS); + } + } + + if (!avoid.empty()) { + // We need to remove all servers that are in the avoid list + if (dbServers.size() - avoid.size() < replicationFactor) { + LOG_TOPIC("03682", DEBUG, Logger::CLUSTER) + << "Do not have enough DBServers for requested " + "replicationFactor," + << " (after considering avoid list)," + << " nrDBServers: " << dbServers.size() << " replicationFactor: " << replicationFactor + << " avoid list size: " << avoid.size(); + // Not enough DBServers left + THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS); + } + dbServers.erase(std::remove_if(dbServers.begin(), dbServers.end(), + [&](const std::string& x) { + return std::find(avoid.begin(), avoid.end(), + x) != avoid.end(); + }), + dbServers.end()); + } + // Constructing a random device can be expensive, and getting random + // numbers from it too, plus they may be scarce on servers. So let's + // use it only to initialize one PRNG per thread, once. + static thread_local auto rd = std::random_device{}; + static thread_local auto g = std::mt19937{rd()}; + std::shuffle(dbServers.begin(), dbServers.end(), g); + return distributeShardsEvenly(ci, numberOfShards, replicationFactor, + dbServers, !col->system()); + } // if - distributeShardsLike.empty() +} + +auto initializeReplicatedLogs(LogicalCollection const& col, ShardMap const& shards, + ClusterInfo& ci) -> std::shared_ptr { + auto replicatedLogsMap = std::make_shared(); + for (auto const& it : shards) { + auto const& [key, value] = it; + replicatedLogsMap->try_emplace(key, ci.uniqid()); + } + + return replicatedLogsMap; +} +} // namespace + //////////////////////////////////////////////////////////////////////////////// /// @brief Persist collection in Agency and trigger shard creation process //////////////////////////////////////////////////////////////////////////////// @@ -2599,7 +2701,8 @@ std::vector> ClusterMethods::persistCollectio // all collections have the same database name - ArangoDB does not // support cross-database operations and they cannot be triggered by // users) - auto const dbName = collections[0]->vocbase().name(); + auto const& vocbase = collections[0]->vocbase(); + auto const& dbName = vocbase.name(); ClusterInfo& ci = feature.clusterInfo(); std::vector infos; @@ -2618,77 +2721,9 @@ std::vector> ClusterMethods::persistCollectio // We can only serve on Database at a time with this call. // We have the vocbase context around this calls anyways, so this is safe. TRI_ASSERT(col->vocbase().name() == dbName); - std::string distributeShardsLike = col->distributeShardsLike(); - std::vector avoid = col->avoidServers(); - std::shared_ptr>> shards = nullptr; - - if (!distributeShardsLike.empty()) { - std::shared_ptr myColToDistributeLike; - - if (colToDistributeLike != nullptr) { - myColToDistributeLike = colToDistributeLike; - } else { - CollectionNameResolver resolver(col->vocbase()); - myColToDistributeLike = resolver.getCollection(distributeShardsLike); - if (myColToDistributeLike == nullptr) { - THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CLUSTER_UNKNOWN_DISTRIBUTESHARDSLIKE, - "Collection not found: " + distributeShardsLike + " in database " + col->vocbase().name()); - } - } - - shards = CloneShardDistribution(ci, col, myColToDistributeLike); - } else { - // system collections should never enforce replicationfactor - // to allow them to come up with 1 dbserver - if (col->system()) { - enforceReplicationFactor = false; - } - - size_t replicationFactor = col->replicationFactor(); - size_t writeConcern = col->writeConcern(); - size_t numberOfShards = col->numberOfShards(); - - // the default behavior however is to bail out and inform the user - // that the requested replicationFactor is not possible right now - if (dbServers.size() < replicationFactor) { - TRI_ASSERT(writeConcern <= replicationFactor); - // => (dbServers.size() < writeConcern) is granted - LOG_TOPIC("9ce2e", DEBUG, Logger::CLUSTER) - << "Do not have enough DBServers for requested replicationFactor," - << " nrDBServers: " << dbServers.size() - << " replicationFactor: " << replicationFactor; - if (enforceReplicationFactor) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS); - } - } - - if (!avoid.empty()) { - // We need to remove all servers that are in the avoid list - if (dbServers.size() - avoid.size() < replicationFactor) { - LOG_TOPIC("03682", DEBUG, Logger::CLUSTER) - << "Do not have enough DBServers for requested " - "replicationFactor," - << " (after considering avoid list)," - << " nrDBServers: " << dbServers.size() - << " replicationFactor: " << replicationFactor - << " avoid list size: " << avoid.size(); - // Not enough DBServers left - THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS); - } - dbServers.erase(std::remove_if(dbServers.begin(), dbServers.end(), - [&](const std::string& x) { - return std::find(avoid.begin(), avoid.end(), - x) != avoid.end(); - }), - dbServers.end()); - } - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(dbServers.begin(), dbServers.end(), g); - shards = DistributeShardsEvenly(ci, numberOfShards, replicationFactor, - dbServers, !col->system()); - } // if - distributeShardsLike.empty() + auto shards = initializeShardMap(enforceReplicationFactor, + colToDistributeLike, dbServers, col, ci); if (shards->empty() && !col->isSmart()) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, @@ -2697,6 +2732,17 @@ std::vector> ClusterMethods::persistCollectio col->setShardMap(shards); + auto replicatedLogs = + std::invoke([&]() -> std::optional> { + if (vocbase.replicationVersion() == replication::Version::TWO) { + auto replicatedLogs = initializeReplicatedLogs(*col, *shards, ci); + // col->setReplicatedLogsMap(replicatedLogs); + return replicatedLogs; + } else { + return std::nullopt; + } + }); + std::unordered_set const ignoreKeys{ "allowUserKeys", "cid", "globallyUniqueId", "count", "planId", "version", "objectId"}; @@ -2704,17 +2750,19 @@ std::vector> ClusterMethods::persistCollectio VPackBuilder velocy = col->toVelocyPackIgnore(ignoreKeys, LogicalDataSource::Serialization::List); - auto const serverState = ServerState::instance(); + auto const* const serverState = ServerState::instance(); infos.emplace_back(ClusterCollectionCreationInfo{ std::to_string(col->id().id()), col->numberOfShards(), - col->replicationFactor(), col->writeConcern(), waitForSyncReplication, - velocy.slice(), serverState->getId(), serverState->getRebootId()}); + col->replicationFactor(), col->writeConcern(), col->waitForSync(), + waitForSyncReplication, velocy.slice(), serverState->getId(), + serverState->getRebootId(), replicatedLogs}); vpackData.emplace_back(velocy.steal()); - } // for col : collections + } // for col : collections // pass in the *endTime* here, not a timeout! Result res = ci.createCollectionsCoordinator(dbName, infos, endTime, - isNewDatabase, colToDistributeLike); + isNewDatabase, colToDistributeLike, + vocbase.replicationVersion()); if (res.ok()) { // success! exit the loop and go on @@ -2746,8 +2794,6 @@ std::vector> ClusterMethods::persistCollectio } } - //ci.loadPlan(); - // Produce list of shared_ptr wrappers std::vector> usableCollectionPointers; @@ -2769,9 +2815,9 @@ std::vector> ClusterMethods::persistCollectio } } return usableCollectionPointers; -} +} -std::string const apiStr("/_admin/backup/"); +static auto const apiStr = std::string("/_admin/backup/"); arangodb::Result hotBackupList(network::ConnectionPool* pool, std::vector const& dbServers, VPackSlice const idSlice, diff --git a/arangod/Replication2/AgencyMethods.cpp b/arangod/Replication2/AgencyMethods.cpp index 077182ab780b..c0b2742059ef 100644 --- a/arangod/Replication2/AgencyMethods.cpp +++ b/arangod/Replication2/AgencyMethods.cpp @@ -20,13 +20,28 @@ /// @author Lars Maier //////////////////////////////////////////////////////////////////////////////// -#include - #include "AgencyMethods.h" -#include -#include -#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "Agency/AsyncAgencyComm.h" +#include "Agency/TransactionBuilder.h" +#include "Agency/AgencyPaths.h" +#include "Cluster/ClusterTypes.h" +#include "Replication2/ReplicatedLog/AgencyLogSpecification.h" +#include "Replication2/ReplicatedLog/LogCommon.h" + +namespace arangodb { +class Result; +} // namespace arangodb using namespace std::chrono_literals; diff --git a/arangod/Replication2/AgencyMethods.h b/arangod/Replication2/AgencyMethods.h index dcc637e439fe..caf033e8c335 100644 --- a/arangod/Replication2/AgencyMethods.h +++ b/arangod/Replication2/AgencyMethods.h @@ -21,13 +21,26 @@ //////////////////////////////////////////////////////////////////////////////// #pragma once -#include - #include #include #include - #include +#include + +#include "Futures/Future.h" + +namespace arangodb { +class Result; +} // namespace arangodb +namespace arangodb::replication2 { +class LogId; +struct LogTerm; +} // namespace arangodb::replication2 +namespace arangodb::replication2::agency { +struct LogCurrentSupervisionElection; +struct LogPlanSpecification; +struct LogPlanTermSpecification; +} // namespace arangodb::replication2::agency namespace arangodb::replication2::agency::methods { diff --git a/arangod/Replication2/ReplicatedLog/AgencyLogSpecification.cpp b/arangod/Replication2/ReplicatedLog/AgencyLogSpecification.cpp index 01401bb5cdd7..47bbd97ca27d 100644 --- a/arangod/Replication2/ReplicatedLog/AgencyLogSpecification.cpp +++ b/arangod/Replication2/ReplicatedLog/AgencyLogSpecification.cpp @@ -57,9 +57,9 @@ auto LogPlanTermSpecification::toVelocyPack(VPackBuilder& builder) const -> void } } -LogPlanTermSpecification::LogPlanTermSpecification(from_velocypack_t, VPackSlice slice) { - term = slice.get(StaticStrings::Term).extract(); - config = LogConfig(slice.get(StaticStrings::Config)); +LogPlanTermSpecification::LogPlanTermSpecification(from_velocypack_t, VPackSlice slice) + : term(slice.get(StaticStrings::Term).extract()), + config(slice.get(StaticStrings::Config)) { for (auto const& [key, value] : VPackObjectIterator(slice.get(StaticStrings::Participants))) { TRI_ASSERT(value.isEmptyObject()); @@ -82,9 +82,9 @@ auto LogPlanSpecification::toVelocyPack(VPackBuilder& builder) const -> void { } } -LogPlanSpecification::LogPlanSpecification(from_velocypack_t, VPackSlice slice) { - id = slice.get(StaticStrings::Id).extract(); - targetConfig = LogConfig(slice.get(StaticStrings::TargetConfig)); +LogPlanSpecification::LogPlanSpecification(from_velocypack_t, VPackSlice slice) + : id(slice.get(StaticStrings::Id).extract()), + targetConfig(slice.get(StaticStrings::TargetConfig)) { if (auto term = slice.get(StaticStrings::CurrentTerm); !term.isNone()) { currentTerm = LogPlanTermSpecification{from_velocypack, term}; } @@ -99,7 +99,8 @@ LogPlanTermSpecification::LogPlanTermSpecification(LogTerm term, LogConfig confi participants(std::move(participants)) {} LogPlanSpecification::LogPlanSpecification(LogId id, std::optional term, - LogConfig config) : id(id), currentTerm(std::move(term)), targetConfig(config) {} + LogConfig config) + : id(id), currentTerm(std::move(term)), targetConfig(config) {} LogCurrentLocalState::LogCurrentLocalState(from_velocypack_t, VPackSlice slice) { auto spearheadSlice = slice.get(StaticStrings::Spearhead); @@ -135,10 +136,11 @@ LogCurrentSupervision::LogCurrentSupervision(from_velocypack_t, VPackSlice slice } } -LogCurrentSupervisionElection::LogCurrentSupervisionElection(from_velocypack_t, VPackSlice slice) { - term = slice.get(StaticStrings::Term).extract(); - participantsRequired = slice.get("participantsRequired").getNumericValue(); - participantsAvailable = slice.get("participantsAvailable").getNumericValue(); +LogCurrentSupervisionElection::LogCurrentSupervisionElection(from_velocypack_t, VPackSlice slice) + : term(slice.get(StaticStrings::Term).extract()), + participantsRequired(slice.get("participantsRequired").getNumericValue()), + participantsAvailable( + slice.get("participantsAvailable").getNumericValue()) { for (auto [key, value] : VPackObjectIterator(slice.get("details"))) { detail.emplace(key.copyString(), value.get("code").getNumericValue()); } @@ -172,21 +174,23 @@ auto LogCurrentSupervisionElection::toVelocyPack(VPackBuilder& builder) const -> builder.add("participantsAvailable", VPackValue(participantsAvailable)); { VPackObjectBuilder db(&builder, "details"); - for (auto const&[server, error] : detail) { + for (auto const& [server, error] : detail) { builder.add(VPackValue(server)); ::toVelocyPack(error, builder); } } } -auto agency::toVelocyPack(LogCurrentSupervisionElection::ErrorCode ec, VPackBuilder& builder) -> void { +auto agency::toVelocyPack(LogCurrentSupervisionElection::ErrorCode ec, + VPackBuilder& builder) -> void { VPackObjectBuilder ob(&builder); builder.add("code", VPackValue(static_cast(ec))); builder.add("message", VPackValue(to_string(ec))); } -auto agency::to_string(LogCurrentSupervisionElection::ErrorCode ec) noexcept -> std::string_view { - switch(ec) { +auto agency::to_string(LogCurrentSupervisionElection::ErrorCode ec) noexcept + -> std::string_view { + switch (ec) { case LogCurrentSupervisionElection::ErrorCode::OK: return "the server is ok"; case LogCurrentSupervisionElection::ErrorCode::SERVER_NOT_GOOD: diff --git a/arangod/Replication2/ReplicatedLog/Algorithms.h b/arangod/Replication2/ReplicatedLog/Algorithms.h index 2a35bedf9488..966fc1d2360f 100644 --- a/arangod/Replication2/ReplicatedLog/Algorithms.h +++ b/arangod/Replication2/ReplicatedLog/Algorithms.h @@ -27,8 +27,7 @@ #include #include "Cluster/ClusterTypes.h" -#include "InMemoryLog.h" -#include "ReplicatedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" #include "Replication2/ReplicatedLog/AgencyLogSpecification.h" namespace arangodb::replication2::algorithms { diff --git a/arangod/Replication2/ReplicatedLog/ILogParticipant.cpp b/arangod/Replication2/ReplicatedLog/ILogParticipant.cpp index c0fbe53129b5..5125505fab69 100644 --- a/arangod/Replication2/ReplicatedLog/ILogParticipant.cpp +++ b/arangod/Replication2/ReplicatedLog/ILogParticipant.cpp @@ -28,6 +28,7 @@ #include "RestServer/Metrics.h" #include +#include using namespace arangodb; using namespace arangodb::replication2; @@ -64,3 +65,23 @@ auto replicated_log::ILogParticipant::waitForIterator(LogIndex index) auto replicated_log::ILogParticipant::getTerm() const noexcept -> std::optional { return getStatus().getCurrentTerm(); } + +auto replicated_log::LogUnconfiguredParticipant::release(LogIndex doneWithIdx) -> Result { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +replicated_log::WaitForResult::WaitForResult(LogIndex index, + std::shared_ptr quorum) + : currentCommitIndex(index), quorum(std::move(quorum)) {} + +void replicated_log::WaitForResult::toVelocyPack(velocypack::Builder& builder) const { + VPackObjectBuilder ob(&builder); + builder.add(StaticStrings::CommitIndex, VPackValue(currentCommitIndex)); + builder.add(VPackValue("quorum")); + quorum->toVelocyPack(builder); +} + +replicated_log::WaitForResult::WaitForResult(velocypack::Slice s) { + currentCommitIndex = s.get(StaticStrings::CommitIndex).extract(); + quorum = std::make_shared(s.get("quorum")); +} diff --git a/arangod/Replication2/ReplicatedLog/ILogParticipant.h b/arangod/Replication2/ReplicatedLog/ILogParticipant.h index 4ee236312557..e60bb6af3946 100644 --- a/arangod/Replication2/ReplicatedLog/ILogParticipant.h +++ b/arangod/Replication2/ReplicatedLog/ILogParticipant.h @@ -33,11 +33,28 @@ #include #include +namespace arangodb { +class Result; +} + namespace arangodb::replication2::replicated_log { struct LogCore; struct LogStatus; +struct WaitForResult { + /// @brief contains the _current_ commit index. (Not the index waited for) + LogIndex currentCommitIndex; + /// @brief Quorum information + std::shared_ptr quorum; + + WaitForResult(LogIndex index, std::shared_ptr quorum); + WaitForResult() = default; + WaitForResult(velocypack::Slice); + + void toVelocyPack(velocypack::Builder&) const; +}; + /** * @brief Interface for a log participant: That is, usually either a leader or a * follower (LogLeader and LogFollower). Can also be a LogUnconfiguredParticipant, @@ -51,21 +68,23 @@ struct ILogParticipant { [[nodiscard]] virtual auto resign() && -> std::tuple, DeferredAction> = 0; - using WaitForPromise = futures::Promise>; - using WaitForFuture = futures::Future>; - using WaitForIteratorFuture = futures::Future>; + using WaitForPromise = futures::Promise; + using WaitForFuture = futures::Future; + using WaitForIteratorFuture = futures::Future>; using WaitForQueue = std::multimap; [[nodiscard]] virtual auto waitFor(LogIndex index) -> WaitForFuture = 0; [[nodiscard]] virtual auto waitForIterator(LogIndex index) -> WaitForIteratorFuture; [[nodiscard]] virtual auto getTerm() const noexcept -> std::optional; + + [[nodiscard]] virtual auto release(LogIndex doneWithIdx) -> Result = 0; }; /** * @brief Unconfigured log participant, i.e. currently neither a leader nor * follower. Holds a LogCore, does nothing else. */ -struct LogUnconfiguredParticipant +struct LogUnconfiguredParticipant final : std::enable_shared_from_this, ILogParticipant { ~LogUnconfiguredParticipant() override; @@ -76,6 +95,7 @@ struct LogUnconfiguredParticipant auto resign() && -> std::tuple, DeferredAction> override; [[nodiscard]] auto waitFor(LogIndex) -> WaitForFuture override; + [[nodiscard]] auto release(LogIndex doneWithIdx) -> Result override; private: std::unique_ptr _logCore; std::shared_ptr const _logMetrics; diff --git a/arangod/Replication2/ReplicatedLog/InMemoryLog.cpp b/arangod/Replication2/ReplicatedLog/InMemoryLog.cpp index 766189e691ba..a33516b13e77 100644 --- a/arangod/Replication2/ReplicatedLog/InMemoryLog.cpp +++ b/arangod/Replication2/ReplicatedLog/InMemoryLog.cpp @@ -51,15 +51,10 @@ using namespace arangodb; using namespace arangodb::replication2; auto replicated_log::InMemoryLog::getLastIndex() const noexcept -> LogIndex { - auto const result = LogIndex{_log.size()}; - // log empty => result == 0 - TRI_ASSERT(!_log.empty() || result == LogIndex(0)); - // !log empty => result index == last entry - TRI_ASSERT(_log.empty() || result == _log.back().entry().logIndex()); - return result; + return getLastTermIndexPair().index; } -auto replicated_log::InMemoryLog::getLastTermIndexPair() const noexcept -> TermIndexPair{ +auto replicated_log::InMemoryLog::getLastTermIndexPair() const noexcept -> TermIndexPair { if (_log.empty()) { return {}; } @@ -67,36 +62,35 @@ auto replicated_log::InMemoryLog::getLastTermIndexPair() const noexcept -> TermI } auto replicated_log::InMemoryLog::getLastTerm() const noexcept -> LogTerm { - if (_log.empty()) { - return LogTerm{0}; - } - return _log.back().entry().logTerm(); + return getLastTermIndexPair().term; } auto replicated_log::InMemoryLog::getNextIndex() const noexcept -> LogIndex { - return getLastIndex() + 1; + return _first + _log.size(); } auto replicated_log::InMemoryLog::getEntryByIndex(LogIndex const idx) const noexcept -> std::optional { - if (_log.size() < idx.value || idx.value == 0) { + if (_first + _log.size() <= idx || idx < _first) { return std::nullopt; } - auto const& e = _log.at(idx.value - 1); + auto const& e = _log.at(idx.value - _first.value); TRI_ASSERT(e.entry().logIndex() == idx); return e; } auto replicated_log::InMemoryLog::slice(LogIndex from, LogIndex to) const -> log_type { - from = LogIndex{std::max(from.value, 1)}; + from = std::max(from, _first); + to = std::max(to, _first); TRI_ASSERT(from <= to); - auto res = _log.take(to.value - 1).drop(from.value - 1); - TRI_ASSERT(res.size() == to.value - from.value); + auto res = _log.take(to.value - _first.value).drop(from.value - _first.value); + TRI_ASSERT(res.size() <= to.value - from.value); return res; } -auto replicated_log::InMemoryLog::getFirstIndexOfTerm(LogTerm term) const noexcept -> std::optional { +auto replicated_log::InMemoryLog::getFirstIndexOfTerm(LogTerm term) const noexcept + -> std::optional { auto it = std::lower_bound(_log.begin(), _log.end(), term, [](auto const& entry, auto const& term) { return term > entry.entry().logTerm(); @@ -125,17 +119,24 @@ auto replicated_log::InMemoryLog::getLastIndexOfTerm(LogTerm term) const noexcep } } -replicated_log::InMemoryLog::InMemoryLog(LoggerContext const& logContext, - replicated_log::LogCore const& logCore) { +replicated_log::InMemoryLog::InMemoryLog(replicated_log::LogCore const& logCore) { auto iter = logCore.read(LogIndex{0}); auto log = _log.transient(); while (auto entry = iter->next()) { log.push_back(InMemoryLogEntry(std::move(entry).value())); } _log = std::move(log).persistent(); + _first =_log.empty() ? LogIndex{1} : _log.front().entry().logIndex(); } -replicated_log::InMemoryLog::InMemoryLog(log_type log) : _log(std::move(log)) {} +replicated_log::InMemoryLog::InMemoryLog(log_type log) + : _log(std::move(log)), + _first(_log.empty() ? LogIndex{1} : _log.front().entry().logIndex()) {} + +replicated_log::InMemoryLog::InMemoryLog(log_type log, LogIndex first) + : _log(std::move(log)), _first(first) { + TRI_ASSERT(_log.empty() || first == _log.front().entry().logIndex()); +} #if (_MSC_VER >= 1) // suppress false positive warning: @@ -144,7 +145,8 @@ replicated_log::InMemoryLog::InMemoryLog(log_type log) : _log(std::move(log)) {} #pragma warning(disable : 4297) #endif replicated_log::InMemoryLog::InMemoryLog(replicated_log::InMemoryLog&& other) noexcept try - : _log(std::move(other._log)) { + : _log(std::move(other._log)), _first(other._first) { + other._first = LogIndex{1}; // Note that immer::flex_vector is currently not nothrow move-assignable, // though it probably does not throw any exceptions. However, we *need* this // to be noexcept, otherwise we cannot keep the persistent and in-memory state @@ -186,6 +188,8 @@ auto replicated_log::InMemoryLog::operator=(replicated_log::InMemoryLog&& other) // The try/catch is *only* for logging, but *must* terminate (e.g. by // rethrowing) the process if an exception is caught. _log = std::move(other._log); + _first = other._first; + other._first = LogIndex{1}; return *this; } catch (std::exception const& ex) { LOG_TOPIC("bf5c5", FATAL, Logger::REPLICATION2) @@ -206,21 +210,22 @@ auto replicated_log::InMemoryLog::getIteratorFrom(LogIndex fromIdx) const -> std::unique_ptr { // if we want to have read from log entry 1 onwards, we have to drop // no entries, because log entry 0 does not exist. - auto log = _log.drop(fromIdx.saturatedDecrement().value); + auto log = _log.drop(fromIdx.saturatedDecrement(_first.value).value); return std::make_unique(std::move(log)); } -auto replicated_log::InMemoryLog::getInternalIteratorFrom(LogIndex fromIdx) const -> std::unique_ptr { +auto replicated_log::InMemoryLog::getInternalIteratorFrom(LogIndex fromIdx) const + -> std::unique_ptr { // if we want to have read from log entry 1 onwards, we have to drop // no entries, because log entry 0 does not exist. - auto log = _log.drop(fromIdx.saturatedDecrement().value); + auto log = _log.drop(fromIdx.saturatedDecrement(_first.value).value); return std::make_unique(std::move(log)); } auto replicated_log::InMemoryLog::getIteratorRange(LogIndex fromIdx, LogIndex toIdx) const - -> std::unique_ptr { - auto log = _log.take(toIdx.saturatedDecrement().value) - .drop(fromIdx.saturatedDecrement().value); + -> std::unique_ptr { + auto log = _log.take(toIdx.saturatedDecrement(_first.value).value) + .drop(fromIdx.saturatedDecrement(_first.value).value); return std::make_unique(std::move(log)); } @@ -242,23 +247,21 @@ auto replicated_log::InMemoryLog::append(LoggerContext const& logContext, log_type entries) const -> InMemoryLog { auto transient = _log.transient(); transient.append(std::move(entries).transient()); - return InMemoryLog{std::move(transient).persistent()}; + return InMemoryLog{std::move(transient).persistent(), _first}; } -auto replicated_log::InMemoryLog::append( - LoggerContext const& logContext, - ::immer::flex_vector const& entries) const - -> InMemoryLog { +auto replicated_log::InMemoryLog::append(LoggerContext const& logContext, + log_type_persisted const& entries) const -> InMemoryLog { auto transient = _log.transient(); for (auto const& entry : entries) { transient.push_back(InMemoryLogEntry(entry)); } - return InMemoryLog{std::move(transient).persistent()}; + return InMemoryLog{std::move(transient).persistent(), _first}; } auto replicated_log::InMemoryLog::takeSnapshotUpToAndIncluding(LogIndex until) const -> InMemoryLog { - return InMemoryLog(_log.take(until.value)); + return InMemoryLog{_log.take(until.value), _first}; } auto replicated_log::InMemoryLog::copyFlexVector() const -> log_type { @@ -289,7 +292,7 @@ auto replicated_log::InMemoryLog::getFirstEntry() const noexcept return _log.front(); } -auto replicated_log::InMemoryLog::dump(replicated_log::InMemoryLog::log_type log) +auto replicated_log::InMemoryLog::dump(replicated_log::InMemoryLog::log_type const& log) -> std::string { auto builder = velocypack::Builder(); auto stream = std::stringstream(); @@ -310,4 +313,10 @@ auto replicated_log::InMemoryLog::dump(replicated_log::InMemoryLog::log_type log return stream.str(); } -auto replicated_log::InMemoryLog::dump() -> std::string { return dump(_log); } +auto replicated_log::InMemoryLog::dump() const -> std::string { + return dump(_log); +} + +auto replicated_log::InMemoryLog::getIndexRange() const noexcept -> LogRange { + return LogRange(_first, _first + _log.size()); +} diff --git a/arangod/Replication2/ReplicatedLog/InMemoryLog.h b/arangod/Replication2/ReplicatedLog/InMemoryLog.h index 9a7f6512a9d5..07cf30684805 100644 --- a/arangod/Replication2/ReplicatedLog/InMemoryLog.h +++ b/arangod/Replication2/ReplicatedLog/InMemoryLog.h @@ -56,15 +56,18 @@ struct PersistedLogIterator; */ struct InMemoryLog { public: - using log_type = - ::immer::flex_vector; + template + using log_type_t = ::immer::flex_vector; + using log_type = log_type_t; + using log_type_persisted = log_type_t; private: log_type _log{}; + LogIndex _first{0}; public: InMemoryLog() = delete; - InMemoryLog(LoggerContext const& logContext, replicated_log::LogCore const& logCore); + InMemoryLog(replicated_log::LogCore const& logCore); InMemoryLog(InMemoryLog&& other) noexcept; InMemoryLog(InMemoryLog const&) = default; @@ -89,6 +92,8 @@ struct InMemoryLog { [[nodiscard]] auto getLastIndexOfTerm(LogTerm term) const noexcept -> std::optional; + [[nodiscard]] auto getIndexRange() const noexcept -> LogRange; + // @brief Unconditionally accesses the last element [[nodiscard]] auto back() const noexcept -> decltype(_log)::const_reference; [[nodiscard]] auto empty() const noexcept -> bool; @@ -98,25 +103,25 @@ struct InMemoryLog { [[nodiscard]] auto append(LoggerContext const& logContext, log_type entries) const -> InMemoryLog; [[nodiscard]] auto append(LoggerContext const& logContext, - ::immer::flex_vector const& entries) const - -> InMemoryLog; + log_type_persisted const& entries) const -> InMemoryLog; [[nodiscard]] auto getIteratorFrom(LogIndex fromIdx) const -> std::unique_ptr; [[nodiscard]] auto getInternalIteratorFrom(LogIndex fromIdx) const -> std::unique_ptr; // get an iterator for range [from, to). [[nodiscard]] auto getIteratorRange(LogIndex fromIdx, LogIndex toIdx) const - -> std::unique_ptr; + -> std::unique_ptr; [[nodiscard]] auto takeSnapshotUpToAndIncluding(LogIndex until) const -> InMemoryLog; [[nodiscard]] auto copyFlexVector() const -> log_type; // helpful for debugging - [[nodiscard]] static auto dump(log_type log) -> std::string; - [[nodiscard]] auto dump() -> std::string; + [[nodiscard]] static auto dump(log_type const& log) -> std::string; + [[nodiscard]] auto dump() const -> std::string; protected: explicit InMemoryLog(log_type log); + explicit InMemoryLog(log_type log, LogIndex first); }; } // namespace arangodb::replication2::replicated_log diff --git a/arangod/Replication2/ReplicatedLog/LogCommon.cpp b/arangod/Replication2/ReplicatedLog/LogCommon.cpp index 7f1b17eb1176..6b0e09a66088 100644 --- a/arangod/Replication2/ReplicatedLog/LogCommon.cpp +++ b/arangod/Replication2/ReplicatedLog/LogCommon.cpp @@ -304,3 +304,74 @@ auto replication2::operator==(LogConfig const& left, LogConfig const& right) noe auto replication2::operator!=(const LogConfig& left, const LogConfig& right) noexcept -> bool { return !(left == right); } + +LogRange::LogRange(LogIndex from, LogIndex to) noexcept : from(from), to(to) { + TRI_ASSERT(from <= to); +} + +auto LogRange::empty() const noexcept -> bool { return from == to; } + +auto LogRange::count() const noexcept -> std::size_t { + return to.value - from.value; +} + +auto LogRange::contains(LogIndex idx) const noexcept -> bool { + return from <= idx && idx < to; +} + +auto replication2::operator<<(std::ostream& os, LogRange const& r) -> std::ostream& { + return os << "[" << r.from << ", " << r.to << ")"; +} + +auto replication2::intersect(LogRange a, LogRange b) noexcept -> LogRange { + auto max_from = std::max(a.from, b.from); + auto min_to = std::min(a.to, b.to); + if (max_from > min_to) { + return {LogIndex{0}, LogIndex{0}}; + } else { + return {max_from, min_to}; + } +} + +auto LogRange::end() const noexcept -> LogRange::Iterator { + return Iterator{to}; +} +auto LogRange::begin() const noexcept -> LogRange::Iterator { + return Iterator{from}; +} + +auto LogRange::Iterator::operator++() noexcept -> LogRange::Iterator& { + current = current + 1; + return *this; +} + +auto LogRange::Iterator::operator++(int) noexcept -> LogRange::Iterator { + auto idx = current; + current = current + 1; + return Iterator(idx); +} + +auto LogRange::Iterator::operator*() const noexcept -> LogIndex { + return current; +} +auto LogRange::Iterator::operator->() const noexcept -> LogIndex const* { + return ¤t; +} + +auto replication2::operator==(LogRange a, LogRange b) noexcept -> bool { + return a.from == b.from && a.to == b.to; +} + +auto replication2::operator!=(LogRange a, LogRange b) noexcept -> bool { + return !(a == b); +} + +auto replication2::operator==(LogRange::Iterator const& a, + LogRange::Iterator const& b) noexcept -> bool { + return a.current == b.current; +} + +auto replication2::operator!=(LogRange::Iterator const& a, + LogRange::Iterator const& b) noexcept -> bool { + return !(a == b); +} diff --git a/arangod/Replication2/ReplicatedLog/LogCommon.h b/arangod/Replication2/ReplicatedLog/LogCommon.h index 8f9fabc492af..db9b63feb6a6 100644 --- a/arangod/Replication2/ReplicatedLog/LogCommon.h +++ b/arangod/Replication2/ReplicatedLog/LogCommon.h @@ -132,6 +132,48 @@ struct TermIndexPair : implement_compare { auto operator<=(TermIndexPair, TermIndexPair) noexcept -> bool; auto operator<<(std::ostream&, TermIndexPair) -> std::ostream&; +struct LogRange { + LogIndex from; + LogIndex to; + + LogRange(LogIndex from, LogIndex to) noexcept; + + [[nodiscard]] auto empty() const noexcept -> bool; + [[nodiscard]] auto count() const noexcept -> std::size_t; + [[nodiscard]] auto contains(LogIndex idx) const noexcept -> bool; + + friend auto operator<<(std::ostream& os, LogRange const& r) -> std::ostream&; + friend auto intersect(LogRange a, LogRange b) noexcept -> LogRange; + + struct Iterator { + auto operator++() noexcept -> Iterator&; + auto operator++(int) noexcept -> Iterator; + auto operator*() const noexcept -> LogIndex; + auto operator->() const noexcept -> LogIndex const*; + friend auto operator==(Iterator const& a, Iterator const& b) noexcept -> bool; + friend auto operator!=(Iterator const& a, Iterator const& b) noexcept -> bool; + + private: + friend LogRange; + explicit Iterator(LogIndex idx) : current(idx) {} + LogIndex current; + }; + + friend auto operator==(LogRange, LogRange) noexcept -> bool; + friend auto operator!=(LogRange, LogRange) noexcept -> bool; + + [[nodiscard]] auto begin() const noexcept -> Iterator; + [[nodiscard]] auto end() const noexcept -> Iterator; +}; + +auto operator<<(std::ostream& os, LogRange const& r) -> std::ostream&; +auto intersect(LogRange a, LogRange b) noexcept -> LogRange; +auto operator==(LogRange, LogRange) noexcept -> bool; +auto operator!=(LogRange, LogRange) noexcept -> bool; + +auto operator==(LogRange::Iterator const& a, LogRange::Iterator const& b) noexcept -> bool; +auto operator!=(LogRange::Iterator const& a, LogRange::Iterator const& b) noexcept -> bool; + struct LogPayload { explicit LogPayload(velocypack::UInt8Buffer dummy); @@ -240,13 +282,26 @@ class LogId : public arangodb::basics::Identifier { auto to_string(LogId logId) -> std::string; -struct LogIterator { - virtual ~LogIterator() = default; +template +struct TypedLogIterator { + virtual ~TypedLogIterator() = default; // The returned view is guaranteed to stay valid until a successive next() // call (only). - virtual auto next() -> std::optional = 0; + virtual auto next() -> std::optional = 0; }; +template +struct TypedLogRangeIterator : TypedLogIterator { + // returns the index interval [from, to) + // Note that this does not imply that all indexes in the range [from, to) + // are returned. Hence (to - from) is only an upper bound on the number of + // entries returned. + virtual auto range() const noexcept -> std::pair = 0; +}; + +using LogIterator = TypedLogIterator; +using LogRangeIterator = TypedLogRangeIterator; + struct LogConfig { std::size_t writeConcern = 1; bool waitForSync = false; diff --git a/arangod/Replication2/ReplicatedLog/LogFollower.cpp b/arangod/Replication2/ReplicatedLog/LogFollower.cpp index 609ef32d7b73..c1c5709aa11e 100644 --- a/arangod/Replication2/ReplicatedLog/LogFollower.cpp +++ b/arangod/Replication2/ReplicatedLog/LogFollower.cpp @@ -24,10 +24,10 @@ #include "Replication2/ReplicatedLog/Algorithms.h" #include "Replication2/ReplicatedLog/LogContextKeys.h" -#include "Replication2/ReplicatedLog/ReplicatedLogIterator.h" #include "Replication2/ReplicatedLog/LogStatus.h" #include "Replication2/ReplicatedLog/NetworkMessages.h" #include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLogIterator.h" #include "Replication2/ReplicatedLog/ReplicatedLogMetrics.h" #include "RestServer/Metrics.h" @@ -95,8 +95,7 @@ auto LogFollower::appendEntriesPreFlightChecks(GuardedFollowerData const& data, // It is always allowed to replace the log entirely if (req.prevLogEntry.index > LogIndex{0}) { - if (auto conflict = - algorithms::detectConflict(data._inMemoryLog, req.prevLogEntry); + if (auto conflict = algorithms::detectConflict(data._inMemoryLog, req.prevLogEntry); conflict.has_value()) { auto [reason, next] = *conflict; @@ -115,7 +114,6 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) auto self = _guardedFollowerData.getLockedGuard(); - { // Preflight checks - does the leader, log and other stuff match? // This code block should not modify the local state, only check values. @@ -133,7 +131,8 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) // as a copy, then modify the log on disk. This is an atomic operation. If // it fails, we forget the new state. Otherwise we replace the old in memory // state with the new value. - auto newInMemoryLog = self->_inMemoryLog.takeSnapshotUpToAndIncluding(req.prevLogEntry.index); + auto newInMemoryLog = + self->_inMemoryLog.takeSnapshotUpToAndIncluding(req.prevLogEntry.index); if (self->_inMemoryLog.getLastIndex() != req.prevLogEntry.index) { auto res = self->_logCore->removeBack(req.prevLogEntry.index + 1); @@ -149,30 +148,16 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) self->_inMemoryLog = std::move(newInMemoryLog); } - struct WaitForQueueResolve { - using QueueGuard = Guarded::mutex_guard_type; - - WaitForQueueResolve(QueueGuard guard, LogIndex commitIndex) noexcept - : _guard(std::move(guard)), - begin(_guard->begin()), - end(_guard->upper_bound(commitIndex)) {} - - QueueGuard _guard; - WaitForQueue::iterator begin; - WaitForQueue::iterator end; - }; - // Allocations auto newInMemoryLog = self->_inMemoryLog.append(_loggerContext, req.entries); auto iter = std::make_unique(req.entries); - auto toBeResolvedPtr = std::make_unique>(); + auto toBeResolved = std::make_unique(); auto* core = self->_logCore.get(); static_assert(std::is_nothrow_move_constructible_v); auto commitToMemoryAndResolve = [selfGuard = std::move(self), req = std::move(req), - newInMemoryLog = std::move(newInMemoryLog), - toBeResolvedPtr = std::move(toBeResolvedPtr)]( + newInMemoryLog = std::move(newInMemoryLog), toBeResolved = std::move(toBeResolved)]( futures::Try&& tryRes) mutable -> std::pair { // We have to release the guard after this lambda is finished. // Otherwise it would be released when the lambda is destroyed, which @@ -203,25 +188,52 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) << req.prevLogEntry.index << ", leader commit index = " << req.leaderCommit; } + auto const generateToBeResolved = [&] { + try { + auto waitForQueue = self->_waitForQueue.getLockedGuard(); + + auto const end = waitForQueue->upper_bound(self->_commitIndex); + for (auto it = waitForQueue->begin(); it != end;) { + LOG_CTX("37d9c", TRACE, self->_follower._loggerContext) + << "resolving promise for index " << it->first; + toBeResolved->insert(waitForQueue->extract(it++)); + } + return DeferredAction([commitIndex = self->_commitIndex, + toBeResolved = std::move(toBeResolved)]() noexcept { + for (auto& it : *toBeResolved) { + if (!it.second.isFulfilled()) { + // This only throws if promise was fulfilled earlier. + it.second.setValue(WaitForResult{commitIndex, std::shared_ptr{}}); + } + } + }); + } catch (std::exception const& e) { + // If those promises are not fulfilled we can not continue. + // Note that the move constructor of std::multi_map is not noexcept. + LOG_CTX("e7a4d", FATAL, self->_follower._loggerContext) + << "failed to fulfill replication promises due to exception; " + "system " + "can not continue. message: " + << e.what(); + FATAL_ERROR_EXIT(); + } catch (...) { + // If those promises are not fulfilled we can not continue. + // Note that the move constructor of std::multi_map is not noexcept. + LOG_CTX("c0bbb", FATAL, self->_follower._loggerContext) + << "failed to fulfill replication promises due to exception; " + "system " + "can not continue"; + FATAL_ERROR_EXIT(); + } + }; + auto action = std::invoke([&]() noexcept -> DeferredAction { if (self->_commitIndex < req.leaderCommit && !self->_inMemoryLog.empty()) { self->_commitIndex = std::min(req.leaderCommit, self->_inMemoryLog.back().entry().logIndex()); LOG_CTX("1641d", TRACE, self->_follower._loggerContext) << "increment commit index: " << self->_commitIndex; - - auto toBeResolved = std::optional{std::in_place, self->_waitForQueue.getLockedGuard(), self->_commitIndex}; - static_assert(std::is_nothrow_move_assignable_v>); - *toBeResolvedPtr = std::move(toBeResolved); - return DeferredAction([toBeResolved = std::move(toBeResolvedPtr)]() noexcept { - auto& resolve = toBeResolved->value(); - for (auto it = resolve.begin; it != resolve.end; it = resolve._guard->erase(it)) { - if (!it->second.isFulfilled()) { - // This only throws if promise was fulfilled earlier. - it->second.setValue(std::shared_ptr{}); - } - } - }); + return generateToBeResolved(); } return {}; @@ -240,17 +252,19 @@ auto replicated_log::LogFollower::appendEntries(AppendEntriesRequest req) .then(std::move(commitToMemoryAndResolve)) .then([measureTime = std::move(measureTimeGuard)](auto&& res) mutable { measureTime.fire(); - auto&& [result, toBeResolved] = res.get(); - // It is okay to fire here, because commitToMemoryAndResolve has released - // the guard already. - toBeResolved.fire(); + auto&& [result, action] = res.get(); + // It is okay to fire here, because commitToMemoryAndResolve has + // released the guard already. + action.fire(); return std::move(result); }); } replicated_log::LogFollower::GuardedFollowerData::GuardedFollowerData( LogFollower const& self, std::unique_ptr logCore, InMemoryLog inMemoryLog) - : _follower(self), _inMemoryLog(std::move(inMemoryLog)), _logCore(std::move(logCore)) {} + : _follower(self), + _inMemoryLog(std::move(inMemoryLog)), + _logCore(std::move(logCore)) {} auto replicated_log::LogFollower::getStatus() const -> LogStatus { return _guardedFollowerData.doUnderLock([this](auto const& followerData) { @@ -320,8 +334,8 @@ auto replicated_log::LogFollower::waitFor(LogIndex idx) -> replicated_log::ILogParticipant::WaitForFuture { auto self = _guardedFollowerData.getLockedGuard(); if (self->_commitIndex >= idx) { - return futures::Future>{ - std::in_place, std::make_shared(idx, _currentTerm)}; + return futures::Future{std::in_place, self->_commitIndex, + std::make_shared(idx, _currentTerm)}; } // emplace might throw a std::bad_alloc but the remainder is noexcept // so either you inserted it and or nothing happens @@ -335,17 +349,19 @@ auto replicated_log::LogFollower::waitFor(LogIndex idx) auto replicated_log::LogFollower::waitForIterator(LogIndex index) -> replicated_log::ILogParticipant::WaitForIteratorFuture { if (index == LogIndex{0}) { - THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, "invalid parameter; log index 0 is invalid"); + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, + "invalid parameter; log index 0 is invalid"); } - return waitFor(index).thenValue([this, self = shared_from_this(), index](auto&& quorum) -> WaitForIteratorFuture { + return waitFor(index).thenValue([this, self = shared_from_this(), + index](auto&& quorum) -> WaitForIteratorFuture { auto [fromIndex, iter] = _guardedFollowerData.doUnderLock( - [&](GuardedFollowerData& followerData) -> std::pair> { + [&](GuardedFollowerData& followerData) -> std::pair> { TRI_ASSERT(index <= followerData._commitIndex); /* * This code here ensures that if only private log entries are present - * we do not reply with an empty iterator but instead wait for the next - * entry containing payload. + * we do not reply with an empty iterator but instead wait for the + * next entry containing payload. */ auto actualIndex = index; @@ -377,7 +393,7 @@ auto replicated_log::LogFollower::waitForIterator(LogIndex index) } auto replicated_log::LogFollower::getLogIterator(LogIndex firstIndex) const --> std::unique_ptr { + -> std::unique_ptr { return _guardedFollowerData.doUnderLock( [&](GuardedFollowerData const& data) -> std::unique_ptr { auto const endIdx = data._inMemoryLog.getLastTermIndexPair().index + 1; @@ -387,7 +403,7 @@ auto replicated_log::LogFollower::getLogIterator(LogIndex firstIndex) const } auto replicated_log::LogFollower::getCommittedLogIterator(LogIndex firstIndex) const --> std::unique_ptr { + -> std::unique_ptr { return _guardedFollowerData.doUnderLock( [&](GuardedFollowerData const& data) -> std::unique_ptr { return data.getCommittedLogIterator(firstIndex); @@ -395,7 +411,7 @@ auto replicated_log::LogFollower::getCommittedLogIterator(LogIndex firstIndex) c } auto replicated_log::LogFollower::GuardedFollowerData::getCommittedLogIterator(LogIndex firstIndex) const --> std::unique_ptr { + -> std::unique_ptr { auto const endIdx = _inMemoryLog.getNextIndex(); TRI_ASSERT(firstIndex < endIdx); // return an iterator for the range [firstIndex, _commitIndex + 1) @@ -406,6 +422,9 @@ replicated_log::LogFollower::~LogFollower() { _logMetrics->replicatedLogFollowerNumber->fetch_sub(1); } +auto LogFollower::release(LogIndex doneWithIdx) -> Result { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} auto replicated_log::LogFollower::GuardedFollowerData::getLocalStatistics() const noexcept -> LogStatistics { diff --git a/arangod/Replication2/ReplicatedLog/LogFollower.h b/arangod/Replication2/ReplicatedLog/LogFollower.h index 14c285da2528..96b7bcf62f5a 100644 --- a/arangod/Replication2/ReplicatedLog/LogFollower.h +++ b/arangod/Replication2/ReplicatedLog/LogFollower.h @@ -43,9 +43,9 @@ namespace arangodb::replication2::replicated_log { /** * @brief Follower instance of a replicated log. */ -class LogFollower : public ILogParticipant, - public AbstractFollower, - public std::enable_shared_from_this { +class LogFollower final : public ILogParticipant, + public AbstractFollower, + public std::enable_shared_from_this { public: ~LogFollower() override; LogFollower(LoggerContext const&, std::shared_ptr logMetrics, @@ -53,7 +53,8 @@ class LogFollower : public ILogParticipant, std::optional leaderId, InMemoryLog inMemoryLog); // follower only - [[nodiscard]] auto appendEntries(AppendEntriesRequest) -> futures::Future override; + [[nodiscard]] auto appendEntries(AppendEntriesRequest) + -> futures::Future override; [[nodiscard]] auto getStatus() const -> LogStatus override; [[nodiscard]] auto resign() && -> std::tuple, DeferredAction> override; @@ -61,8 +62,12 @@ class LogFollower : public ILogParticipant, [[nodiscard]] auto waitFor(LogIndex) -> WaitForFuture override; [[nodiscard]] auto waitForIterator(LogIndex index) -> WaitForIteratorFuture override; [[nodiscard]] auto getParticipantId() const noexcept -> ParticipantId const& override; - [[nodiscard]] auto getLogIterator(LogIndex firstIndex) const -> std::unique_ptr; - [[nodiscard]] auto getCommittedLogIterator(LogIndex firstIndex) const -> std::unique_ptr; + [[nodiscard]] auto getLogIterator(LogIndex firstIndex) const + -> std::unique_ptr; + [[nodiscard]] auto getCommittedLogIterator(LogIndex firstIndex) const + -> std::unique_ptr; + + [[nodiscard]] auto release(LogIndex doneWithIdx) -> Result override; private: struct GuardedFollowerData { @@ -72,7 +77,7 @@ class LogFollower : public ILogParticipant, [[nodiscard]] auto getLocalStatistics() const noexcept -> LogStatistics; [[nodiscard]] auto getCommittedLogIterator(LogIndex firstIndex) const - -> std::unique_ptr; + -> std::unique_ptr; LogFollower const& _follower; InMemoryLog _inMemoryLog; @@ -88,8 +93,8 @@ class LogFollower : public ILogParticipant, LogTerm const _currentTerm; // We use the unshackled mutex because guards are captured by futures. - // When using a std::mutex we would have to release the mutex in the same thread. - // Using the UnshackledMutex this is no longer required. + // When using a std::mutex we would have to release the mutex in the same + // thread. Using the UnshackledMutex this is no longer required. Guarded _guardedFollowerData; [[nodiscard]] auto appendEntriesPreFlightChecks(GuardedFollowerData const&, diff --git a/arangod/Replication2/ReplicatedLog/LogLeader.cpp b/arangod/Replication2/ReplicatedLog/LogLeader.cpp index 0fc0ada8daf2..3f287c9a00e4 100644 --- a/arangod/Replication2/ReplicatedLog/LogLeader.cpp +++ b/arangod/Replication2/ReplicatedLog/LogLeader.cpp @@ -22,16 +22,6 @@ #include "LogLeader.h" -#include "Replication2/ReplicatedLog/InMemoryLog.h" -#include "Replication2/ReplicatedLog/LogContextKeys.h" -#include "Replication2/ReplicatedLog/LogCore.h" -#include "Replication2/ReplicatedLog/LogStatus.h" -#include "Replication2/ReplicatedLog/PersistedLog.h" -#include "Replication2/ReplicatedLog/ReplicatedLogIterator.h" -#include "Replication2/ReplicatedLog/ReplicatedLogMetrics.h" -#include "RestServer/Metrics.h" -#include "Scheduler/SchedulerFeature.h" - #include #include #include @@ -45,12 +35,35 @@ #include #include #include - #include -#include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogContextKeys.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogStatus.h" +#include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLogIterator.h" +#include "Replication2/ReplicatedLog/ReplicatedLogMetrics.h" +#include "RestServer/Metrics.h" +#include "Scheduler/SchedulerFeature.h" +#include "Basics/ErrorCode.h" +#include "Futures/Promise-inl.h" +#include "Futures/Promise.h" +#include "Futures/Unit.h" +#include "Replication2/DeferredExecution.h" +#include "Scheduler/SupervisedScheduler.h" +#include "immer/detail/iterator_facade.hpp" +#include "immer/detail/rbts/rrbtree_iterator.hpp" #if (_MSC_VER >= 1) // suppress warnings: @@ -179,7 +192,7 @@ void replicated_log::LogLeader::handleResolvedPromiseSet( for (auto& promise : resolvedPromises._set) { TRI_ASSERT(promise.second.valid()); - promise.second.setValue(resolvedPromises._quorum); + promise.second.setValue(resolvedPromises.result); } } @@ -211,10 +224,13 @@ void replicated_log::LogLeader::executeAppendEntriesRequests( << "last acked index = " << follower->lastAckedEntry << ", current index = " << lastAvailableIndex << ", last acked commit index = " << follower->lastAckedCommitIndex - << ", current commit index = " << self._commitIndex; + << ", current commit index = " << self._commitIndex + << ", last acked lci = " << follower->lastAckedLCI + << ", current lci = " << self._largestCommonIndex; // We can only get here if there is some new information for this follower TRI_ASSERT(follower->lastAckedEntry.index != lastAvailableIndex.index || - self._commitIndex != follower->lastAckedCommitIndex); + self._commitIndex != follower->lastAckedCommitIndex || + self._largestCommonIndex != follower->lastAckedLCI); return self.createAppendEntriesRequest(*follower, lastAvailableIndex); }); @@ -229,6 +245,7 @@ void replicated_log::LogLeader::executeAppendEntriesRequests( follower->_impl->appendEntries(std::move(request)) .thenFinal([weakParentLog = it->_parentLog, weakFollower = it->_follower, lastIndex = lastIndex, currentCommitIndex = request.leaderCommit, + currentLCI = request.largestCommonIndex, currentTerm = logLeader->_currentTerm, messageId = messageId, startTime, logMetrics = logMetrics]( futures::Try&& res) noexcept { @@ -250,8 +267,8 @@ void replicated_log::LogLeader::executeAppendEntriesRequests( if (!guarded->_didResign) { // Is throwing the right thing to do here? - No, we are in a finally return guarded->handleAppendEntriesResponse( - *follower, lastIndex, currentCommitIndex, currentTerm, - std::move(res), endTime - startTime, messageId); + *follower, lastIndex, currentCommitIndex, currentLCI, + currentTerm, std::move(res), endTime - startTime, messageId); } else { LOG_CTX("da116", DEBUG, follower->logContext) << "received response from follower but leader " @@ -302,7 +319,7 @@ auto replicated_log::LogLeader::construct( std::move(id), term, std::move(inMemoryLog)) {} }; - auto log = InMemoryLog{logContext, *logCore}; + auto log = InMemoryLog{*logCore}; auto const lastIndex = log.getLastTermIndexPair(); if (lastIndex.term != term) { // Immediately append an empty log entry in the new term. This is necessary @@ -450,7 +467,7 @@ auto replicated_log::LogLeader::waitFor(LogIndex index) -> WaitForFuture { return promise.getFuture(); } if (leaderData._commitIndex >= index) { - return futures::Future>{std::in_place, + return futures::Future{std::in_place, leaderData._commitIndex, leaderData._lastQuorum}; } auto it = leaderData._waitForQueue.emplace(index, WaitForPromise{}); @@ -478,7 +495,7 @@ auto replicated_log::LogLeader::triggerAsyncReplication() -> void { auto replicated_log::LogLeader::GuardedLeaderData::updateCommitIndexLeader( LogIndex newIndex, std::shared_ptr quorum) -> ResolvedPromiseSet { LOG_CTX("a9a7e", TRACE, _self._logContext) - << "updating commit index to " << newIndex << "with quorum " << quorum->quorum; + << "updating commit index to " << newIndex << " with quorum " << quorum->quorum; auto oldIndex = _commitIndex; TRI_ASSERT(_commitIndex < newIndex) @@ -494,7 +511,8 @@ auto replicated_log::LogLeader::GuardedLeaderData::updateCommitIndexLeader( << "resolving promise for index " << it->first; toBeResolved.insert(_waitForQueue.extract(it++)); } - return ResolvedPromiseSet{std::move(toBeResolved), std::move(quorum), + return ResolvedPromiseSet{std::move(toBeResolved), + WaitForResult(newIndex, std::move(quorum)), _inMemoryLog.slice(oldIndex, newIndex + 1)}; } catch (std::exception const& e) { // If those promises are not fulfilled we can not continue. @@ -536,10 +554,13 @@ auto replicated_log::LogLeader::GuardedLeaderData::prepareAppendEntry(FollowerIn << "last acked index = " << follower.lastAckedEntry << ", current index = " << lastAvailableIndex << ", last acked commit index = " << follower.lastAckedCommitIndex - << ", current commit index = " << _commitIndex; + << ", current commit index = " << _commitIndex + << ", last acked lci = " << follower.lastAckedLCI + << ", current lci = " << _largestCommonIndex; if (follower.lastAckedEntry.index == lastAvailableIndex.index && - _commitIndex == follower.lastAckedCommitIndex) { - LOG_CTX("74b71", TRACE, _self._logContext) << "up to date"; + _commitIndex == follower.lastAckedCommitIndex && + _largestCommonIndex == follower.lastAckedLCI) { + LOG_CTX("74b71", TRACE, follower.logContext) << "up to date"; return std::nullopt; // nothing to replicate } @@ -571,6 +592,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::createAppendEntriesRequest( AppendEntriesRequest req; req.leaderCommit = _commitIndex; + req.largestCommonIndex = _largestCommonIndex; req.leaderTerm = _self._currentTerm; req.leaderId = _self._id; req.waitForSync = _self._config.waitForSync; @@ -605,14 +627,15 @@ auto replicated_log::LogLeader::GuardedLeaderData::createAppendEntriesRequest( << "creating append entries request with " << req.entries.size() << " entries , prevLogEntry.term = " << req.prevLogEntry.term << ", prevLogEntry.index = " << req.prevLogEntry.index - << ", leaderCommit = " << req.leaderCommit; + << ", leaderCommit = " << req.leaderCommit + << ", lci = " << req.largestCommonIndex << ", msg-id = " << req.messageId; return std::make_pair(std::move(req), lastIndex); } auto replicated_log::LogLeader::GuardedLeaderData::handleAppendEntriesResponse( FollowerInfo& follower, TermIndexPair lastIndex, LogIndex currentCommitIndex, - LogTerm currentTerm, futures::Try&& res, + LogIndex currentLCI, LogTerm currentTerm, futures::Try&& res, std::chrono::steady_clock::duration latency, MessageId messageId) -> std::pair>, ResolvedPromiseSet> { if (currentTerm != _self._currentTerm) { @@ -645,6 +668,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::handleAppendEntriesResponse( follower.numErrorsSinceLastAnswer = 0; follower.lastAckedEntry = lastIndex; follower.lastAckedCommitIndex = currentCommitIndex; + follower.lastAckedLCI = currentLCI; toBeResolved = checkCommitIndex(); } else { TRI_ASSERT(response.reason != AppendEntriesErrorReason::NONE); @@ -704,7 +728,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::getInternalLogIterator(LogInd } auto replicated_log::LogLeader::GuardedLeaderData::getCommittedLogIterator(LogIndex firstIndex) const - -> std::unique_ptr { + -> std::unique_ptr { auto const endIdx = _inMemoryLog.getNextIndex(); TRI_ASSERT(firstIndex < endIdx); // return an iterator for the range [firstIndex, _commitIndex + 1) @@ -714,6 +738,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::getCommittedLogIterator(LogIn auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> ResolvedPromiseSet { auto const quorum_size = _self._config.writeConcern; + auto newLargestCommonIndex = _commitIndex; std::vector> indexes; indexes.reserve(_follower.size()); for (auto const& follower : _follower) { @@ -741,6 +766,8 @@ auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> Resolve << lastAckedEntry.index << ") is of term " << lastAckedEntry.term << ", but we're in term " << _self._currentTerm << "."; } + + newLargestCommonIndex = std::min(follower.lastAckedCommitIndex, newLargestCommonIndex); } LOG_CTX("a2d04", TRACE, _self._logContext) << "checking commit index on set " << indexes; @@ -751,6 +778,13 @@ auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> Resolve return {}; } + if (newLargestCommonIndex != _largestCommonIndex) { + LOG_CTX("851bb", TRACE, _self._logContext) + << "largest common index went from " << _largestCommonIndex << " to " + << newLargestCommonIndex; + _largestCommonIndex = newLargestCommonIndex; + } + auto nth = indexes.begin(); std::advance(nth, quorum_size - 1); @@ -772,7 +806,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> Resolve auto const quorum_data = std::make_shared(commitIndex, _self._currentTerm, std::move(quorum)); - return updateCommitIndexLeader(commitIndex, std::move(quorum_data)); + return updateCommitIndexLeader(commitIndex, quorum_data); } return {}; } @@ -780,8 +814,7 @@ auto replicated_log::LogLeader::GuardedLeaderData::checkCommitIndex() -> Resolve auto replicated_log::LogLeader::GuardedLeaderData::getLocalStatistics() const -> LogStatistics { auto result = LogStatistics{}; result.commitIndex = _commitIndex; - result.spearHead.index = _inMemoryLog.getLastIndex(); - result.spearHead.term = _inMemoryLog.getLastTerm(); + result.spearHead = _inMemoryLog.getLastTermIndexPair(); return result; } @@ -810,7 +843,7 @@ auto replicated_log::LogLeader::waitForIterator(LogIndex index) return waitFor(index).thenValue([this, self = shared_from_this(), index](auto&& quorum) -> WaitForIteratorFuture { auto [actualIndex, iter] = _guardedLeaderData.doUnderLock( - [&](GuardedLeaderData& leaderData) -> std::pair> { + [&](GuardedLeaderData& leaderData) -> std::pair> { TRI_ASSERT(index <= leaderData._commitIndex); /* @@ -859,6 +892,14 @@ auto replicated_log::LogLeader::construct( term, logContext, std::move(logMetrics)); } +auto replicated_log::LogLeader::release(LogIndex doneWithIdx) -> Result { + return Result(); +} + +auto replicated_log::LogLeader::copyInMemoryLog() const -> replicated_log::InMemoryLog { + return _guardedLeaderData.getLockedGuard()->_inMemoryLog; +} + replicated_log::LogLeader::LocalFollower::LocalFollower( replicated_log::LogLeader& self, LoggerContext logContext, std::unique_ptr logCore, [[maybe_unused]] TermIndexPair lastIndex) diff --git a/arangod/Replication2/ReplicatedLog/LogLeader.h b/arangod/Replication2/ReplicatedLog/LogLeader.h index 88d9a6cd428c..e2f6c8928188 100644 --- a/arangod/Replication2/ReplicatedLog/LogLeader.h +++ b/arangod/Replication2/ReplicatedLog/LogLeader.h @@ -22,17 +22,8 @@ #pragma once -#include "Replication2/ReplicatedLog/ILogParticipant.h" -#include "Replication2/ReplicatedLog/InMemoryLog.h" -#include "Replication2/ReplicatedLog/LogCommon.h" -#include "Replication2/ReplicatedLog/NetworkMessages.h" -#include "Replication2/ReplicatedLog/types.h" - -#include "Replication2/LoggerContext.h" - #include #include - #include #include #include @@ -40,6 +31,23 @@ #include #include #include +#include +#include + +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogCommon.h" +#include "Replication2/ReplicatedLog/NetworkMessages.h" +#include "Replication2/ReplicatedLog/types.h" +#include "Replication2/LoggerContext.h" +#include "Basics/Result.h" +#include "Futures/Future.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogStatus.h" + +namespace arangodb { +struct DeferredAction; +} // namespace arangodb #if (_MSC_VER >= 1) // suppress warnings: @@ -62,9 +70,10 @@ class Try; namespace arangodb::replication2::replicated_log { struct LogCore; struct ReplicatedLogMetrics; -} +} // namespace arangodb::replication2::replicated_log namespace arangodb::replication2::replicated_log { +struct PersistedLogIterator; /** * @brief Leader instance of a replicated log. @@ -108,7 +117,8 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar [[nodiscard]] auto getReplicatedLogSnapshot() const -> InMemoryLog::log_type; - [[nodiscard]] auto readReplicatedEntryByIndex(LogIndex idx) const -> std::optional; + [[nodiscard]] auto readReplicatedEntryByIndex(LogIndex idx) const + -> std::optional; // Triggers sending of appendEntries requests to all followers. This continues // until all participants are perfectly in sync, and will then stop. @@ -122,6 +132,10 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar [[nodiscard]] auto getParticipantId() const noexcept -> ParticipantId const&; + [[nodiscard]] auto release(LogIndex doneWithIdx) -> Result override; + + [[nodiscard]] auto copyInMemoryLog() const -> InMemoryLog; + protected: // Use the named constructor construct() to create a leader! LogLeader(LoggerContext logContext, std::shared_ptr logMetrics, @@ -129,6 +143,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar private: struct GuardedLeaderData; + using Guard = MutexGuard>; using ConstGuard = MutexGuard>; @@ -140,6 +155,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar std::shared_ptr _impl; TermIndexPair lastAckedEntry = TermIndexPair{LogTerm{0}, LogIndex{0}}; LogIndex lastAckedCommitIndex = LogIndex{0}; + LogIndex lastAckedLCI = LogIndex{0}; MessageId lastSentMessageId{0}; std::size_t numErrorsSinceLastAnswer = 0; AppendEntriesErrorReason lastErrorReason = AppendEntriesErrorReason::NONE; @@ -187,7 +203,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar struct ResolvedPromiseSet { WaitForQueue _set; - std::shared_ptr _quorum; + WaitForResult result; ::immer::flex_vector _commitedLogEntries; }; @@ -208,7 +224,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar [[nodiscard]] auto handleAppendEntriesResponse( FollowerInfo& follower, TermIndexPair lastIndex, LogIndex currentCommitIndex, - LogTerm currentTerm, futures::Try&& res, + LogIndex currentLCI, LogTerm currentTerm, futures::Try&& res, std::chrono::steady_clock::duration latency, MessageId messageId) -> std::pair>, ResolvedPromiseSet>; @@ -222,7 +238,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar -> std::unique_ptr; [[nodiscard]] auto getCommittedLogIterator(LogIndex firstIndex) const - -> std::unique_ptr; + -> std::unique_ptr; [[nodiscard]] auto getLocalStatistics() const -> LogStatistics; @@ -236,6 +252,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar WaitForQueue _waitForQueue{}; std::shared_ptr _lastQuorum{}; LogIndex _commitIndex{0}; + LogIndex _largestCommonIndex{0}; bool _didResign{false}; }; @@ -262,7 +279,7 @@ class LogLeader : public std::enable_shared_from_this, public ILogPar std::vector> requests, std::shared_ptr const& logMetrics); static void handleResolvedPromiseSet(ResolvedPromiseSet set, - std::shared_ptr const& logMetrics); + std::shared_ptr const& logMetrics); auto tryHardToClearQueue() noexcept -> void; }; diff --git a/arangod/Replication2/ReplicatedLog/NetworkMessages.cpp b/arangod/Replication2/ReplicatedLog/NetworkMessages.cpp index abfba203ec70..28d50762b537 100644 --- a/arangod/Replication2/ReplicatedLog/NetworkMessages.cpp +++ b/arangod/Replication2/ReplicatedLog/NetworkMessages.cpp @@ -60,6 +60,7 @@ AppendEntriesRequest::AppendEntriesRequest(AppendEntriesRequest&& other) noexcep leaderId(std::move(other.leaderId)), prevLogEntry(other.prevLogEntry), leaderCommit(other.leaderCommit), + largestCommonIndex(other.largestCommonIndex), messageId(other.messageId), entries(std::move(other.entries)), waitForSync(other.waitForSync) { @@ -108,6 +109,7 @@ auto AppendEntriesRequest::operator=(replicated_log::AppendEntriesRequest&& othe leaderId = std::move(other.leaderId); prevLogEntry = other.prevLogEntry; leaderCommit = other.leaderCommit; + largestCommonIndex = other.largestCommonIndex; messageId = other.messageId; waitForSync = other.waitForSync; entries = std::move(other.entries); @@ -203,27 +205,27 @@ auto replicated_log::AppendEntriesResult::withConflict(LogTerm term, replicated_log::MessageId id, TermIndexPair conflict) noexcept -> replicated_log::AppendEntriesResult { - return AppendEntriesResult(term, id, conflict); + return {term, id, conflict}; } auto replicated_log::AppendEntriesResult::withRejection(LogTerm term, MessageId id, AppendEntriesErrorReason reason) noexcept -> AppendEntriesResult { - return AppendEntriesResult(term, TRI_ERROR_REPLICATION_REPLICATED_LOG_APPEND_ENTRIES_REJECTED, - reason, id); + return {term, TRI_ERROR_REPLICATION_REPLICATED_LOG_APPEND_ENTRIES_REJECTED, + reason, id}; } auto replicated_log::AppendEntriesResult::withPersistenceError(LogTerm term, replicated_log::MessageId id, Result const& res) noexcept -> replicated_log::AppendEntriesResult { - return AppendEntriesResult(term, res.errorNumber(), - AppendEntriesErrorReason::PERSISTENCE_FAILURE, id); + return {term, res.errorNumber(), + AppendEntriesErrorReason::PERSISTENCE_FAILURE, id}; } auto replicated_log::AppendEntriesResult::withOk(LogTerm term, replicated_log::MessageId id) noexcept -> replicated_log::AppendEntriesResult { - return AppendEntriesResult(term, id); + return {term, id}; } auto replicated_log::AppendEntriesResult::isSuccess() const noexcept -> bool { @@ -238,6 +240,7 @@ void replicated_log::AppendEntriesRequest::toVelocyPack(velocypack::Builder& bui builder.add(VPackValue("prevLogEntry")); prevLogEntry.toVelocyPack(builder); builder.add("leaderCommit", VPackValue(leaderCommit.value)); + builder.add("largestCommonIndex", VPackValue(largestCommonIndex.value)); builder.add("messageId", VPackValue(messageId)); builder.add("waitForSync", VPackValue(waitForSync)); builder.add("entries", VPackValue(VPackValueType::Array)); @@ -254,6 +257,7 @@ auto replicated_log::AppendEntriesRequest::fromVelocyPack(velocypack::Slice slic auto leaderId = ParticipantId{slice.get("leaderId").copyString()}; auto prevLogEntry = TermIndexPair::fromVelocyPack(slice.get("prevLogEntry")); auto leaderCommit = slice.get("leaderCommit").extract(); + auto largestCommonIndex = slice.get("largestCommonIndex").extract(); auto messageId = slice.get("messageId").extract(); auto waitForSync = slice.get("waitForSync").extract(); auto entries = std::invoke([&] { @@ -266,19 +270,20 @@ auto replicated_log::AppendEntriesRequest::fromVelocyPack(velocypack::Slice slic return std::move(transientEntries).persistent(); }); - return AppendEntriesRequest{leaderTerm, leaderId, prevLogEntry, - leaderCommit, messageId, waitForSync, - std::move(entries)}; + return AppendEntriesRequest{leaderTerm, leaderId, prevLogEntry, + leaderCommit, largestCommonIndex, messageId, + waitForSync, std::move(entries)}; } replicated_log::AppendEntriesRequest::AppendEntriesRequest( LogTerm leaderTerm, ParticipantId leaderId, TermIndexPair prevLogEntry, - LogIndex leaderCommit, replicated_log::MessageId messageId, - bool waitForSync, EntryContainer entries) + LogIndex leaderCommit, LogIndex largestCommonIndex, + replicated_log::MessageId messageId, bool waitForSync, EntryContainer entries) : leaderTerm(leaderTerm), leaderId(std::move(leaderId)), prevLogEntry(prevLogEntry), leaderCommit(leaderCommit), + largestCommonIndex(largestCommonIndex), messageId(messageId), entries(std::move(entries)), waitForSync(waitForSync) {} diff --git a/arangod/Replication2/ReplicatedLog/NetworkMessages.h b/arangod/Replication2/ReplicatedLog/NetworkMessages.h index 413b780118d8..3fac1e727b43 100644 --- a/arangod/Replication2/ReplicatedLog/NetworkMessages.h +++ b/arangod/Replication2/ReplicatedLog/NetworkMessages.h @@ -99,6 +99,7 @@ struct AppendEntriesRequest { ParticipantId leaderId; TermIndexPair prevLogEntry; LogIndex leaderCommit; + LogIndex largestCommonIndex; MessageId messageId; EntryContainer entries{}; bool waitForSync = false; @@ -106,7 +107,8 @@ struct AppendEntriesRequest { AppendEntriesRequest() = default; AppendEntriesRequest(LogTerm leaderTerm, ParticipantId leaderId, TermIndexPair prevLogEntry, LogIndex leaderCommit, - MessageId messageId, bool waitForSync, EntryContainer entries); + LogIndex largestCommonIndex, MessageId messageId, + bool waitForSync, EntryContainer entries); ~AppendEntriesRequest() noexcept = default; AppendEntriesRequest(AppendEntriesRequest&& other) noexcept; diff --git a/arangod/Replication2/ReplicatedLog/ReplicatedLog.cpp b/arangod/Replication2/ReplicatedLog/ReplicatedLog.cpp index d1790d8bcd95..dab70378c674 100644 --- a/arangod/Replication2/ReplicatedLog/ReplicatedLog.cpp +++ b/arangod/Replication2/ReplicatedLog/ReplicatedLog.cpp @@ -93,7 +93,7 @@ auto replicated_log::ReplicatedLog::becomeFollower(ParticipantId id, LogTerm ter LOG_CTX("1ed24", DEBUG, _logContext) << "becoming follower in term " << term << " with leader " << leaderId.value_or(""); - auto log = InMemoryLog{_logContext, *logCore}; + auto log = InMemoryLog{*logCore}; auto follower = std::make_shared(_logContext, _metrics, std::move(id), std::move(logCore), term, std::move(leaderId), log); diff --git a/arangod/Replication2/ReplicatedLog/ReplicatedLogIterator.h b/arangod/Replication2/ReplicatedLog/ReplicatedLogIterator.h index 04924d8df23c..c50cb35cd25d 100644 --- a/arangod/Replication2/ReplicatedLog/ReplicatedLogIterator.h +++ b/arangod/Replication2/ReplicatedLog/ReplicatedLogIterator.h @@ -42,7 +42,7 @@ namespace arangodb::replication2::replicated_log { -class ReplicatedLogIterator : public LogIterator { +class ReplicatedLogIterator : public LogRangeIterator { public: using log_type = ::immer::flex_vector; @@ -63,6 +63,14 @@ class ReplicatedLogIterator : public LogIterator { return std::nullopt; } + auto range() const noexcept -> std::pair override { + if (_container.empty()) { + return {LogIndex{0}, LogIndex{0}}; + } else { + return {_container.front().entry().logIndex(), _container.back().entry().logIndex() + 1}; + } + } + private: log_type _container; log_type::const_iterator _begin; diff --git a/arangod/Replication2/ReplicatedState/AbstractStateMachine.h b/arangod/Replication2/ReplicatedState/AbstractStateMachine.h new file mode 100644 index 000000000000..b749fc68eaf2 --- /dev/null +++ b/arangod/Replication2/ReplicatedState/AbstractStateMachine.h @@ -0,0 +1,75 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once +#include + +#include "Basics/Result.h" +#include "Basics/Guarded.h" +#include "Basics/UnshackledMutex.h" +#include "Futures/Future.h" + +#include "Replication2/ReplicatedLog/LogCommon.h" +#include "Replication2/ReplicatedLog/types.h" + +namespace arangodb::replication2 { +namespace replicated_log { +struct ReplicatedLog; +} + +namespace replicated_state { + +template +struct AbstractStateMachine : std::enable_shared_from_this> { + // TODO Maybe we can create a non-templated base class for functions that do not + // require the template parameter. (waitFor, pollEntries, ...) + using LogIterator = TypedLogIterator; + using LogRangeIterator = TypedLogRangeIterator; + + virtual ~AbstractStateMachine() = default; + + explicit AbstractStateMachine(std::shared_ptr log); + auto triggerPollEntries() -> futures::Future; + + protected: + virtual auto installSnapshot(ParticipantId const&) -> futures::Future = 0; + virtual auto applyEntries(std::unique_ptr) + -> futures::Future = 0; + + void releaseIndex(LogIndex); + auto getEntry(LogIndex) -> std::optional; + auto getIterator(LogIndex first) -> LogIterator; + auto insert(T const&) -> LogIndex; + auto waitFor(LogIndex) -> futures::Future; + + private: + struct GuardedData { + bool pollOnGoing{false}; + LogIndex nextIndex{1}; + }; + + Guarded _guardedData; + std::shared_ptr const log; +}; + +} // namespace replicated_state +} // namespace arangodb::replication2 diff --git a/arangod/Replication2/ReplicatedState/AbstractStateMachine.tpp b/arangod/Replication2/ReplicatedState/AbstractStateMachine.tpp new file mode 100644 index 000000000000..c8fdf99e803c --- /dev/null +++ b/arangod/Replication2/ReplicatedState/AbstractStateMachine.tpp @@ -0,0 +1,131 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#include +#include +#include "AbstractStateMachine.h" +#include "Replication2/ReplicatedLog/LogCommon.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" + +using namespace arangodb; +using namespace arangodb::replication2; + +template +auto replicated_state::AbstractStateMachine::getIterator(LogIndex first) -> LogIterator { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +template +auto replicated_state::AbstractStateMachine::getEntry(LogIndex) + -> std::optional { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +template +auto replicated_state::AbstractStateMachine::insert(T const& v) -> LogIndex { + velocypack::UInt8Buffer payload; + { + velocypack::Builder builder(payload); + v.toVelocyPack(builder); + } + return log->getLeader()->insert(LogPayload(std::move(payload))); +} + +template +auto replication2::replicated_state::AbstractStateMachine::waitFor(LogIndex idx) + -> futures::Future { + return log->getParticipant()->waitFor(idx); +} + +template +void replicated_state::AbstractStateMachine::releaseIndex(LogIndex) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); +} + +namespace { +template +struct DeserializeLogIterator : TypedLogRangeIterator { + explicit DeserializeLogIterator(std::unique_ptr base) + : base(std::move(base)) {} + + auto next() -> std::optional override { + if (auto entry = base->next(); entry.has_value()) { + return T::fromVelocyPack(entry->logPayload()); + } + + return std::nullopt; + } + + auto range() const noexcept -> std::pair override { + return base->range(); + } + + std::unique_ptr base; +}; +} // namespace + +template +auto replicated_state::AbstractStateMachine::triggerPollEntries() + -> futures::Future { + auto nextIndex = + _guardedData.template doUnderLock([&](GuardedData& guard) -> std::optional { + if (guard.pollOnGoing) { + return std::nullopt; + } + + guard.pollOnGoing = true; + return guard.nextIndex; + }); + + if (nextIndex.has_value()) { + return log->getParticipant() + ->waitForIterator(*nextIndex) + .thenValue([weak = this->weak_from_this()]( + std::unique_ptr res) { + if (auto self = weak.lock()) { + auto [from, to] = res->range(); // [from, to) + TRI_ASSERT(from != to); + + auto iter = std::make_unique>(std::move(res)); + return self->applyEntries(std::move(iter)).thenValue([self, to = to](Result&& result) { + auto guard = self->_guardedData.getLockedGuard(); + guard->pollOnGoing = false; + TRI_ASSERT(to > guard->nextIndex); + guard->nextIndex = to; + return std::move(result); + }); + } + + return futures::Future{TRI_ERROR_NO_ERROR}; + }); + } + + return futures::Future{TRI_ERROR_NO_ERROR}; +} + +template +replicated_state::AbstractStateMachine::AbstractStateMachine( + std::shared_ptr log) + : log(std::move(log)) {} diff --git a/arangod/Replication2/Streams/LogMultiplexer.h b/arangod/Replication2/Streams/LogMultiplexer.h new file mode 100644 index 000000000000..1f9c309695c3 --- /dev/null +++ b/arangod/Replication2/Streams/LogMultiplexer.h @@ -0,0 +1,81 @@ +#pragma once +#include + +#include + +#include +#include +#include + +#include +#include + +namespace arangodb::replication2::replicated_log { +class LogFollower; +class LogLeader; +} // namespace arangodb::replication2::replicated_log + +namespace arangodb::replication2::streams { + +/** + * Common stream dispatcher class for Multiplexer and Demultiplexer. You can + * obtain a stream given its id using getStreamById. Alternatively, you can + * static_cast the a pointer to StreamBase for the given stream. + * @tparam Self + * @tparam Spec + * @tparam StreamType + */ +template typename StreamType> +struct LogMultiplexerStreamDispatcher : std::enable_shared_from_this, + StreamDispatcherBase { + template > + auto getStreamBaseById() + -> std::shared_ptr> { + return getStreamByDescriptor(); + } + + template + auto getStreamById() -> std::shared_ptr>> { + return getStreamByDescriptor>(); + } + + template + auto getStreamByDescriptor() + -> std::shared_ptr> { + return std::static_pointer_cast>( + this->shared_from_this()); + } +}; + +/** + * Demultiplexer class. Use ::construct to create an instance. + * @tparam Spec Log specification + */ +template +struct LogDemultiplexer + : LogMultiplexerStreamDispatcher, Spec, Stream> { + virtual auto digestIterator(LogRangeIterator& iter) -> void = 0; + virtual auto listen() -> void = 0; + + static auto construct(std::shared_ptr) + -> std::shared_ptr; + + protected: + LogDemultiplexer() = default; +}; + +/** + * Multiplexer class. Use ::construct to create an instance. + * @tparam Spec Log specification + */ +template +struct LogMultiplexer + : LogMultiplexerStreamDispatcher, Spec, ProducerStream> { + static auto construct(std::shared_ptr leader) + -> std::shared_ptr; + + protected: + LogMultiplexer() = default; +}; + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/LogMultiplexer.tpp b/arangod/Replication2/Streams/LogMultiplexer.tpp new file mode 100644 index 000000000000..3e26a09d4414 --- /dev/null +++ b/arangod/Replication2/Streams/LogMultiplexer.tpp @@ -0,0 +1,342 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include "LogMultiplexer.h" + +#include +#include + +#include +#include +#include + +#include +#include + +namespace arangodb::replication2::streams { + +namespace { +template +auto allUnresolved(std::pair& q) { + return std::all_of(std::begin(q.first), std::end(q.first), + [&](auto const& pair) { return !pair.second.isFulfilled(); }); +} +template > +auto resolvePromiseSet(std::pair& q) { + TRI_ASSERT(allUnresolved(q)); + std::for_each(std::begin(q.first), std::end(q.first), [&](auto& pair) { + TRI_ASSERT(!pair.second.isFulfilled()); + if (!pair.second.isFulfilled()) { + pair.second.setValue(q.second); + } + }); +} + +template +auto resolvePromiseSets(stream_descriptor_set, + std::index_sequence, std::tuple& pairs) { + (resolvePromiseSet(std::get(pairs)), ...); +} + +template +auto resolvePromiseSets(stream_descriptor_set, std::tuple& pairs) { + resolvePromiseSets(stream_descriptor_set{}, + std::index_sequence_for{}, pairs); +} +} // namespace + +template typename StreamInterface, typename Interface> +struct LogMultiplexerImplementationBase { + explicit LogMultiplexerImplementationBase(std::shared_ptr const& interface) + : _guardedData(static_cast(*this)), _interface(interface) {} + + template , + typename E = StreamEntryView> + auto waitForIteratorInternal(LogIndex first) + -> futures::Future>> { + return waitForInternal(first).thenValue( + [that = shared_from_self(), first](auto&&) { + return that->_guardedData.doUnderLock([&](MultiplexerData& self) { + auto& block = std::get>(self._blocks); + return block.getIteratorRange(first, self._firstUncommittedIndex); + }); + }); + } + + template , + typename W = typename Stream::WaitForResult> + auto waitForInternal(LogIndex index) -> futures::Future { + return _guardedData.doUnderLock([&](MultiplexerData& self) { + if (self._firstUncommittedIndex > index) { + return futures::Future{std::in_place}; + } + auto& block = std::get>(self._blocks); + return block.registerWaitFor(index); + }); + } + + template + auto releaseInternal(LogIndex index) -> void { + // update the release index for the given stream + // then compute the minimum and forward it to the + // actual log implementation + auto globalReleaseIndex = _guardedData.doUnderLock( + [&](MultiplexerData& self) -> std::optional { + { + auto& block = self.template getBlockForDescriptor(); + auto newIndex = std::max(block._releaseIndex, index); + if (newIndex == block._releaseIndex) { + return std::nullopt; + } + TRI_ASSERT(newIndex > block._releaseIndex); + block._releaseIndex = newIndex; + } + + return self.minReleaseIndex(); + }); + + if (globalReleaseIndex) { + // TODO handle return value + std::ignore = _interface->release(*globalReleaseIndex); + } + } + + template , + typename E = StreamEntryView> + auto getIteratorInternal() -> std::unique_ptr> { + return _guardedData.template doUnderLock([](MultiplexerData& self) { + auto& block = self.template getBlockForDescriptor(); + return block.getIterator(); + }); + } + + protected: + template + struct MultiplexerData; + template + struct MultiplexerData> { + std::tuple...> _blocks; + LogIndex _firstUncommittedIndex{1}; + LogIndex _lastIndex; + bool _pendingWaitFor{false}; + + Derived& _self; + + explicit MultiplexerData(Derived& self) : _self(self) {} + void digestIterator(LogRangeIterator& iter) { + while (auto memtry = iter.next()) { + auto muxedValue = + MultiplexedValues::fromVelocyPack(memtry->logPayload()); + std::visit( + [&](auto&& value) { + using ValueTag = std::decay_t; + using Descriptor = typename ValueTag::DescriptorType; + std::get>(_blocks).appendEntry( + memtry->logIndex(), std::move(value.value)); + }, + std::move(muxedValue.variant())); + } + } + + auto getWaitForResolveSetAll(LogIndex commitIndex) { + return std::make_tuple(std::make_pair( + getBlockForDescriptor().getWaitForResolveSet(commitIndex), + typename StreamInformationBlock::WaitForResult{})...); + } + + // returns a LogIndex to wait for (if necessary) + auto checkWaitFor() -> std::optional { + if (!_pendingWaitFor && _lastIndex >= _firstUncommittedIndex) { + // we have to trigger a waitFor operation + // and wait for the next index + _pendingWaitFor = true; + return _firstUncommittedIndex; + } + return std::nullopt; + } + + auto minReleaseIndex() -> LogIndex { + return std::min({getBlockForDescriptor()._releaseIndex...}); + } + + template + auto getBlockForDescriptor() -> StreamInformationBlock& { + return std::get>(_blocks); + } + }; + + auto shared_from_self() -> std::shared_ptr { + return std::static_pointer_cast(static_cast(*this).shared_from_this()); + } + + Guarded, basics::UnshackledMutex> _guardedData{}; + std::shared_ptr const _interface; +}; + +template +struct LogDemultiplexerImplementation + : LogDemultiplexer, // implement the actual class + ProxyStreamDispatcher, Spec, Stream>, // use a proxy stream dispatcher + LogMultiplexerImplementationBase, Spec, Stream, Interface> { + explicit LogDemultiplexerImplementation(std::shared_ptr interface) + : LogMultiplexerImplementationBase( + std::move(interface)) {} + + auto digestIterator(LogRangeIterator& iter) -> void override { + this->_guardedData.getLockedGuard()->digestIterator(iter); + } + + auto listen() -> void override { + auto nextIndex = + this->_guardedData.doUnderLock([](auto& self) -> std::optional { + if (!self._pendingWaitFor) { + self._pendingWaitFor = true; + return self._firstUncommittedIndex; + } + return std::nullopt; + }); + if (nextIndex.has_value()) { + triggerWaitFor(*nextIndex); + } + } + + private: + void triggerWaitFor(LogIndex waitForIndex) { + this->_interface->waitForIterator(waitForIndex) + .thenValue([weak = this->weak_from_this()](std::unique_ptr&& iter) { + if (auto locked = weak.lock(); locked) { + auto that = std::static_pointer_cast(locked); + auto [nextIndex, promiseSets] = that->_guardedData.doUnderLock([&](auto& self) { + self._firstUncommittedIndex = iter->range().second; + self.digestIterator(*iter); + return std::make_tuple(self._firstUncommittedIndex, + self.getWaitForResolveSetAll( + self._firstUncommittedIndex.saturatedDecrement())); + }); + + that->triggerWaitFor(nextIndex); + resolvePromiseSets(Spec{}, promiseSets); + } + }); + } +}; + +template +struct LogMultiplexerImplementation + : LogMultiplexer, + ProxyStreamDispatcher, Spec, ProducerStream>, + LogMultiplexerImplementationBase, Spec, ProducerStream, Interface> { + using SelfClass = LogMultiplexerImplementation; + + explicit LogMultiplexerImplementation(std::shared_ptr interface) + : LogMultiplexerImplementationBase, Spec, ProducerStream, Interface>( + std::move(interface)) {} + + template > + auto insertInternal(T const& t) -> LogIndex { + auto serialized = std::invoke([&] { + velocypack::UInt8Buffer buffer; + velocypack::Builder builder(buffer); + MultiplexedValues::toVelocyPack(t, builder); + return buffer; + }); + + // we have to lock before we insert, otherwise we could mess up the order + // or log entries for this stream + auto [index, waitForIndex] = this->_guardedData.doUnderLock([&](auto& self) { + // First write to replicated log + auto insertIndex = this->_interface->insert(LogPayload(std::move(serialized))); + TRI_ASSERT(insertIndex > self._lastIndex); + self._lastIndex = insertIndex; + + // Now we insert the value T into the StreamsLog, + // but it is not yet visible because of the commitIndex + auto& block = self.template getBlockForDescriptor(); + block.appendEntry(insertIndex, t); + return std::make_pair(insertIndex, self.checkWaitFor()); + }); + + if (waitForIndex.has_value()) { + triggerWaitForIndex(*waitForIndex); + } + return index; + } + + private: + void triggerWaitForIndex(LogIndex waitForIndex) { + auto f = this->_interface->waitFor(waitForIndex); + std::move(f).thenValue([weak = this->weak_from_this()]( + replicated_log::WaitForResult&& result) noexcept { + // First lock the shared pointer + if (auto locked = weak.lock(); locked) { + auto that = std::static_pointer_cast(locked); + // now acquire the mutex + auto [resolveSets, nextIndex] = that->_guardedData.doUnderLock([&](auto& self) { + self._pendingWaitFor = false; + + // find out what the commit index is + self._firstUncommittedIndex = result.currentCommitIndex + 1; + return std::make_pair(self.getWaitForResolveSetAll(result.currentCommitIndex), + self.checkWaitFor()); + }); + + resolvePromiseSets(Spec{}, resolveSets); + if (nextIndex.has_value()) { + that->triggerWaitForIndex(*nextIndex); + } + } + }); + } +}; + +template +auto LogDemultiplexer::construct(std::shared_ptr interface) + -> std::shared_ptr { + return std::make_shared>( + std::move(interface)); +} + +template +auto LogMultiplexer::construct(std::shared_ptr leader) + -> std::shared_ptr { + return std::make_shared>( + std::move(leader)); +} + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/MultiplexedValues.h b/arangod/Replication2/Streams/MultiplexedValues.h new file mode 100644 index 000000000000..b977e966b482 --- /dev/null +++ b/arangod/Replication2/Streams/MultiplexedValues.h @@ -0,0 +1,101 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +namespace arangodb::replication2::streams { + +template > +struct DescriptorValueTag { + using DescriptorType = Descriptor; + explicit DescriptorValueTag(Type value) : value(std::move(value)) {} + Type value; +}; + +template +struct MultiplexedVariant { + using VariantType = std::variant...>; + + [[nodiscard]] auto variant() & -> VariantType& { return _value; } + [[nodiscard]] auto variant() && -> VariantType&& { return std::move(_value); } + [[nodiscard]] auto variant() const& -> VariantType& { return _value; } + + template + explicit MultiplexedVariant(std::in_place_t, Args&&... args) + : _value(std::forward(args)...) {} + + private: + VariantType _value; +}; + +struct MultiplexedValues { + template > + static void toVelocyPack(Type const& v, velocypack::Builder& builder) { + using PrimaryTag = stream_descriptor_primary_tag_t; + using Serializer = typename PrimaryTag::serializer; + velocypack::ArrayBuilder ab(&builder); + builder.add(velocypack::Value(PrimaryTag::tag)); + static_assert( + std::is_invocable_r_v, + std::add_lvalue_reference_t>, + std::add_lvalue_reference_t>); + std::invoke(Serializer{}, serializer_tag, v, builder); + } + + template + static auto fromVelocyPack(velocypack::Slice slice) + -> MultiplexedVariant { + TRI_ASSERT(slice.isArray()); + auto [tag, valueSlice] = slice.unpackTuple(); + return FromVelocyPackHelper, Descriptors...>::extract(tag, valueSlice); + } + + private: + template + struct FromVelocyPackHelper { + static auto extract(StreamTag tag, velocypack::Slice slice) -> ValueType { + return extractTags(stream_descriptor_tags_t{}, tag, slice); + } + + template + static auto extractTags(tag_descriptor_set, StreamTag tag, + velocypack::Slice slice) -> ValueType { + if (Tag::tag == tag) { + return extractValue(slice); + } else if constexpr (sizeof...(Tags) > 0) { + return extractTags(tag_descriptor_set{}, tag, slice); + } else if constexpr (sizeof...(Other) > 0) { + return FromVelocyPackHelper::extract(tag, slice); + } else { + std::abort(); + } + } + + template > + static auto extractValue(velocypack::Slice slice) -> ValueType { + static_assert(std::is_invocable_r_v, velocypack::Slice>); + auto value = std::invoke(Deserializer{}, serializer_tag, slice); + return ValueType(std::in_place, std::in_place_type>, + std::move(value)); + } + }; +}; +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/StreamInformationBlock.h b/arangod/Replication2/Streams/StreamInformationBlock.h new file mode 100644 index 000000000000..417ff6a0f9a5 --- /dev/null +++ b/arangod/Replication2/Streams/StreamInformationBlock.h @@ -0,0 +1,65 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include + +#include +#include + +#include "Replication2/ReplicatedLog/LogCommon.h" +#include "Replication2/Streams/Streams.h" + +namespace arangodb::replication2::streams { + +template +struct StreamInformationBlock; +template +struct StreamInformationBlock> { + using StreamType = streams::Stream; + using EntryType = StreamEntry; + using Iterator = TypedLogRangeIterator>; + + using ContainerType = ::immer::flex_vector; + using TransientType = typename ContainerType::transient_type; + using LogVariantType = std::variant; + + using WaitForResult = typename StreamType::WaitForResult; + using WaitForPromise = futures::Promise; + using WaitForQueue = std::multimap; + + LogIndex _releaseIndex{0}; + LogVariantType _container; + WaitForQueue _waitForQueue; + + auto appendEntry(LogIndex index, Type t); + auto getWaitForResolveSet(LogIndex commitIndex) -> WaitForQueue; + auto registerWaitFor(LogIndex index) -> futures::Future; + auto getIterator() -> std::unique_ptr; + auto getIteratorRange(LogIndex start, LogIndex stop) -> std::unique_ptr; + + private: + auto getTransientContainer() -> TransientType&; + auto getPersistentContainer() -> ContainerType&; +}; + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/StreamInformationBlock.tpp b/arangod/Replication2/Streams/StreamInformationBlock.tpp new file mode 100644 index 000000000000..88ea4b4415fe --- /dev/null +++ b/arangod/Replication2/Streams/StreamInformationBlock.tpp @@ -0,0 +1,138 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once +#include "Replication2/Streams/StreamInformationBlock.h" +#include "Replication2/Streams/Streams.h" + +namespace arangodb::replication2::streams { + +template +auto StreamInformationBlock>::getTransientContainer() + -> TransientType& { + if (!std::holds_alternative(_container)) { + _container = std::get(_container).transient(); + } + return std::get(_container); +} + +template +auto StreamInformationBlock>::getPersistentContainer() + -> ContainerType& { + if (!std::holds_alternative(_container)) { + _container = std::get(_container).persistent(); + } + return std::get(_container); +} + +template +auto StreamInformationBlock>::appendEntry(LogIndex index, + Type t) { + getTransientContainer().push_back(EntryType{index, std::move(t)}); +} + +template +auto StreamInformationBlock>::getWaitForResolveSet(LogIndex commitIndex) + -> std::multimap> { + WaitForQueue toBeResolved; + auto const end = _waitForQueue.upper_bound(commitIndex); + for (auto it = _waitForQueue.begin(); it != end;) { + toBeResolved.insert(_waitForQueue.extract(it++)); + } + return toBeResolved; +} + +template +auto StreamInformationBlock>::registerWaitFor(LogIndex index) + -> futures::Future { + return _waitForQueue.emplace(index, futures::Promise{})->second.getFuture(); +} + +template +auto StreamInformationBlock>::getIterator() + -> std::unique_ptr { + auto log = getPersistentContainer(); + + struct Iterator : TypedLogRangeIterator> { + ContainerType log; + typename ContainerType::iterator current; + + auto next() -> std::optional> override { + if (current != std::end(log)) { + auto view = std::make_pair(current->first, std::cref(current->second)); + ++current; + return view; + } + return std::nullopt; + } + + [[nodiscard]] auto range() const noexcept -> std::pair override { + abort(); + THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); + } + + explicit Iterator(ContainerType log) + : log(std::move(log)), current(this->log.begin()) {} + }; + + return std::make_unique(std::move(log)); +} + +template +auto StreamInformationBlock>::getIteratorRange(LogIndex start, LogIndex stop) + -> std::unique_ptr { + TRI_ASSERT(stop >= start); + + auto const log = getPersistentContainer(); + + using ContainerIterator = typename ContainerType::iterator; + + struct Iterator : TypedLogRangeIterator> { + ContainerType _log; + ContainerIterator current; + LogIndex start, stop; + + auto next() -> std::optional> override { + if (current != std::end(_log) && current->first < stop) { + auto view = std::make_pair(current->first, std::cref(current->second)); + ++current; + return view; + } + return std::nullopt; + } + [[nodiscard]] auto range() const noexcept -> std::pair override { + return {start, stop}; + } + + explicit Iterator(ContainerType log, LogIndex start, LogIndex stop) + : _log(std::move(log)), + current(std::lower_bound(std::begin(_log), std::end(_log), start, + [](StreamEntry const& left, LogIndex index) { + return left.first < index; + })), + start(start), + stop(stop) {} + }; + return std::make_unique(std::move(log), start, stop); +} + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/StreamSpecification.h b/arangod/Replication2/Streams/StreamSpecification.h new file mode 100644 index 000000000000..4021c9e14a13 --- /dev/null +++ b/arangod/Replication2/Streams/StreamSpecification.h @@ -0,0 +1,193 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once +#include +#include + +#include +#include + +namespace arangodb::replication2::streams { + +using StreamId = std::uint64_t; +using StreamTag = std::uint64_t; + +template +struct serializer_tag_t {}; +template +inline constexpr auto serializer_tag = serializer_tag_t{}; + +template +struct tag_descriptor { + using deserializer = D; + using serializer = S; + static constexpr auto tag = Tag; +}; + +template +struct is_tag_descriptor : std::false_type {}; +template +struct is_tag_descriptor> : std::true_type {}; +template +inline constexpr bool is_tag_descriptor_v = is_tag_descriptor::value; + +template +struct tag_descriptor_set { + static_assert((is_tag_descriptor_v && ...)); +}; + +template +struct tag_descriptor_set_primary; +template +struct tag_descriptor_set_primary> { + using type = D; +}; +template +using tag_descriptor_set_primary_t = tag_descriptor_set_primary; + +template +struct stream_descriptor; +template +struct stream_descriptor> { + static constexpr auto id = StreamId; + using tags = tag_descriptor_set; + using type = Type; + + // Check that all deserializers are invocable with (serializer_tag{}, + // slice) and return Type. + static_assert((std::is_invocable_r_v, velocypack::Slice> && + ...)); + + // Check that all serializers are invocable with (serializer_tag{}, T + // const&, Builder) and return void. + static_assert((std::is_invocable_r_v, + std::add_lvalue_reference_t>, + std::add_lvalue_reference_t> && + ...)); +}; + +template +struct is_stream_descriptor : std::false_type {}; +template +struct is_stream_descriptor> : std::true_type { +}; +template +inline constexpr auto is_stream_descriptor_v = is_stream_descriptor::value; + +template +struct stream_descriptor_set { + static_assert((is_stream_descriptor_v && ...)); + + static constexpr auto length = sizeof...(Descriptors); + + template + static void for_each_descriptor(F&& f, Args&&... args) { + (std::invoke(std::forward(f), Descriptors{}, std::forward(args)...), ...); + } +}; + +template +struct is_stream_descriptor_set : std::false_type {}; +template +struct is_stream_descriptor_set> + : std::true_type {}; +template +inline constexpr auto is_stream_descriptor_set_v = is_stream_descriptor_set::value; + +template +struct stream_descriptor_type { + static_assert(is_stream_descriptor_v); + using type = typename T::type; +}; +template +using stream_descriptor_type_t = typename stream_descriptor_type::type; +template +struct stream_descriptor_id { + static inline constexpr auto value = T::id; +}; +template +inline constexpr auto stream_descriptor_id_v = stream_descriptor_id::value; +template +struct stream_descriptor_tags { + using type = typename T::tags; +}; +template +using stream_descriptor_tags_t = typename stream_descriptor_tags::type; + +template +using stream_descriptor_primary_tag_t = + typename tag_descriptor_set_primary_t>::type; + +namespace detail { +template +struct stream_descriptor_by_id_impl; +template +struct stream_descriptor_by_id_impl + : std::conditional, D, + typename stream_descriptor_by_id_impl::type> {}; +template +struct stream_descriptor_by_id_impl { + // static_assert(StreamId == stream_descriptor_id_v); + using type = D; +}; + +} // namespace detail + +template +struct stream_descriptor_by_id; +template +struct stream_descriptor_by_id> { + static_assert(((stream_descriptor_id_v == StreamId) || ...)); + using type = typename detail::stream_descriptor_by_id_impl::type; +}; +template +using stream_descriptor_by_id_t = typename stream_descriptor_by_id::type; + +template +using stream_type_by_id_t = + stream_descriptor_type_t>; + +namespace detail { +template +struct stream_index_by_id_impl; +template +struct stream_index_by_id_impl + : std::conditional_t, std::integral_constant, + stream_index_by_id_impl> {}; +template +struct stream_index_by_id_impl + : std::integral_constant {}; +} // namespace detail + +template +struct stream_index_by_id; +template +struct stream_index_by_id> { + static_assert(((stream_descriptor_id_v == StreamId) || ...)); + static inline constexpr std::size_t value = + detail::stream_index_by_id_impl<0, StreamId, Ds...>::value; +}; + +template +inline constexpr auto stream_index_by_id_v = stream_index_by_id::value; +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/Streams.h b/arangod/Replication2/Streams/Streams.h new file mode 100644 index 000000000000..6654faf0ca4d --- /dev/null +++ b/arangod/Replication2/Streams/Streams.h @@ -0,0 +1,103 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include + +namespace arangodb::replication2::streams { + +/** + * Object returned by a stream iterator. Allows read only access + * to the stored object. The view does not own the value and remains + * valid until the iterator is destroyed or next() is called. + * @tparam T Object Type + */ +template +using StreamEntryView = std::pair; +template +using StreamEntry = std::pair; + +/** + * Consumer interface for a multiplexed object stream. Provides methods for + * iteraction with the replicated logs stream. + * @tparam T Object Type + */ +template +struct Stream { + virtual ~Stream() = default; + + struct WaitForResult {}; + virtual auto waitFor(LogIndex) -> futures::Future = 0; + + using Iterator = TypedLogRangeIterator>; + virtual auto waitForIterator(LogIndex) + -> futures::Future> = 0; + + virtual auto release(LogIndex) -> void = 0; +}; + +/** + * Producing interface for a multiplexed object stream. Besides the Stream + * methods it additionally provides a insert method. + * @tparam T Object Type + */ +template +struct ProducerStream : Stream { + virtual auto insert(T const&) -> LogIndex = 0; +}; + +/** + * StreamGenericBase is the base for all Stream implementations. In general + * users don't need to access this object directly. It provides more information + * about the stream. + * @tparam Descriptor The associated stream descriptor. + * @tparam StreamType Either Stream or ProducerStream. + * @tparam Type Object Type, default is extracted from Descriptor + */ +template typename StreamType, typename Type = stream_descriptor_type_t> +struct StreamGenericBase : StreamType { + static_assert(is_stream_descriptor_v, + "Descriptor is not a valid stream descriptor"); + + using Iterator = typename StreamType::Iterator; + virtual auto getAllEntriesIterator() -> std::unique_ptr = 0; +}; + +template +using StreamBase = StreamGenericBase; +template +using ProducerStreamBase = StreamGenericBase; + +template typename> +struct StreamDispatcherBase; + +/** + * This class declares the general interface for an entity that provides a given + * set of streams. It has the StreamBases as virtual base classes. + * @tparam Streams + * @tparam StreamType Either Stream or ProducerStream + */ +template typename StreamType> +struct StreamDispatcherBase, StreamType> + : virtual StreamGenericBase... {}; + +} // namespace arangodb::replication2::streams diff --git a/arangod/Replication2/Streams/Streams.tpp b/arangod/Replication2/Streams/Streams.tpp new file mode 100644 index 000000000000..2d9b8e007a5b --- /dev/null +++ b/arangod/Replication2/Streams/Streams.tpp @@ -0,0 +1,99 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#pragma once +#include "Replication2/Streams/Streams.h" + +namespace arangodb::replication2::streams { + +/** + * This is the implementation of the stream interfaces. They are just proxy + * objects that static_cast the this pointer to the respective implementor and + * forward the call, annotated with the stream descriptor. + * @tparam Implementation Implementor Top Class + * @tparam Descriptor Stream Descriptor + * @tparam StreamInterface Stream or ProducerStream + */ +template typename StreamInterface> +struct StreamGenericImplementationBase + : virtual StreamGenericBase { + static_assert(is_stream_descriptor_v); + + using ValueType = stream_descriptor_type_t; + using Iterator = TypedLogRangeIterator>; + using WaitForResult = typename StreamInterface::WaitForResult; + + auto waitForIterator(LogIndex index) -> futures::Future> final { + return implementation().template waitForIteratorInternal(index); + } + auto waitFor(LogIndex index) -> futures::Future final { + return implementation().template waitForInternal(index); + } + auto release(LogIndex index) -> void final { + return implementation().template releaseInternal(index); + } + auto getAllEntriesIterator() -> std::unique_ptr final { + return implementation().template getIteratorInternal(); + } + + private: + auto implementation() -> Implementation& { return static_cast(*this); } +}; + +/** + * Wrapper about StreamGenericImplementationBase, that adds depending on the + * StreamInterface more methods. Is specialized for ProducerStream. + * @tparam Implementation Implementor Top Class + * @tparam Descriptor Stream Descriptor + * @tparam StreamInterface Stream or ProducerStream + */ +template typename StreamInterface> +struct StreamGenericImplementation + : StreamGenericImplementationBase {}; +template +struct StreamGenericImplementation + : StreamGenericImplementationBase { + using ValueType = stream_descriptor_type_t; + auto insert(ValueType const& t) -> LogIndex override { + return static_cast(this)->template insertInternal(t); + } +}; + +template +using StreamImplementation = StreamGenericImplementation; +template +using ProducerStreamImplementation = + StreamGenericImplementation; + +template typename> +struct ProxyStreamDispatcher; + +/** + * Class that implements all streams as virtual base classes. + * @tparam Implementation + * @tparam Streams + * @tparam StreamInterface + */ +template typename StreamInterface> +struct ProxyStreamDispatcher, StreamInterface> + : StreamGenericImplementation... {}; + +} // namespace arangodb::replication2::streams diff --git a/arangod/RestHandler/RestLogHandler.cpp b/arangod/RestHandler/RestLogHandler.cpp index 9558beb215b5..c95d97c620c4 100644 --- a/arangod/RestHandler/RestLogHandler.cpp +++ b/arangod/RestHandler/RestLogHandler.cpp @@ -83,7 +83,7 @@ struct arangodb::ReplicatedLogMethods { } virtual auto insert(LogId, LogPayload) const - -> futures::Future>> { + -> futures::Future> { THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } @@ -97,22 +97,27 @@ namespace { auto sendInsertRequest(network::ConnectionPool *pool, std::string const& server, std::string const& database, LogId id, LogPayload payload) - -> futures::Future>> { + -> futures::Future> { auto path = basics::StringUtils::joinT("/", "_api/log", id, "insert"); network::RequestOptions opts; opts.database = database; - return network::sendRequest(pool, "server:" + server, fuerte::RestVerb::Post, path, - payload.dummy, opts) + return network::sendRequest(pool, "server:" + server, fuerte::RestVerb::Post, + path, payload.dummy, opts) .thenValue([](network::Response&& resp) { if (resp.fail() || !fuerte::statusIsSuccess(resp.statusCode())) { THROW_ARANGO_EXCEPTION(resp.combinedResult()); } auto result = resp.slice().get("result"); - auto quorum = std::make_shared(result.get("quorum")); + auto waitResult = result.get("result"); + + auto quorum = std::make_shared( + waitResult.get("quorum")); + auto commitIndex = waitResult.get("commitIndex").extract(); auto index = result.get("index").extract(); - return std::make_pair(index, std::move(quorum)); + return std::make_pair(index, replicated_log::WaitForResult(commitIndex, + std::move(quorum))); }); } @@ -203,7 +208,7 @@ struct ReplicatedLogMethodsCoord final : ReplicatedLogMethods { } auto insert(LogId id, LogPayload payload) const - -> futures::Future>> override { + -> futures::Future> override { return sendInsertRequest(pool, getLogLeader(id), vocbase.name(), id, std::move(payload)); } @@ -301,11 +306,11 @@ struct ReplicatedLogMethodsDBServ final : ReplicatedLogMethods { } auto insert(LogId logId, LogPayload payload) const - -> futures::Future>> override { + -> futures::Future> override { auto log = vocbase.getReplicatedLogLeaderById(logId); auto idx = log->insert(std::move(payload)); return log->waitFor(idx).thenValue( - [idx](auto&& quorum) { return std::make_pair(idx, std::move(quorum)); }); + [idx](auto&& result) { return std::make_pair(idx, std::move(result)); }); } explicit ReplicatedLogMethodsDBServ(TRI_vocbase_t& vocbase) @@ -393,13 +398,13 @@ RestStatus RestLogHandler::handlePostRequest(ReplicatedLogMethods const& methods if (auto& verb = suffixes[1]; verb == "insert") { return waitForFuture( - methods.insert(logId, LogPayload::createFromSlice(body)).thenValue([this](auto&& quorum) { + methods.insert(logId, LogPayload::createFromSlice(body)).thenValue([this](auto&& waitForResult) { VPackBuilder response; { VPackObjectBuilder result(&response); - response.add("index", VPackValue(quorum.first)); - response.add(VPackValue("quorum")); - quorum.second->toVelocyPack(response); + response.add("index", VPackValue(waitForResult.first)); + response.add(VPackValue("result")); + waitForResult.second.toVelocyPack(response); } generateOk(rest::ResponseCode::ACCEPTED, response.slice()); })); diff --git a/arangod/Sharding/ShardingInfo.cpp b/arangod/Sharding/ShardingInfo.cpp index 57ee06712ef2..78b73c20969f 100644 --- a/arangod/Sharding/ShardingInfo.cpp +++ b/arangod/Sharding/ShardingInfo.cpp @@ -22,6 +22,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "ShardingInfo.h" +#include "Agency/AgencyPaths.h" #include "ApplicationFeatures/ApplicationServer.h" #include "Basics/Exceptions.h" #include "Basics/StaticStrings.h" @@ -30,6 +31,7 @@ #include "Cluster/ClusterFeature.h" #include "Cluster/ServerState.h" #include "Logger/LogMacros.h" +#include "Replication2/ReplicatedLog/LogCommon.h" #include "Sharding/ShardingFeature.h" #include "Sharding/ShardingStrategyDefault.h" #include "Utils/CollectionNameResolver.h" @@ -43,12 +45,12 @@ using namespace arangodb; ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* collection) : _collection(collection), _numberOfShards(basics::VelocyPackHelper::getNumericValue(info, StaticStrings::NumberOfShards, - 1)), + 1)), _replicationFactor(1), _writeConcern(1), _distributeShardsLike(basics::VelocyPackHelper::getStringValue(info, StaticStrings::DistributeShardsLike, "")), - _shardIds(new ShardMap()) { + _shardIds(std::make_shared()) { bool const isSmart = basics::VelocyPackHelper::getBooleanValue(info, StaticStrings::IsSmart, false); @@ -74,8 +76,7 @@ ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* } VPackSlice distributeShardsLike = info.get(StaticStrings::DistributeShardsLike); - if (!distributeShardsLike.isNone() && - !distributeShardsLike.isString() && + if (!distributeShardsLike.isNone() && !distributeShardsLike.isString() && !distributeShardsLike.isNull()) { THROW_ARANGO_EXCEPTION_MESSAGE( TRI_ERROR_BAD_PARAMETER, @@ -103,7 +104,7 @@ ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* } } - bool isASatellite = false; + bool isASatellite = false; auto replicationFactorSlice = info.get(StaticStrings::ReplicationFactor); if (!replicationFactorSlice.isNone()) { bool isError = true; @@ -130,8 +131,9 @@ ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* } if (isSmart && isASatellite) { - THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, - "'isSmart' and replicationFactor 'satellite' cannot be combined"); + THROW_ARANGO_EXCEPTION_MESSAGE( + TRI_ERROR_BAD_PARAMETER, + "'isSmart' and replicationFactor 'satellite' cannot be combined"); } #endif if (isError) { @@ -142,7 +144,7 @@ ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* if (!isASatellite) { auto writeConcernSlice = info.get(StaticStrings::WriteConcern); - if (writeConcernSlice.isNone()) { // minReplicationFactor is deprecated in 3.6 + if (writeConcernSlice.isNone()) { // minReplicationFactor is deprecated in 3.6 writeConcernSlice = info.get(StaticStrings::MinReplicationFactor); } if (!writeConcernSlice.isNone()) { @@ -166,7 +168,7 @@ ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* } } } - + // replicationFactor == 0 -> SatelliteCollection if (shardKeysSlice.isNone() || _replicationFactor == 0) { // Use default. @@ -188,10 +190,10 @@ ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* } } // system attributes are not allowed (except _key, _from and _to) - if (stripped == StaticStrings::IdString || - stripped == StaticStrings::RevString) { - THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, - "_id or _rev cannot be used as shard keys"); + if (stripped == StaticStrings::IdString || stripped == StaticStrings::RevString) { + THROW_ARANGO_EXCEPTION_MESSAGE( + TRI_ERROR_BAD_PARAMETER, + "_id or _rev cannot be used as shard keys"); } if (!stripped.empty()) { @@ -199,7 +201,7 @@ ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* } } } - if (_shardKeys.empty()) { + if (_shardKeys.empty()) { // Compatibility. Old configs might store empty shard-keys locally. // This is translated to ["_key"]. In cluster-case this always was // forbidden. @@ -212,8 +214,7 @@ ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* if (_shardKeys.empty() || _shardKeys.size() > 8) { THROW_ARANGO_EXCEPTION_MESSAGE( - TRI_ERROR_BAD_PARAMETER, - "invalid number of shard keys for collection"); + TRI_ERROR_BAD_PARAMETER, "invalid number of shard keys for collection"); } auto shardsSlice = info.get("shards"); @@ -231,6 +232,49 @@ ShardingInfo::ShardingInfo(arangodb::velocypack::Slice info, LogicalCollection* } } + if (_collection->vocbase().replicationVersion() == replication::Version::TWO) { + auto const& path = cluster::paths::root() + ->arango() + ->plan() + ->collections() + ->database(_collection->vocbase().name()) + ->collection(_collection->name()) + ->replicatedLogs(); + auto const& replicatedLogsKey = path->component(); + auto replicatedLogsSlice = info.get(replicatedLogsKey); + if (replicatedLogsSlice.isObject()) { + TRI_ASSERT(replicatedLogsSlice.isObject() == + (_collection->vocbase().replicationVersion() == replication::Version::TWO)); + try { + auto replicatedLogs = std::make_shared(); + for (auto const& logSlice : VPackObjectIterator(replicatedLogsSlice)) { + auto shardId = logSlice.key.stringView(); + auto logId = logSlice.value.stringView(); + auto success = + replicatedLogs + ->emplace(shardId, replication2::LogId::fromString(logId).value()) + .second; + TRI_ASSERT(success); + } + _replicatedLogs = std::move(replicatedLogs); + } catch (std::exception const& ex) { + using basics::StringUtils::concatT; + THROW_ARANGO_EXCEPTION_MESSAGE( + TRI_ERROR_INTERNAL, + concatT("When creating ShardingInfo of collection ", + _collection->vocbase().name(), "/", _collection->name(), + ": " + "Invalid agency entry at ", + *path, ", exception while reading was: ", ex.what(), ".")); + } + } else { + LOG_TOPIC("77879", INFO, Logger::MAINTENANCE) + << "Empty replicatedLogsSlice for " << _collection->vocbase().name() + << "/" << _collection->name() + << ", even though the database uses replicationVersion 2."; + } + } + // set the sharding strategy if (!ServerState::instance()->isRunningInCluster()) { // shortcut, so we do not need to set up the whole application server for @@ -251,7 +295,10 @@ ShardingInfo::ShardingInfo(ShardingInfo const& other, LogicalCollection* collect _distributeShardsLike(other.distributeShardsLike()), _avoidServers(other.avoidServers()), _shardKeys(other.shardKeys()), - _shardIds(new ShardMap()), + _shardIds(std::make_shared()), + _replicatedLogs(collection->vocbase().replicationVersion() == replication::Version::TWO + ? decltype(_replicatedLogs){other.replicatedLogs()} + : std::nullopt), _shardingStrategy() { TRI_ASSERT(_collection != nullptr); @@ -297,6 +344,17 @@ void ShardingInfo::toVelocyPack(VPackBuilder& result, bool translateCids) const result.close(); // shards + if (_collection->vocbase().replicationVersion() == replication::Version::TWO) { + result.add(VPackValue(StaticStrings::ReplicatedLogs)); + result.openObject(); + { + for (auto const& it : *replicatedLogs()) { + result.add(it.first, VPackValue(to_string(it.second))); + } + } + result.close(); + } + if (isSatellite()) { result.add(StaticStrings::ReplicationFactor, VPackValue(StaticStrings::Satellite)); } else { @@ -411,7 +469,8 @@ void ShardingInfo::writeConcern(size_t writeConcern) { _writeConcern = writeConcern; } -void ShardingInfo::setWriteConcernAndReplicationFactor(size_t writeConcern, size_t replicationFactor) { +void ShardingInfo::setWriteConcernAndReplicationFactor(size_t writeConcern, + size_t replicationFactor) { if (writeConcern > replicationFactor) { THROW_ARANGO_EXCEPTION_MESSAGE( TRI_ERROR_BAD_PARAMETER, @@ -474,8 +533,12 @@ std::shared_ptr ShardingInfo::shardIds(std::unordered_set return result; } -void ShardingInfo::setShardMap(std::shared_ptr const& map) { - _shardIds = map; +void ShardingInfo::setShardMap(std::shared_ptr map) noexcept { + _shardIds = std::move(map); +} + +void ShardingInfo::setReplicatedLogsMap(std::shared_ptr map) noexcept { + _replicatedLogs = std::move(map); } ErrorCode ShardingInfo::getResponsibleShard(arangodb::velocypack::Slice slice, @@ -486,9 +549,9 @@ ErrorCode ShardingInfo::getResponsibleShard(arangodb::velocypack::Slice slice, usesDefaultShardKeys, key); } -Result ShardingInfo::validateShardsAndReplicationFactor(arangodb::velocypack::Slice slice, - application_features::ApplicationServer const& server, - bool enforceReplicationFactor) { +Result ShardingInfo::validateShardsAndReplicationFactor( + arangodb::velocypack::Slice slice, + application_features::ApplicationServer const& server, bool enforceReplicationFactor) { if (slice.isObject()) { auto& cl = server.getFeature(); @@ -496,22 +559,26 @@ Result ShardingInfo::validateShardsAndReplicationFactor(arangodb::velocypack::Sl if (numberOfShardsSlice.isNumber()) { uint32_t const maxNumberOfShards = cl.maxNumberOfShards(); uint32_t numberOfShards = numberOfShardsSlice.getNumber(); - if (maxNumberOfShards > 0 && - numberOfShards > maxNumberOfShards) { - return Result(TRI_ERROR_CLUSTER_TOO_MANY_SHARDS, - std::string("too many shards. maximum number of shards is ") + std::to_string(maxNumberOfShards)); + if (maxNumberOfShards > 0 && numberOfShards > maxNumberOfShards) { + return Result(TRI_ERROR_CLUSTER_TOO_MANY_SHARDS, + std::string( + "too many shards. maximum number of shards is ") + + std::to_string(maxNumberOfShards)); } - TRI_ASSERT((cl.forceOneShard() && numberOfShards <= 1) || !cl.forceOneShard()); + TRI_ASSERT((cl.forceOneShard() && numberOfShards <= 1) || !cl.forceOneShard()); } - + auto writeConcernSlice = slice.get(StaticStrings::WriteConcern); auto minReplicationFactorSlice = slice.get(StaticStrings::MinReplicationFactor); - + if (writeConcernSlice.isNumber() && minReplicationFactorSlice.isNumber()) { // both attributes set. now check if they have different values - if (basics::VelocyPackHelper::compare(writeConcernSlice, minReplicationFactorSlice, false) != 0) { - return Result(TRI_ERROR_BAD_PARAMETER, "got ambiguous values for writeConcern and minReplicationFactor"); + if (basics::VelocyPackHelper::compare(writeConcernSlice, + minReplicationFactorSlice, false) != 0) { + return Result( + TRI_ERROR_BAD_PARAMETER, + "got ambiguous values for writeConcern and minReplicationFactor"); } } @@ -522,12 +589,13 @@ Result ShardingInfo::validateShardsAndReplicationFactor(arangodb::velocypack::Sl if (replicationFactorSlice.isNumber()) { int64_t replicationFactorProbe = replicationFactorSlice.getNumber(); if (replicationFactorProbe == 0) { - // TODO: Which configuration for satellites are valid regarding minRepl and writeConcern - // valid for creating a SatelliteCollection + // TODO: Which configuration for satellites are valid regarding + // minRepl and writeConcern valid for creating a SatelliteCollection return Result(); } if (replicationFactorProbe < 0) { - return Result(TRI_ERROR_BAD_PARAMETER, "invalid value for replicationFactor"); + return Result(TRI_ERROR_BAD_PARAMETER, + "invalid value for replicationFactor"); } uint32_t const minReplicationFactor = cl.minReplicationFactor(); @@ -535,19 +603,23 @@ Result ShardingInfo::validateShardsAndReplicationFactor(arangodb::velocypack::Sl uint32_t replicationFactor = replicationFactorSlice.getNumber(); // make sure the replicationFactor value is between the configured min and max values - if (replicationFactor > maxReplicationFactor && - maxReplicationFactor > 0) { + if (replicationFactor > maxReplicationFactor && maxReplicationFactor > 0) { return Result(TRI_ERROR_BAD_PARAMETER, - std::string("replicationFactor must not be higher than maximum allowed replicationFactor (") + std::to_string(maxReplicationFactor) + ")"); - } else if (replicationFactor < minReplicationFactor && - minReplicationFactor > 0) { + std::string( + "replicationFactor must not be higher than " + "maximum allowed replicationFactor (") + + std::to_string(maxReplicationFactor) + ")"); + } else if (replicationFactor < minReplicationFactor && minReplicationFactor > 0) { return Result(TRI_ERROR_BAD_PARAMETER, - std::string("replicationFactor must not be lower than minimum allowed replicationFactor (") + std::to_string(minReplicationFactor) + ")"); + std::string( + "replicationFactor must not be lower than " + "minimum allowed replicationFactor (") + + std::to_string(minReplicationFactor) + ")"); } - + // make sure we have enough servers available for the replication factor if (ServerState::instance()->isCoordinator() && - replicationFactor > cl.clusterInfo().getCurrentDBServers().size()) { + replicationFactor > cl.clusterInfo().getCurrentDBServers().size()) { return Result(TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS); } } @@ -561,10 +633,12 @@ Result ShardingInfo::validateShardsAndReplicationFactor(arangodb::velocypack::Sl if (writeConcernSlice.isNumber()) { int64_t writeConcern = writeConcernSlice.getNumber(); if (writeConcern <= 0) { - return Result(TRI_ERROR_BAD_PARAMETER, "invalid value for writeConcern"); + return Result(TRI_ERROR_BAD_PARAMETER, + "invalid value for writeConcern"); } if (ServerState::instance()->isCoordinator() && - static_cast(writeConcern) > cl.clusterInfo().getCurrentDBServers().size()) { + static_cast(writeConcern) > + cl.clusterInfo().getCurrentDBServers().size()) { return Result(TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS); } } @@ -586,3 +660,24 @@ void ShardingInfo::sortShardNamesNumerically(std::vector& list) { return l < r; }); } + +auto ShardingInfo::replicatedLogs() const -> std::shared_ptr { + if (_replicatedLogs.has_value()) { + return *_replicatedLogs; + } + + using basics::StringUtils::concatT; + auto detail = + _collection->vocbase().replicationVersion() == replication::Version::TWO + ? "Thus the access is expected, but non-existence is not." + : "Thus the non-existence is expected, but the access is not."; + + basics::abortOrThrow( + TRI_ERROR_INTERNAL, + concatT("Access to ShardingInfo::replicatedLogs of collection ", + _collection->name(), + " failed: Does not exist. The containing database is ", + _collection->vocbase().name(), " and uses replication version ", + versionToString(_collection->vocbase().replicationVersion()), + ". ", detail), ADB_HERE); +} diff --git a/arangod/Sharding/ShardingInfo.h b/arangod/Sharding/ShardingInfo.h index c50d95b0ef48..890fb82797ab 100644 --- a/arangod/Sharding/ShardingInfo.h +++ b/arangod/Sharding/ShardingInfo.h @@ -27,21 +27,31 @@ #include #include + +#include +#include #include #include -#include namespace arangodb { class LogicalCollection; class ShardingStrategy; +} // namespace arangodb -namespace application_features { +namespace arangodb::application_features { class ApplicationServer; } +namespace arangodb { + +namespace replication2 { +class LogId; +} + typedef std::string ServerID; // ID of a server typedef std::string ShardID; // ID of a shard typedef std::unordered_map> ShardMap; +using ReplicatedLogsMap = std::unordered_map; class ShardingInfo { public: @@ -93,12 +103,17 @@ class ShardingInfo { std::shared_ptr shardIds() const; + // Must only be called if the replication version of the collection's vocbase + // is 2, will throw otherwise. + auto replicatedLogs() const -> std::shared_ptr; + // return a sorted vector of ShardIDs std::shared_ptr> shardListAsShardID() const; // return a filtered list of the collection's shards std::shared_ptr shardIds(std::unordered_set const& includedShards) const; - void setShardMap(std::shared_ptr const& map); + void setShardMap(std::shared_ptr map) noexcept; + void setReplicatedLogsMap(std::shared_ptr map) noexcept; ErrorCode getResponsibleShard(arangodb::velocypack::Slice slice, bool docComplete, ShardID& shardID, bool& usesDefaultShardKeys, @@ -138,6 +153,8 @@ class ShardingInfo { // @brief current shard ids std::shared_ptr _shardIds; + std::optional> _replicatedLogs; + // @brief vector of shard keys in use. this is immutable after initial setup std::unique_ptr _shardingStrategy; }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 64441a092482..3a6ff6d74621 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -291,25 +291,35 @@ set(ARANGODB_TESTS_SOURCES ${ADDITIONAL_TEST_SOURCES}) set(ARANGODB_REPLICATION2_TEST_SOURCES - Replication2/AppendEntriesBatchTest.cpp - Replication2/ChangeStreamTests.cpp - Replication2/CheckLogsTest.cpp - Replication2/ConcurrencyTests.cpp - Replication2/DetectConflictTest.cpp - Replication2/FollowerAppendEntries.cpp - Replication2/FollowerWaitFor.cpp - Replication2/LeaderAppendEntriesTest.cpp - Replication2/LogReclaimTest.cpp - Replication2/MaintenanceTests.cpp - Replication2/MultiTermTest.cpp - Replication2/ReplicatedLogMetricsMock.cpp - Replication2/ReplicatedLogTest.cpp - Replication2/RewriteLogTest.cpp - Replication2/RocksDBLogTest.cpp - Replication2/SimpleInsertTests.cpp - Replication2/TestHelper.cpp - Replication2/UpdateReplicatedLogTests.cpp -) + Replication2/ReplicatedLog/AppendEntriesBatchTest.cpp + Replication2/ReplicatedLog/ChangeStreamTests.cpp + Replication2/ReplicatedLog/CheckLogsTest.cpp + Replication2/ReplicatedLog/ConcurrencyTests.cpp + Replication2/ReplicatedLog/DetectConflictTest.cpp + Replication2/ReplicatedLog/FollowerAppendEntries.cpp + Replication2/ReplicatedLog/FollowerWaitFor.cpp + Replication2/ReplicatedLog/LeaderAppendEntriesTest.cpp + Replication2/ReplicatedLog/LogReclaimTest.cpp + Replication2/ReplicatedLog/MaintenanceTests.cpp + Replication2/ReplicatedLog/MultiTermTest.cpp + Replication2/Mocks/ReplicatedLogMetricsMock.cpp + Replication2/ReplicatedLog/ReplicatedLogTest.cpp + Replication2/ReplicatedLog/RewriteLogTest.cpp + Replication2/ReplicatedLog/RocksDBLogTest.cpp + Replication2/ReplicatedLog/SimpleInsertTests.cpp + Replication2/ReplicatedLog/TestHelper.cpp + Replication2/ReplicatedLog/UpdateReplicatedLogTests.cpp + Replication2/ReplicatedState/AbstractStateMachinePollTest.cpp + Replication2/ReplicatedState/StateMachineTestHelper.cpp + Replication2/ReplicatedState/StateMachineTestHelper.h + Replication2/Mocks/PersistedLog.cpp + Replication2/Mocks/PersistedLog.h + Replication2/ReplicatedLog/InMemoryLogTest.cpp + Replication2/Streams/LogMultiplexerTest.cpp + Replication2/Mocks/FakeReplicatedLog.cpp + Replication2/Mocks/FakeReplicatedLog.h + Replication2/Streams/TestLogSpecification.cpp + Replication2/Streams/TestLogSpecification.h Replication2/Streams/MultiplexerConcurrencyTest.cpp Replication2/Mocks/AsyncFollower.cpp Replication2/Mocks/AsyncFollower.h) if (LINUX) # add "-fno-var-tracking" to the compiler flags diff --git a/tests/IResearch/IResearchFeature-test.cpp b/tests/IResearch/IResearchFeature-test.cpp index ba3200c66b97..9e7f72aae603 100644 --- a/tests/IResearch/IResearchFeature-test.cpp +++ b/tests/IResearch/IResearchFeature-test.cpp @@ -2273,9 +2273,10 @@ TEST_F(IResearchFeatureTestCoordinator, test_upgrade0_1) { agencyCreateDatabase(vocbase->name()); ASSERT_TRUE( - ci.createCollectionCoordinator( - vocbase->name(), collectionId, 0, 1, 1, false, collectionJson->slice(), 0.0, false, nullptr) - .ok()); + ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok()); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_FALSE(!logicalCollection); EXPECT_TRUE( diff --git a/tests/IResearch/IResearchLinkCoordinator-test.cpp b/tests/IResearch/IResearchLinkCoordinator-test.cpp index 8c149d6ccb97..5fd5f900e6ed 100644 --- a/tests/IResearch/IResearchLinkCoordinator-test.cpp +++ b/tests/IResearch/IResearchLinkCoordinator-test.cpp @@ -120,8 +120,11 @@ TEST_F(IResearchLinkCoordinatorTest, test_create_drop) { auto collectionJson = arangodb::velocypack::Parser::fromJson( "{ \"id\": \"1\", \"name\": \"testCollection\", \"replicationFactor\":1, \"shards\":{} }"); - EXPECT_TRUE(ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr).ok()); + EXPECT_TRUE( + ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok()); logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((nullptr != logicalCollection)); diff --git a/tests/IResearch/IResearchViewCoordinator-test.cpp b/tests/IResearch/IResearchViewCoordinator-test.cpp index ce862aa1db90..d0a25ca3eeeb 100644 --- a/tests/IResearch/IResearchViewCoordinator-test.cpp +++ b/tests/IResearch/IResearchViewCoordinator-test.cpp @@ -165,20 +165,25 @@ TEST_F(IResearchViewCoordinatorTest, visit_collections) { auto json = arangodb::velocypack::Parser::fromJson( "{ \"name\": \"testView\", \"type\": \"arangosearch\", \"id\": \"1\" }"); - ASSERT_TRUE(ci.createCollectionCoordinator(vocbase->name(), collectionId0, 0, 1, 1, false, - collectionJson0->slice(), 0.0, false, nullptr).ok()); + ASSERT_TRUE( + ci.createCollectionCoordinator(vocbase->name(), collectionId0, 0, 1, 1, false, + false, collectionJson0->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok()); auto logicalCollection0 = ci.getCollection(vocbase->name(), collectionId0); ASSERT_TRUE((false == !logicalCollection0)); ASSERT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId1, 0, 1, 1, false, - collectionJson1->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson1->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection1 = ci.getCollection(vocbase->name(), collectionId1); ASSERT_TRUE((false == !logicalCollection1)); ASSERT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId2, 0, 1, 1, false, - collectionJson2->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson2->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection2 = ci.getCollection(vocbase->name(), collectionId2); ASSERT_TRUE((false == !logicalCollection2)); @@ -352,8 +357,9 @@ TEST_F(IResearchViewCoordinatorTest, test_defaults) { auto viewId = "testView"; EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -385,8 +391,9 @@ TEST_F(IResearchViewCoordinatorTest, test_defaults) { auto viewId = "testView"; EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -433,8 +440,9 @@ TEST_F(IResearchViewCoordinatorTest, test_defaults) { auto viewId = "testView"; EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -621,8 +629,9 @@ TEST_F(IResearchViewCoordinatorTest, test_create_link_in_background) { auto viewId = std::to_string(42); ASSERT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_NE(nullptr, logicalCollection); ASSERT_TRUE(( @@ -723,8 +732,9 @@ TEST_F(IResearchViewCoordinatorTest, test_drop_with_link) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); EXPECT_TRUE(( @@ -1833,8 +1843,11 @@ TEST_F(IResearchViewCoordinatorTest, test_properties_internal_request_explicit_v "{ \"id\": \"100\", \"planId\": \"100\", \"name\": \"testCollection\", " "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); - EXPECT_TRUE(ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr).ok()); + EXPECT_TRUE( + ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok()); logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_NE(nullptr, logicalCollection); @@ -2524,8 +2537,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_partial_remove) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection1 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection1); @@ -2540,8 +2554,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_partial_remove) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection2 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection2); @@ -2556,8 +2571,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_partial_remove) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection3 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection3); @@ -3190,8 +3206,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_partial_add) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection1 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection1); @@ -3206,8 +3223,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_partial_add) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection2 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection2); @@ -3222,8 +3240,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_partial_add) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection3 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection3); @@ -3904,8 +3923,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_replace) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection1 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection1); @@ -3920,8 +3940,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_replace) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection2 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection2); @@ -3936,8 +3957,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_replace) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection3 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection3); @@ -4555,8 +4577,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_clear) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection1 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection1); @@ -4571,8 +4594,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_clear) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection2 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection2); @@ -4587,8 +4611,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_links_clear) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection3 = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE(nullptr != logicalCollection3); @@ -5055,8 +5080,9 @@ TEST_F(IResearchViewCoordinatorTest, test_drop_link) { "\"replicationFactor\": 1, \"type\": 1, \"shards\":{} }"); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((nullptr != logicalCollection)); @@ -5404,8 +5430,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_overwrite) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -5461,8 +5488,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_overwrite) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -5595,8 +5623,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_overwrite) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -5656,8 +5685,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_overwrite) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -5794,8 +5824,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_overwrite) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId0, 0, 1, 1, false, - collection0Json->slice(), 0.0, false, nullptr) - .ok())); + false, collection0Json->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection0 = ci.getCollection(vocbase->name(), collectionId0); ASSERT_TRUE((false == !logicalCollection0)); auto dropLogicalCollection0 = std::shared_ptr( @@ -5803,8 +5834,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_overwrite) { ci->dropCollectionCoordinator(vocbase->name(), collectionId0, 0); }); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId1, 0, 1, 1, false, - collection1Json->slice(), 0.0, false, nullptr) - .ok())); + false, collection1Json->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection1 = ci.getCollection(vocbase->name(), collectionId1); ASSERT_TRUE((false == !logicalCollection1)); auto dropLogicalCollection1 = std::shared_ptr( @@ -5949,8 +5981,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_overwrite) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId0, 0, 1, 1, false, - collection0Json->slice(), 0.0, false, nullptr) - .ok())); + false, collection0Json->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection0 = ci.getCollection(vocbase->name(), collectionId0); ASSERT_TRUE((false == !logicalCollection0)); auto dropLogicalCollection0 = std::shared_ptr( @@ -5958,8 +5991,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_overwrite) { ci->dropCollectionCoordinator(vocbase->name(), collectionId0, 0); }); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId1, 0, 1, 1, false, - collection1Json->slice(), 0.0, false, nullptr) - .ok())); + false, collection1Json->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection1 = ci.getCollection(vocbase->name(), collectionId1); ASSERT_TRUE((false == !logicalCollection1)); auto dropLogicalCollection1 = std::shared_ptr( @@ -6161,8 +6195,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_partial) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -6217,8 +6252,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_partial) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -6351,8 +6387,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_partial) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -6412,8 +6449,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_partial) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId, 0, 1, 1, false, - collectionJson->slice(), 0.0, false, nullptr) - .ok())); + false, collectionJson->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection = ci.getCollection(vocbase->name(), collectionId); ASSERT_TRUE((false == !logicalCollection)); auto dropLogicalCollection = std::shared_ptr( @@ -6546,8 +6584,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_partial) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId0, 0, 1, 1, false, - collection0Json->slice(), 0.0, false, nullptr) - .ok())); + false, collection0Json->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection0 = ci.getCollection(vocbase->name(), collectionId0); ASSERT_TRUE((false == !logicalCollection0)); auto dropLogicalCollection0 = std::shared_ptr( @@ -6555,8 +6594,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_partial) { ci->dropCollectionCoordinator(vocbase->name(), collectionId0, 0); }); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId1, 0, 1, 1, false, - collection1Json->slice(), 0.0, false, nullptr) - .ok())); + false, collection1Json->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection1 = ci.getCollection(vocbase->name(), collectionId1); ASSERT_TRUE((false == !logicalCollection1)); auto dropLogicalCollection1 = std::shared_ptr( @@ -6709,8 +6749,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_partial) { auto viewId = std::to_string(42); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId0, 0, 1, 1, false, - collection0Json->slice(), 0.0, false, nullptr) - .ok())); + false, collection0Json->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection0 = ci.getCollection(vocbase->name(), collectionId0); ASSERT_TRUE((false == !logicalCollection0)); auto dropLogicalCollection0 = std::shared_ptr( @@ -6718,8 +6759,9 @@ TEST_F(IResearchViewCoordinatorTest, test_update_partial) { ci->dropCollectionCoordinator(vocbase->name(), collectionId0, 0); }); EXPECT_TRUE((ci.createCollectionCoordinator(vocbase->name(), collectionId1, 0, 1, 1, false, - collection1Json->slice(), 0.0, false, nullptr) - .ok())); + false, collection1Json->slice(), 0.0, false, nullptr, + arangodb::replication::Version::ONE, std::nullopt) + .ok())); auto logicalCollection1 = ci.getCollection(vocbase->name(), collectionId1); ASSERT_TRUE((false == !logicalCollection1)); auto dropLogicalCollection1 = std::shared_ptr( diff --git a/tests/Replication2/Mocks/AsyncFollower.cpp b/tests/Replication2/Mocks/AsyncFollower.cpp new file mode 100644 index 000000000000..0177044dabc5 --- /dev/null +++ b/tests/Replication2/Mocks/AsyncFollower.cpp @@ -0,0 +1,106 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// +#include "AsyncFollower.h" + +#include + +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogStatus.h" + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; + +auto AsyncFollower::getStatus() const -> LogStatus { + return _follower->getStatus(); +} + +auto AsyncFollower::resign() && -> std::tuple, DeferredAction> { + return std::move(*_follower).resign(); +} + +auto AsyncFollower::waitFor(arangodb::replication2::LogIndex index) -> WaitForFuture { + return _follower->waitFor(index); +} + +auto AsyncFollower::release(arangodb::replication2::LogIndex doneWithIdx) -> Result { + return _follower->release(doneWithIdx); +} + +auto AsyncFollower::getParticipantId() const noexcept -> ParticipantId const& { + return _follower->getParticipantId(); +} + +auto AsyncFollower::appendEntries(AppendEntriesRequest request) + -> futures::Future { + std::unique_lock guard(_mutex); + _cv.notify_all(); + return _requests.emplace_back(std::move(request)).promise.getFuture(); +} + +AsyncFollower::AsyncFollower(std::shared_ptr follower) + : _follower(std::move(follower)), _asyncWorker([this] { this->runWorker(); }) {} + +AsyncFollower::~AsyncFollower() noexcept { + if (!_stopping) { + stop(); + } +} + +void AsyncFollower::runWorker() { + while (true) { + std::vector requests; + { + std::unique_lock guard(_mutex); + if (_stopping) { + break; + } + if (!_requests.empty()) { + std::swap(requests, _requests); + } else { + _cv.wait(guard); + } + } + + for (auto& req : requests) { + _follower->appendEntries(req.request).thenFinal([promise = std::move(req.promise)](auto&& res) mutable { + promise.setValue(std::forward(res)); + }); + } + } +} + +void AsyncFollower::stop() noexcept { + { + std::unique_lock guard(_mutex); + _stopping = true; + _cv.notify_all(); + } + + TRI_ASSERT(_asyncWorker.joinable()); + _asyncWorker.join(); +} + +AsyncFollower::AsyncRequest::AsyncRequest(AppendEntriesRequest request) + : request(std::move(request)) {} diff --git a/tests/Replication2/Mocks/AsyncFollower.h b/tests/Replication2/Mocks/AsyncFollower.h new file mode 100644 index 000000000000..1e922f045fa6 --- /dev/null +++ b/tests/Replication2/Mocks/AsyncFollower.h @@ -0,0 +1,64 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include + +#include "Replication2/ReplicatedLog/LogFollower.h" + +namespace arangodb::replication2::test { + +struct AsyncFollower : replicated_log::ILogParticipant, replicated_log::AbstractFollower { + explicit AsyncFollower(std::shared_ptr follower); + ~AsyncFollower() noexcept override; + [[nodiscard]] auto getStatus() const -> replicated_log::LogStatus override; + auto resign() && -> std::tuple, DeferredAction> override; + auto waitFor(LogIndex index) -> WaitForFuture override; + auto release(LogIndex doneWithIdx) -> Result override; + [[nodiscard]] auto getParticipantId() const noexcept -> ParticipantId const& override; + auto appendEntries(replicated_log::AppendEntriesRequest request) + -> futures::Future override; + + void stop() noexcept; + private: + void runWorker(); + + struct AsyncRequest { + AsyncRequest(replicated_log::AppendEntriesRequest request); + replicated_log::AppendEntriesRequest request; + futures::Promise promise; + }; + + std::mutex _mutex; + std::condition_variable _cv; + std::vector _requests; + std::shared_ptr const _follower; + bool _stopping{false}; + + std::thread _asyncWorker; +}; + +} // namespace arangodb::replication2::test diff --git a/tests/Replication2/Mocks/FakeReplicatedLog.cpp b/tests/Replication2/Mocks/FakeReplicatedLog.cpp new file mode 100644 index 000000000000..db19b27c7e96 --- /dev/null +++ b/tests/Replication2/Mocks/FakeReplicatedLog.cpp @@ -0,0 +1,45 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include "FakeReplicatedLog.h" + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; + + +auto TestReplicatedLog::becomeFollower(ParticipantId const& id, LogTerm term, ParticipantId leaderId) +-> std::shared_ptr { + auto ptr = ReplicatedLog::becomeFollower(id, term, std::move(leaderId)); + return std::make_shared(ptr); +} + +auto TestReplicatedLog::becomeLeader(ParticipantId const& id, LogTerm term, + std::vector> const& follower, + std::size_t writeConcern) + -> std::shared_ptr { + LogConfig config; + config.writeConcern = writeConcern; + config.waitForSync = false; + return becomeLeader(config, id, term, follower); +} diff --git a/tests/Replication2/Mocks/FakeReplicatedLog.h b/tests/Replication2/Mocks/FakeReplicatedLog.h new file mode 100644 index 000000000000..942485ed2262 --- /dev/null +++ b/tests/Replication2/Mocks/FakeReplicatedLog.h @@ -0,0 +1,131 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once +#include + +#include "Replication2/Mocks/ReplicatedLogMetricsMock.h" +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/ReplicatedLog/LogStatus.h" +#include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" +#include "Replication2/ReplicatedLog/types.h" + +namespace arangodb::replication2::test { + +struct DelayedFollowerLog : replicated_log::AbstractFollower, replicated_log::ILogParticipant { + explicit DelayedFollowerLog(std::shared_ptr follower) + : _follower(std::move(follower)) {} + + DelayedFollowerLog(LoggerContext const& logContext, + std::shared_ptr logMetricsMock, + ParticipantId const& id, std::unique_ptr logCore, + LogTerm term, ParticipantId leaderId) + : DelayedFollowerLog([&] { + auto inMemoryLog = replicated_log::InMemoryLog{*logCore}; + return std::make_shared( + logContext, std::move(logMetricsMock), id, std::move(logCore), + term, std::move(leaderId), std::move(inMemoryLog)); + }()) {} + + auto appendEntries(replicated_log::AppendEntriesRequest req) + -> arangodb::futures::Future override { + auto future = _asyncQueue.doUnderLock([&](auto& queue) { + return queue.emplace_back(std::make_shared(std::move(req))) + ->promise.getFuture(); + }); + return std::move(future).thenValue([this](auto&& result) mutable { + return _follower->appendEntries(std::forward(result)); + }); + } + + void runAsyncAppendEntries() { + auto asyncQueue = _asyncQueue.doUnderLock([](auto& _queue) { + auto queue = std::move(_queue); + _queue.clear(); + return queue; + }); + + for (auto& p : asyncQueue) { + p->promise.setValue(std::move(p->request)); + } + } + + using WaitForAsyncPromise = futures::Promise; + + struct AsyncRequest { + explicit AsyncRequest(replicated_log::AppendEntriesRequest request) + : request(std::move(request)) {} + replicated_log::AppendEntriesRequest request; + WaitForAsyncPromise promise; + }; + [[nodiscard]] auto pendingAppendEntries() const + -> std::deque> { + return _asyncQueue.copy(); + } + [[nodiscard]] auto hasPendingAppendEntries() const -> bool { + return _asyncQueue.doUnderLock( + [](auto const& queue) { return !queue.empty(); }); + } + + auto getParticipantId() const noexcept -> ParticipantId const& override { + return _follower->getParticipantId(); + } + + auto getStatus() const -> replicated_log::LogStatus override { + return _follower->getStatus(); + } + + [[nodiscard]] auto resign() && -> std::tuple, DeferredAction> override { + return std::move(*_follower).resign(); + } + + auto waitFor(LogIndex index) -> WaitForFuture override { return _follower->waitFor(index); } + + auto waitForIterator(LogIndex index) -> WaitForIteratorFuture override { + return _follower->waitForIterator(index); + } + + auto release(LogIndex doneWithIdx) -> Result override { + return _follower->release(doneWithIdx); + } + + private: + Guarded>> _asyncQueue; + std::shared_ptr _follower; +}; + +struct TestReplicatedLog : replicated_log::ReplicatedLog { + using ReplicatedLog::becomeLeader; + using ReplicatedLog::ReplicatedLog; + auto becomeFollower(ParticipantId const& id, LogTerm term, ParticipantId leaderId) + -> std::shared_ptr; + + auto becomeLeader(ParticipantId const& id, LogTerm term, + std::vector> const&, + std::size_t writeConcern) -> std::shared_ptr; +}; +} // namespace arangodb::replication2::test diff --git a/tests/Replication2/TestHelper.cpp b/tests/Replication2/Mocks/PersistedLog.cpp similarity index 59% rename from tests/Replication2/TestHelper.cpp rename to tests/Replication2/Mocks/PersistedLog.cpp index ad4b74c52c8e..65e5866e095a 100644 --- a/tests/Replication2/TestHelper.cpp +++ b/tests/Replication2/Mocks/PersistedLog.cpp @@ -1,37 +1,13 @@ -//////////////////////////////////////////////////////////////////////////////// -/// DISCLAIMER -/// -/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is ArangoDB GmbH, Cologne, Germany -/// -/// @author Lars Maier -//////////////////////////////////////////////////////////////////////////////// - -#include "TestHelper.h" - -#include "Replication2/ReplicatedLog/LogCore.h" -#include "Replication2/ReplicatedLog/LogLeader.h" -#include "Replication2/ReplicatedLog/ReplicatedLog.h" -#include "Replication2/ReplicatedLog/types.h" - -#include +// +// Created by lars on 10/08/2021. +// + +#include "PersistedLog.h" using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; auto MockLog::insert(PersistedLogIterator& iter, WriteOptions const&) -> arangodb::Result { auto lastIndex = LogIndex{0}; @@ -84,7 +60,7 @@ auto MockLog::removeBack(replication2::LogIndex start) -> Result { auto MockLog::drop() -> Result { _storage.clear(); - return Result(); + return {}; } void MockLog::setEntry(replication2::LogIndex idx, replication2::LogTerm term, @@ -145,32 +121,9 @@ void AsyncMockLog::runWorker() { std::swap(queue, _queue); } } - for (auto& lambda : queue) { - auto res = insert(*lambda->iter, lambda->opts); - lambda->promise.setValue(res); + for (auto& entry : queue) { + auto res = insert(*entry->iter, entry->opts); + entry->promise.setValue(res); } } } - -auto TestReplicatedLog::becomeFollower(ParticipantId const& id, LogTerm term, ParticipantId leaderId) - -> std::shared_ptr { - auto ptr = ReplicatedLog::becomeFollower(id, term, std::move(leaderId)); - return std::make_shared(ptr); -} - -auto TestReplicatedLog::becomeLeader(LogConfig config, ParticipantId id, LogTerm term, - std::vector> const& follower) - -> std::shared_ptr { - return ReplicatedLog::becomeLeader(config, std::move(id), term, follower); -} - -auto TestReplicatedLog::becomeLeader(ParticipantId const& id, LogTerm term, - std::vector> const& follower, - std::size_t writeConcern) - -> std::shared_ptr { - LogConfig config; - config.writeConcern = writeConcern; - config.waitForSync = false; - - return becomeLeader(config, id, term, follower); -} diff --git a/tests/Replication2/Mocks/PersistedLog.h b/tests/Replication2/Mocks/PersistedLog.h new file mode 100644 index 000000000000..e71c97c89c15 --- /dev/null +++ b/tests/Replication2/Mocks/PersistedLog.h @@ -0,0 +1,104 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/ReplicatedLog/LogStatus.h" +#include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" +#include "Replication2/ReplicatedLog/types.h" + +namespace arangodb::replication2::test { + +using namespace replicated_log; + +struct MockLog : replication2::replicated_log::PersistedLog { + using storeType = std::map; + + explicit MockLog(replication2::LogId id); + MockLog(replication2::LogId id, storeType storage); + + auto insert(replication2::replicated_log::PersistedLogIterator& iter, + WriteOptions const&) -> Result override; + auto insertAsync(std::unique_ptr iter, + WriteOptions const&) -> futures::Future override; + auto read(replication2::LogIndex start) + -> std::unique_ptr override; + auto removeFront(replication2::LogIndex stop) -> Result override; + auto removeBack(replication2::LogIndex start) -> Result override; + auto drop() -> Result override; + + void setEntry(replication2::LogIndex idx, replication2::LogTerm term, + replication2::LogPayload payload); + void setEntry(replication2::PersistingLogEntry); + + [[nodiscard]] storeType getStorage() const { return _storage; } + + private: + using iteratorType = storeType::iterator; + storeType _storage; +}; + +struct AsyncMockLog : MockLog { + explicit AsyncMockLog(replication2::LogId id); + + ~AsyncMockLog() noexcept; + + auto insertAsync(std::unique_ptr iter, + WriteOptions const&) -> futures::Future override; + + auto stop() noexcept -> void { + if (!_stopping) { + { + std::unique_lock guard(_mutex); + _stopping = true; + _cv.notify_all(); + } + _asyncWorker.join(); + } + } + + private: + struct QueueEntry { + WriteOptions opts; + std::unique_ptr iter; + futures::Promise promise; + }; + + void runWorker(); + + std::mutex _mutex; + std::vector> _queue; + std::condition_variable _cv; + std::atomic _stopping = false; + bool _stopped = false; + // _asyncWorker *must* be initialized last, otherwise starting the thread + // races with initializing the coordination variables. + std::thread _asyncWorker; +}; + +} diff --git a/tests/Replication2/ReplicatedLogMetricsMock.cpp b/tests/Replication2/Mocks/ReplicatedLogMetricsMock.cpp similarity index 100% rename from tests/Replication2/ReplicatedLogMetricsMock.cpp rename to tests/Replication2/Mocks/ReplicatedLogMetricsMock.cpp diff --git a/tests/Replication2/ReplicatedLogMetricsMock.h b/tests/Replication2/Mocks/ReplicatedLogMetricsMock.h similarity index 100% rename from tests/Replication2/ReplicatedLogMetricsMock.h rename to tests/Replication2/Mocks/ReplicatedLogMetricsMock.h diff --git a/tests/Replication2/AppendEntriesBatchTest.cpp b/tests/Replication2/ReplicatedLog/AppendEntriesBatchTest.cpp similarity index 96% rename from tests/Replication2/AppendEntriesBatchTest.cpp rename to tests/Replication2/ReplicatedLog/AppendEntriesBatchTest.cpp index 0e0aee610675..35c8ec5c6deb 100644 --- a/tests/Replication2/AppendEntriesBatchTest.cpp +++ b/tests/Replication2/ReplicatedLog/AppendEntriesBatchTest.cpp @@ -28,6 +28,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct AppendEntriesBatchTest : ReplicatedLogTest {}; @@ -72,7 +73,8 @@ TEST_F(AppendEntriesBatchTest, test_with_two_batches) { // 1. AppendEntries 1..1000 // 2. AppendEntries 2..2000 // 3. AppendEntries CommitIndex - EXPECT_EQ(num_requests, 3 + 1); + // 4. AppendEntries LCI + EXPECT_EQ(num_requests, 3 + 1 + 1); } { diff --git a/tests/Replication2/ChangeStreamTests.cpp b/tests/Replication2/ReplicatedLog/ChangeStreamTests.cpp similarity index 99% rename from tests/Replication2/ChangeStreamTests.cpp rename to tests/Replication2/ReplicatedLog/ChangeStreamTests.cpp index 786a64474d59..3bb2b5271d23 100644 --- a/tests/Replication2/ChangeStreamTests.cpp +++ b/tests/Replication2/ReplicatedLog/ChangeStreamTests.cpp @@ -28,6 +28,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct ChangeStreamTests : ReplicatedLogTest {}; diff --git a/tests/Replication2/CheckLogsTest.cpp b/tests/Replication2/ReplicatedLog/CheckLogsTest.cpp similarity index 100% rename from tests/Replication2/CheckLogsTest.cpp rename to tests/Replication2/ReplicatedLog/CheckLogsTest.cpp diff --git a/tests/Replication2/ConcurrencyTests.cpp b/tests/Replication2/ReplicatedLog/ConcurrencyTests.cpp similarity index 99% rename from tests/Replication2/ConcurrencyTests.cpp rename to tests/Replication2/ReplicatedLog/ConcurrencyTests.cpp index cf495b475ddf..d6aa1b37d4ce 100644 --- a/tests/Replication2/ConcurrencyTests.cpp +++ b/tests/Replication2/ReplicatedLog/ConcurrencyTests.cpp @@ -35,6 +35,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct ReplicatedLogConcurrentTest : ReplicatedLogTest { @@ -216,7 +217,6 @@ TEST_F(ReplicatedLogConcurrentTest, lonelyLeader) { using namespace std::chrono_literals; auto replicatedLog = makeReplicatedLogWithAsyncMockLog(LogId{1}); - // TODO this test hangs because there is not local follower currently auto leaderLog = replicatedLog->becomeLeader("leader", LogTerm{1}, {}, 1); auto data = ThreadCoordinationData{leaderLog}; diff --git a/tests/Replication2/DetectConflictTest.cpp b/tests/Replication2/ReplicatedLog/DetectConflictTest.cpp similarity index 98% rename from tests/Replication2/DetectConflictTest.cpp rename to tests/Replication2/ReplicatedLog/DetectConflictTest.cpp index 4f8682b03f0a..4c3ffbbc6709 100644 --- a/tests/Replication2/DetectConflictTest.cpp +++ b/tests/Replication2/ReplicatedLog/DetectConflictTest.cpp @@ -27,6 +27,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::algorithms; +using namespace arangodb::replication2::test; struct DetectConflictTest : ::testing::Test { diff --git a/tests/Replication2/FollowerAppendEntries.cpp b/tests/Replication2/ReplicatedLog/FollowerAppendEntries.cpp similarity index 99% rename from tests/Replication2/FollowerAppendEntries.cpp rename to tests/Replication2/ReplicatedLog/FollowerAppendEntries.cpp index 759851afdd93..a176f3bd664c 100644 --- a/tests/Replication2/FollowerAppendEntries.cpp +++ b/tests/Replication2/ReplicatedLog/FollowerAppendEntries.cpp @@ -24,7 +24,7 @@ #include "Basics/voc-errors.h" -#include "ReplicatedLogMetricsMock.h" +#include "Replication2/Mocks/ReplicatedLogMetricsMock.h" #include "Replication2/ReplicatedLog/LogFollower.h" #include "Replication2/ReplicatedLog/ReplicatedLog.h" @@ -33,6 +33,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct FollowerAppendEntriesTest : ReplicatedLogTest { auto makeFollower(ParticipantId id, LogTerm term, ParticipantId leaderId) -> std::shared_ptr { diff --git a/tests/Replication2/FollowerWaitFor.cpp b/tests/Replication2/ReplicatedLog/FollowerWaitFor.cpp similarity index 98% rename from tests/Replication2/FollowerWaitFor.cpp rename to tests/Replication2/ReplicatedLog/FollowerWaitFor.cpp index e0713aca84c3..d25d64d0ba4e 100644 --- a/tests/Replication2/FollowerWaitFor.cpp +++ b/tests/Replication2/ReplicatedLog/FollowerWaitFor.cpp @@ -30,6 +30,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct FollowerWaitForTest : ReplicatedLogTest { auto makeFollower(ParticipantId id, LogTerm term, ParticipantId leaderId) -> std::shared_ptr { diff --git a/tests/Replication2/ReplicatedLog/InMemoryLogTest.cpp b/tests/Replication2/ReplicatedLog/InMemoryLogTest.cpp new file mode 100644 index 000000000000..d7a95dba04bb --- /dev/null +++ b/tests/Replication2/ReplicatedLog/InMemoryLogTest.cpp @@ -0,0 +1,306 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include + +#include "Containers/Enumerate.h" + +#include "Replication2/ReplicatedLog/InMemoryLog.h" + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::replicated_log; + +struct TestInMemoryLog : InMemoryLog { + explicit TestInMemoryLog(log_type log) : InMemoryLog(std::move(log)) {} + explicit TestInMemoryLog(log_type log, LogIndex first) + : InMemoryLog(std::move(log), first) {} + TestInMemoryLog() : InMemoryLog(log_type{}) {} +}; + +struct InMemoryLogTestBase { + static auto createLogForRangeSingleTerm(LogRange range, LogTerm term = LogTerm{1}) + -> TestInMemoryLog { + auto transient = InMemoryLog::log_type::transient_type{}; + for (auto i : range) { + transient.push_back(InMemoryLogEntry( + {term, LogIndex{i}, LogPayload::createFromString("foo")})); + } + return TestInMemoryLog(transient.persistent(), range.from); + } +}; + +struct InMemoryLogTest : ::testing::TestWithParam, InMemoryLogTestBase {}; + +TEST_P(InMemoryLogTest, first_last_next) { + auto const term = LogTerm{1}; + auto const range = GetParam(); + auto const log = createLogForRangeSingleTerm(range, term); + auto [from, to] = range; + + EXPECT_EQ(!range.empty(), log.getFirstEntry().has_value()); + EXPECT_EQ(!range.empty(), log.getLastEntry().has_value()); + EXPECT_EQ(log.getNextIndex(), to); + + EXPECT_EQ(log.getIndexRange(), range); + + if (!range.empty()) { + { + auto memtry = log.getFirstEntry().value(); + EXPECT_EQ(memtry.entry().logIndex(), from); + } + { + auto memtry = log.getLastEntry().value(); + EXPECT_EQ(memtry.entry().logIndex() + 1, to); + EXPECT_EQ(log.getLastIndex() + 1, to); + EXPECT_EQ(log.back().entry().logIndex() + 1, to); + + EXPECT_EQ(memtry.entry().logTerm(), term); + EXPECT_EQ(log.getLastTerm(), term); + EXPECT_EQ(log.back().entry().logTerm(), term); + } + } +} + +TEST_P(InMemoryLogTest, get_entry_by_index) { + auto const range = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + auto const tests = {LogIndex{1}, LogIndex{12}, LogIndex{45}}; + for (auto idx : tests) { + auto memtry = log.getEntryByIndex(idx); + EXPECT_EQ(range.contains(idx), memtry.has_value()) + << "Range is " << range << " and index is " << idx; + if (range.contains(idx)) { + auto entry = memtry->entry(); + EXPECT_EQ(entry.logIndex(), idx); + } + } +} + +TEST_P(InMemoryLogTest, empty) { + auto const range = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + EXPECT_EQ(range.empty(), log.empty()); +} + +TEST_P(InMemoryLogTest, append_in_place) { + auto const range = GetParam(); + auto log = createLogForRangeSingleTerm(range); + + auto memtry = + InMemoryLogEntry({LogTerm{1}, range.to, LogPayload::createFromString("foo")}); + log.appendInPlace(LoggerContext(Logger::FIXME), std::move(memtry)); + { + auto result = log.getEntryByIndex(range.to); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->entry().logIndex(), range.to); + } + { + auto result = log.getLastEntry(); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->entry().logIndex(), range.to); + } +} + +auto const LogRanges = ::testing::Values(LogRange(LogIndex{1}, LogIndex{15}), + LogRange(LogIndex{1}, LogIndex{1234}), + LogRange(LogIndex{1}, LogIndex{1}), + LogRange(LogIndex{5}, LogIndex{18}), + LogRange(LogIndex{76}, LogIndex{76})); + +INSTANTIATE_TEST_CASE_P(InMemoryLogTestInstance, InMemoryLogTest, LogRanges); + +struct InMemoryLogAppendTest + : testing::TestWithParam>, + InMemoryLogTestBase { + static auto getPersistedEntriesVector(LogIndex first, std::size_t length, + LogTerm term = LogTerm{1}) { + auto result = InMemoryLog::log_type_persisted::transient_type{}; + for (auto idx : LogRange(first, first + length)) { + result.push_back(PersistingLogEntry{term, idx, LogPayload::createFromString("foo")}); + } + return result.persistent(); + } +}; + +TEST_P(InMemoryLogAppendTest, append_peristed_entries) { + auto const [length, range] = GetParam(); + auto const log = createLogForRangeSingleTerm(range, LogTerm{1}); + auto const toAppend = getPersistedEntriesVector(range.to, length, LogTerm{2}); + + auto const newLog = log.append(LoggerContext(Logger::FIXME), toAppend); + for (auto idx : LogRange(range.from, range.to + length)) { + auto memtry = newLog.getEntryByIndex(idx); + ASSERT_TRUE(memtry.has_value()) << "idx = " << idx; + auto const expectedTerm = range.contains(idx) ? LogTerm{1} : LogTerm{2}; + EXPECT_EQ(memtry->entry().logIndex(), idx); + EXPECT_EQ(memtry->entry().logTerm(), expectedTerm); + } +} + +INSTANTIATE_TEST_CASE_P(InMemoryLogAppendTest, InMemoryLogAppendTest, + ::testing::Combine(::testing::Range(0, 10), LogRanges)); + +struct InMemoryLogSliceTest : ::testing::TestWithParam>, + InMemoryLogTestBase {}; + +TEST_P(InMemoryLogSliceTest, slice) { + auto const [range, testRange] = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + + auto s = log.slice(testRange.from, testRange.to); + auto const expectedRange = intersect(testRange, range); + + ASSERT_EQ(s.size(), expectedRange.count()); + for (auto const& [idx, e] : enumerate(s)) { + EXPECT_EQ(e.entry().logIndex(), expectedRange.from + idx); + } +} + +TEST_P(InMemoryLogSliceTest, get_iterator_range) { + auto const [range, testRange] = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + + auto const expectedRange = intersect(range, testRange); + auto iter = log.getIteratorRange(testRange.from, testRange.to); + auto [from, to] = iter->range(); + if (expectedRange.empty()) { + EXPECT_TRUE(from == to); + + } else { + EXPECT_EQ(from, expectedRange.from); + EXPECT_EQ(to, expectedRange.to); + + for (auto idx : expectedRange) { + auto value = iter->next(); + ASSERT_TRUE(value.has_value()) << "idx = " << idx << " range = " << expectedRange; + EXPECT_EQ(value->logIndex(), idx); + } + } + + EXPECT_EQ(iter->next(), std::nullopt); +} + +TEST_P(InMemoryLogSliceTest, get_iterator_from) { + auto [range, testRange] = GetParam(); + auto const log = createLogForRangeSingleTerm(range); + testRange.to = range.to; // no bound on to + + auto const expectedRange = intersect(range, testRange); + auto iter = log.getIteratorFrom(testRange.from); + + for (auto idx : expectedRange) { + auto value = iter->next(); + ASSERT_TRUE(value.has_value()) << "idx = " << idx << " range = " << expectedRange; + EXPECT_EQ(value->logIndex(), idx); + } + + EXPECT_EQ(iter->next(), std::nullopt); +} + +auto const SliceRanges = ::testing::Values(LogRange(LogIndex{4}, LogIndex{6}), + LogRange(LogIndex{1}, LogIndex{8}), + LogRange(LogIndex{100}, LogIndex{120}), + LogRange(LogIndex{18}, LogIndex{18})); + +INSTANTIATE_TEST_CASE_P(InMemoryLogSliceTest, InMemoryLogSliceTest, + ::testing::Combine(LogRanges, SliceRanges)); + +using TermDistribution = std::map; + +using TermTestData = std::tuple; + +struct IndexOfTermTest : ::testing::TestWithParam, InMemoryLogTestBase { + static auto createLogForDistribution(LogIndex first, TermDistribution const& dist) + -> TestInMemoryLog { + auto transient = InMemoryLog::log_type::transient_type{}; + auto next = first; + for (auto [term, length] : dist) { + for (auto idx : LogRange(next, next + length)) { + transient.push_back( + InMemoryLogEntry({term, idx, LogPayload::createFromString("foo")})); + } + next = next + length; + } + return TestInMemoryLog(transient.persistent()); + } + + static auto getTermBounds(LogIndex first, TermDistribution const& dist, LogTerm wanted) + -> std::optional { + auto next = first; + for (auto [term, length] : dist) { + if (term == wanted) { + return LogRange{next, next + length}; + } + next = next + length; + } + + return std::nullopt; + } +}; + +TEST_P(IndexOfTermTest, first_index_of_term) { + auto [term, first, dist] = GetParam(); + auto log = createLogForDistribution(first, dist); + + auto range = getTermBounds(first, dist, term); + auto firstInTerm = log.getFirstIndexOfTerm(term); + auto lastInTerm = log.getLastIndexOfTerm(term); + + ASSERT_EQ(range.has_value(), firstInTerm.has_value()); + ASSERT_EQ(range.has_value(), lastInTerm.has_value()); + + if (range.has_value()) { + EXPECT_EQ(range->from, *firstInTerm) << "term = " << term << " log = " << log.dump(); + EXPECT_EQ(range->to, *lastInTerm + 1); + } +} + +auto Distributions = ::testing::Values( + TermDistribution{ + {LogTerm{1}, 5}, + }, + TermDistribution{ + {LogTerm{1}, 5}, + {LogTerm{2}, 18}, + }, + TermDistribution{ + {LogTerm{1}, 5}, + {LogTerm{2}, 18}, + }, + TermDistribution{ + {LogTerm{1}, 5}, + {LogTerm{2}, 18}, + {LogTerm{3}, 18}, + }, + TermDistribution{ + {LogTerm{1}, 5}, + {LogTerm{2}, 18}, + {LogTerm{3}, 18}, + }); + +INSTANTIATE_TEST_CASE_P( + IndexOfTermTest, IndexOfTermTest, + ::testing::Combine(::testing::Values(LogTerm{1}, LogTerm{2}, LogTerm{3}), + ::testing::Values(LogIndex{1}, LogIndex{10}), Distributions)); diff --git a/tests/Replication2/LeaderAppendEntriesTest.cpp b/tests/Replication2/ReplicatedLog/LeaderAppendEntriesTest.cpp similarity index 99% rename from tests/Replication2/LeaderAppendEntriesTest.cpp rename to tests/Replication2/ReplicatedLog/LeaderAppendEntriesTest.cpp index 97803bac64b1..e6bd6eee3b76 100644 --- a/tests/Replication2/LeaderAppendEntriesTest.cpp +++ b/tests/Replication2/ReplicatedLog/LeaderAppendEntriesTest.cpp @@ -31,6 +31,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct LeaderAppendEntriesTest : ReplicatedLogTest {}; diff --git a/tests/Replication2/LogReclaimTest.cpp b/tests/Replication2/ReplicatedLog/LogReclaimTest.cpp similarity index 94% rename from tests/Replication2/LogReclaimTest.cpp rename to tests/Replication2/ReplicatedLog/LogReclaimTest.cpp index 0c77eab52fee..4bc07aca3540 100644 --- a/tests/Replication2/LogReclaimTest.cpp +++ b/tests/Replication2/ReplicatedLog/LogReclaimTest.cpp @@ -21,7 +21,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "Replication2/ReplicatedLog/types.h" -#include "Replication2/TestHelper.h" +#include "TestHelper.h" #include @@ -30,6 +30,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; TEST_F(ReplicatedLogTest, reclaim_leader_after_term_change) { @@ -42,7 +43,7 @@ TEST_F(ReplicatedLogTest, reclaim_leader_after_term_change) { auto idx = leader->insert(LogPayload::createFromString("payload"), false, LogLeader::doNotTriggerAsyncReplication); auto f = leader->waitFor(idx).then( - [&](futures::Try>&& quorum) { + [&](futures::Try&& quorum) { EXPECT_TRUE(quorum.hasException()); try { quorum.throwIfFailed(); @@ -73,7 +74,7 @@ TEST_F(ReplicatedLogTest, reclaim_follower_after_term_change) { auto idx = leader->insert(LogPayload::createFromString("payload"), false, LogLeader::doNotTriggerAsyncReplication); auto f = follower->waitFor(idx).then( - [&](futures::Try>&& quorum) { + [&](futures::Try&& quorum) { EXPECT_TRUE(quorum.hasException()); try { quorum.throwIfFailed(); diff --git a/tests/Replication2/MaintenanceTests.cpp b/tests/Replication2/ReplicatedLog/MaintenanceTests.cpp similarity index 100% rename from tests/Replication2/MaintenanceTests.cpp rename to tests/Replication2/ReplicatedLog/MaintenanceTests.cpp diff --git a/tests/Replication2/MultiTermTest.cpp b/tests/Replication2/ReplicatedLog/MultiTermTest.cpp similarity index 95% rename from tests/Replication2/MultiTermTest.cpp rename to tests/Replication2/ReplicatedLog/MultiTermTest.cpp index 0604f82c4c7a..040b2e7e306e 100644 --- a/tests/Replication2/MultiTermTest.cpp +++ b/tests/Replication2/ReplicatedLog/MultiTermTest.cpp @@ -23,11 +23,12 @@ #include #include "Replication2/ReplicatedLog/types.h" -#include "Replication2/TestHelper.h" +#include "TestHelper.h" using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct MultiTermTest : ReplicatedLogTest {}; @@ -44,8 +45,8 @@ TEST_F(MultiTermTest, add_follower_test) { leader->triggerAsyncReplication(); { ASSERT_TRUE(f.isReady()); - auto const& quorum = f.get(); - EXPECT_EQ(quorum->quorum, std::vector{"leader"}); + auto const& result = f.get(); + EXPECT_EQ(result.quorum->quorum, std::vector{"leader"}); } { auto stats = std::get(leader->getStatus().getVariant()).local; @@ -250,10 +251,11 @@ TEST_F(MultiTermTest, resign_leader_append_entries) { ASSERT_TRUE(f2.isReady()); { - auto quorum = f2.get(); - EXPECT_EQ(quorum->index, LogIndex{3}); - EXPECT_EQ(quorum->term, LogTerm{2}); - EXPECT_EQ(quorum->quorum, + auto result = f2.get(); + EXPECT_EQ(result.currentCommitIndex, LogIndex{3}); + EXPECT_EQ(result.quorum->index, LogIndex{3}); + EXPECT_EQ(result.quorum->term, LogTerm{2}); + EXPECT_EQ(result.quorum->quorum, (std::vector{"newLeader", "newFollower"})); } } diff --git a/tests/Replication2/ReplicatedLogTest.cpp b/tests/Replication2/ReplicatedLog/ReplicatedLogTest.cpp similarity index 96% rename from tests/Replication2/ReplicatedLogTest.cpp rename to tests/Replication2/ReplicatedLog/ReplicatedLogTest.cpp index f51eb79fdd43..e23c92eafd14 100644 --- a/tests/Replication2/ReplicatedLogTest.cpp +++ b/tests/Replication2/ReplicatedLog/ReplicatedLogTest.cpp @@ -20,10 +20,11 @@ /// @author Tobias Gödderz //////////////////////////////////////////////////////////////////////////////// -#include "Replication2/TestHelper.h" +#include "TestHelper.h" using namespace arangodb; using namespace arangodb::replication2; +using namespace arangodb::replication2::test; TEST(LogIndexTest, compareOperators) { diff --git a/tests/Replication2/RewriteLogTest.cpp b/tests/Replication2/ReplicatedLog/RewriteLogTest.cpp similarity index 97% rename from tests/Replication2/RewriteLogTest.cpp rename to tests/Replication2/ReplicatedLog/RewriteLogTest.cpp index 56d2507f6bc0..db8d4f5785eb 100644 --- a/tests/Replication2/RewriteLogTest.cpp +++ b/tests/Replication2/ReplicatedLog/RewriteLogTest.cpp @@ -22,11 +22,12 @@ #include "Replication2/ReplicatedLog/LogCore.h" #include "Replication2/ReplicatedLog/types.h" -#include "Replication2/TestHelper.h" +#include "TestHelper.h" using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; struct RewriteLogTest : ReplicatedLogTest {}; @@ -101,7 +102,8 @@ TEST_F(RewriteLogTest, rewrite_old_leader) { } // AppendEntries with prevLogIndex 0 -> success = true // AppendEntries with new commitIndex - EXPECT_EQ(number_of_runs, 2); + // AppendEntries with new lci + EXPECT_EQ(number_of_runs, 3); } { diff --git a/tests/Replication2/RocksDBLogTest.cpp b/tests/Replication2/ReplicatedLog/RocksDBLogTest.cpp similarity index 100% rename from tests/Replication2/RocksDBLogTest.cpp rename to tests/Replication2/ReplicatedLog/RocksDBLogTest.cpp diff --git a/tests/Replication2/SimpleInsertTests.cpp b/tests/Replication2/ReplicatedLog/SimpleInsertTests.cpp similarity index 93% rename from tests/Replication2/SimpleInsertTests.cpp rename to tests/Replication2/ReplicatedLog/SimpleInsertTests.cpp index 05f036367b59..fbabfe4d58c2 100644 --- a/tests/Replication2/SimpleInsertTests.cpp +++ b/tests/Replication2/ReplicatedLog/SimpleInsertTests.cpp @@ -32,6 +32,7 @@ using namespace arangodb; using namespace arangodb::replication2; using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; TEST_F(ReplicatedLogTest, write_single_entry_to_follower) { auto coreA = makeLogCore(LogId{1}); @@ -169,10 +170,11 @@ TEST_F(ReplicatedLogTest, write_single_entry_to_follower) { { // Expect the quorum to consist of the follower only ASSERT_TRUE(f.isReady()); - auto quorum = f.get(); - EXPECT_EQ(quorum->index, LogIndex{2}); - EXPECT_EQ(quorum->term, LogTerm{1}); - EXPECT_EQ(quorum->quorum, (std::vector{leaderId, followerId})); + auto result = f.get(); + EXPECT_EQ(result.currentCommitIndex, LogIndex{2}); + EXPECT_EQ(result.quorum->index, LogIndex{2}); + EXPECT_EQ(result.quorum->term, LogTerm{1}); + EXPECT_EQ(result.quorum->quorum, (std::vector{leaderId, followerId})); } // Follower should have pending append entries @@ -187,19 +189,23 @@ TEST_F(ReplicatedLogTest, write_single_entry_to_follower) { EXPECT_EQ(status.local.spearHead.index, LogIndex{2}); } + // LCI update + EXPECT_TRUE(follower->hasPendingAppendEntries()); + follower->runAsyncAppendEntries(); EXPECT_FALSE(follower->hasPendingAppendEntries()); } { - // Metric should have registered four appendEntries. + // Metric should have registered six appendEntries. // There was one insert, resulting in one appendEntries each to the follower // and the local follower. After the followers responded, the commit index // is updated, and both followers get another appendEntries request. + // Finally, the LCI is updated with another round of requests. auto numAppendEntries = countHistogramEntries(_logMetricsMock->replicatedLogAppendEntriesRttUs); - EXPECT_EQ(numAppendEntries, 4); + EXPECT_EQ(numAppendEntries, 6); auto numFollowerAppendEntries = countHistogramEntries(_logMetricsMock->replicatedLogFollowerAppendEntriesRtUs); - EXPECT_EQ(numFollowerAppendEntries, 4); + EXPECT_EQ(numFollowerAppendEntries, 6); } } @@ -262,7 +268,8 @@ TEST_F(ReplicatedLogTest, wake_up_as_leader_with_persistent_data) { // AppendEntries with prevLogIndex 2 -> success = false // AppendEntries with prevLogIndex 0 -> success = true // AppendEntries with new commitIndex - EXPECT_EQ(number_of_runs, 3); + // AppendEntries with new LCI + EXPECT_EQ(number_of_runs, 4); } { @@ -374,10 +381,11 @@ TEST_F(ReplicatedLogTest, multiple_follower) { // and update of commitIndex on both follower { ASSERT_TRUE(future.isReady()); - auto quorum = future.get(); - EXPECT_EQ(quorum->term, LogTerm{1}); - EXPECT_EQ(quorum->index, LogIndex{2}); - EXPECT_EQ(quorum->quorum, (std::vector{leaderId, followerId_1, followerId_2})); + auto result = future.get(); + EXPECT_EQ(result.currentCommitIndex, LogIndex{2}); + EXPECT_EQ(result.quorum->term, LogTerm{1}); + EXPECT_EQ(result.quorum->index, LogIndex{2}); + EXPECT_EQ(result.quorum->quorum, (std::vector{leaderId, followerId_1, followerId_2})); } EXPECT_TRUE(follower_1->hasPendingAppendEntries()); @@ -401,6 +409,12 @@ TEST_F(ReplicatedLogTest, multiple_follower) { EXPECT_EQ(status.local.spearHead.index, LogIndex{2}); } + // LCI updates + follower_1->runAsyncAppendEntries(); + EXPECT_FALSE(follower_1->hasPendingAppendEntries()); // no lci update yet + follower_2->runAsyncAppendEntries(); + EXPECT_TRUE(follower_2->hasPendingAppendEntries()); + follower_1->runAsyncAppendEntries(); EXPECT_FALSE(follower_1->hasPendingAppendEntries()); follower_2->runAsyncAppendEntries(); @@ -483,7 +497,8 @@ TEST_F(ReplicatedLogTest, write_concern_one_immediate_leader_commit_on_startup) } // AppendEntries with prevLogIndex 2 -> success = false, replicated log empty // AppendEntries with prevLogIndex 2 -> success = true, including commit index - EXPECT_EQ(number_of_runs, 2); + // AppendEntries with LCI + EXPECT_EQ(number_of_runs, 3); } { diff --git a/tests/Replication2/ReplicatedLog/TestHelper.cpp b/tests/Replication2/ReplicatedLog/TestHelper.cpp new file mode 100644 index 000000000000..8fba1ec85407 --- /dev/null +++ b/tests/Replication2/ReplicatedLog/TestHelper.cpp @@ -0,0 +1,30 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include "TestHelper.h" + +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" +#include "Replication2/ReplicatedLog/types.h" + +#include diff --git a/tests/Replication2/ReplicatedLog/TestHelper.h b/tests/Replication2/ReplicatedLog/TestHelper.h new file mode 100644 index 000000000000..c557c810c837 --- /dev/null +++ b/tests/Replication2/ReplicatedLog/TestHelper.h @@ -0,0 +1,98 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "Replication2/Mocks/ReplicatedLogMetricsMock.h" + +#include "Replication2/ReplicatedLog/ILogParticipant.h" +#include "Replication2/ReplicatedLog/InMemoryLog.h" +#include "Replication2/ReplicatedLog/LogCore.h" +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/ReplicatedLog/LogStatus.h" +#include "Replication2/ReplicatedLog/PersistedLog.h" +#include "Replication2/ReplicatedLog/ReplicatedLog.h" +#include "Replication2/ReplicatedLog/types.h" + +#include + +#include +#include +#include + +#include "Replication2/Mocks/PersistedLog.h" +#include "Replication2/Mocks/FakeReplicatedLog.h" + +namespace arangodb::replication2::test { + +using namespace replicated_log; + +struct ReplicatedLogTest : ::testing::Test { + + auto makeLogCore(LogId id) -> std::unique_ptr { + auto persisted = makePersistedLog(id); + return std::make_unique(persisted); + } + + auto getPersistedLogById(LogId id) -> std::shared_ptr { + return _persistedLogs.at(id); + } + + auto makePersistedLog(LogId id) -> std::shared_ptr { + auto persisted = std::make_shared(id); + _persistedLogs[id] = persisted; + return persisted; + } + + auto makeReplicatedLog(LogId id) -> std::shared_ptr { + auto core = makeLogCore(id); + return std::make_shared(std::move(core), _logMetricsMock, + LoggerContext(Logger::FIXME)); + } + + auto makeReplicatedLogWithAsyncMockLog(LogId id) -> std::shared_ptr { + auto persisted = std::make_shared(id); + _persistedLogs[id] = persisted; + auto core = std::make_unique(persisted); + return std::make_shared(std::move(core), _logMetricsMock, + LoggerContext(Logger::FIXME)); + } + + auto defaultLogger() { + return LoggerContext(Logger::REPLICATION2); + } + + auto stopAsyncMockLogs() -> void { + for (auto const& it : _persistedLogs) { + if (auto log = std::dynamic_pointer_cast(it.second); log != nullptr) { + log->stop(); + } + } + } + + std::unordered_map> _persistedLogs; + std::shared_ptr _logMetricsMock = std::make_shared(); +}; + + +} diff --git a/tests/Replication2/UpdateReplicatedLogTests.cpp b/tests/Replication2/ReplicatedLog/UpdateReplicatedLogTests.cpp similarity index 99% rename from tests/Replication2/UpdateReplicatedLogTests.cpp rename to tests/Replication2/ReplicatedLog/UpdateReplicatedLogTests.cpp index 89e7e7dbc796..c82a0e0c3bb0 100644 --- a/tests/Replication2/UpdateReplicatedLogTests.cpp +++ b/tests/Replication2/ReplicatedLog/UpdateReplicatedLogTests.cpp @@ -31,6 +31,7 @@ using namespace arangodb; using namespace arangodb::replication2; +using namespace arangodb::replication2::test; namespace { diff --git a/tests/Replication2/ReplicatedState/AbstractStateMachinePollTest.cpp b/tests/Replication2/ReplicatedState/AbstractStateMachinePollTest.cpp new file mode 100644 index 000000000000..9d4fdbd3b3aa --- /dev/null +++ b/tests/Replication2/ReplicatedState/AbstractStateMachinePollTest.cpp @@ -0,0 +1,206 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +#include "StateMachineTestHelper.h" + +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" + +#include "Basics/voc-errors.h" +#include "Replication2/ReplicatedState/AbstractStateMachine.h" + +using namespace arangodb; +using namespace arangodb::replication2; + +namespace { +struct MyTestStateMachine : replicated_state::AbstractStateMachine { + explicit MyTestStateMachine(std::shared_ptr log) + : replicated_state::AbstractStateMachine(std::move(log)) {} + + auto add(std::string_view value) -> LogIndex { + auto idx = insert(TestLogEntry(std::string{value})); + waitFor(idx).thenValue([weak = weak_from_this()](auto&& res) mutable { + if (auto self = weak.lock()) { + self->triggerPollEntries(); + } + return Result{TRI_ERROR_NO_ERROR}; + }); + return idx; + } + + auto get() -> std::unordered_set { + std::unique_lock guard(mutex); + return _entries; + } + + protected: + auto installSnapshot(ParticipantId const& id) -> futures::Future override { + TRI_ASSERT(false); + } + auto applyEntries(std::unique_ptr ptr) + -> futures::Future override { + std::unique_lock guard(mutex); + while (auto e = ptr->next()) { + _entries.insert(e->payload); + } + + return futures::Future{std::in_place, TRI_ERROR_NO_ERROR}; + } + + std::mutex mutex; + std::unordered_set _entries; +}; + +struct ParticipantBase { + explicit ParticipantBase(std::shared_ptr const& log) + : state(std::make_shared(log)) {} + std::shared_ptr state; +}; + +struct Follower : ParticipantBase { + explicit Follower(std::shared_ptr const& log, + ParticipantId const& p, LogTerm term, ParticipantId const& leader) + : ParticipantBase(log), log(log->becomeFollower(p, term, leader)) {} + + std::shared_ptr log; +}; + +struct Leader : ParticipantBase { + explicit Leader(std::shared_ptr const& log, + LogConfig config, ParticipantId id, LogTerm term, + std::vector> const& follower) + : ParticipantBase(log), + log(log->becomeLeader(config, std::move(id), term, follower)) {} + + std::shared_ptr log; +}; + +} + +struct PollStateMachineTest : StateMachineTest {}; + +TEST_F(PollStateMachineTest, check_apply_entries) { + auto A = createReplicatedLog(); + auto B = createReplicatedLog(); + + { + auto follower = std::make_shared(B, "B", LogTerm{1}, "A"); + auto leader = std::make_shared( + A, LogConfig{2, false}, "A", LogTerm{1}, + std::vector>{follower->log}); + + leader->state->add("first"); + auto f = follower->state->triggerPollEntries(); + ASSERT_TRUE(f.isReady()); + + using namespace std::string_literals; + + { + auto set = follower->state->get(); + EXPECT_EQ(set.size(), 1); + EXPECT_EQ(set, std::unordered_set{"first"s}); + } + { + auto set = leader->state->get(); + EXPECT_EQ(set.size(), 1); + EXPECT_EQ(set, std::unordered_set{"first"s}); + } + } + + { + auto follower = std::make_shared(B, "B", LogTerm{2}, "A"); + auto leader = std::make_shared( + A, LogConfig{2, false}, "A", LogTerm{2}, + std::vector>{follower->log}); + + auto f1 = leader->state->triggerPollEntries(); + ASSERT_FALSE(f1.isReady()); + auto f2 = follower->state->triggerPollEntries(); + ASSERT_FALSE(f2.isReady()); + + leader->log->triggerAsyncReplication(); + ASSERT_TRUE(f1.isReady()); + ASSERT_TRUE(f2.isReady()); + + using namespace std::string_literals; + + { + auto set = follower->state->get(); + EXPECT_EQ(set.size(), 1); + EXPECT_EQ(set, std::unordered_set{"first"s}); + } + { + auto set = leader->state->get(); + EXPECT_EQ(set.size(), 1); + EXPECT_EQ(set, std::unordered_set{"first"s}); + } + } +} + +TEST_F(PollStateMachineTest, insert_multiple) { + auto A = createReplicatedLog(); + auto B = createReplicatedLog(); + + { + auto follower = std::make_shared(B, "B", LogTerm{1}, "A"); + auto leader = std::make_shared( + A, LogConfig{2, false}, "A", LogTerm{1}, + std::vector>{follower->log}); + + leader->state->add("first"); + leader->state->add("second"); + leader->state->add("third"); + auto f = follower->state->triggerPollEntries(); + ASSERT_TRUE(f.isReady()); + + using namespace std::string_literals; + + { + auto set = follower->state->get(); + EXPECT_EQ(set, std::unordered_set({"first"s, "second"s, "third"s})); + } + } + + { + auto follower = std::make_shared(B, "B", LogTerm{2}, "A"); + auto leader = std::make_shared( + A, LogConfig{2, false}, "A", LogTerm{2}, + std::vector>{follower->log}); + + auto f2 = follower->state->triggerPollEntries(); + ASSERT_FALSE(f2.isReady()); + + leader->log->triggerAsyncReplication(); + ASSERT_TRUE(f2.isReady()); + + using namespace std::string_literals; + + { + auto set = follower->state->get(); + EXPECT_EQ(set, std::unordered_set({"first"s, "second"s, "third"s})); + } + } +} diff --git a/tests/Replication2/ReplicatedState/StateMachineTestHelper.cpp b/tests/Replication2/ReplicatedState/StateMachineTestHelper.cpp new file mode 100644 index 000000000000..e290d368368d --- /dev/null +++ b/tests/Replication2/ReplicatedState/StateMachineTestHelper.cpp @@ -0,0 +1,51 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include "StateMachineTestHelper.h" + +#include +#include + +#include +#include "Replication2/Mocks/PersistedLog.h" + +void arangodb::TestLogEntry::toVelocyPack(arangodb::velocypack::Builder& builder) const { + velocypack::ObjectBuilder ob(&builder); + builder.add("payload", velocypack::Value(payload)); +} + +auto arangodb::TestLogEntry::fromVelocyPack(arangodb::velocypack::Slice slice) + -> arangodb::TestLogEntry { + return TestLogEntry(slice.get("payload").copyString()); +} + +#include "Replication2/ReplicatedState/AbstractStateMachine.tpp" + +template struct replicated_state::AbstractStateMachine; + +auto StateMachineTest::createReplicatedLog() + -> std::shared_ptr { + auto persisted = std::make_shared(LogId{0}); + auto core = std::make_unique(persisted); + auto metrics = std::make_shared(); + return std::make_shared(std::move(core), metrics, LoggerContext(Logger::REPLICATION2)); +} diff --git a/tests/Replication2/ReplicatedState/StateMachineTestHelper.h b/tests/Replication2/ReplicatedState/StateMachineTestHelper.h new file mode 100644 index 000000000000..04e4aaac2e03 --- /dev/null +++ b/tests/Replication2/ReplicatedState/StateMachineTestHelper.h @@ -0,0 +1,51 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include + +#include + +#include "Replication2/ReplicatedLog/ReplicatedLog.h" + +namespace arangodb { + +namespace velocypack { +class Builder; +class Slice; +} // namespace velocypack + +struct StateMachineTest : ::testing::Test { + // returns two replicated logs, the seconds is the leader of the first + static auto createReplicatedLog() -> std::shared_ptr; +}; + +struct TestLogEntry { + explicit TestLogEntry(std::string payload) : payload(std::move(payload)) {} + static auto fromVelocyPack(velocypack::Slice slice) -> TestLogEntry; + void toVelocyPack(velocypack::Builder& builder) const; + std::string payload; +}; + +} // namespace arangodb diff --git a/tests/Replication2/Streams/LogMultiplexerTest.cpp b/tests/Replication2/Streams/LogMultiplexerTest.cpp new file mode 100644 index 000000000000..fb6ee822b599 --- /dev/null +++ b/tests/Replication2/Streams/LogMultiplexerTest.cpp @@ -0,0 +1,205 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include +#include + +#include +#include +#include + +#include + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::streams; +using namespace arangodb::replication2::test; + +struct LogMultiplexerTest : LogMultiplexerTestBase {}; + +TEST_F(LogMultiplexerTest, leader_follower_test) { + auto ints = {12, 13, 14, 15, 16}; + auto strings = {"foo", "bar", "baz", "fuz"}; + + auto leaderLog = createReplicatedLog(); + auto followerLog = createReplicatedLog(); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto leader = + leaderLog->becomeLeader(LogConfig(2, false), "leader", LogTerm{1}, {follower}); + + auto mux = LogMultiplexer::construct(leader); + auto demux = LogDemultiplexer::construct(follower); + demux->listen(); + + auto leaderStreamA = mux->getStreamBaseById(); + auto leaderStreamB = mux->getStreamBaseById(); + + { + auto iterA = ints.begin(); + auto iterB = strings.begin(); + while (iterA != ints.end() || iterB != strings.end()) { + if (iterA != ints.end()) { + leaderStreamA->insert(*iterA); + ++iterA; + } + if (iterB != strings.end()) { + leaderStreamB->insert(*iterB); + ++iterB; + } + } + } + + auto followerStreamA = demux->getStreamBaseById(); + auto followerStreamB = demux->getStreamBaseById(); + + auto futureA = followerStreamA->waitFor(LogIndex{2}); + auto futureB = followerStreamB->waitFor(LogIndex{1}); + ASSERT_TRUE(futureA.isReady()); + ASSERT_TRUE(futureB.isReady()); + + { + auto iter = followerStreamA->getAllEntriesIterator(); + for (auto x : ints) { + auto entry = iter->next(); + ASSERT_TRUE(entry.has_value()) << "expected value " << x; + auto const& [index, value] = *entry; + EXPECT_EQ(value, x); + } + EXPECT_EQ(iter->next(), std::nullopt); + } + { + auto iter = followerStreamB->getAllEntriesIterator(); + for (auto x : strings) { + auto entry = iter->next(); + ASSERT_TRUE(entry.has_value()); + auto const& [index, value] = *entry; + EXPECT_EQ(value, x); + } + EXPECT_EQ(iter->next(), std::nullopt); + } +} + +TEST_F(LogMultiplexerTest, leader_wait_for) { + auto leaderLog = createReplicatedLog(); + auto followerLog = createFakeReplicatedLog(); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto leader = + leaderLog->becomeLeader(LogConfig(2, false), "leader", LogTerm{1}, {follower}); + auto mux = LogMultiplexer::construct(leader); + + auto stream = mux->getStreamById(); + + // Write an entry and wait for it + auto idx = stream->insert(12); + auto f = stream->waitFor(idx); + // Future not yet resolved because follower did not answer yet + EXPECT_FALSE(f.isReady()); + + // let follower run + EXPECT_TRUE(follower->hasPendingAppendEntries()); + while (follower->hasPendingAppendEntries()) { + follower->runAsyncAppendEntries(); + } + + // future should be ready + ASSERT_TRUE(f.isReady()); +} + +TEST_F(LogMultiplexerTest, leader_wait_for_multiple) { + auto leaderLog = createReplicatedLog(); + auto followerLog = createFakeReplicatedLog(); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto leader = + leaderLog->becomeLeader(LogConfig(2, false), "leader", LogTerm{1}, {follower}); + auto mux = LogMultiplexer::construct(leader); + + auto streamA = mux->getStreamById(); + auto streamB = mux->getStreamById(); + + // Write an entry and wait for it + auto idxA = streamA->insert(12); + auto fA = streamA->waitFor(idxA); + // Future not yet resolved because follower did not answer yet + EXPECT_FALSE(fA.isReady()); + // Follower has pending append entries + EXPECT_TRUE(follower->hasPendingAppendEntries()); + + // Write another entry + auto idxB = streamB->insert("hello world"); + auto fB = streamB->waitFor(idxB); + // Both futures are not yet resolved because follower did not answer yet + EXPECT_FALSE(fB.isReady()); + EXPECT_FALSE(fA.isReady()); + + // Do a single follower run + follower->runAsyncAppendEntries(); + + // future A should be ready and follower has still pending append entries + EXPECT_TRUE(fA.isReady()); + EXPECT_TRUE(follower->hasPendingAppendEntries()); + + // Now future B should become ready. + while (follower->hasPendingAppendEntries()) { + follower->runAsyncAppendEntries(); + } + EXPECT_TRUE(fB.isReady()); +} + +TEST_F(LogMultiplexerTest, follower_wait_for) { + auto leaderLog = createReplicatedLog(LogId{1}); + auto followerLog = createFakeReplicatedLog(LogId{2}); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto leader = + leaderLog->becomeLeader(LogConfig(2, false), "leader", LogTerm{1}, {follower}); + // handle first leader log entry (empty) + leader->triggerAsyncReplication(); + while (follower->hasPendingAppendEntries()) { + follower->runAsyncAppendEntries(); + } + + auto mux = LogMultiplexer::construct(leader); + auto demux = LogDemultiplexer::construct(follower); + demux->listen(); + + auto inStream = mux->getStreamById(); + auto outStream = demux->getStreamById(); + + auto idx = inStream->insert(17); + auto f = outStream->waitFor(idx); + EXPECT_FALSE(f.isReady()); + EXPECT_TRUE(follower->hasPendingAppendEntries()); + + // Handle append request, entry not yet committed on follower + follower->runAsyncAppendEntries(); + EXPECT_FALSE(f.isReady()); + EXPECT_TRUE(follower->hasPendingAppendEntries()); + + // Receive commit update + follower->runAsyncAppendEntries(); + EXPECT_TRUE(f.isReady()); +} diff --git a/tests/Replication2/Streams/MultiplexerConcurrencyTest.cpp b/tests/Replication2/Streams/MultiplexerConcurrencyTest.cpp new file mode 100644 index 000000000000..6ff1f85b9fbd --- /dev/null +++ b/tests/Replication2/Streams/MultiplexerConcurrencyTest.cpp @@ -0,0 +1,175 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include +#include +#include + +#include "Replication2/ReplicatedLog/LogFollower.h" +#include "Replication2/ReplicatedLog/LogLeader.h" +#include "Replication2/Streams/LogMultiplexer.h" + +#include "Replication2/Streams/TestLogSpecification.h" + +using namespace arangodb; +using namespace arangodb::replication2; +using namespace arangodb::replication2::replicated_log; +using namespace arangodb::replication2::test; + +struct LogMultiplexerConcurrencyTest : LogMultiplexerTestBase { + using Spec = test::MyTestSpecification; + + template + struct StateMachine : std::enable_shared_from_this> { + using ValueType = streams::stream_type_by_id_t; + + explicit StateMachine(std::shared_ptr> stream) + : _stream(std::move(stream)) {} + + void start() { + waitForStream(LogIndex{1}); + } + + void waitForStream(LogIndex next) { + _stream->waitForIterator(next).thenValue([weak = this->weak_from_this()](auto&& iter) { + if (auto self = weak.lock(); self) { + auto [start, stop] = iter->range(); + TRI_ASSERT(start != stop); + while (auto memtry = iter->next()) { + self->_observedLog.emplace(*memtry); + } + self->waitForStream(stop); + } else { + TRI_ASSERT(false); + } + }); + } + + std::map _observedLog; + std::shared_ptr> _stream; + }; + + template + struct StateCombiner; + template + struct StateCombiner> { + std::tuple>...> _states; + + template + explicit StateCombiner(std::shared_ptr const& demux) + : _states(std::make_shared>( + demux->template getStreamById())...) { + ((std::get>>(_states)->start()), ...); + } + }; + + struct FollowerInstance { + explicit FollowerInstance(std::shared_ptr const& follower) + : _follower(follower), + _demux(streams::LogDemultiplexer::construct(follower)), + combiner(_demux) { + _demux->listen(); + } + + std::shared_ptr _follower; + std::shared_ptr> _demux; + StateCombiner combiner; + }; + + struct LeaderInstance { + explicit LeaderInstance(std::shared_ptr const& leader) + : _leader(leader), + _mux(streams::LogMultiplexer::construct(leader)), + combiner(_mux) {} + + std::shared_ptr _leader; + std::shared_ptr> _mux; + StateCombiner combiner; + }; +}; + +TEST_F(LogMultiplexerConcurrencyTest, test) { + auto followerLog = createAsyncReplicatedLog(LogId{1}); + auto leaderLog = createAsyncReplicatedLog(LogId{2}); + + auto follower = followerLog->becomeFollower("follower", LogTerm{1}, "leader"); + auto asyncFollower = std::make_shared(follower); + + auto leader = leaderLog->becomeLeader(LogConfig(2, false), "leader", + LogTerm{1}, {asyncFollower}); + + auto followerInstance = std::make_shared(follower); + auto leaderInstance = std::make_shared(leader); + + auto producer = leaderInstance->_mux->getStreamById(); + + constexpr std::size_t num_threads = 8; + constexpr std::size_t num_inserts_per_thread = 10000; + constexpr auto lastIndex = LogIndex{num_threads * num_inserts_per_thread + 1}; + + std::vector threads; + std::generate_n(std::back_inserter(threads), num_threads, [&]{ + return std::thread([&, producer]{ + auto index = LogIndex{0}; + for (std::size_t i = 0; i < num_inserts_per_thread; i++) { + index = producer->insert(i); + } + producer->waitFor(index).wait(); + }); + }); + + std::for_each(std::begin(threads), std::end(threads), [](std::thread& t) { + t.join(); + }); + asyncFollower->waitFor(lastIndex).wait(); + asyncFollower->stop(); + + auto iterA = follower->waitForIterator(LogIndex{1}).get(); + auto iterB = leader->waitForIterator(LogIndex{1}).get(); + + EXPECT_EQ(iterA->range(), iterB->range()); + while (auto A = iterA->next()) { + auto B = iterB->next(); + ASSERT_TRUE(B.has_value()); + EXPECT_EQ(A->logIndex(), B->logIndex()); + bool equal = basics::VelocyPackHelper::equal(A->logPayload(), B->logPayload(), true); + EXPECT_TRUE(equal) << A->logPayload().toJson() << " " << B->logPayload().toJson(); + } + EXPECT_FALSE(iterB->next().has_value()); + + MyTestSpecification::for_each_descriptor([&](auto p) { + using Descriptor = decltype(p); + auto streamA = leaderInstance->_mux->getStreamByDescriptor(); + auto streamB = followerInstance->_demux->getStreamByDescriptor(); + + auto iterA = streamA->waitForIterator(LogIndex{1}).get(); + auto iterB = streamB->waitForIterator(LogIndex{1}).get(); + + EXPECT_EQ(iterA->range(), iterB->range()); + while (auto A = iterA->next()) { + ASSERT_EQ(A, iterB->next()); + } + EXPECT_FALSE(iterB->next().has_value()); + }); +} diff --git a/tests/Replication2/Streams/TestLogSpecification.cpp b/tests/Replication2/Streams/TestLogSpecification.cpp new file mode 100644 index 000000000000..2facd49cbf46 --- /dev/null +++ b/tests/Replication2/Streams/TestLogSpecification.cpp @@ -0,0 +1,28 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#include "TestLogSpecification.h" + +#include "Replication2/Streams/LogMultiplexer.tpp" + +template struct arangodb::replication2::streams::LogMultiplexer; +template struct arangodb::replication2::streams::LogDemultiplexer; diff --git a/tests/Replication2/Streams/TestLogSpecification.h b/tests/Replication2/Streams/TestLogSpecification.h new file mode 100644 index 000000000000..a34fe8c4ae9e --- /dev/null +++ b/tests/Replication2/Streams/TestLogSpecification.h @@ -0,0 +1,109 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Lars Maier +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include + +#include +#include + +#include +#include + +#include +#include +#include + +namespace arangodb::replication2::test { + +struct LogMultiplexerTestBase : ::testing::Test { + static auto createReplicatedLog(LogId id = LogId{0}) + -> std::shared_ptr { + return createReplicatedLogImpl(id); + } + + static auto createAsyncReplicatedLog(LogId id = LogId{0}) + -> std::shared_ptr { + return createReplicatedLogImpl(id); + } + + static auto createFakeReplicatedLog(LogId id = LogId{0}) + -> std::shared_ptr { + return createReplicatedLogImpl(id); + } + + private: + template + static auto createReplicatedLogImpl(LogId id) -> std::shared_ptr { + auto persisted = std::make_shared(id); + auto core = std::make_unique(persisted); + auto metrics = std::make_shared(); + return std::make_shared(std::move(core), metrics, + LoggerContext(Logger::REPLICATION2)); + } +}; + +struct default_deserializer { + template + auto operator()(streams::serializer_tag_t, velocypack::Slice s) -> T { + return s.extract(); + } +}; + +struct default_serializer { + template + void operator()(streams::serializer_tag_t, T const& t, velocypack::Builder& b) { + b.add(velocypack::Value(t)); + } +}; + +inline constexpr auto my_int_stream_id = streams::StreamId{1}; +inline constexpr auto my_string_stream_id = streams::StreamId{8}; +inline constexpr auto my_string2_stream_id = streams::StreamId{9}; + +inline constexpr auto my_int_stream_tag = streams::StreamTag{12}; +inline constexpr auto my_string_stream_tag = streams::StreamTag{55}; +inline constexpr auto my_string2_stream_tag = streams::StreamTag{56}; +inline constexpr auto my_string2_stream_tag2 = streams::StreamTag{58}; + +/* clang-format off */ + +using MyTestSpecification = streams::stream_descriptor_set< + streams::stream_descriptor + >>, + streams::stream_descriptor + >>, + streams::stream_descriptor, + streams::tag_descriptor + >> + >; + +/* clang-format on */ + +} // namespace arangodb::replication2::test + +extern template struct arangodb::replication2::streams::LogMultiplexer; +extern template struct arangodb::replication2::streams::LogDemultiplexer; diff --git a/tests/Replication2/TestHelper.h b/tests/Replication2/TestHelper.h deleted file mode 100644 index 48ca40f4a489..000000000000 --- a/tests/Replication2/TestHelper.h +++ /dev/null @@ -1,252 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -/// DISCLAIMER -/// -/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is ArangoDB GmbH, Cologne, Germany -/// -/// @author Lars Maier -//////////////////////////////////////////////////////////////////////////////// - -#pragma once - -#include "ReplicatedLogMetricsMock.h" - -#include "Replication2/ReplicatedLog/ILogParticipant.h" -#include "Replication2/ReplicatedLog/InMemoryLog.h" -#include "Replication2/ReplicatedLog/LogCore.h" -#include "Replication2/ReplicatedLog/LogFollower.h" -#include "Replication2/ReplicatedLog/LogLeader.h" -#include "Replication2/ReplicatedLog/LogStatus.h" -#include "Replication2/ReplicatedLog/PersistedLog.h" -#include "Replication2/ReplicatedLog/ReplicatedLog.h" -#include "Replication2/ReplicatedLog/types.h" - -#include - -#include -#include -#include - -namespace arangodb::replication2 { - -using namespace replicated_log; - -struct MockLog : replication2::replicated_log::PersistedLog { - using storeType = std::map; - - explicit MockLog(replication2::LogId id); - MockLog(replication2::LogId id, storeType storage); - - auto insert(replication2::replicated_log::PersistedLogIterator& iter, WriteOptions const&) -> Result override; - auto insertAsync(std::unique_ptr iter, - WriteOptions const&) -> futures::Future override; - auto read(replication2::LogIndex start) - -> std::unique_ptr override; - auto removeFront(replication2::LogIndex stop) -> Result override; - auto removeBack(replication2::LogIndex start) -> Result override; - auto drop() -> Result override; - - void setEntry(replication2::LogIndex idx, replication2::LogTerm term, - replication2::LogPayload payload); - void setEntry(replication2::PersistingLogEntry); - - [[nodiscard]] storeType getStorage() const { return _storage; } - private: - using iteratorType = storeType::iterator; - storeType _storage; -}; - -struct AsyncMockLog : MockLog { - - explicit AsyncMockLog(replication2::LogId id); - - ~AsyncMockLog() noexcept; - - auto insertAsync(std::unique_ptr iter, - WriteOptions const&) -> futures::Future override; - - auto stop() noexcept -> void { - if (!_stopping) { - { - std::unique_lock guard(_mutex); - _stopping = true; - _cv.notify_all(); - } - _asyncWorker.join(); - } - } - - private: - struct QueueEntry { - WriteOptions opts; - std::unique_ptr iter; - futures::Promise promise; - }; - - void runWorker(); - - std::mutex _mutex; - std::vector> _queue; - std::condition_variable _cv; - std::atomic _stopping = false; - bool _stopped = false; - // _asyncWorker *must* be initialized last, otherwise starting the thread - // races with initializing the coordination variables. - std::thread _asyncWorker; -}; - -struct DelayedFollowerLog : AbstractFollower { - explicit DelayedFollowerLog(std::shared_ptr follower) - : _follower(std::move(follower)) {} - - DelayedFollowerLog(LoggerContext const& logContext, - std::shared_ptr logMetricsMock, - ParticipantId const& id, std::unique_ptr logCore, - LogTerm term, ParticipantId leaderId) - : DelayedFollowerLog([&] { - auto inMemoryLog = InMemoryLog{logContext, *logCore}; - return std::make_shared(logContext, std::move(logMetricsMock), - id, std::move(logCore), term, - std::move(leaderId), - std::move(inMemoryLog)); - }()) {} - - auto appendEntries(AppendEntriesRequest req) - -> arangodb::futures::Future override { - auto future = _asyncQueue.doUnderLock([&](auto& queue) { - return queue.emplace_back(std::make_shared(std::move(req))) - ->promise.getFuture(); - }); - return std::move(future).thenValue( - [this](auto&& result) mutable { - return _follower->appendEntries(std::forward(result)); - }); - } - - void runAsyncAppendEntries() { - auto asyncQueue = _asyncQueue.doUnderLock([](auto& _queue) { - auto queue = std::move(_queue); - _queue.clear(); - return queue; - }); - - for (auto& p : asyncQueue) { - p->promise.setValue(std::move(p->request)); - } - } - - using WaitForAsyncPromise = futures::Promise; - - struct AsyncRequest { - explicit AsyncRequest(AppendEntriesRequest request) - : request(std::move(request)) {} - AppendEntriesRequest request; - WaitForAsyncPromise promise; - }; - [[nodiscard]] auto pendingAppendEntries() const - -> std::deque> { - return _asyncQueue.copy(); - } - [[nodiscard]] auto hasPendingAppendEntries() const -> bool { - return _asyncQueue.doUnderLock( - [](auto const& queue) { return !queue.empty(); }); - } - - auto getParticipantId() const noexcept -> ParticipantId const& override { - return _follower->getParticipantId(); - } - - auto getStatus() const -> LogStatus { - return _follower->getStatus(); - } - - auto resign() && { - return std::move(*_follower).resign(); - } - - auto waitFor(LogIndex index) { - return _follower->waitFor(index); - } - - auto waitForIterator(LogIndex index) { - return _follower->waitForIterator(index); - } - private: - Guarded>> _asyncQueue; - std::shared_ptr _follower; -}; - -struct TestReplicatedLog : ReplicatedLog { - using ReplicatedLog::ReplicatedLog; - auto becomeFollower(ParticipantId const& id, LogTerm term, ParticipantId leaderId) - -> std::shared_ptr; - auto becomeLeader(ParticipantId const& id, LogTerm term, - std::vector> const& follower, - std::size_t writeConcern) -> std::shared_ptr; - auto becomeLeader(LogConfig config, ParticipantId id, LogTerm term, - std::vector> const& follower) - -> std::shared_ptr; -}; - -struct ReplicatedLogTest : ::testing::Test { - - auto makeLogCore(LogId id) -> std::unique_ptr { - auto persisted = makePersistedLog(id); - return std::make_unique(persisted); - } - - auto getPersistedLogById(LogId id) -> std::shared_ptr { - return _persistedLogs.at(id); - } - - auto makePersistedLog(LogId id) -> std::shared_ptr { - auto persisted = std::make_shared(id); - _persistedLogs[id] = persisted; - return persisted; - } - - auto makeReplicatedLog(LogId id) -> std::shared_ptr { - auto core = makeLogCore(id); - return std::make_shared(std::move(core), _logMetricsMock, - LoggerContext(Logger::FIXME)); - } - - auto makeReplicatedLogWithAsyncMockLog(LogId id) -> std::shared_ptr { - auto persisted = std::make_shared(id); - _persistedLogs[id] = persisted; - auto core = std::make_unique(persisted); - return std::make_shared(std::move(core), _logMetricsMock, - LoggerContext(Logger::FIXME)); - } - - auto defaultLogger() { - return LoggerContext(Logger::REPLICATION2); - } - - auto stopAsyncMockLogs() -> void { - for (auto const& it : _persistedLogs) { - if (auto log = std::dynamic_pointer_cast(it.second); log != nullptr) { - log->stop(); - } - } - } - - std::unordered_map> _persistedLogs; - std::shared_ptr _logMetricsMock = std::make_shared(); -}; - - -}