diff --git a/CHANGELOG b/CHANGELOG index 0e7fd0096d31..e1bfdb1dc35d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,18 @@ devel ----- +* Fixed reloading of users within the Cluster. + If a Coordinator is asked to reload its users (e.g. by the UserManager in + Foxx, it is also possible to do via API, but this is internal and on purpose + not documented, so unlikely that it is used), in concurrency with user + management updates there is a chance that the reload is not correctly performed + on this coordinator. It may have missed the last update locally, causing + one user to have an older state. It will be fixed on the next modification + of any other users/permissions. Unfortunately this bug can cascade and when + hit again, the coordinator can now be off by two updates. + In DC2DC this situation is more likely to happen on the target datacenter, + causing this datacenter to have other users/permissions than the source one. + * Slightly improve specific warning messages for better readability. * Fix URL request parsing in case data is handed in in small chunks. diff --git a/arangod/Auth/UserManager.cpp b/arangod/Auth/UserManager.cpp index 9c2efa505e3c..1dff43256bf2 100644 --- a/arangod/Auth/UserManager.cpp +++ b/arangod/Auth/UserManager.cpp @@ -209,6 +209,13 @@ void auth::UserManager::loadFromDB() { return; } + TRI_IF_FAILURE("UserManager::performDBLookup") { + // Used in GTest. It is used to identify + // if the UserManager would have updated it's + // cache in a specific situation. + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + try { std::shared_ptr builder = QueryAllUsers(_server); if (builder) { diff --git a/arangod/Auth/UserManager.h b/arangod/Auth/UserManager.h index 905b8a4a7dab..a54b19bac331 100644 --- a/arangod/Auth/UserManager.h +++ b/arangod/Auth/UserManager.h @@ -79,7 +79,7 @@ class UserManager { /// @brief reload user cache and token caches void triggerLocalReload() { - _globalVersion.fetch_add(1, std::memory_order_release); + _internalVersion.store(0, std::memory_order_release); } /// @brief used for caching @@ -103,8 +103,7 @@ class UserManager { bool active, velocypack::Slice extras); /// Enumerate list of all users - Result enumerateUsers(std::function&&, - bool retryOnConflict); + Result enumerateUsers(std::function&&, bool retryOnConflict); /// Update specific user Result updateUser(std::string const& user, UserCallback&&); /// Access user without modifying it @@ -133,7 +132,7 @@ class UserManager { std::string const& dbname, bool configured = false); auth::Level collectionAuthLevel(std::string const& username, std::string const& dbname, std::string const& coll, bool configured = false); - + #ifdef ARANGODB_USE_GOOGLE_TESTS /// Overwrite internally cached permissions, only use /// for testing purposes diff --git a/tests/Auth/UserManagerClusterTest.cpp b/tests/Auth/UserManagerClusterTest.cpp new file mode 100644 index 000000000000..e3f943e0efd2 --- /dev/null +++ b/tests/Auth/UserManagerClusterTest.cpp @@ -0,0 +1,240 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2021-2021 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Michael Hackstein +//////////////////////////////////////////////////////////////////////////////// + +#include "gtest/gtest.h" + +#include "Mocks/Servers.h" + +#include "Auth/UserManager.h" +#include "GeneralServer/AuthenticationFeature.h" + +namespace arangodb { +namespace tests { +namespace auth_info_test { + +namespace { +static char const* FailureOnLoadDB = "UserManager::performDBLookup"; +} + +class UserManagerClusterTest : public ::testing::Test { + public: + mocks::MockCoordinator _server{}; + + protected: + auth::UserManager* userManager() { + auto um = _server.getFeature().userManager(); + TRI_ASSERT(um != nullptr); + return um; + } + + void simulateOneHeartbeat() { + /* + * NOTE: Sorry i gave up on this. Something that + * requires the heartbeat is absolutely untestable + * it does everything at once, and you need to setup + * a complete functioning world to somehow execute it. + * Furthermore it has tons of undesired side-effects. + * + * All i wanted to do, is let the heartbeat detect + * the UserVersion and inject it the way it does into + * the UserManager. + */ + } + + uint64_t getAgencyUserVersion() { + // Simply read the UserVersion from the Agency. + // This is copy pasted from HeartbeatThread. + auto& cache = _server.getFeature().agencyCache(); + auto [acb, idx] = cache.read( + std::vector{AgencyCommHelper::path("Sync/UserVersion")}); + auto result = acb->slice(); + VPackSlice slice = result[0].get(std::vector( + {AgencyCommHelper::path(), "Sync", "UserVersion"})); + TRI_ASSERT(slice.isInteger()); + // there is a UserVersion, and it has to be an UINT + return slice.getUInt(); + } +}; + +#ifdef ARANGODB_ENABLE_FAILURE_TESTS +TEST_F(UserManagerClusterTest, regression_forgotten_update) { + /* The following order of events did lead to a missing update: + * 1. um->triggerLocalReload(); + * 2. um->triggerGlobalReload(); + * 3. heartbeat + * 4. um->loadFromDB(); + * + * 1. and 2. moved internal versions forward two times + * 3. heartbeat resets one of the movings + * 4. Does not perform the actual load, as the heartbeat reset indicates everything is okay. + */ + + TRI_AddFailurePointDebugging(FailureOnLoadDB); + auto guard = arangodb::scopeGuard( + []() { TRI_RemoveFailurePointDebugging(FailureOnLoadDB); }); + + auto um = userManager(); + // If for some reason this EXPECT ever triggers, we can + // inject either the AgencyValue into the UserManager + // or vice-versa. This is just an assertion that we + // expect everything to start at default (1). + EXPECT_EQ(um->globalVersion(), getAgencyUserVersion()); + + um->triggerLocalReload(); + EXPECT_EQ(um->globalVersion(), getAgencyUserVersion()); + + um->triggerGlobalReload(); + EXPECT_EQ(um->globalVersion(), getAgencyUserVersion()); + + /* + * This is the correct test here, the heartbeat has + * a side-effect, but that is simply untestable in the + * current design. So the only thing i could + * fall back to is to assert that the UserVersions in the agency + * do stay aligned. + * + * this->simulateOneHeartbeat(); + */ + // This needs to trigger a reload from DB + try { + um->userExists("unknown user"); + FAIL(); + } catch (arangodb::basics::Exception const& e) { + // This Execption indicates that we got past the version + // checks and would contact DBServer now. + // This is not under test here, we only want to test that we load + // the plan + EXPECT_EQ(e.code(), TRI_ERROR_DEBUG); + } catch (...) { + FAIL(); + } +} + +TEST_F(UserManagerClusterTest, cacheRevalidationShouldKeepVersionsInLine) { + TRI_AddFailurePointDebugging(FailureOnLoadDB); + auto guard = arangodb::scopeGuard( + []() { TRI_RemoveFailurePointDebugging(FailureOnLoadDB); }); + + auto um = userManager(); + // If for some reason this EXPECT ever triggers, we can + // inject either the AgencyValue into the UserManager + // or vice-versa. This is just an assertion that we + // expect everything to start at default (1). + EXPECT_EQ(um->globalVersion(), getAgencyUserVersion()); + + try { + // This needs to trigger a reload from DB + // Internally it will do local, global, and loadFromDB. + um->triggerCacheRevalidation(); + FAIL(); + } catch (arangodb::basics::Exception const& e) { + // This Execption indicates that we got past the version + // checks and would contact DBServer now. + // This is not under test here, we only want to test that we load + // the plan + EXPECT_EQ(e.code(), TRI_ERROR_DEBUG); + } catch (...) { + FAIL(); + } + EXPECT_EQ(um->globalVersion(), getAgencyUserVersion()); +} + +TEST_F(UserManagerClusterTest, triggerLocalReloadShouldNotUpdateClusterVersion) { + TRI_AddFailurePointDebugging(FailureOnLoadDB); + auto guard = arangodb::scopeGuard( + []() { TRI_RemoveFailurePointDebugging(FailureOnLoadDB); }); + + auto um = userManager(); + // If for some reason this EXPECT ever triggers, we can + // inject either the AgencyValue into the UserManager + // or vice-versa. This is just an assertion that we + // expect everything to start at default (1). + EXPECT_EQ(um->globalVersion(), getAgencyUserVersion()); + + uint64_t versionBefore = getAgencyUserVersion(); + + um->triggerLocalReload(); + EXPECT_EQ(versionBefore, getAgencyUserVersion()); + + /* + * This is the correct test here, see above + * + * this->simulateOneHeartbeat(); + */ + // This needs to trigger a reload from DB + try { + um->userExists("unknown user"); + FAIL(); + } catch (arangodb::basics::Exception const& e) { + // This Execption indicates that we got past the version + // checks and would contact DBServer now. + // This is not under test here, we only want to test that we load + // the plan + EXPECT_EQ(e.code(), TRI_ERROR_DEBUG); + } catch (...) { + FAIL(); + } +} + +TEST_F(UserManagerClusterTest, triggerGlobalReloadShouldUpdateClusterVersion) { + TRI_AddFailurePointDebugging(FailureOnLoadDB); + auto guard = arangodb::scopeGuard( + []() { TRI_RemoveFailurePointDebugging(FailureOnLoadDB); }); + + auto um = userManager(); + // If for some reason this EXPECT ever triggers, we can + // inject either the AgencyValue into the UserManager + // or vice-versa. This is just an assertion that we + // expect everything to start at default (1). + EXPECT_EQ(um->globalVersion(), getAgencyUserVersion()); + + uint64_t versionBefore = getAgencyUserVersion(); + + um->triggerGlobalReload(); + // The version in the Agency needs to be increased + EXPECT_LT(versionBefore, getAgencyUserVersion()); + + /* + * This is the correct test here, see above + * + * this->simulateOneHeartbeat(); + */ + // This needs to trigger a reload from DB + try { + um->userExists("unknown user"); + FAIL(); + } catch (arangodb::basics::Exception const& e) { + // This Execption indicates that we got past the version + // checks and would contact DBServer now. + // This is not under test here, we only want to test that we load + // the plan + EXPECT_EQ(e.code(), TRI_ERROR_DEBUG); + } catch (...) { + FAIL(); + } +} + +#endif + +} // namespace auth_info_test +} // namespace tests +} // namespace arangodb diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7e93b9b5f366..2834a9515f29 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -159,6 +159,7 @@ set(ARANGODB_TESTS_SOURCES Aql/WindowExecutorTest.cpp AsyncAgencyComm/AsyncAgencyCommTest.cpp Auth/UserManagerTest.cpp + Auth/UserManagerClusterTest.cpp Basics/conversions-test.cpp Basics/csv-test.cpp Basics/datetime.cpp