8000 Start up server HTTP interface earlier (#16181) · rnshah9/arangodb@f7c63f8 · GitHub
[go: up one dir, main page]

Skip to content

Commit f7c63f8

Browse files
authored
Start up server HTTP interface earlier (arangodb#16181)
1 parent 54f4395 commit f7c63f8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1065
-659
lines changed

CHANGELOG

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,41 @@
11
devel
22
-----
33

4+
* The API `/_admin/status` now returns a progress attribute that shows the
5+
server's current state (starting, stopping, etc.), with details about which
6+
feature is currently started, stopped etc. During recovery, the current WAL
7+
recovery sequence number is also reported in a sub-attribute of the
8+
`progress` attribute. Clients can query this attribute to track the
9+
progress of the WAL recovery.
10+
The additional progress attribute returned by `/_admin/status` is most
11+
useful when using the `--server.early-connections true` setting. With that
12+
setting, the server will respond to incoming requests to a limited set of
13+
APIs already during server startup. When the setting is not used, the REST
14+
interface will be opened relatively late during the startup sequence, so
15+
that the progress attribute will likely not be very useful anymore.
16+
17+
* Optionally start up HTTP interface of servers earlier, so that ping probes
18+
from tools can already be responded to when the server is not fully started.
19+
By default, the HTTP interface is opened at the same point during the startup
20+
sequence as before, but it can optionally be opened earlier by setting the
21+
new startup option `--server.early-connections` to `true`. This will
22+
open the HTTP interface early in the startup, so that the server can respond
23+
to a limited set of REST APIs even during recovery. This can be useful
24+
because the recovery procedure can take time proportional to the amount of
25+
data to recover.
26+
When the `--server.early-connections` option is set to `true`, the
27+
server will respond to requests to the following APIs during the startup
28+
already:
29+
- `/_api/version`
30+
- `/_admin/version`
31+
- `/_admin/status`
32+
All other APIs will be responded to with an HTTP response code 503, so that
33+
callers can see that the server is not fully ready.
34+
If authentication is used, then only JWT authentication can be used during
35+
the early startup phase. Incoming requests relying on other authentication
36+
mechanisms that require access to the database data will also be responded to
37+
with HTTP 503 errors, even if correct credentials are used.
38+
439
* Fix behavior when accessing a view instead of a collection by name in a REST
540
document operation. Now return a proper error.
641

arangod/Agency/Inception.cpp

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@
2626
#include "Agency/Agent.h"
2727
#include "ApplicationFeatures/ApplicationServer.h"
2828
#include "Basics/ConditionLocker.h"
29-
#include "Basics/application-exit.h"
3029
#include "Basics/MutexLocker.h"
30+
#include "Basics/StaticStrings.h"
31+
#include "Basics/application-exit.h"
3132
#include "Cluster/ServerState.h"
3233
#include "Logger/LogMacros.h"
3334
#include "Network/Methods.h"
3435
#include "Network/NetworkFeature.h"
36+
#include "Random/RandomGenerator.h"
3537

3638
#include <chrono>
3739
#include <thread>
@@ -40,39 +42,45 @@ using namespace arangodb::consensus;
4042

4143
namespace {
4244
void handleGossipResponse(arangodb::network::Response const& r,
45+
std::string const& endpoint,
4346
arangodb::consensus::Agent* agent, size_t version) {
4447
using namespace arangodb;
45-
std::string newLocation;
46-
4748
if (r.ok()) {
4849
velocypack::Slice payload = r.slice();
4950

5051
switch (r.statusCode()) {
51-
case 200: // Digest other configuration
52+
case 200: {
53+
// Digest other configuration
5254
LOG_TOPIC("4995a", DEBUG, Logger::AGENCY)
5355
<< "Got result of gossip message, code: 200"
5456
<< " body: " << payload.toJson();
5557
agent->gossip(payload, true, version);
5658
break;
59+
}
5760

58-
case 307: // Add new endpoint to gossip peers
61+
case 307: {
62+
// Add new endpoint to gossip peers
5963
bool found;
60-
newLocation = r.response().header.metaByKey("location", found);
64+
std::string newLocation =
65+
r.response().header.metaByKey(StaticStrings::Location, found);
6166

6267
if (found) {
63-
if (newLocation.compare(0, 5, "https") == 0) {
64-
newLocation = newLocation.replace(0, 5, "ssl");
65-
} else if (newLocation.compare(0, 4, "http") == 0) {
66-
newLocation = newLocation.replace(0, 4, "tcp");
68+
if (newLocation.starts_with("https")) {
69+
newLocation =
70+
newLocation.replace(0, std::string_view("https").size(), "ssl");
71+
} else if (newLocation.starts_with("http")) {
72+
newLocation =
73+
newLocation.replace(0, std::string_view("http").size(), "tcp");
6774
} else {
6875
LOG_TOPIC("60be0", FATAL, Logger::AGENCY)
69-
<< "Invalid URL specified as gossip endpoint";
76+
<< "Invalid URL specified as gossip endpoint by " << endpoint
77+
<< ": " << newLocation;
7078
FATAL_ERROR_EXIT();
7179
}
7280

7381
LOG_TOPIC("4c822", DEBUG, Logger::AGENCY)
74-
<< "Got redirect to " << newLocation
75-
<< ". Adding peer to gossip peers";
82+
<< "Got redirect to " << newLocation << ". Adding peer "
83+
<< newLocation << " to gossip peers";
7684
bool added = agent->addGossipPeer(newLocation);
7785
if (added) {
7886
LOG_TOPIC("d41c8", DEBUG, Logger::AGENCY)
@@ -86,18 +94,33 @@ void handleGossipResponse(arangodb::network::Response const& r,
8694
<< "Redirect lacks 'Location' header";
8795
}
8896
break;
97+
}
98+
99+
case 503: {
100+
// service unavailable
101+
LOG_TOPIC("f9c3f", INFO, Logger::AGENCY)
102+
<< "Gossip endpoint " << endpoint << " is still unavailable";
103+
uint32_t sleepTime = 250 + RandomGenerator::interval(uint32_t(250));
104+
std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime));
105+
break;
106+
}
89107

90-
default:
108+
default: {
109+
// unexpected error
91110
LOG_TOPIC("bed89", ERR, Logger::AGENCY)
92-
<< "Got error " << r.statusCode() << " from gossip endpoint";
93-
std::this_thread::sleep_for(std::chrono::seconds(40));
111+
<< "Got error " << r.statusCode() << " from gossip endpoint "
112+
<< endpoint;
113+
std::this_thread::sleep_for(std::chrono::seconds(30));
94114
break;
115+
}
95116
}
96117
}
97118

98119
LOG_TOPIC("e2ef9", DEBUG, Logger::AGENCY)
99-
<< "Got error from gossip message, status:" << fuerte::to_string(r.error);
120+
<< "Got error from gossip message to " << endpoint
121+
<< ", status: " << fuerte::to_string(r.error);
100122
}
123+
101124
} // namespace
102125

103126
Inception::Inception(Agent& agent)
@@ -166,7 +189,7 @@ void Inception::gossip() {
166189
network::sendRequest(cp, p, fuerte::RestVerb::Post, path, buffer,
167190
reqOpts)
168191
.thenValue([=, this](network::Response r) {
169-
::handleGossipResponse(r, &_agent, version);
192+
::handleGossipResponse(r, p, &_agent, version);
170193
});
171194
}
172195
}
@@ -197,7 +220,7 @@ void Inception::gossip() {
197220
network::sendRequest(cp, pair.second, fuerte::RestVerb::Post, path,
198221
buffer, reqOpts)
199222
.thenValue([=, this](network::Response r) {
200-
::handleGossipResponse(r, &_agent, version);
223+
::handleGossipResponse(r, pair.second, &_agent, version);
201224
});
202225
}
203226
}
@@ -333,7 +356,7 @@ bool Inception::restartingActiveAgent() {
333356
path, greetBuffer, reqOpts)
334357
.get();
335358

336-
if (comres.ok()) {
359+
if (comres.combinedResult().ok()) {
337360
try {
338361
VPackSlice theirConfig = comres.slice();
339362

@@ -479,7 +502,7 @@ void Inception::reportVersionForEp(std::string const& endpoint,
479502
// @brief Thread main
480503
void Inception::run() {
481504
auto server = ServerState::instance();
482-
while (server->isMaintenance() && !this->isStopping() &&
505+
while (server->isStartupOrMaintenance() && !this->isStopping() &&
483506
!_agent.isStopping()) {
484507
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
485508
LOG_TOPIC("1b613", DEBUG, Logger::AGENCY)

arangod/Auth/TokenCache.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,19 @@ std::string auth::TokenCache::jwtSecret() const {
9090
// should only lock if required, otherwise we will serialize all
9191
// requests whether we need to or not
9292
auth::TokenCache::Entry auth::TokenCache::checkAuthentication(
93-
AuthenticationMethod authType, std::string const& secret) {
93+
AuthenticationMethod authType, ServerState::Mode mode,
94+
std::string const& secret) {
9495
switch (authType) {
9596
case AuthenticationMethod::BASIC:
97+
if (mode == ServerState::Mode::STARTUP) {
98+
// during the startup phase, we have no access to the underlying
99+
// database data, so we cannot validate the credentials.
100+
return auth::TokenCache::Entry::Unauthenticated();
101+
}
96102
return checkAuthenticationBasic(secret);
97103

98104
case AuthenticationMethod::JWT:
105+
// JWTs work fine even during the startup phase
99106
return checkAuthenticationJWT(secret);
100107

101108
default:

arangod/Auth/TokenCache.h

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,17 @@
3030
#include "Basics/Result.h"
3131
#include "Basics/debugging.h"
3232
#include "Basics/system-functions.h"
33+
#include "Cluster/ServerState.h"
3334
#include "Rest/CommonDefines.h"
3435

3536
#include <velocypack/Builder.h>
3637
#include <velocypack/Slice.h>
3738

39+
#include <atomic>
40+
#include <mutex>
41+
#include <string>
42+
#include <unordered_map>
43+
3844
namespace arangodb {
3945
namespace auth {
4046
class UserManager;
@@ -53,18 +59,20 @@ class TokenCache {
5359
friend class auth::TokenCache;
5460

5561
public:
56-
explicit Entry(std::string const& username, bool a, double t)
57-
: _username(username), _expiry(t), _authenticated(a) {}
62+
explicit Entry(std::string username, bool a, double t)
63+
: _username(std::move(username)), _expiry(t), _authenticated(a) {}
5864

5965
static Entry Unauthenticated() { return Entry("", false, 0); }
6066
static Entry Superuser() { return Entry("", true, 0); }
6167

62-
std::string const& username() const { return _username; }
63-
bool authenticated() const { return _authenticated; }
64-
void authenticated(bool value) { _authenticated = value; }
65-
void setExpiry(double expiry) { _expiry = expiry; }
68+
std::string const& username() const noexcept { return _username; }
69+
bool authenticated() const noexcept { return _authenticated; }
70+
void authenticated(bool value) noexcept { _authenticated = value; }
71+
void setExpiry(double expiry) noexcept { _expiry = expiry; }
6672
double expiry() const noexcept { return _expiry; }
67-
bool expired() const { return _expiry != 0 && _expiry < TRI_microtime(); }
73+
bool expired() const noexcept {
74+
return _expiry != 0 && _expiry < TRI_microtime();
75+
}
6876
std::vector<std::string> const& allowedPaths() const {
6977
return _allowedPaths;
7078
}
@@ -81,8 +89,9 @@ class TokenCache {
8189
};
8290

8391
public:
84-
TokenCache::Entry checkAuthentication(
85-
arangodb::rest::AuthenticationMethod authType, std::string const& secret);
92+
TokenCache::Entry checkAuthentication(rest::AuthenticationMethod authType,
93+
ServerState::Mode mode,
94+
std::string const& secret);
8695

8796
/// Clear the cache of username / password auth
8897
void invalidateBasicCache();

arangod/Cluster/HeartbeatThread.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ void HeartbeatThread::run() {
342342
// which fails when it is still in maintenance mode
343343
auto server = ServerState::instance();
344344
if (!server->isCoordinator(role)) {
345-
while (server->isMaintenance()) {
345+
while (server->isStartupOrMaintenance()) {
346346
if (isStopping()) {
347347
// startup aborted
348348
return;

0 commit comments

Comments
 (0)
0