8000 [BTS-491][3.7] ArangoRestore connect Retry by maierlars · Pull Request #14437 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
v3.7.13 (XXXX-XX-XX)
--------------------

* Added a retry loop for arangorestore during the initial connection phase. The
number of retries defaults to 3 and can be configured using
--initial-connect-retries.

* Fix display of running and slow queries in web UI when there are multiple
coordinators. Previously, the display order of queries was undefined, which
could lead to queries from one coordinator being display on top once and then
Expand Down
25 changes: 23 additions & 2 deletions arangosh/Restore/RestoreFeature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1347,6 +1347,12 @@ void RestoreFeature::collectOptions(std::shared_ptr<options::ProgramOptions> opt
new UInt32Parameter(&_options.threadCount))
.setIntroducedIn(30400);

options
->addOption("--initial-connect-retries",
"number of connect retries for initial connection",
new UInt32Parameter(&_options.initialConnectRetries))
.setIntroducedIn(30713);

options->addOption("--include-system-collections",
"include system collections",
new BooleanParameter(&_options.includeSystemCollections));
Expand Down Expand Up @@ -1610,9 +1616,24 @@ void RestoreFeature::start() {

std::unique_ptr<SimpleHttpClient> httpClient;

auto const connectRetry = [&](size_t numRetries) -> Result {
for (size_t i = 0; i < numRetries; i++) {
if (i > 0) {
LOG_TOPIC("5855a", WARN, Logger::RESTORE) << "Failed to connect to server, retrying...";
using namespace std::chrono_literals;
std::this_thread::sleep_f 8000 or(i * 1s);
}
Result result = _clientManager.getConnectedClient(httpClient, _options.force,
true, !_options.createDatabase, false);
if (!result.is(TRI_ERROR_SIMPLE_CLIENT_COULD_NOT_CONNECT) && !result.is(TRI_ERROR_INTERNAL)) {
return result;
}
}
return {TRI_ERROR_SIMPLE_CLIENT_COULD_NOT_CONNECT};
};

// final result
Result result = _clientManager.getConnectedClient(httpClient, _options.force,
true, !_options.createDatabase, false);
Result result = connectRetry(std::max<uint32_t>(1, _options.initialConnectRetries));
if (result.is(TRI_ERROR_SIMPLE_CLIENT_COULD_NOT_CONNECT)) {
LOG_TOPIC("c23bf", FATAL, Logger::RESTORE)
<< "cannot create server connection, giving up!";
Expand Down
1 change: 1 addition & 0 deletions arangosh/Restore/RestoreFeature.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class RestoreFeature final : public application_features::ApplicationFeature {
std::vector<std::string> numberOfShards;
std::vector<std::string> replicationFactor;
uint32_t threadCount{2};
uint32_t initialConnectRetries{3};
bool clusterMode{false};
bool createDatabase{false};
bool force{false};
Expand Down
0