8000 Add visibility for recent API calls (#21642) · fceller/arangodb@e7be75c · GitHub
[go: up one dir, main page]

Skip to content

Commit e7be75c

Browse files
neunhoefjvolmer
andauthored
Add visibility for recent API calls (arangodb#21642)
Implement api call recording. Past api calls are recorded in a bounded list with their time stamp. There is an API to get the list of recent API calls. --------- Co-authored-by: Julia Volmer <julia.volmer@arangodb.com>
1 parent b7b0841 commit e7be75c

20 files changed

+1167
-15
lines changed

CHANGELOG

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
devel
22
-----
33

4+
* Add visibility for recent api calls via a new API /_admin/server/api-calls.
5+
46
* Fix TSAN issue in LogicalCollection.
57

68
* FE-541: update disk usage message.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: arangodb_api_recording_call_time
2+
type: histogram
3+
category: Statistics
4+
help: |
5+
API recording runtime histogram.
6+
description: |
7+
Execution time histogram for API recording calls in nanoseconds.
8+
9+
This histogram tracks the time it takes to record API calls in the ApiRecordingFeature.
10+
The histogram uses a logarithmic scale with base 2, starting at 0 and going up to 16000.0 nanoseconds,
11+
with 9 buckets.
12+
unit: nanoseconds
13+
introducedIn: "3.12.5"
14+
category: Agency
15+
complexity: high
16+
exposedBy:
17+
- agent
18+
- coordinator
19+
- dbserver
20+
- single

arangod/Aql/ExecutionEngine.cpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -593,19 +593,23 @@ struct DistributedQueryInstanciator final
593593

594594
for (auto const& [server, queryId, rebootId] : srvrQryId) {
595595
TRI_ASSERT(!server.starts_with("server:"));
596-
std::function<void(void)> f = [srvr = server, id = _query.id(),
597-
vn = _query.vocbase().name(), &df]() {
598-
LOG_TOPIC("d2554", INFO, Logger::QUERIES)
599-
<< "killing query " << id << " because participating DB server "
600-
<< srvr << " is unavailable";
601-
try {
602-
methods::Queries::kill(df, vn, id);
603-
} catch (...) {
604-
// it does not really matter if this fails.
605-
// if the coordinator contacts the failed DB server next time, it
606-
// will realize it has failed.
607-
}
608-
};
596+
std::function<void(void)> f =
597+
[srvr = server, id = _query.id(), vn = _query.vocbase().name(), &df,
598+
qs = _query.queryString(),
599+
bp = _query.bindParametersAsBuilder()]() {
600+
LOG_TOPIC("d2554", INFO, Logger::QUERIES)
601+
<< "killing query " << id
602+
<< " because participating DB server " << srvr
603+
<< " is unavailable, query string:" << qs
604+
<< ", bind parameters: " << bp->slice().toJson();
605+
try {
606+
methods::Queries::kill(df, vn, id);
607+
} catch (...) {
608+
// it does not really matter if this fails.
609+
// if the coordinator contacts the failed DB server next time,
610+
// it will realize it has failed.
611+
}
612+
};
609613

610614
engine->rebootTrackers().emplace_back(ci.rebootTracker().callMeOnChange(
611615
{server, rebootId}, std::move(f),

arangod/GeneralServer/CommTask.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "Logger/LogMacros.h"
4242
#include "Replication/ReplicationFeature.h"
4343
#include "Rest/GeneralResponse.h"
44+
#include "RestServer/ApiRecordingFeature.h"
4445
#include "RestServer/DatabaseFeature.h"
4546
#include "RestServer/VocbaseContext.h"
4647
#include "Scheduler/SchedulerFeature.h"
@@ -148,6 +149,7 @@ bool queueTimeViolated(GeneralRequest const& req) {
148149
CommTask::CommTask(GeneralServer& server, ConnectionInfo info)
149150
: _server(server),
150151
_generalServerFeature(server.server().getFeature<GeneralServerFeature>()),
152+
_apiRecordingFeature(server.server().getFeature<ApiRecordingFeature>()),
151153
_connectionInfo(std::move(info)),
152154
_connectionStatistics(acquireConnectionStatistics()),
153155
_auth(AuthenticationFeature::instance()),
@@ -441,6 +443,12 @@ void CommTask::executeRequest(std::unique_ptr<GeneralRequest> request,
441443

442444
// create a handler, this takes ownership of request and response
443445
auto& server = _server.server();
446+
447+
// Record the request here in the ApplicationServer:
448+
_apiRecordingFeature.recordAPICall(
449+
request->requestType(), request->requestPath(), request->databaseName());
450+
451+
// And find a request handler:
444452
auto factory = _generalServerFeature.handlerFactory();
445453
auto handler =
446454
factory->createHandler(server, std::move(request), std::move(response));

arangod/GeneralServer/CommTask.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include <mutex>
4141

4242
namespace arangodb {
43+
class ApiRecordingFeature;
4344
class AuthenticationFeature;
4445
class ConnectionStatistics;
4546
class GeneralRequest;
@@ -168,6 +169,7 @@ class CommTask : public std::enable_shared_from_this<CommTask> {
168169
protected:
169170
GeneralServer& _server;
170171
GeneralServerFeature& _generalServerFeature;
172+
ApiRecordingFeature& _apiRecordingFeature;
171173
ConnectionInfo _connectionInfo;
172174

173175
ConnectionStatistics::Item _connectionStatistics;

arangod/GeneralServer/GeneralServer.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#pragma once
2727

2828
#include "Basics/Result.h"
29-
#include "Basics/Thread.h"
3029
#include "GeneralServer/IoContext.h"
3130
#include "GeneralServer/SslServerFeature.h"
3231

arangod/RestHandler/RestAdminServerHandler.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@
3232
#include "GeneralServer/AuthenticationFeature.h"
3333
#include "GeneralServer/GeneralServerFeature.h"
3434
#include "GeneralServer/SslServerFeature.h"
35+
#include "Inspection/VPack.h"
3536
#include "Logger/LogMacros.h"
3637
#include "Replication/ReplicationFeature.h"
38+
#include "RestServer/ApiRecordingFeature.h"
3739
#include "Scheduler/Scheduler.h"
3840
#include "Scheduler/SchedulerFeature.h"
3941
#include "StorageEngine/EngineSelectorFeature.h"
@@ -67,6 +69,8 @@ RestStatus RestAdminServerHandler::execute() {
6769
handleJWTSecretsReload();
6870
} else if (suffixes.size() == 1 && suffixes[0] == "encryption") {
6971
handleEncryptionKeyRotation();
72+
} else if (suffixes.size() == 1 && suffixes[0] == "api-calls") {
73+
handleApiCalls();
7074
} else {
7175
generateError(rest::ResponseCode::NOT_FOUND, TRI_ERROR_HTTP_NOT_FOUND);
7276
}
@@ -298,3 +302,29 @@ void RestAdminServerHandler::handleEncryptionKeyRotation() {
298302
generateError(rest::ResponseCode::NOT_FOUND, TRI_ERROR_HTTP_NOT_FOUND);
299303
}
300304
#endif
305+
306+
void RestAdminServerHandler::handleApiCalls() {
307+
if (_request->requestType() != rest::RequestType::GET) {
308+
generateError(rest::ResponseCode::METHOD_NOT_ALLOWED,
309+
TRI_ERROR_HTTP_METHOD_NOT_ALLOWED);
310+
return;
311+
}
312+
313+
auto& apiRecordingFeature = server().getFeature<ApiRecordingFeature>();
314+
315+
VPackBuilder builder;
316+
{
317+
VPackObjectBuilder guard(&builder);
318+
builder.add(VPackValue("calls"));
319+
{
320+
VPackArrayBuilder guard2(&builder);
321+
322+
// Use doForApiCallRecords to iterate through records
323+
apiRecordingFeature.doForApiCallRecords(
324+
[&builder](ApiCallRecord const& record) {
325+
arangodb::velocypack::serialize(builder, record);
326+
});
327+
}
328+
}
329+
generateOk(rest::ResponseCode::OK, builder.slice());
330+
}

arangod/RestHandler/RestAdminServerHandler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,6 @@ class RestAdminServerHandler : public RestBaseHandler {
5353

5454
void handleJWTSecretsReload();
5555
void handleEncryptionKeyRotation();
56+
void handleApiCalls();
5657
};
5758
} // namespace arangodb
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
////////////////////////////////////////////////////////////////////////////////
2+
/// DISCLAIMER
3+
///
4+
/// Copyright 2014-2025 ArangoDB GmbH, Cologne, Germany
5+
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
6+
///
7+
/// Licensed under the Business Source License 1.1 (the "License");
8+
/// you may not use this file except in compliance with the License.
9+
/// You may obtain a copy of the License at
10+
///
11+
/// https://github.com/arangodb/arangodb/blob/devel/LICENSE
12+
///
13+
/// Unless required by applicable law or agreed to in writing, software
14+
/// distributed under the License is distributed on an "AS IS" BASIS,
15+
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
/// See the License for the specific language governing permissions and
17+
/// limitations under the License.
18+
///
19+
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
20+
///
21+
/// @author Max Neunhoeffer
22+
////////////////////////////////////////////////////////////////////////////////
23+
24+
#include "ApiRecordingFeature.h"
25+
26+
#include "ApplicationFeatures/ApplicationServer.h"
27+
#include "ProgramOptions/ProgramOptions.h"
28+
#include "ProgramOptions/Parameters.h"
29+
#include "Logger/Logger.h"
30+
#include "Logger/LogMacros.h"
31+
#include "Metrics/MetricsFeature.h"
32+
33+
using namespace arangodb::options;
34+
35+
namespace arangodb {
36+
37+
size_t ApiCallRecord::memoryUsage() const noexcept {
38+
return sizeof(ApiCallRecord) + path.size() + database.size();
39+
}
40+
41+
ApiRecordingFeature::ApiRecordingFeature(Server& server)
42+
: ArangodFeature{server, *this},
43+
_recordApiCallTimes(server.getFeature<metrics::MetricsFeature>().add(
44+
arangodb_api_recording_call_time{})) {
45+
setOptional(false);
46+
startsAfter<application_features::GreetingsFeaturePhase>();
47+
}
48+
49+
ApiRecordingFeature::~ApiRecordingFeature() {
50+
// Ensure cleanup thread is stopped if not already
51+
_stopCleanupThread.store(true, std::memory_order_relaxed);
52+
if (_cleanupThread.joinable()) {
53+
_cleanupThread.join();
54+
}
55+
}
56+
57+
void ApiRecordingFeature::collectOptions(
58+
std::shared_ptr<ProgramOptions> options) {
59+
options->addOption(
60+
"--server.api-call-recording",
61+
"Record recent API calls for debugging purposes (default: true).",
62+
new BooleanParameter(&_enabled),
63+
arangodb::options::makeDefaultFlags(arangodb::options::Flags::Uncommon,
64+
arangodb::options::Flags::Command));
65+
66+
options->addOption(
67+
"--server.api-recording-memory-limit",
68+
"Memory limit for the list of ApiCallRecords.",
69+
new UInt64Parameter(&_totalMemoryLimit, 1, 256000, 256000000000),
70+
arangodb::options::makeDefaultFlags(arangodb::options::Flags::Uncommon,
71+
arangodb::options::Flags::Command));
72+
}
73+
74+
void ApiRecordingFeature::prepare() {
75+
// Calculate per-list memory limit
76+
_memoryPerApiRecordList = _totalMemoryLimit / NUMBER_OF_API_RECORD_LISTS;
77+
78+
if (_enabled) {
79+
_apiCallRecord = std::make_unique<BoundedList<ApiCallRecord>>(
80+
_memoryPerApiRecordList, NUMBER_OF_API_RECORD_LISTS);
81+
}
82+
}
83+
84+
void ApiRecordingFeature::start() {
85+
// Start the cleanup thread if enabled
86+
if (_enabled) {
87+
_stopCleanupThread.store(false, std::memory_order_relaxed);
88+
_cleanupThread = std::jthread([this] { cleanupLoop(); });
89+
#ifdef TRI_HAVE_SYS_PRCTL_H
90+
pthread_setname_np(_cleanupThread.native_handle(), "ApiRecordCleanup");
91+
#endif
92+
}
93+
}
94+
95+
void ApiRecordingFeature::stop() {
96+
// Stop and join the cleanup thread
97+
_stopCleanupThread.store(true, std::memory_order_relaxed);
98+
if (_cleanupThread.joinable()) {
99+
_cleanupThread.join();
100+
}
101+
}
102+
103+
void ApiRecordingFeature::recordAPICall(arangodb::rest::RequestType requestType,
104+
std::string_view path,
105+
std::string_view database) {
106+
if (!_enabled || !_apiCallRecord) {
107+
return;
108+
}
109+
110+
// Start timing
111+
auto start = std::chrono::steady_clock::now();
112+
113+
// Existing implementation
114+
_apiCallRecord->prepend(ApiCallRecord(requestType, path, database));
115+
116+
// End timing and record metrics
117+
auto end = std::chrono::steady_clock::now();
118+
int64_t elapsed =
119+
std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
120+
121+
// Record in histogram (seconds)
122+
_recordApiCallTimes.count(static_cast<double>(elapsed));
123+
}
124+
125+
void ApiRecordingFeature::cleanupLoop() {
126+
// Initialize delay values
127+
constexpr std::chrono::milliseconds MIN_DELAY{1};
128+
constexpr std::chrono::milliseconds MAX_DELAY{256};
129+
auto currentDelay = MIN_DELAY;
130+
131+
while (!_stopCleanupThread.load(std::memory_order_relaxed)) {
132+
// Get the trash and measure the time
133+
auto start = std::chrono::steady_clock::now();
134+
size_t count = _apiCallRecord->clearTrash();
135+
136+
auto duration = std::chrono::steady_clock::now() - start;
137+
auto nanoseconds =
138+
std::chrono::duration_cast<std::chrono::nanoseconds>(duration);
139+
140+
if (count > 0) {
141+
LOG_TOPIC("53626", TRACE, Logger::MEMORY)
142+
<< "Cleaned up " << count << " API call record lists in "
143+
<< nanoseconds.count() << " nanoseconds";
144+
// Reset delay to minimum when trash was found
145+
currentDelay = MIN_DELAY;
146+
} else {
147+
// Double the delay if no trash was found, up to MAX_DELAY
148+
currentDelay = std::min(currentDelay * 2, MAX_DELAY);
149+
}
150+
151+
// Sleep using the calculated delay
152+
std::this_thread::sleep_for(currentDelay);
153+
}
154+
}
155+
156+
} // namespace arangodb

0 commit comments

Comments
 (0)
0