44
44
#include " Logger/LogMacros.h"
45
45
#include " Logger/Logger.h"
46
46
#include " Logger/LoggerStream.h"
47
- #include " Metrics/CounterBuilder.h"
48
- #include " Metrics/HistogramBuilder.h"
49
47
#include " Metrics/MetricsFeature.h"
50
48
#include " ProgramOptions/ProgramOptions.h"
51
49
#include " ProgramOptions/Section.h"
54
52
#include " Replication2/Version.h"
55
53
#include " RestServer/DatabaseFeature.h"
56
54
#include " RestServer/DatabasePathFeature.h"
55
+ #include " RestServer/IOHeartbeatThread.h"
57
56
#include " RestServer/QueryRegistryFeature.h"
58
57
#include " StorageEngine/EngineSelectorFeature.h"
59
58
#include " StorageEngine/StorageEngine.h"
@@ -271,153 +270,6 @@ void DatabaseManagerThread::run() {
271
270
}
272
271
}
273
272
274
- struct HeartbeatTimescale {
275
- static arangodb::metrics::LogScale<double > scale () {
276
- return {10.0 , 0.0 , 1000000.0 , 8 };
277
- }
278
- };
279
-
280
- DECLARE_HISTOGRAM (arangodb_ioheartbeat_duration, HeartbeatTimescale,
281
- " Time to execute the io heartbeat once [us]" );
282
- DECLARE_COUNTER (arangodb_ioheartbeat_failures_total,
283
- " Total number of failures in IO heartbeat" );
284
- DECLARE_COUNTER (arangodb_ioheartbeat_delays_total,
285
- " Total number of delays in IO heartbeat" );
286
-
287
- // / IO check thread main loop
288
- // / The purpose of this thread is to try to perform a simple IO write
289
- // / operation on the database volume regularly. We need visibility in
290
- // / production if IO is slow or not possible at all.
291
- IOHeartbeatThread::IOHeartbeatThread (Server& server,
292
- metrics::MetricsFeature& metricsFeature)
293
- : ServerThread<ArangodServer>(server, " IOHeartbeat" ),
294
- _exeTimeHistogram(metricsFeature.add(arangodb_ioheartbeat_duration{})),
295
- _failures(metricsFeature.add(arangodb_ioheartbeat_failures_total{})),
296
- _delays(metricsFeature.add(arangodb_ioheartbeat_delays_total{})) {}
297
-
298
- IOHeartbeatThread::~IOHeartbeatThread () { shutdown (); }
299
-
300
- void IOHeartbeatThread::run () {
301
- auto & databasePathFeature = server ().getFeature <DatabasePathFeature>();
302
- std::string testFilePath = FileUtils::buildFilename (
303
- databasePathFeature.directory (), " TestFileIOHeartbeat" );
304
- std::string testFileContent = " This is just an I/O test.\n " ;
305
-
306
- LOG_TOPIC (" 66665" , DEBUG, Logger::ENGINES) << " IOHeartbeatThread: running..." ;
307
-
308
- while (true ) {
309
- try { // protect thread against any exceptions
310
- if (isStopping ()) {
311
- // done
312
- break ;
313
- }
314
-
315
- LOG_TOPIC (" 66659" , DEBUG, Logger::ENGINES)
316
- << " IOHeartbeat: testing to write/read/remove " << testFilePath;
317
- // We simply write a file and sync it to disk in the database
318
- // directory and then read it and then delete it again:
319
- auto start1 = std::chrono::steady_clock::now ();
320
- bool trouble = false ;
321
- try {
322
- FileUtils::spit (testFilePath, testFileContent, true );
323
- } catch (std::exception const & exc) {
324
- ++_failures;
325
- LOG_TOPIC (" 66663" , INFO, Logger::ENGINES)
326
- << " IOHeartbeat: exception when writing test file: " << exc.what ();
327
- trouble = true ;
328
- }
329
- auto finish = std::chrono::steady_clock::now ();
330
- std::chrono::duration<double > dur = finish - start1;
331
- bool delayed = dur > std::chrono::seconds (1 );
332
- if (trouble || delayed) {
333
- if (delayed) {
334
- ++_delays;
335
- }
336
- LOG_TOPIC (" 66662" , INFO, Logger::ENGINES)
337
- << " IOHeartbeat: trying to write test file took "
338
- << std::chrono::duration_cast<std::chrono::microseconds>(dur)
339
- .count ()
340
- << " microseconds." ;
341
- }
342
-
343
- // Read the file if we can reasonably assume it is there:
344
- if (!trouble) {
345
- auto start = std::chrono::steady_clock::now ();
346
- try {
347
- std::string content = FileUtils::slurp (testFilePath);
348
- if (content != testFileContent) {
349
- LOG_TOPIC (" 66660" , INFO, Logger::ENGINES)
350
- << " IOHeartbeat: read content of test file was not as "
351
- " expected, found:'"
352
- << content << " ', expected: '" << testFileContent << " '" ;
353
- trouble = true ;
354
- ++_failures;
355
- }
356
- } catch (std::exception const & exc) {
357
- ++_failures;
358
- LOG_TOPIC (" 66661" , INFO, Logger::ENGINES)
359
- << " IOHeartbeat: exception when reading test file: "
360
- << exc.what ();
361
- trouble = true ;
362
- }
363
- auto finish = std::chrono::steady_clock::now ();
364
- std::chrono::duration<double > dur = finish - start;
365
- bool delayed = dur > std::chrono::seconds (1 );
366
- if (trouble || delayed) {
367
- if (delayed) {
368
- ++_delays;
369
- }
370
- LOG_TOPIC (" 66669" , INFO, Logger::ENGINES)
371
- << " IOHeartbeat: trying to read test file took "
372
- << std::chrono::duration_cast<std::chrono::microseconds>(dur)
373
- .count ()
374
- << " microseconds." ;
375
- }
376
-
377
- // And remove it again:
378
- start = std::chrono::steady_clock::now ();
379
- ErrorCode err = FileUtils::remove (testFilePath);
380
- if (err != TRI_ERROR_NO_ERROR) {
381
- ++_failures;
382
- LOG_TOPIC (" 66670" , INFO, Logger::ENGINES)
383
- << " IOHeartbeat: error when removing test file: " << err;
384
- trouble = true ;
385
- }
386
- finish = std::chrono::steady_clock::now ();
387
- dur = finish - start;
388
- delayed = dur > std::chrono::seconds (1 );
389
- if (trouble || delayed) {
390
- if (delayed) {
391
- ++_delays;
392
- }
393
- LOG_TOPIC (" 66671" , INFO, Logger::ENGINES)
394
- << " IOHeartbeat: trying to remove test file took "
395
- << std::chrono::duration_cast<std::chrono::microseconds>(dur)
396
- .count ()
397
- << " microseconds." ;
398
- }
399
- }
400
-
401
- // Total duration and update histogram:
402
- dur = finish - start1;
403
- _exeTimeHistogram.count (static_cast <double >(
404
- std::chrono::duration_cast<std::chrono::microseconds>(dur).count ()));
405
-
406
- std::unique_lock<std::mutex> guard (_mutex);
407
- if (trouble) {
408
- // In case of trouble, we retry more quickly, since we want to
409
- // have a record when the trouble has actually stopped!
410
- _cv.wait_for (guard, checkIntervalTrouble);
411
- } else {
412
- _cv.wait_for (guard, checkIntervalNormal);
413
- }
414
- } catch (...) {
415
- }
416
- // next iteration
417
- }
418
- LOG_TOPIC (" 66664" , DEBUG, Logger::ENGINES) << " IOHeartbeatThread: stopped." ;
419
- }
420
-
421
273
DatabaseFeature::DatabaseFeature (Server& server)
422
274
: ArangodFeature{server, *this } {
423
275
setOptional (false );
@@ -430,6 +282,8 @@ DatabaseFeature::DatabaseFeature(Server& server)
430
282
startsAfter<StorageEngineFeature>();
431
283
}
432
284
285
+ DatabaseFeature::~DatabaseFeature () = default ;
286
+
433
287
void DatabaseFeature::collectOptions (std::shared_ptr<ProgramOptions> options) {
434
288
options->addSection (" database" , " database options" );
435
289
0 commit comments