8000 Added (gauge) metric "rocksdb_read_only" (#14469) · arangodb/arangodb@e88ab99 · GitHub
[go: up one dir, main page]

Skip to content

Commit e88ab99

Browse files
jsteemannKVS85
andauthored
Added (gauge) metric "rocksdb_read_only" (#14469)
* APM-107: Added metric "rocksdb_read_only" to determine whether RocksDB is currently in read-only mode due to a background error. The metric will have a value of "1" if RocksDB is in read-only mode and "0" if RocksDB is in normal operations mode. If the metric value is "1" it means all writes into RocksDB will fail, so inspecting the logfiles and acting on the actual error situation is required. Co-authored-by: Vadim <vadim@arangodb.com>
1 parent e95da8d commit e88ab99

File tree

5 files changed

+37
-6
lines changed

5 files changed

+37
-6
lines changed

CHANGELOG

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
v3.7.13 (XXXX-XX-XX)
22
--------------------
33

4+
* APM-107: Added metric "rocksdb_read_only" to determine whether RocksDB is
5+
currently in read-only mode due to a background error. The metric will have a
6+
value of "1" if RocksDB is in read-only mode and "0" if RocksDB is in normal
7+
operations mode. If the metric value is "1" it means all writes into RocksDB
8+
will fail, so inspecting the logfiles and acting on the actual error situation
9+
is required.
10+
411
* Add following term ids, which prevents old synchronous replication requests to
512
be accepted after a follower was dropped and has gotten in sync again.
613
This makes the chaos tests which delay synchronous replication requests more

arangod/RocksDBEngine/RocksDBBackgroundErrorListener.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828

2929
namespace arangodb {
3030

31+
RocksDBBackgroundErrorListener::RocksDBBackgroundErrorListener()
32+
: _called(false) {}
33+
3134
RocksDBBackgroundErrorListener::~RocksDBBackgroundErrorListener() = default;
3235

3336
void RocksDBBackgroundErrorListener::OnBackgroundError(rocksdb::BackgroundErrorReason reason,
@@ -37,10 +40,8 @@ void RocksDBBackgroundErrorListener::OnBackgroundError(rocksdb::BackgroundErrorR
3740
return;
3841
}
3942

40-
if (!_called) {
41-
_called = true;
42-
43-
std::string operation = "unknown";
43+
if (!_called.exchange(true)) {
44+
char const* operation = "unknown";
4445
switch (reason) {
4546
case rocksdb::BackgroundErrorReason::kFlush: {
4647
operation = "flush";
@@ -67,4 +68,11 @@ void RocksDBBackgroundErrorListener::OnBackgroundError(rocksdb::BackgroundErrorR
6768
}
6869
}
6970

71+
void RocksDBBackgroundErrorListener::OnErrorRecoveryCompleted(rocksdb::Status /* old_bg_error */) {
72+
_called.store(false, std::memory_order_relaxed);
73+
74+
LOG_TOPIC("8ff56", WARN, Logger::ROCKSDB)
75+
<< "RocksDB resuming operations after background error";
76+
}
77+
7078
} // namespace arangodb

arangod/RocksDBEngine/RocksDBBackgroundErrorListener.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,24 @@
2727
#include <rocksdb/db.h>
2828
#include <rocksdb/listener.h>
2929

30+
#include <atomic>
31+
3032
namespace arangodb {
3133

3234
class RocksDBBackgroundErrorListener : public rocksdb::EventListener {
3335
public:
36+
RocksDBBackgroundErrorListener();
3437
virtual ~RocksDBBackgroundErrorListener();
3538

3639
void OnBackgroundError(rocksdb::BackgroundErrorReason reason, rocksdb::Status* error) override;
40+
void OnErrorRecoveryCompleted(rocksdb::Status /* old_bg_error */) override;
41+
42+
bool called() const noexcept {
43+
return _called.load(std::memory_order_relaxed);
44+
}
3745

3846
private:
39-
bool _called = false;
47+
std::atomic<bool> _called;
4048
}; // class RocksDBThrottle
4149

4250
} // namespace arangodb

arangod/RocksDBEngine/RocksDBEngine.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,8 @@ void RocksDBEngine::start() {
642642
_options.listeners.push_back(_shaListener);
643643
} // if
644644

645-
_options.listeners.push_back(std::make_shared<RocksDBBackgroundErrorListener>());
645+
_errorListener = std::make_shared<RocksDBBackgroundErrorListener>();
646+
_options.listeners.push_back(_errorListener);
646647

647648
if (opts._totalWriteBufferSize > 0) {
648649
_options.db_write_buffer_size = opts._totalWriteBufferSize;
@@ -2468,6 +2469,10 @@ void RocksDBEngine::getStatistics(VPackBuilder& builder) const {
24682469
builder.add("rocksdbengine.throttle.bps", VPackValue(_listener->GetThrottle()));
24692470
} // if
24702471

2472+
if (_errorListener) {
2473+
builder.add("rocksdb.read-only", VPackValue(_errorListener->called() ? 1 : 0));
2474+
}
2475+
24712476
builder.close();
24722477
}
24732478

arangod/RocksDBEngine/RocksDBEngine.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ namespace arangodb {
5252

5353
class PhysicalCollection;
5454
class PhysicalView;
55+
class RocksDBBackgroundErrorListener;
5556
class RocksDBBackgroundThread;
5657
class RocksDBEventListener;
5758
class RocksDBKey;
@@ -497,6 +498,8 @@ class RocksDBEngine final : public StorageEngine {
497498
// optional code to notice when rocksdb creates or deletes .ssh files. Currently
498499
// uses that input to create or delete parallel sha256 files
499500
std::shared_ptr<RocksDBEventListener> _shaListener;
501+
502+
std::shared_ptr<RocksDBBackgroundErrorListener> _errorListener;
500503

501504
arangodb::basics::ReadWriteLock _purgeLock;
502505

0 commit comments

Comments
 (0)
0