8000 BTS-519: avoid holding 2 mutexes at the same time (#14511) · arangodb/arangodb@6aebaaf · GitHub
[go: up one dir, main page]

Skip to content

Commit 6aebaaf

Browse files
authored
BTS-519: avoid holding 2 mutexes at the same time (#14511)
1 parent 7d34504 commit 6aebaaf

File tree

2 files changed

+40
-14
lines changed

2 files changed

+40
-14
lines changed

arangod/Pregel/Conductor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ class Conductor : public std::enable_shared_from_this<Conductor> {
151151
}
152152

153153
bool canBeGarbageCollected() const;
154+
155+
uint64_t executionNumber() const { return _executionNumber; }
154156

155157
private:
156158
void cancelNoLock();

arangod/Pregel/PregelFeature.cpp

Original file line numberDiff line numberDiff line change
@@ -335,19 +335,34 @@ std::shared_ptr<Conductor> PregelFeature::conductor(uint64_t executionNumber) {
335335

336336
void PregelFeature::garbageCollectConductors() try {
337337
// iterate over all conductors and remove the ones which can be garbage-collected
338-
MUTEX_LOCKER(guard, _mutex);
339-
for (auto it = _conductors.begin(); it != _conductors.end(); /*no hoisting*/) {
340-
if (it->second.conductor->canBeGarbageCollected()) {
341-
uint64_t executionNumber = it->first;
342-
343-
it->second.conductor->cancel();
344-
it = _conductors.erase(it);
345-
346-
_workers.erase(executionNumber);
347-
} else {
348-
++it;
338+
std::vector<std::shared_ptr<Conductor>> conductors;
339+
340+
// copy out shared-ptrs of Conductors under the mutex
341+
{
342+
MUTEX_LOCKER(guard, _mutex);
343+
for (auto const& it : _conductors) {
344+
if (it.second.conductor->canBeGarbageCollected()) {
345+
if (conductors.empty()) {
346+
conductors.reserve(8);
347+
}
348+
conductors.emplace_back(it.second.conductor);
349+
}
349350
}
350351
}
352+
353+
// cancel and kill conductors without holding the mutex
354+
// permanently
355+
for (auto& c : conductors) {
356+
c->cancel();
357+
}
358+
359+
MUTEX_LOCKER(guard, _mutex);
360+
for (auto& c : conductors) {
361+
uint64_t executionNumber = c->executionNumber();
362+
363+
_conductors.erase(executionNumber);
364+
_workers.erase(executionNumber);
365+
}
351366
} catch (...) {}
352367

353368
void PregelFeature::addWorker(std::shared_ptr<IWorker>&& w, uint64_t executionNumber) {
@@ -521,21 +536,30 @@ uint64_t PregelFeature::numberOfActiveConductors() const {
521536
Result PregelFeature::toVelocyPack(TRI_vocbase_t& vocbase,
522537
arangodb::velocypack::Builder& result,
523538
bool allDatabases, bool fanout) const {
524-
Result res;
539+
std::vector<std::shared_ptr<Conductor>> conductors;
525540

526-
result.openArray();
541+
// make a copy of all conductor shared-ptrs under the mutex
527542
{
528543
MUTEX_LOCKER(guard, _mutex);
544+
conductors.reserve(_conductors.size());
529545

530546
for (auto const& p : _conductors) {
531547
auto const& ce = p.second;
532548
if (!::authorized(ce.user)) {
533549
continue;
534550
}
535551

536-
ce.conductor->toVelocyPack(result);
552+
conductors.emplace_back(ce.conductor);
537553
}
538554
}
555+
556+
// release lock, and now velocypackify all conductors
557+
result.openArray();
558+
for (auto const& c : conductors) {
559+
c->toVelocyPack(result);
560+
}
561+
562+
Result res;
539563

540564
if (ServerState::instance()->isCoordinator() && fanout) {
541565
// coordinator case, fan out to other coordinators!

0 commit comments

Comments
 (0)
0