8000 Bug fix/print on violated block ordering by mchacki · Pull Request #21743 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Bug fix/print on violated block ordering #21743

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Sleeping barber in SharedQueryState (#21732)
* sleeping barber in SharedQueryState

* double entry in change log

* Add oskar branch parameter to CircleCI

* disable vector index on ARM for now, faiss is known to be crashy now. (#21718)

* disable vector index on ARM for now, faiss is known to be crashy now.

* lint

* lint

* disable on ARM

* disable on ARM

---------

Co-authored-by: Vadim Kondratev <vadim@arangodb.com>
Co-authored-by: Wilfried Goesgens <willi@arangodb.com>
  • Loading branch information
3 people authored and mchacki committed Apr 30, 2025
commit 50c07157c557eaf33d9c534c1ea36299f7ffa1e1
6 changes: 5 additions & 1 deletion .circleci/base_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ parameters:
enterprise-branch:
type: string
default: ""
oskar-branch:
type: string
default: "master"
dont-cancel-pipelines:
type: boolean
default: false
Expand Down Expand Up @@ -363,7 +366,8 @@ jobs:
curl -s -L -o build/bin/arangodb "https://github.com/arangodb-helper/arangodb/releases/download/$STARTER_REV/arangodb-linux-$arch"
chmod a+x build/bin/arangodb
if [ << parameters.enterprise >> = true ]; then
curl -s -L -o build/bin/rclone-arangodb "https://github.com/arangodb/oskar/raw/master/rclone/v$RCLONE_VERSION/rclone-arangodb-linux-$arch"
OSKAR_BRANCH=<< pipeline.parameters.oskar-branch >>
curl -s -L -o build/bin/rclone-arangodb "https://github.com/arangodb/oskar/raw/$OSKAR_BRANCH/rclone/v$RCLONE_VERSION/rclone-arangodb-linux-$arch"
chmod a+x build/bin/rclone-arangodb
fi
- when:
Expand Down
4 changes: 4 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ parameters:
# contains a branch with the same name as the arangodb repo. If this is the case
# we use it, otherwise we fall back to "devel".
default: ""
oskar-branch:
type: string
default: "master"
arangosh_args:
type: string
default: ""
Expand Down Expand Up @@ -137,6 +140,7 @@ jobs:
name: Print pipeline parameters
command: |
echo "enterprise-branch: << pipeline.parameters.enterprise-branch >>"
echo "oskar-branch: << pipeline.parameters.oskar-branch >>"
echo "create-docker-images: << pipeline.parameters.create-docker-images >>"
echo "rebuild-test-docker-images: << pipeline.parameters.rebuild-test-docker-images >>"
echo "build-docker-image: << pipeline.parameters.build-docker-image >>"
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
3.12.4.3 (XXXX-XX-XX)
---------------------

* Fix concurrency bug which can lead to lost threads. See BTS-2087.

3.12.4.2 (2025-04-09)
---------------------

Expand Down
13 changes: 11 additions & 2 deletions arangod/Aql/SharedQueryState.cpp
8000
Original file line numberDiff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "SharedQueryState.h"

#include "ApplicationFeatures/ApplicationServer.h"
#include "Logger/LogMacros.h"
#include "Basics/Exceptions.h"
#include "Cluster/ServerState.h"
#include "RestServer/QueryRegistryFeature.h"
Expand Down Expand Up @@ -60,8 +61,16 @@ void SharedQueryState::invalidate() {
_cv.notify_all(); // wakeup everyone else

if (_numTasks.load() > 0) {
std::unique_lock<std::mutex> guard(_mutex);
_cv.wait(guard, [&] { return _numTasks.load() == 0; });
while (true) {
std::unique_lock<std::mutex> guard(_mutex);
_cv.wait_for(guard, std::chrono::milliseconds(1000),
[&] { return _numTasks.load() == 0; });
if (_numTasks.load() == 0) {
break;
}
LOG_TOPIC("abcee", DEBUG, Logger::QUERIES)
<< "Waiting for " << _numTasks.load() << " tasks to finish";
}
}
}

Expand Down
38 changes: 38 additions & 0 deletions arangod/Aql/SharedQueryState.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,36 @@ class SharedQueryState final
/// execute a task in parallel if capacity is there
template<typename F>
bool asyncExecuteAndWakeup(F&& cb) {
// The atomic _numTasks counts the number of ongoing tasks asynchronous
// tasks. We need this for two purposes: One is to limit parallelism
// so we need to know how many tasks we have already launched. But
// Secondly, we want to wait for them to finish when the query is
// shut down, in particular when it is killed or has run into an
// exception. Note that this is *not* necessary for synchronous
// tasks.
// When _numTasks drops to 0, we need to wake up a thread which
// is waiting for this on the condition variable _cv. We must
// not miss this event, or else we might have a thread which is
// waiting forever. The waiting thread uses a predicate to check
// if _numTasks is 0, and only goes to sleep when it is not. This
// happens under the mutex _mutex and releasing the mutex and going
// to sleep is an atomic operation. Thus, to not miss the event that
// _numTasks is reduced to zero, we must, whenever we decrement it,
// do this under the mutex, and then, after releasing the mutex,
// notify the condition variable _cv! Then either the decrement or
// the going to sleep happens first (serialized by the mutex). If
// the decrement happens first, the waiting thread is not even going
// to sleep, if the going to sleep happens first, then we will wake
// it up.
unsigned num = _numTasks.fetch_add(1);
if (num + 1 > _maxTasks) {
// We first count down _numTasks to revert the counting up, since
// we have not - after all - started a new async task. Then we run
// the callback synchronously.
std::unique_lock<std::mutex> guard(_mutex);
_numTasks.fetch_sub(1); // revert
guard.unlock();
_cv.notify_all();
std::forward<F>(cb)(false);
return false;
}
Expand All @@ -130,7 +157,13 @@ class SharedQueryState final
});

if (!queued) {
// We first count down _numTasks to revert the counting up, since
// we have not - after all - started a new async task. Then we run
// the callback synchronously.
std::unique_lock<std::mutex> guard(_mutex);
_numTasks.fetch_sub(1); // revert
guard.unlock();
_cv.notify_all();
std::forward<F>(cb)(false);
}
return queued;
Expand Down Expand Up @@ -166,6 +199,11 @@ class SharedQueryState final
unsigned _callbackVersion;

unsigned _maxTasks;
// Note that we are waiting for _numTasks to drop down to zero using
// the condition Variable _cv above, which is protected by the mutex
// _mutex above. Therefore, to avoid losing wakeups, it is necessary
// to only ever reduce the value of _numTasks under the mutex and then
// wake up the condition variable _cv!
std::atomic<unsigned> _numTasks;
std::atomic<bool> _valid;
};
Expand Down
10 changes: 10 additions & 0 deletions js/client/modules/@arangodb/testutils/client-tools.js
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,16 @@ function rtaMakedata(options, instanceManager, writeReadClean, msg, logFile, mor
if (addArgs !== undefined) {
args = Object.assign(args, addArgs);
}
// TODO: vector index broken on circleci-ARM
if (versionHas("arm")) {
let skipOffset = moreargv.findIndex(i => {return i === '--skip';});
if (skipOffset >= 0) {
moreargv[skipOffset + 1] += ',107';
} else {
moreargv = ['--skip', '107_'];
}
}

let argv = toArgv(args);
argv = argv.concat(['--', options.makedataDB],
moreargv, [
Expand Down
9 changes: 6 additions & 3 deletions tests/js/client/aql/vector/aql-vector-create-and-remove.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ const {
randomNumberGeneratorFloat,
} = require("@arangodb/testutils/seededRandom");

const { versionHas } = require("@arangodb/test-helper");

const dbName = "vectorDB";
const collName = "coll";
const indexName = "vectorIndex";
Expand Down Expand Up @@ -286,7 +288,8 @@ function VectorIndexTestCreationWithVectors() {
}


jsunity.run(VectorIndexCreateAndRemoveTestSuite);
jsunity.run(VectorIndexTestCreationWithVectors);

if (!versionHas("arm")) {
jsunity.run(VectorIndexCreateAndRemoveTestSuite);
jsunity.run(VectorIndexTestCreationWithVectors);
}
return jsunity.done();
12 changes: 7 additions & 5 deletions tests/js/client/aql/vector/aql-vector-full-count.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const {
randomNumberGeneratorFloat,
} = require("@arangodb/testutils/seededRandom");

const { versionHas } = require("@arangodb/test-helper");
const isCluster = require("internal").isCluster();
const dbName = "vectorDB";
const collName = "vectorColl";
Expand Down Expand Up @@ -416,8 +417,9 @@ function VectorIndexFullCountCollectionWithSmallAmountOfDocs() {
};
}

jsunity.run(VectorIndexFullCountTestSuite);
jsunity.run(VectorIndexFullCountWithNotEnoughNListsTestSuite);
jsunity.run(VectorIndexFullCountCollectionWithSmallAmountOfDocs);

return jsunity.done();
if (!versionHas("arm")) {
jsunity.run(VectorIndexFullCountTestSuite);
jsunity.run(VectorIndexFullCountWithNotEnoughNListsTestSuite);
jsunity.run(VectorIndexFullCountCollectionWithSmallAmountOfDocs);
}
return jsunity.done();
6 changes: 4 additions & 2 deletions tests/js/client/aql/vector/aql-vector-nprobe.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const {
randomNumberGeneratorFloat,
} = require("@arangodb/testutils/seededRandom");

const { versionHas } = require("@arangodb/test-helper");
const isCluster = require("internal").isCluster();
const dbName = "vectorDB";
const collName = "vectorColl";
Expand Down Expand Up @@ -162,6 +163,7 @@ function VectorIndexL2NprobeTestSuite() {
};
}

jsunity.run(VectorIndexL2NprobeTestSuite);

if (!versionHas("arm")) {
jsunity.run(VectorIndexL2NprobeTestSuite);
}
return jsunity.done();
11 changes: 6 additions & 5 deletions tests/js/client/aql/vector/aql-vector.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const db = internal.db;
const {
randomNumberGeneratorFloat,
} = require("@arangodb/testutils/seededRandom");

const { versionHas } = require("@arangodb/test-helper");
const isCluster = require("internal").isCluster();
const dbName = "vectorDb";
const collName = "vectorColl";
Expand Down Expand Up @@ -872,9 +872,10 @@ function MultipleVectorIndexesOnField() {
};
}

jsunity.run(VectorIndexL2TestSuite);
jsunity.run(VectorIndexCosineTestSuite);
jsunity.run(MultipleVectorIndexesOnField);

if (!versionHas("arm")) {
jsunity.run(VectorIndexL2TestSuite);
jsunity.run(VectorIndexCosineTestSuite);
jsunity.run(MultipleVectorIndexesOnField);
}
return jsunity.done();

0