8000 Bug fix/improve visualizer performance by mchacki · Pull Request #21819 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Bug fix/improve visualizer performance #21819

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jul 8, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5026212
Added a test stuie for the graph visualizer backend
mchacki Jun 23, 2025
0e7a6ce
Refactored graphs-v2
mchacki Jun 23, 2025
28a533f
Added a performance test, and started refactoring the backend code fo…
mchacki Jun 23, 2025
b0b924d
Further optimization by using 'KEEP' on required attributes only
mchacki Jun 23, 2025
f331f1a
Applied more complex optimization to make use of projections
mchacki Jun 24, 2025
d06b6d7
Allowed for edge Projections
mchacki Jun 24, 2025
c77b6c8
Removed debug output
mchacki Jun 24, 2025
6d79542
Added CHANGELOG entry
mchacki Jun 24, 2025
dd78cd4
Fixed behaviour for multi-labels
mchacki Jun 24, 2025
44cc13d
disable test, its not working for coverage on all of its parts (#21805)
dothebart Jun 23, 2025
0ab78de
Add inner product metric to vector index (#21814)
mchacki Jun 25, 2025
3279c21
Stop using a streaming transaction after an operation timeouts (#21796)
goedderz Jun 24, 2025
3df4b42
Update README.md for v3.12.5+ (#21804)
Simran-B Jun 24, 2025
0f46ad5
Docs: Reword arangimport startup option description for collection na…
Simran-B Jun 24, 2025
29d1f6b
properly handle connection and make sure we don't forget it (#21822)
dothebart Jun 24, 2025
a67be6c
Fix load memory order (#21818)
jvolmer Jun 25, 2025
5021b04
FE-611 | Add Vector Index Feature (#21793)
bluepal-nadeem-abdun Jun 25, 2025
ed243d0
Added CHANGELOG entry
mchacki Jun 24, 2025
70777db
Merge remote-tracking branch 'origin' into bug-fix/improve-visualizer…
mchacki Jun 25, 2025
6b2e822
Added tests and bugfix for attribute not found behaviour
mchacki Jul 2, 2025
b2213df
Merge remote-tracking branch 'origin' into bug-fix/improve-visualizer…
mchacki Jul 8, 2025
072e309
Merge remote-tracking branch 'origin' into bug-fix/improve-visualizer…
mchacki Jul 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add inner product metric to vector index (#21814)
  • Loading branch information
mchacki committed Jun 25, 2025
commit 0ab78de8a1d95a3b4e4899c16e5e7528d0f627f2
25 changes: 25 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,31 @@ devel
a better performing query. This should help on larger graphs
having more, not displayed data.

* Add inner product metric for vector index and APPROX_NEAR_INNER_PRODUCT()
AQL function. The inner product metric is similar to cosine metric except
vectors are not normalized.

Example of creating a vector index with inner product:
```
db.<collection>.ensureIndex({
type: "vector",
fields: ["value"],
params: {
"metric": "innerProduct",
"dimension": 300,
"nLists": 100
}
});
```

Example of using the APPROX_NEAR_INNER_PRODUCT() function:
```
FOR doc IN collection
SORT APPROX_NEAR_INNER_PRODUCT(doc.vector, @query_vector) DESC
LIMIT 10
RETURN doc
```

* Fix BTS-2151: Fix cosine similarity score in vector index returns score
bigger then one due to not being normalized. It is recommended to drop
all vector indexes that use cosine metric and create them again.
Expand Down
6 changes: 6 additions & 0 deletions arangod/Aql/AqlFunctionFeature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,12 @@ void AqlFunctionFeature::addMiscFunctions() {
FF::CanRunOnDBServerCluster,
FF::CanRunOnDBServerOneShard),
&functions::ApproxNearL2});

add({"APPROX_NEAR_INNER_PRODUCT", ".,.|.",
Function::makeFlags(FF::Deterministic, FF::Cacheable,
FF::CanRunOnDBServerCluster,
FF::CanRunOnDBServerOneShard),
&functions::ApproxNearL2});
}
#ifdef USE_ENTERPRISE
add({"SELECT_SMART_DISTRIBUTE_GRAPH_INPUT", ".,.",
Expand Down
11 changes: 11 additions & 0 deletions arangod/Aql/Function/VectorFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,15 @@ AqlValue ApproxNearL2(ExpressionContext* expressionContext, AstNode const& node,
"with vector search.");
}

AqlValue ApproxNearInnerProduct(ExpressionContext* expressionContext,
AstNode const& node,
VPackFunctionParametersView parameters) {
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_QUERY_VECTOR_SEARCH_NOT_APPLIED,
"Vector search could not be applied. Please ensure a vector index has "
"been created and that your query uses the correct syntax for vector "
"search. Note that filtering is currently not supported "
"with vector search.");
}

} // namespace arangodb::aql::functions
3 changes: 3 additions & 0 deletions arangod/Aql/Functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,9 @@ AqlValue ApproxNearCosine(arangodb::aql::ExpressionContext*, AstNode const&,
AqlValue ApproxNearL2(arangodb::aql::ExpressionContext*, AstNode const&,
VPackFunctionParametersView);

AqlValue ApproxNearInnerProduct(arangodb::aql::ExpressionContext*,
AstNode const&, VPackFunctionParametersView);

AqlValue DecayGauss(arangodb::aql::ExpressionContext*, AstNode const&,
VPackFunctionParametersView);

Expand Down
14 changes: 14 additions & 0 deletions arangod/Aql/Optimizer/Rule/OptimizerRuleVectorIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ bool checkFunctionNameMatchesIndexMetric(
case SimilarityMetric::kCosine: {
return functionName == "APPROX_NEAR_COSINE";
}
case SimilarityMetric::kInnerProduct: {
return functionName == "APPROX_NEAR_INNER_PRODUCT";
}
}
}

Expand Down Expand Up @@ -95,6 +98,8 @@ bool checkApproxNearVariableInput(auto const& vectorIndex,
return outVariable == attributeAccessResult.first;
}

// We return nullptr for AstNode if the check has failed, in that case the bool
// is meaningless
std::pair<AstNode const*, bool> getApproxNearExpression(
auto const* sortNode, std::unique_ptr<ExecutionPlan>& plan,
std::shared_ptr<Index> const& vectorIndex) {
Expand All @@ -107,6 +112,10 @@ std::pair<AstNode const*, bool> getApproxNearExpression(
auto const& sortField = sortFields[0];
bool ascending = sortField.ascending;

// Check if the SORT node has a correct order:
// L2: ASC
// Cosine: DESC
// InnerProduct: DESC
switch (vectorIndex->getVectorIndexDefinition().metric) {
// L2 metric can only be in ascending order
case SimilarityMetric::kL2:
Expand All @@ -120,6 +129,11 @@ std::pair<AstNode const*, bool> getApproxNearExpression(
return {nullptr, ascending};
}
break;
case SimilarityMetric::kInnerProduct:
if (sortField.ascending) {
return {nullptr, ascending};
}
break;
}

// check if SORT node contains APPROX function
Expand Down
6 changes: 4 additions & 2 deletions arangod/Indexes/VectorIndexDefinition.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,14 @@ struct SearchParameters {
enum class SimilarityMetric : std::uint8_t {
kL2,
kCosine,
kInnerProduct,
};

template<class Inspector>
inline auto inspect(Inspector& f, SimilarityMetric& x) {
return f.enumeration(x).values(SimilarityMetric::kL2, "l2",
SimilarityMetric::kCosine, "cosine");
return f.enumeration(x).values(
SimilarityMetric::kL2, "l2", SimilarityMetric::kCosine, "cosine",
SimilarityMetric::kInnerProduct, "innerProduct");
}

struct TrainedData {
Expand Down
4 changes: 4 additions & 0 deletions arangod/RocksDBEngine/RocksDBVectorIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ faiss::MetricType metricToFaissMetric(SimilarityMetric const metric) {
return faiss::MetricType::METRIC_L2;
case SimilarityMetric::kCosine:
return faiss::MetricType::METRIC_INNER_PRODUCT;
case SimilarityMetric::kInnerProduct:
return faiss::METRIC_INNER_PRODUCT;
}
}

Expand Down Expand Up @@ -263,6 +265,8 @@ RocksDBVectorIndex::RocksDBVectorIndex(IndexId iid, LogicalCollection& coll,
return std::make_unique<faiss::IndexFlatL2>(_definition.dimension);
case arangodb::SimilarityMetric::kCosine:
return std::make_unique<faiss::IndexFlatIP>(_definition.dimension);
case arangodb::SimilarityMetric::kInnerProduct:
return std::make_unique<faiss::IndexFlatIP>(_definition.dimension);
}
});

Expand Down
171 changes: 149 additions & 22 deletions tests/js/client/aql/vector/aql-vector.js
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ function VectorIndexL2TestSuite() {

// For l2 metric the results must be ordered in descending order
for (let j = 1; j < results.length; ++j) {
assertTrue(results[j - 1].dist < results[j].dist);
assertTrue(results[j - 1].dist <= results[j].dist);
}
}
},
Expand Down Expand Up @@ -410,30 +410,17 @@ function VectorIndexL2TestSuite() {
qp: randomPoint
};

const planSkipped = db
._createStatement({
query: queryWithSkip,
bindVars,
})
.explain().plan;
const indexNodes = planSkipped.nodes.filter(function(n) {
return n.type === "EnumerateNearVectorNode";
});
assertEqual(1, indexNodes.length);

const resultsWithSkip = db._query(queryWithSkip, bindVars).toArray();
const resultsWithoutSkip = db._query(queryWithoutSkip, bindVars).toArray();

assertEqual(resultsWithSkip.length, 5);
assertEqual(resultsWithoutSkip.length, 8);

// Check that skip results are contained within without skip results
const skipKeys = new Set(resultsWithSkip.map(r => r.k));
const withoutSkipKeys = new Set(resultsWithoutSkip.map(r => r.k));
const expectedKeys = new Set(resultsWithoutSkip.slice(3).map(r => r.k));

assertTrue([...skipKeys].every(key => withoutSkipKeys.has(key)));
assertEqual(skipKeys.size, expectedKeys.size);
assertTrue([...skipKeys].every(key => expectedKeys.has(key)));
},

testApproxL2Subquery: function() {
Expand Down Expand Up @@ -689,27 +676,166 @@ function VectorIndexCosineTestSuite() {
qp: randomPoint
};

const planSkipped = db
const resultsWithSkip = db._query(queryWithSkip, bindVars).toArray();
const resultsWithoutSkip = db._query(queryWithoutSkip, bindVars).toArray();

// Check that skip results are contained within without skip results
const skipKeys = new Set(resultsWithSkip.map(r => r.k));
const withoutSkipKeys = new Set(resultsWithoutSkip.map(r => r.k));

assertTrue([...skipKeys].every(key => withoutSkipKeys.has(key)));
},
};
}

function VectorIndexInnerProductTestSuite() {
let collection;
let randomPoint;
const dimension = 500;
const seed = randomInteger();

return {
setUpAll: function() {
print("Using seed: " + seed);
db._createDatabase(dbName);
db._useDatabase(dbName);

collection = db._create(collName, {
numberOfShards: 3
});

let docs = [];
let gen = randomNumberGeneratorFloat(seed);
for (let i = 0; i < 1000; ++i) {
const vector = Array.from({
length: dimension
}, () => gen());
if (i === 250) {
randomPoint = vector;
}
docs.push({
vector,
nonVector: i,
unIndexedVector: vector
});
}
collection.insert(docs);

collection.ensureIndex({
name: "vector_inner_product",
type: "vector",
fields: ["vector"],
params: {
metric: "innerProduct",
dimension: dimension,
nLists: 10
},
});
},

tearDownAll: function() {
db._useDatabase("_system");
db._dropDatabase(dbName);
},

testApproxInnerProductMultipleTopK: function() {
const query =
"FOR d IN " +
collection.name() +
" LET sim = APPROX_NEAR_INNER_PRODUCT(d.vector, @qp) " +
"SORT sim DESC LIMIT @topK " +
"RETURN {key: d._key, sim}";

const topKs = [1, 5, 10, 15, 50, 100];
let previousResult = [];
for (let i = 0; i < topKs.length; ++i) {
const bindVars = {
qp: randomPoint,
topK: topKs[i]
};
const plan = db
._createStatement({
query,
bindVars,
})
.explain().plan;
const indexNodes = plan.nodes.filter(function(n) {
return n.type === "EnumerateNearVectorNode";
});
assertEqual(1, indexNodes.length);

// Assert gather node is sorted
if (isCluster) {
const gatherNodes = plan.nodes.filter(function(n) {
return n.type === "GatherNode";
});
assertEqual(1, gatherNodes.length);

let gatherNode = gatherNodes[0];
assertEqual(1, gatherNode.elements.length);
assertFalse(gatherNode.elements[0].ascending);
}

const results = db._query(query, bindVars).toArray();

// Assert that results are deterministic
if (i !== 0) {
for (let j = 0; j < previousResult.length; ++j) {
assertEqual(previousResult[j].key, results[j].key);
}
}

// For inner product metric the results must be ordered in descending order
for (let j = 1; j < results.length; ++j) {
assertTrue(results[j - 1].sim >= results[j].sim);
}
}
},

testApproxInnerProductMultipleTopKWrongOrder: function() {
const query =
"FOR d IN " +
collection.name() +
" SORT APPROX_NEAR_INNER_PRODUCT(d.vector, @qp) ASC LIMIT 5 " +
" RETURN {key: d._key}";

const bindVars = {
qp: randomPoint,
};
const plan = db
._createStatement({
query: queryWithSkip,
query,
bindVars,
})
.explain().plan;
const indexNodes = planSkipped.nodes.filter(function(n) {
const indexNodes = plan.nodes.filter(function(n) {
return n.type === "EnumerateNearVectorNode";
});
assertEqual(1, indexNodes.length);
assertEqual(0, indexNodes.length);
},

testApproxInnerProductSkipping: function() {
const queryWithSkip =
"FOR d IN " +
collection.name() +
" SORT APPROX_NEAR_INNER_PRODUCT(@qp, d.vector) DESC LIMIT 3, 5 RETURN {k: d._key}";
const queryWithoutSkip =
"FOR d IN " +
collection.name() +
" SORT APPROX_NEAR_INNER_PRODUCT(d.vector, @qp) DESC LIMIT 8 RETURN {k: d._key}";

const bindVars = {
qp: randomPoint
};

const resultsWithSkip = db._query(queryWithSkip, bindVars).toArray();
const resultsWithoutSkip = db._query(queryWithoutSkip, bindVars).toArray();

// Check that skip results are contained within without skip results
const skipKeys = new Set(resultsWithSkip.map(r => r.k));
const withoutSkipKeys = new Set(resultsWithoutSkip.map(r => r.k));
const expectedKeys = new Set(resultsWithoutSkip.slice(3).map(r => r.k));

assertTrue([...skipKeys].every(key => withoutSkipKeys.has(key)));
assertEqual(skipKeys.size, expectedKeys.size);
assertTrue([...skipKeys].every(key => expectedKeys.has(key)));
},
};
}
Expand Down Expand Up @@ -917,6 +1043,7 @@ function MultipleVectorIndexesOnField() {

jsunity.run(VectorIndexL2TestSuite);
jsunity.run(VectorIndexCosineTestSuite);
jsunity.run(VectorIndexInnerProductTestSuite);
jsunity.run(MultipleVectorIndexesOnField);

return jsunity.done();
Expand Down
0