8000 Bug fix 3.8/search 238 late materialization with constrained heap (#14722) by Dronplane · Pull Request #14724 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Bug fix 3.8/search 238 late materialization with constrained heap (#14722) #14724

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
v3.8.2 (XXXX-XX-XX)
-------------------

* SEARCH-238: Improved SortNodes placement optimization in cluster so late
materialization could cover more cases.

* Fix rare case of invalid data could be inserted in the ArangoSearch index if
several clients concurrently inserts data and use custom analyzer with
non-string return type.
Expand Down
28 changes: 26 additions & 2 deletions arangod/Aql/OptimizerRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4870,6 +4870,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt,
OptimizerRule const& rule) {
::arangodb::containers::SmallVector<ExecutionNode*>::allocator_type::arena_type a;
::arangodb::containers::SmallVector<ExecutionNode*> nodes{a};
VarSet usedBySort;
plan->findNodesOfType(nodes, EN::GATHER, true);

bool modified = false;
Expand Down Expand Up @@ -4931,14 +4932,37 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt,

case EN::SORT: {
auto thisSortNode = ExecutionNode::castTo<SortNode*>(inspectNode);

usedBySort.clear();
thisSortNode->getVariablesUsedHere(usedBySort);
// remember our cursor...
parents = inspectNode->getParents();
// then unlink the filter/calculator from the plan
plan->unlinkNode(inspectNode);
// and re-insert into plan in front of the remoteNode
if (thisSortNode->_reinsertInCluster) {
plan->insertDependency(rn, inspectNode);
// let's look for the best place for that SORT.
// We could skip over several calculations if
// they are not needed for our sort. So we could calculate
// more lazily and even make late materialization possible
ExecutionNode* insertPoint = rn;
auto current = insertPoint->getFirstDependency();
while (current != nullptr &&
current->getType() == EN::CALCULATION) {
auto nn = ExecutionNode::castTo<CalculationNode*>(current);
if (!nn->expression()->isDeterministic()) {
// let's not touch non-deterministic calculation
// as results may depend on calls count and sort could change this
break;
}
auto variable = nn->outVariable();
if (usedBySort.find(variable) == usedBySort.end()) {
insertPoint = current;
} else {
break; // first node used by sort. We should stop here.
}
current = current->getFirstDependency();
}
plan->insertDependency(insertPoint, inspectNode);
}

gatherNode->elements(thisSortNode->elements());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,34 @@
});
assertEqual(0, expected.size);
},
testConstrainedSortOnDbServer() {
let query = "FOR d IN " + vn + " SEARCH d.value > 9 SORT BM25(d) LIMIT 10 RETURN {key: d._key, value: d.value}";
let plan = AQL_EXPLAIN(query).plan;
assertNotEqual(-1, plan.rules.indexOf(ruleName));
let materializeNodeFound = false;
let nodeDependency = null;
plan.nodes.forEach(function(node) {
if( node.type === "MaterializeNode") {
assertFalse(materializeNodeFound);
// FIXME: temporary relaxed assertion
// We need to fix node placement in oneshard mode
// first. And than we indeed will get expected node placement
// assertEqual(nodeDependency.type, isCluster ? "SortNode" : "LimitNode");
assertTrue(nodeDependency.type === "SortNode" || nodeDependency.type === "LimitNode");
materializeNodeFound = true;
}
nodeDependency = node;
});
assertTrue(materializeNodeFound);
let result = AQL_EXECUTE(query);
assertEqual(4, result.json.length);
let expected = new Set(['c_0', 'c_1', 'c_2', 'c_3']);
result.json.forEach(function(doc) {
assertTrue(expected.has(doc.key));
expected.delete(doc.key);
});
assertEqual(0, expected.size);
}
};
};
}());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,31 @@ function lateDocumentMaterializationRuleTestSuite () {
let result = AQL_EXECUTE(query);
assertEqual(1, result.json.length);
assertEqual(result.json[0]._key, 'c0');
},
testConstrainedSortOnDbServer() {
let query = "FOR d IN " + prefixIndexCollectionName + " FILTER d.obj.b == {sb: 'b_val_0'} " +
"SORT d.obj.b LIMIT 10 RETURN {key: d._key, value: d.some_value_from_doc}";
let plan = AQL_EXPLAIN(query).plan;
assertNotEqual(-1, plan.rules.indexOf(ruleName));
let materializeNodeFound = false;
let nodeDependency = null;
plan.nodes.forEach(function(node) {
if( node.type === "MaterializeNode") {
assertFalse(materializeNodeFound);
assertEqual(nodeDependency.type, isCluster ? "SortNode" : "LimitNode");
materializeNodeFound = true;
}
nodeDependency = node;
});
assertTrue(materializeNodeFound);
let result = AQL_EXECUTE(query);
assertEqual(1, result.json.length);
let expected = new Set(['c0']);
result.json.forEach(function(doc) {
assertTrue(expected.has(doc.key));
expected.delete(doc.key);
});
assertEqual(0, expected.size);
}
};
}
Expand Down
12 changes: 11 additions & 1 deletion tests/js/server/aql/aql-queries-optimizer-limit-cluster.js
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,17 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
var actual = getQueryResults(query);
assertEqual(0, actual.length);

assertEqual([ "SingletonNode", "EnumerateCollectionNode", "CalculationNode", "CalculationNode", "SortNode", "RemoteNode", "GatherNode", "LimitNode", "FilterNode", "ReturnNode" ], explain(query));
assertEqual([ "SingletonNode", "EnumerateCollectionNode", "CalculationNode", "SortNode", "CalculationNode",
"RemoteNode", "GatherNode", "LimitNode", "FilterNode", "ReturnNode" ], explain(query));
},
testLimitFullCollectionSort3_DoubleCalculation : function () {
var query = "FOR c IN " + cn + " SORT c.value LIMIT 0, 10 FILTER c.value >= 20 && c.value < 30 RETURN c.value2";

var actual = getQueryResults(query);
assertEqual(0, actual.length);

assertEqual([ "SingletonNode", "EnumerateCollectionNode", "CalculationNode", "SortNode", "CalculationNode",
"CalculationNode", "RemoteNode", "GatherNode", "LimitNode", "FilterNode", "ReturnNode" ], explain(query));
},

////////////////////////////////////////////////////////////////////////////////
Expand Down
0