8000 nested collect backport 3.5 (#9587) · mhdprogrammer1/arangodb@f995c46 · GitHub
[go: up one dir, main page]

Skip to content

Commit f995c46

Browse files
ObiWahnKVS85
authored andcommitted
nested collect backport 3.5 (arangodb#9587)
* stop optimization for nested collects (arangodb#9484) * Update CHANGELOG
1 parent 7813c76 commit f995c46

File tree

6 files changed

+565
-74
lines changed

6 files changed

+565
-74
lines changed

CHANGELOG

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
v3.5.0-rc.6 (2019-07-27)
1+
v3.5.0-rc.6 (2019-XX-XX)
22
------------------------
33

4+
* Fixed issue #9459: Optimization rule remove-collect-variables does not KEEP all necessary data.
5+
46
* Added gzip and encryption options to arangoimport and arangoexport.
57

68
* Added missing REST API route GET /_api/transaction for retrieving the list of

arangod/Aql/Ast.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -826,12 +826,12 @@ AstNode* Ast::createNodeBinaryOperator(AstNodeType type, AstNode const* lhs,
826826
// contain an attribute access, e.g. doc.value1 == doc.value2
827827
bool swap = false;
828828
if (type == NODE_TYPE_OPERATOR_BINARY_E 10000 Q &&
829-
rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
829+
rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
830830
lhs->type != NODE_TYPE_ATTRIBUTE_ACCESS) {
831831
// value == doc.value => doc.value == value
832832
swap = true;
833833
} else if (type == NODE_TYPE_OPERATOR_BINARY_NE &&
834-
rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
834+
rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
835835
lhs->type != NODE_TYPE_ATTRIBUTE_ACCESS) {
836836
// value != doc.value => doc.value != value
837837
swap = true;
@@ -1926,9 +1926,9 @@ void Ast::validateAndOptimize() {
19261926
}
19271927
} else if (node->type == NODE_TYPE_AGGREGATIONS) {
19281928
--ctx->stopOptimizationRequests;
1929-
} else if (node->type == NODE_TYPE_ARRAY &&
1930-
node->hasFlag(DETERMINED_CONSTANT) &&
1931-
!node->hasFlag(VALUE_CONSTANT) &&
1929+
} else if (node->type == NODE_TYPE_ARRAY &&
1930+
node->hasFlag(DETERMINED_CONSTANT) &&
1931+
!node->hasFlag(VALUE_CONSTANT) &&
19321932
node->numMembers() < 10) {
19331933
// optimization attempt: we are speculating that this array contains function
19341934
// call parameters, which may have been optimized somehow.
@@ -3206,12 +3206,12 @@ AstNode* Ast::optimizeFunctionCall(AstNode* node) {
32063206
}
32073207
#if 0
32083208
} else if (func->name == "LIKE") {
3209-
// optimize a LIKE(x, y) into a plain x == y or a range scan in case the
3209+
// optimize a LIKE(x, y) into a plain x == y or a range scan in case the
32103210
// search is case-sensitive and the pattern is either a full match or a
32113211
// left-most prefix
32123212

32133213
// this is desirable in 99.999% of all cases, but would cause the following incompatibilities:
3214-
// - the AQL LIKE function will implicitly cast its operands to strings, whereas
3214+
// - the AQL LIKE function will implicitly cast its operands to strings, whereas
32153215
// operator == in AQL will not do this. So LIKE(1, '1') would behave differently
32163216
// when executed via the AQL LIKE function or via 1 == '1'
32173217
// - for left-most prefix searches (e.g. LIKE(text, 'abc%')) we need to determine
@@ -3232,7 +3232,7 @@ AstNode* Ast::optimizeFunctionCall(AstNode* node) {
32323232
caseInsensitive = caseArg->isTrue();
32333233
}
32343234
}
3235-
3235+
32363236
auto patternArg = args->getMember(1);
32373237

32383238
if (!caseInsensitive && patternArg->isStringValue()) {
@@ -3265,7 +3265,7 @@ AstNode* Ast::optimizeFunctionCall(AstNode* node) {
32653265

32663266
AstNode* op = createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, lhs, rhs);
32673267
if (wildcardIsLastChar) {
3268-
// replace LIKE with >= && <=
3268+
// replace LIKE with >= && <=
32693269
return op;
32703270
}
32713271
// add >= && <=, but keep LIKE in place

arangod/Aql/AstHelper.h

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
////////////////////////////////////////////////////////////////////////////////
2+
/// DISCLAIMER
3+
///
4+
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
5+
///
6+
/// Licensed under the Apache License, Version 2.0 (the "License");
7+
/// you may not use this file except in compliance with the License.
8+
/// You may obtain a copy of the License at
9+
///
10+
/// http://www.apache.org/licenses/LICENSE-2.0
11+
///
12+
/// Unless required by applicable law or agreed to in writing, software
13+
/// distributed under the License is distributed on an "AS IS" BASIS,
14+
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
/// See the License for the specific language governing permissions and
16+
/// limitations under the License.
17+
///
18+
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
///
20+
/// @author Jan Christoph Uhde
21+
////////////////////////////////////////////////////////////////////////////////
22+
23+
#ifndef ARANGOD_AQL_AST_HELPER_H
24+
#define ARANGOD_AQL_AST_HELPER_H 1
25+
26+
#include "Aql/Ast.h"
27+
#include "Aql/AstNode.h"
28+
#include "Aql/Variable.h"
29+
#include "Basics/SmallVector.h"
30+
#include "Logger/Logger.h"
31+
32+
#include <unordered_set>
33+
34+
namespace {
35+
auto doNothingVisitor = [](arangodb::aql::AstNode const*) {};
36+
}
37+
38+
namespace arangodb {
39+
namespace aql {
40+
namespace ast {
41+
42+
namespace {
43+
44+
template < typename T>
45+
T& operator<<(T& os, SmallVector<Variable const*> const& vec) {
46+
os << "[ ";
47+
for(auto const& var: vec) {
48+
os << var->name << " ";
49+
}
50+
return os << "]";
51+
}
52+
53+
bool accessesSearchVariableViaReference(AstNode const* current, Variable const* searchVariable) {
54+
if (current->type == NODE_TYPE_INDEXED_ACCESS) {
55+
auto sub = current->getMemberUnchecked(0);
56+
if (sub->type == NODE_TYPE_REFERENCE) {
57+
Variable const* v = static_cast<Variable const*>(sub->getData());
58+
if (v->id == searchVariable->id) {
59+
return true;
60+
}
61+
}
62+
} else if (current->type == NODE_TYPE_EXPANSION) {
63+
if (current->numMembers() < 2) {
64+
return false;
65+
}
66+
auto it = current->getMemberUnchecked(0);
67+
if (it->type != NODE_TYPE_ITERATOR || it->numMembers() != 2) {
68+
return false;
69+
}
70+
71+
auto sub1 = it->getMember(0);
72+
auto sub2 = it->getMember(1);
73+
if (sub1->type != NODE_TYPE_VARIABLE || sub2->type != NODE_TYPE_REFERENCE) {
74+
return false;
75+
}
76+
Variable const* v = static_cast<Variable const*>(sub2->getData());
77+
if (v->id == searchVariable->id) {
78+
return true;
79+
}
80+
}
81+
82+
return false;
83+
};
84+
85+
bool isTargetVariable(AstNode const* node, SmallVector<Variable const*>& searchVariables, bool& isSafeForOptimization) {
86+
TRI_ASSERT(!searchVariables.empty());
87+
TRI_ASSERT(node->type == NODE_TYPE_INDEXED_ACCESS || node->type == NODE_TYPE_EXPANSION);
88+
89+
// given and expression like g3[0].`g2`[0].`g1`[0].`item1`.`_id`
90+
// this loop resolves subtrees of the form: .`g2`[0].`g1`[0]
91+
// search variables should equal [g1, g2, g3]. Therefor we need not match the
92+
// variable names from the vector with those in the expression while going
93+
// forward in the vector the we go backward in the expression
94+
95+
auto current = node;
96+
for(auto varIt = searchVariables.begin(); varIt != std::prev(searchVariables.end()); ++varIt) {
97+
AstNode* next = nullptr;
98+
if (current->type == NODE_TYPE_INDEXED_ACCESS) {
99+
next = current->getMemberUnchecked(0);
100+
} else if (current->type == NODE_TYPE_EXPANSION) {
101+
102+
if (current->numMembers() < 2) {
103+
return false;
104+
}
105+
106+
auto it = current->getMemberUnchecked(0);
107+
TRI_ASSERT(it);
108+
109+
//The expansion is at the very end
110+
if (it->type == NODE_TYPE_ITERATOR && it->numMembers() == 2) {
111+
if (it->getMember(0)->type != NODE_TYPE_VARIABLE ) {
112+
return false;
113+
}
114+
< 1C6A /code>115+
auto attributeAccess = it->getMember(1);
116+
TRI_ASSERT(attributeAccess);
117+
118+
if (std::next(varIt) != searchVariables.end() && attributeAccess->type == NODE_TYPE_ATTRIBUTE_ACCESS){
119+
next = attributeAccess;
120+
} else {
121+
return false;
122+
}
123+
124+
} else {
125+
// The expansion is not at the very end! we are unable to check if the
126+
// variable will be accessed checking nested expansions is really crazy
127+
// and would probably be best done in a recursive way. If the expansion
128+
// is in the middle, then it would be still the very first node in the
129+
// AST, having 2 subtrees that contain the other search variables
130+
isSafeForOptimization = false; // could be an access - we can not tell
131+
return false;
132+
}
133+
} else {
134+
return false;
135+
}
136+
137+
if(varIt == searchVariables.end()) {
138+
return false;
139+
}
140+
141+
if (next && next->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
142+
next->getString() == (*varIt)->name ) {
143+
current = next->getMemberUnchecked(0);
144+
} else if (next && next->type == NODE_TYPE_EXPANSION) {
145+
isSafeForOptimization = false;
146+
return false;
147+
} else {
148+
return false;
149+
}
150+
} // for nodes but last
151+
152+
if(!current) {
153+
return false;
154+
}
155+
156+
return accessesSearchVariableViaReference(current, searchVariables.back());
157+
}
158+
} // end - namespace unnamed
159+
160+
/// @brief determines the to-be-kept attribute of an INTO expression
161+
inline std::unordered_set<std::string> getReferencedAttributesForKeep(
162+
AstNode const* node, SmallVector<Var 10000 iable const*> searchVariables, bool& isSafeForOptimization){
163+
164+
std::unordered_set<std::string> result;
165+
isSafeForOptimization = true;
166+
167+
std::function<bool(AstNode const*)> visitor = [&isSafeForOptimization,
168+
&result,
169+
&searchVariables](AstNode const* node) {
170+
if (!isSafeForOptimization) {
171+
return false;
172+
}
173+
174+
if (node->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
175+
while (node->getMemberUnchecked(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
176+
node = node->getMemberUnchecked(0);
177+
}
178+
if (isTargetVariable(node->getMemberUnchecked(0), searchVariables, isSafeForOptimization)) {
179+
result.emplace(node->getString());
180+
// do not descend further
181+
return false;
182+
}
183+
} else if (node->type == NODE_TYPE_REFERENCE) {
184+
Variable const* v = static_cast<Variable const*>(node->getData());
185+
if (v->id == searchVariables.front()->id) {
186+
isSafeForOptimization = false; // the expression references the searched variable
187+
return false;
188+
}
189+
} else if (node->type == NODE_TYPE_EXPANSION) {
190+
if (isTargetVariable(node, searchVariables, isSafeForOptimization)) {
191+
auto sub = node->getMemberUnchecked(1);
192+
if (sub->type == NODE_TYPE_EXPANSION) {
193+
sub = sub->getMemberUnchecked(0)->getMemberUnchecked(1);
194+
}
195+
if (sub->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
196+
while (sub->getMemberUnchecked(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
197+
sub = sub->getMemberUnchecked(0);
198+
}
199+
result.emplace(sub->getString());
200+
// do not descend further
201+
return false;
202+
}
203+
}
204+
}
205+
else if (node->type == NODE_TYPE_INDEXED_ACCESS) {
206+
auto sub = node->getMemberUnchecked(0);
207+
if (sub->type == NODE_TYPE_REFERENCE) {
208+
Variable const* v = static_cast<Variable const*>(sub->getData());
209+
if (v->id == searchVariables.back()->id) {
210+
isSafeForOptimization = false;
211+
return false;
212+
}
213+
}
214+
}
215+
return true;
216+
};
217+
218+
// Traverse AST and call visitor before recursing on each node
219+
// as long as visitor returns true the traversal continues. In
220+
// that branch
221+
Ast::traverseReadOnly(node, visitor, ::doNothingVisitor);
222+
223+
return result;
224+
}
225+
226+
227+
} // end - namsepace ast
228+
} // end - namespace aql
229+
} // end - namespace arangodb
230+
231+
#endif

arangod/Aql/ExecutionNode.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,24 @@
2424
// Execution plans like the one below are made of Nodes that inherit the
2525
// ExecutionNode class as a base class.
2626
//
27+
// clang-format off
28+
//
2729
// Execution plan:
2830
// Id NodeType Est. Comment
2931
// 1 SingletonNode 1 * ROOT
30-
// 2 EnumerateCollectionNode 6400 - FOR d IN coll /* full collection
31-
// scan */ 3 CalculationNode 6400 - LET #1 =
32-
// DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections
33-
// used: d : coll */ 4 SortNode 6400 - SORT #1 ASC 5
34-
// LimitNode 5 - LIMIT 0, 5 6 ReturnNode 5 -
35-
// RETURN d
32+
// 2 EnumerateCollectionNode 6400 - FOR d IN coll /* full collection scan */
33+
// 3 CalculationNode 6400 - LET #1 = DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections used: d : coll */
34+
// 4 SortNode 6400 - SORT #1 ASC
35+
// 5 LimitNode 5 - LIMIT 0, 5
36+
// 6 ReturnNode 5 - RETURN d
37+
//
38+
// clang-format on
3639
//
3740
// Even though the Singleton Node has a label saying it is the "ROOT" node it
3841
// is not in our definiton. Root Nodes are leaf nodes (at the bottom of the
3942
// list).
4043
//
41-
// To get down (direction to root) from 4 to 5 you need to call getFirst Parent
44+
// To get down (direction to root) from 4 to 5 you need to call getFirstParent
4245
// on the SortNode(4) to receive a pointer to the LimitNode(5). If you want to
4346
// go up from 5 to 4 (away from root) you need to call getFirstDependency at
4447
// the LimitNode (5) to get a pointer to the SortNode(4).

0 commit comments

Comments
 (0)
0