8000 take over selectivity estimates (#6505) · arangodb/arangodb@45f2110 · GitHub
[go: up one dir, main page]

Skip to content 8000

Commit 45f2110

Browse files
authored
take over selectivity estimates (#6505)
1 parent aa21ffd commit 45f2110

27 files changed

+150
-181
lines changed

arangod/ClusterEngine/ClusterIndex.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ bool ClusterIndex::hasSelectivityEstimate() const {
117117
}
118118

119119
/// @brief default implementation for selectivityEstimate
120-
double ClusterIndex::selectivityEstimate(StringRef const* extra) const {
120+
double ClusterIndex::selectivityEstimate(StringRef const&) const {
121121
TRI_ASSERT(hasSelectivityEstimate());
122122
if (_unique) {
123123
return 1.0;
@@ -198,6 +198,7 @@ bool ClusterIndex::matchesDefinition(VPackSlice const& info) const {
198198
}
199199

200200
bool ClusterIndex::supportsFilterCondition(
201+
std::vector<std::shared_ptr<arangodb::Index>> const& allIndexes,
201202
arangodb::aql::AstNode const* node,
202203
arangodb::aql::Variable const* reference, size_t itemsInIndex,
203204
size_t& estimatedItems, double& estimatedCost) const {
@@ -216,7 +217,7 @@ bool ClusterIndex::supportsFilterCondition(
216217
#endif
217218
case TRI_IDX_TYPE_NO_ACCESS_INDEX: {
218219
// should not be called for these indexes
219-
return Index::supportsFilterCondition(node, reference, itemsInIndex,
220+
return Index::supportsFilterCondition(allIndexes, node, reference, itemsInIndex,
220221
estimatedItems, estimatedCost);
221222
}
222223
case TRI_IDX_TYPE_HASH_INDEX:{
@@ -225,7 +226,7 @@ bool ClusterIndex::supportsFilterCondition(
225226
return matcher.matchAll(this, node, reference, itemsInIndex, estimatedItems,
226227
estimatedCost);
227228
} else if (_engineType == ClusterEngineType::RocksDBEngine) {
228-
return PersistentIndexAttributeMatcher::supportsFilterCondition(this, node, reference, itemsInIndex,
229+
return PersistentIndexAttributeMatcher::supportsFilterCondition(allIndexes, this, node, reference, itemsInIndex,
229230
estimatedItems, estimatedCost);
230231
}
231232
break;
@@ -239,17 +240,17 @@ bool ClusterIndex::supportsFilterCondition(
239240

240241
case TRI_IDX_TYPE_SKIPLIST_INDEX: {
241242
if (_engineType == ClusterEngineType::MMFilesEngine) {
242-
return SkiplistIndexAttributeMatcher::supportsFilterCondition(this, node, reference, itemsInIndex,
243+
return SkiplistIndexAttributeMatcher::supportsFilterCondition(allIndexes, this, node, reference, itemsInIndex,
243244
estimatedItems, estimatedCost);
244245
} else if (_engineType == ClusterEngineType::RocksDBEngine) {
245-
return PersistentIndexAttributeMatcher::supportsFilterCondition(this, node, reference, itemsInIndex,
246+
return PersistentIndexAttributeMatcher::supportsFilterCondition(allIndexes, this, node, reference, itemsInIndex,
246247
estimatedItems, estimatedCost);
247248
}
248249
break;
249250
}
250251
case TRI_IDX_TYPE_PERSISTENT_INDEX: {
251252
// same for both engines
252-
return PersistentIndexAttributeMatcher::supportsFilterCondition(this, node, reference, itemsInIndex,
253+
return PersistentIndexAttributeMatcher::supportsFilterCondition(allIndexes, this, node, reference, itemsInIndex,
253254
estimatedItems, estimatedCost);
254255
}
255256

arangod/ClusterEngine/ClusterIndex.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#define ARANGOD_CLUSTER_ENGINE_CLUSTER_INDEX_H 1
2525

2626
#include "Basics/Common.h"
27+
#include "Basics/StringRef.h"
2728
#include "ClusterEngine/ClusterTransactionState.h"
2829
#include "ClusterEngine/Common.h"
2930
#include "Indexes/Index.h"
@@ -66,7 +67,7 @@ class ClusterIndex : public Index {
6667

6768
bool hasSelectivityEstimate() const override;
6869

69-
double selectivityEstimate(arangodb::StringRef const* = nullptr) const override;
70+
double selectivityEstimate(arangodb::StringRef const& = arangodb::StringRef()) const override;
7071

7172
/// @brief update the cluster selectivity estimate
7273
void updateClusterSelectivityEstimate(double estimate) override;
@@ -83,7 +84,8 @@ class ClusterIndex : public Index {
8384
/// @brief Checks if this index is identical to the given definition
8485
bool matchesDefinition(arangodb::velocypack::Slice const&) const override;
8586

86-
bool supportsFilterCondition(arangodb::aql::AstNode const*,
87+
bool supportsFilterCondition(std::vector<std::shared_ptr<arangodb::Index>> const& allIndexes,
88+
arangodb::aql::AstNode const*,
8789
arangodb::aql::Variable const*, size_t, size_t&,
8890
double&) const override;
8991

arangod/Indexes/Index.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ bool Index::matchesDefinition(VPackSlice const& info) const {
614614
}
615615

616616
/// @brief default implementation for selectivityEstimate
617-
double Index::selectivityEstimate(StringRef const* extra) const {
617+
double Index::selectivityEstimate(StringRef const&) const {
618618 if (_unique) {
619619
return 1.0;
620620
}
@@ -657,7 +657,8 @@ int Index::sizeHint(transaction::Methods*, size_t) {
657657
bool Index::hasBatchInsert() const { return false; }
658658

659659
/// @brief default implementation for supportsFilterCondition
660-
bool Index::supportsFilterCondition(arangodb::aql::AstNode const*,
660+
bool Index::supportsFilterCondition(std::vector<std::shared_ptr<arangodb::Index>> const&,
661+
arangodb::aql::AstNode const*,
661662
arangodb::aql::Variable const*,
662663
size_t itemsInIndex, size_t& estimatedItems,
663664
double& estimatedCost) const {

arangod/Indexes/Index.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "Basics/AttributeNameParser.h"
2929
#include "Basics/Exceptions.h"
3030
#include "Basics/Result.h"
31+
#include "Basics/StringRef.h"
3132
#include "VocBase/LocalDocumentId.h"
3233
#include "VocBase/voc-types.h"
3334
#include "VocBase/vocbase.h"
@@ -42,7 +43,6 @@ class LocalTaskQueue;
4243
class IndexIterator;
4344
class LogicalCollection;
4445
class ManagedDocumentResult;
45-
class StringRef;
4646
struct IndexIteratorOptions;
4747

4848
namespace velocypack {
@@ -252,7 +252,7 @@ class Index {
252252
/// The extra StringRef is only used in the edge index as direction
253253
/// attribute attribute, a Slice would be more flexible.
254254
virtual double selectivityEstimate(
255-
arangodb::StringRef const* extra = nullptr) const;
255+
arangodb::StringRef const& extra = arangodb::StringRef()) const;
256256

257257
/// @brief update the cluster selectivity estimate
258258
virtual void updateClusterSelectivityEstimate(double /*estimate*/) {
@@ -330,7 +330,8 @@ class Index {
330330

331331
virtual bool hasBatchInsert() const;
332332

333-
virtual bool supportsFilterCondition(arangodb::aql::AstNode const*,
333+
virtual bool supportsFilterCondition(std::vector<std::shared_ptr<arangodb::Index>> const& allIndexes,
334+
arangodb::aql::AstNode const*,
334335
arangodb::aql::Variable const*, size_t,
335336
size_t&, double&) const;
336337

arangod/Indexes/PersistentIndexAttributeMatcher.cpp

Lines changed: 4 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ void PersistentIndexAttributeMatcher::matchAttributes(
5959
}
6060

6161
bool PersistentIndexAttributeMatcher::supportsFilterCondition(
62+
std::vector<std::shared_ptr<arangodb::Index>> const& allIndexes,
6263
arangodb::Index const* idx, arangodb::aql::AstNode const* node,
6364
arangodb::aql::Variable const* reference, size_t itemsInIndex,
6465
size_t& estimatedItems, double& estimatedCost) {
@@ -68,128 +69,9 @@ bool PersistentIndexAttributeMatcher::supportsFilterCondition(
6869
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
6970
}
7071
}
71-
72-
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>> found;
73-
std::unordered_set<std::string> nonNullAttributes;
74-
size_t values = 0;
75-
matchAttributes(idx, node, reference, found, values, nonNullAttributes,
76-
false);
77-
78-
bool lastContainsEquality = true;
79-
size_t attributesCovered = 0;
80-
size_t attributesCoveredByEquality = 0;
81-
double equalityReductionFactor = 20.0;
82-
estimatedCost = static_cast<double>(itemsInIndex);
83-
84-
for (size_t i = 0; i < idx->fields().size(); ++i) {
85-
auto it = found.find(i);
86-
87-
if (it == found.end()) {
88-
// index attribute not covered by condition
89-
break;
90-
}
91-
92-
// check if the current condition contains an equality condition
93-
auto const& nodes = (*it).second;
94-
bool containsEquality = false;
95-
for (size_t j = 0; j < nodes.size(); ++j) {
96-
if (nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ ||
97-
nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) {
98-
containsEquality = true;
99-
break;
100-
}
101-
}
102-
103-
if (!lastContainsEquality) {
104-
// unsupported condition. must abort
105-
break;
106-
}
107-
108-
++attributesCovered;
109-
if (containsEquality) {
110-
++attributesCoveredByEquality;
111-
estimatedCost /= equalityReductionFactor;
112-
113-
// decrease the effect of the equality reduction factor
114-
equalityReductionFactor *= 0.25;
115-
if (equalityReductionFactor < 2.0) {
116-
// equalityReductionFactor shouldn't get too low
117-
equalityReductionFactor = 2.0;
118-
}
119-
} else {
120-
// quick estimate for the potential reductions caused by the conditions
121-
if (nodes.size() >= 2) {
122-
// at least two (non-equality) conditions. probably a range with lower
123-
// and upper bound defined
124-
estimatedCost /= 7.5;
125-
} else {
126-
// one (non-equality). this is either a lower or a higher bound
127-
estimatedCost /= 2.0;
128-
}
129-
}
130-
131-
lastContainsEquality = containsEquality;
132-
}
133-
134-
if (values == 0) {
135-
values = 1;
136-
}
137-
138-
if (attributesCoveredByEquality == idx->fields().size() && idx->unique()) {
139-
// index is unique and condition covers all attributes by equality
140-
if (itemsInIndex == 0) {
141-
estimatedItems = 0;
142-
estimatedCost = 0.0;
143-
return true;
144-
}
145-
146-
estimatedItems = values;
147-
estimatedCost = static_cast<double>(estimatedItems * values);
148-
// cost is already low... now slightly prioritize unique indexes
149-
estimatedCost *= 0.995 - 0.05 * (idx->fields().size() - 1);
150-
return true;
151-
}
152-
153-
if (attributesCovered > 0 &&
154-
(!idx->sparse() || attributesCovered == idx->fields().size())) {
155-
// if the condition contains at least one index attribute and is not
156-
// sparse,
157-
// or the index is sparse and all attributes are covered by the condition,
158-
// then it can be used (note: additional checks for condition parts in
159-
// sparse indexes are contained in Index::canUseConditionPart)
160-
estimatedItems = static_cast<size_t>((std::max)(
161-
static_cast<size_t>(estimatedCost * values), static_cast<size_t>(1)));
162-
163-
// check if the index has a selectivity estimate ready
164-
if (idx->hasSelectivityEstimate() &&
165-
attributesCoveredByEquality == idx->fields().size()) {
166-
StringRef ignore;
167-
double estimate = idx->selectivityEstimate(&ignore);
168-
if (estimate > 0.0) {
169-
estimatedItems = static_cast<size_t>(1.0 / estimate);
170-
}
171-
}
172-
if (itemsInIndex == 0) {
173-
estimatedCost = 0.0;
174-
} else {
175-
// simon: neither RocksDBVPackIndex nor MMFilesPersistentIndex have caches
176-
/*if (useCache()) {
177-
estimatedCost = static_cast<double>(estimatedItems * values) -
178-
(_fields.size() - 1) * 0.01;
179-
} else {*/
180-
estimatedCost =
181-
(std::max)(static_cast<double>(1),
182-
std::log2(static_cast<double>(itemsInIndex)) * values) -
183-
(idx->fields().size() - 1) * 0.01;
184-
//}
185-
}
186-
return true;
187-
}
188-
189-
// index does not help for this condition
190-
estimatedItems = itemsInIndex;
191-
estimatedCost = static_cast<double>(estimatedItems);
192-
return false;
72+
73+
// just forward to reference implementation
74+
return SkiplistIndexAttributeMatcher::supportsFilterCondition(allIndexes, idx, node, reference, itemsInIndex, estimatedItems, estimatedCost);
19375
}
19476

19577
bool PersistentIndexAttributeMatcher::supportsSortCondition(

arangod/Indexes/PersistentIndexAttributeMatcher.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ class Index;
4040
/// rocksdb based storage
4141
namespace PersistentIndexAttributeMatcher {
4242

43-
bool supportsFilterCondition(arangodb::Index const*,
43+
bool supportsFilterCondition(std::vector<std::shared_ptr<arangodb::Index>> const& allIndexes,
44+
arangodb::Index const*,
4445
arangodb::aql::AstNode const* node,
4546
arangodb::aql::Variable const* reference,
4647
size_t itemsInIndex, size_t& estimatedItems,

arangod/Indexes/SimpleAttributeEqualityMatcher.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ void SimpleAttributeEqualityMatcher::calculateIndexCosts(
323323
if (attribute != nullptr && attribute->type == aql::NODE_TYPE_ATTRIBUTE_ACCESS) {
324324
att = StringRef(attribute->getStringValue(), attribute->getStringLength());
325325
}
326-
double estimate = index->selectivityEstimate(&att);
326+
double estimate = index->selectivityEstimate(att);
327327
if (estimate <= 0.0) {
328328
// prevent division by zero
329329
estimatedItems = itemsInIndex;

0 commit comments

Comments
 (0)
0