8000 Restrict hybrid disjoint sg directions (#16214) · rnshah9/arangodb@34ff094 · GitHub
[go: up one dir, main page]

Skip to content

Commit 34ff094

Browse files
authored
Restrict hybrid disjoint sg directions (arangodb#16214)
1 parent eeddd7a commit 34ff094

14 files changed

+451
-127
lines changed

CHANGELOG

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,24 @@
11
devel
22
-----
33

4+
5+
* Enterprise only: Restricted behaviour of Hybrid Disjoint Smart Graphs. Within a single
6+
traversal or path query we now restrict that you can only switch between
7+
Smart and Satellite sharding once, all queries where more than one switch
8+
is (in theory) possible will be rejected. e.g:
9+
```
10+
FOR v IN 2 OUTBOUND @start smartToSatEdges, satToSmartEdges
11+
```
12+
will be rejected (we can go smart -> sat -> smart, so two switches)
13+
```
14+
FOR v1 IN 1 OUTBOUND @start smartToSatEdges
15+
FOR v2 IN 1 OUTBOUND v1 satToSmartEdges
16+
```
17+
will still be allowed, as each statement only switches once.
18+
We have decided to take this restrictions as especially for ShortestPath
19+
queries the results are not well-defined. If you have a use-case where
20+
this restriction hits you, please contact us.
21+
422
* Change default value of `--rocksdb.block-cache-shard-bits` to an automatic
523
default value that allows data blocks of at least 128MiB to be stored in each
624
cache shard if the block cache's strict capacity limit is used. The strict

arangod/Aql/AqlFunctionFeature.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,13 @@ void AqlFunctionFeature::addMiscFunctions() {
573573
FF::CanRunOnDBServerCluster,
574574
FF::CanRunOnDBServerOneShard),
575575
&Functions::MakeDistributeGraphInput});
576+
#ifdef USE_ENTERPRISE
577+
add({"SELECT_SMART_DISTRIBUTE_GRAPH_INPUT", ".,.",
578+
Function::makeFlags(FF::Deterministic, FF::Cacheable, FF::Internal,
579+
FF::CanRunOnDBServerCluster,
580+
FF::CanRunOnDBServerOneShard),
581+
&Functions::SelectSmartDistributeGraphInput});
582+
#endif
576583

577584
// this is an internal function that is only here for testing. it cannot
578585
// be invoked by end users, because refering to internal functions from user

arangod/Aql/Functions.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@
7979
#include "VocBase/Methods/Collections.h"
8080
#include "VocBase/Validators.h"
8181

82+
#ifdef USE_ENTERPRISE
83+
#include "Enterprise/VocBase/SmartGraphSchema.h"
84+
#endif
85+
8286
#include "analysis/token_attributes.hpp"
8387
#include "utils/levenshtein_utils.hpp"
8488
#include "utils/ngram_match_utils.hpp"
@@ -9228,6 +9232,41 @@ AqlValue Functions::MakeDistributeGraphInput(
92289232
return AqlValue{input};
92299233
}
92309234

9235+
#ifdef USE_ENTERPRISE
9236+
AqlValue Functions::SelectSmartDistributeGraphInput(
9237+
arangodb::aql::ExpressionContext* expressionContext, AstNode const&,
9238+
VPackFunctionParametersView parameters) {
9239+
AqlValue const& from = extractFunctionParameterValue(parameters, 0);
9240+
VPackSlice input = from.slice(); // will throw when wrong type
9241+
if (ADB_UNLIKELY(!input.isObject() ||
9242+
!input.hasKey(StaticStrings::IdString) ||
9243+
!input.get(StaticStrings::IdString).isString())) {
9244+
// This is an internal use function, so the if condition should always be
9245+
// true Just a protection against users typing this method by hand.
9246+
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE,
9247+
"invalid start vertex. Must either be "
9248+
"an _id string value or an object with _id. "
9249+
"Instead got: " +
9250+
input.toJson());
9251+
}
9252+
auto fromId = input.get(StaticStrings::IdString).stringView();
9253+
auto res =
9254+
SmartGraphValidationHelper::SmartValidationResult::validateVertexId(
9255+
fromId);
9256+
if (res.ok()) {
9257+
return AqlValue{input};
9258+
}
9259+
// From vertex is not smart. Use the other side.
9260+
9261+
// It does not matter if the other side is actually smart.
9262+
// Validity will be checked before (MAKE_DISTRIBUTE INPUT) and after
9263+
// (Distribute/PathQuery)
9264+
// If this vertex is Smart we shard by it.
9265+
// If not, we assume it to be satellite, so it can be send anywhere.
9266+
return extractFunctionParameterValue(parameters, 1);
9267+
}
9268+
#endif
9269+
92319270
template<typename F>
92329271
AqlValue decayFuncImpl(arangodb::aql::ExpressionContext* expressionContext,
92339272
AstNode const& node,

arangod/Aql/Functions.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,12 @@ struct Functions {
543543
AstNode const&,
544544
VPackFunctionParametersView);
545545

546+
#ifdef USE_ENTERPRISE
547+
static AqlValue SelectSmartDistributeGraphInput(
548+
arangodb::aql::ExpressionContext*, AstNode const&,
549+
VPackFunctionParametersView);
550+
#endif
551+
546552
static AqlValue DecayGauss(arangodb::aql::ExpressionContext*, AstNode const&,
547553
VPackFunctionParametersView);
548554

arangod/Aql/GraphNode.cpp

Lines changed: 112 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,86 @@ using namespace arangodb::traverser;
6161

6262
namespace {
6363

64+
struct DisjointSmartToSatelliteTester {
65+
Result isCollectionAllowed(
66+
std::shared_ptr<LogicalCollection> const& collection,
67+
TRI_edge_direction_e dir) {
68+
// We only need to check Sat -> Smart or Smart -> Sat collection, nothing
69+
// else
70+
if (collection->isSmartToSatEdgeCollection() ||
71+
collection->isSatToSmartEdgeCollection()) {
72+
if (dir == TRI_EDGE_ANY) {
73+
// ANY is always forbidden
74+
return {
75+
TRI_ERROR_UNSUPPORTED_CHANGE_IN_SMART_TO_SATELLITE_DISJOINT_EDGE_DIRECTION,
76+
"Using direction 'ANY' on collection: '" + collection->name() +
77+
"' could switch from Smart to Satellite and back. This "
78+
"violates the isDisjoint feature and is forbidden."};
79+
}
80+
// Unify Edge storage, and edge read direction.
81+
// The smartToSatDir defines the direction in which we attempt to
82+
// walk from Smart to Satellite collections. (OUT: Smart -> Sat, IN: Sat
83+
// -> Smart)
84+
bool isOut = dir == TRI_EDGE_OUT;
85+
auto smartToSatDir = isOut == collection->isSmartToSatEdgeCollection()
86+
? TRI_EDGE_OUT
87+
: TRI_EDGE_IN;
88+
if (_disjointSmartToSatDirection == TRI_EDGE_ANY) {
89+
// We have not defined the direction yet, store it, this now defines the
90+
// only allowed switch
91+
_disjointSmartToSatDirection = smartToSatDir;
92+
TRI_ASSERT(_conflictingCollection == nullptr);
93+
_conflictingCollection = collection;
94+
_conflictingDirection = dir;
95+
} else if (_disjointSmartToSatDirection != smartToSatDir) {
96+
// We try to switch again! This is disallowed. Let us report.
97+
std::stringstream errorMessage;
98+
errorMessage << "Using direction ";
99+
if (dir == TRI_EDGE_OUT) {
100+
errorMessage << "OUTBOUND";
101+
} else {
102+
errorMessage << "INBOUND";
103+
}
104+
auto printCollection = [&errorMessage](LogicalCollection const& col,
105+
bool isOut) {
106+
errorMessage << "'" << col.name() << "' switching from ";
107+
if (isOut == col.isSmartToSatEdgeCollection()) {
108+
// Hits OUTBOUND on SmartToSat and INBOUND on SatToSmart
109+
errorMessage << "Smart to Satellite";
110+
} else {
111+
// Hits INBOUND on SmartToSat and OUTBOUND on SatToSmart
112+
errorMessage << "Satellite to Smart";
113+
}
114+
};
115+
errorMessage << " on collection: ";
116+
printCollection(*collection, isOut);
117+
118+
errorMessage << ". Conflicting with: ";
119+
if (_conflictingDirection == TRI_EDGE_OUT) {
120+
errorMessage << "OUTBOUND";
121+
} else {
122+
errorMessage << "INBOUND";
123+
}
124+
errorMessage << " ";
125+
bool conflictingIsOut = _conflictingDirection == TRI_EDGE_OUT;
126+
TRI_ASSERT(_conflictingCollection != nullptr);
127+
printCollection(*_conflictingCollection, conflictingIsOut);
128+
errorMessage
129+
<< ". This violates the isDisjoint feature and is forbidden.";
130+
return {
131+
TRI_ERROR_UNSUPPORTED_CHANGE_IN_SMART_TO_SATELLITE_DISJOINT_EDGE_DIRECTION,
132+
errorMessage.str()};
133+
}
134+
}
135+
return TRI_ERROR_NO_ERROR;
136+
}
137+
138+
private:
139+
TRI_edge_direction_e _disjointSmartToSatDirection{TRI_EDGE_ANY};
140+
std::shared_ptr<LogicalCollection> _conflictingCollection{nullptr};
141+
TRI_edge_direction_e _conflictingDirection{TRI_EDGE_ANY};
142+
};
143+
64144
TRI_edge_direction_e uint64ToDirection(uint64_t dirNum) {
65145
switch (dirNum) {
66146
case 0:
@@ -114,7 +194,7 @@ GraphNode::GraphNode(ExecutionPlan* plan, ExecutionNodeId id,
114194
TRI_ASSERT(direction != nullptr);
115195
TRI_ASSERT(graph != nullptr);
116196

117-
auto& ci = _vocbase->server().getFeature<ClusterFeature>().clusterInfo();
197+
DisjointSmartToSatelliteTester disjointTest{};
118198

119199
if (graph->type == NODE_TYPE_COLLECTION_LIST) {
120200
size_t edgeCollectionCount = graph->numMembers();
@@ -123,36 +203,7 @@ GraphNode::GraphNode(ExecutionPlan* plan, ExecutionNodeId id,
123203
_edgeColls.reserve(edgeCollectionCount);
124204
_directions.reserve(edgeCollectionCount);
125205

126-
// First determine whether all edge collections are smart and sharded
127-
// like a common collection:
128-
if (ServerState::instance()->isRunningInCluster()) {
129-
_isSmart = true;
130-
_isDisjoint = true;
131-
std::string distributeShardsLike;
132-
for (size_t i = 0; i < edgeCollectionCount; ++i) {
133-
auto col = graph->getMember(i);
134-
if (col->type == NODE_TYPE_DIRECTION) {
135-
col = col->getMember(1); // The first member always is the collection
136-
}
137-
std::string n = col->getString();
138-
auto c = ci.getCollection(_vocbase->name(), n);
139-
if (c->isSmart() && !c->isDisjoint()) {
140-
_isDisjoint = false;
141-
}
142-
if (!c->isSmart() || c->distributeShardsLike().empty()) {
143-
_isSmart = false;
144-
_isDisjoint = false;
145-
break;
146-
}
147-
if (distributeShardsLike.empty()) {
148-
distributeShardsLike = c->distributeShardsLike();
149-
} else if (distributeShardsLike != c->distributeShardsLike()) {
150-
_isSmart = false;
151-
_isDisjoint = false;
152-
break;
153-
}
154-
}
155-
}
206+
determineEnterpriseFlags(graph);
156207

157208
std::unordered_map<std::string, TRI_edge_direction_e> seenCollections;
158209
CollectionNameResolver const& resolver = plan->getAst()->query().resolver();
@@ -199,20 +250,27 @@ GraphNode::GraphNode(ExecutionPlan* plan, ExecutionNodeId id,
199250
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_COLLECTION_TYPE_INVALID,
200251
msg);
201252
}
253+
if (_isDisjoint) {
254+
// TODO: Alternative to "THROW" we could run a community based Query
255+
// here, instead of a Disjoint one.
256+
auto res = disjointTest.isCollectionAllowed(collection, dir);
257+
if (res.fail()) {
258+
THROW_ARANGO_EXCEPTION_MESSAGE(res.errorNumber(), res.errorMessage());
259+
}
260+
}
202261

203262
auto& collections = plan->getAst()->query().collections();
204263

205264
_graphInfo.add(VPackValue(eColName));
206265
if (ServerState::instance()->isRunningInCluster()) {
207-
auto c = ci.getCollection(_vocbase->name(), eColName);
208-
if (!c->isSmart()) {
266+
if (!collection->isSmart()) {
209267
addEdgeCollection(collections, eColName, dir);
210268
} else {
211269
std::vector<std::string> names;
212270
if (_isSmart) {
213-
names = c->realNames();
271+
names = collection->realNames();
214272
} else {
215-
names = c->realNamesForRead();
273+
names = collection->realNamesForRead();
216274
}
217275
for (auto const& name : names) {
218276
addEdgeCollection(collections, name, dir);
@@ -242,22 +300,30 @@ GraphNode::GraphNode(ExecutionPlan* plan, ExecutionNodeId id,
242300
THROW_ARANGO_EXCEPTION(TRI_ERROR_GRAPH_EMPTY);
243301
}
244302

245-
// First determine whether all edge collections are smart and sharded
246-
// like a common collection:
303+
// Just use the Graph Object information
247304
if (ServerState::instance()->isRunningInCluster()) {
248305
_isSmart = _graphObj->isSmart();
249306
_isDisjoint = _graphObj->isDisjoint();
250307
}
251-
308+
auto& ci = _vocbase->server().getFeature<ClusterFeature>().clusterInfo();
309+
auto& collections = plan->getAst()->query().collections();
252310
for (const auto& n : eColls) {
253311
if (_options->shouldExcludeEdgeCollection(n)) {
254312
// excluded edge collection
255313
continue;
256314
}
257315

258-
auto& collections = plan->getAst()->query().collections();
259316
if (ServerState::instance()->isRunningInCluster()) {
260317
auto c = ci.getCollection(_vocbase->name(), n);
318+
if (_isDisjoint) {
319+
// TODO: Alternative to "THROW" we could run a community based Query
320+
// here, instead of a Disjoint one.
321+
auto res = disjointTest.isCollectionAllowed(c, _defaultDirection);
322+
if (res.fail()) {
323+
THROW_ARANGO_EXCEPTION_MESSAGE(res.errorNumber(),
324+
res.errorMessage());
325+
}
326+
}
261327
if (!c->isSmart()) {
262328
addEdgeCollection(collections, n, _defaultDirection);
263329
} else {
@@ -276,7 +342,6 @@ GraphNode::GraphNode(ExecutionPlan* plan, ExecutionNodeId id,
276342
}
277343
}
278344

279-
auto& collections = plan->getAst()->query().collections();
280345
auto vColls = _graphObj->vertexCollections();
281346
length = vColls.size();
282347
if (length == 0) {
@@ -473,6 +538,13 @@ GraphNode::GraphNode(ExecutionPlan* plan, ExecutionNodeId id,
473538
setGraphInfoAndCopyColls(edgeColls, vertexColls);
474539
}
475540

541+
#ifndef USE_ENTERPRISE
542+
void GraphNode::determineEnterpriseFlags(AstNode const*) {
543+
_isSmart = false;
544+
_isDisjoint = false;
545+
}
546+
#endif
547+
476548
void GraphNode::setGraphInfoAndCopyColls(
477549
std::vector<Collection*> const& edgeColls,
478550
std::vector<Collection*> const& vertexColls) {

arangod/Aql/GraphNode.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ class GraphNode : public ExecutionNode {
228228

229229
Collection const* getShardingPrototype() const;
230230

231+
void determineEnterpriseFlags(AstNode const* edgeCollectionList);
232+
231233
protected:
232234
/// @brief the database
233235
TRI_vocbase_t* _vocbase;

0 commit comments

Comments
 (0)
0