8000 Improve efficiency of array/object serialization in query plans (#17902) · cloudhub-js/arangodb@c9bb77c · GitHub
[go: up one dir, main page]

Skip to content

Commit c9bb77c

Browse files
authored
Improve efficiency of array/object serialization in query plans (arangodb#17902)
1 parent 89fa5af commit c9bb77c

23 files changed

+1195
-223
lines changed

CHANGELOG

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
devel
22
-----
33

4+
* Use more compact and efficient representation for arrays and objects during
5+
AQL AST serialization and deserialization. This can help to reduce the size
6+
of messages exchanged between coordinator and 8000 database servers during query
7+
setup, and also reduce the time needed for parsing these messages. This
8+
especially helps when there are large bind parameter values that are arrays
9+
or objects.
10+
The more efficient format is used also inside an AQL query's "explain" and
11+
"profile" methods, and thus any callers that process the return values of
12+
explain and profile operations may now receive the new format. All callers
13+
inside the ArangoDB code have been adjusted, but any external callers that
14+
process the JSON response values of AQL query explain or profile operations
15+
may need to be adjusted to handle the new format.
16+
417
* Allow cluster database servers to start even when there are existing databases
518
that would violate the settings `--cluster.min-replication-factor` or
619
`--cluster.max-replication-factor`.
@@ -16,7 +29,7 @@ devel
1629
distribution was contained in the "optimizing plan" stage, which was
1730
misleading.
1831

19-
* Remove constant values for query variables from query plan serialization
32+
* Removed constant values for query variables from query plan serialization
2033
in cases they were not needed. Previously, constant values of query variables
2134
were always serialized for all occurrences of a variable in a query plan.
2235
If the constant values were large, this contributed to higher serialization

arangod/Aql/AqlCallList.cpp

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -24,30 +24,24 @@
2424
#include "AqlCallList.h"
2525

2626
#include "Basics/StaticStrings.h"
27+
#include "Basics/debugging.h"
2728
#include "Basics/voc-errors.h"
2829
#include "Containers/Enumerate.h"
2930
#include "Logger/LogMacros.h"
3031
#include "Logger/Logger.h"
3132

3233
#include <velocypack/Builder.h>
33-
#include <velocypack/Collection.h>
34+
#include <velocypack/Iterator.h>
3435
#include <velocypack/Slice.h>
3536

37+
#include <algorithm>
38+
#include <array>
3639
#include <iostream>
37-
#include <map>
3840
#include <string_view>
3941

4042
using namespace arangodb;
4143
using namespace arangodb::aql;
4244

43-
namespace {
44-
// hack for MSVC
45-
auto getStringView(VPackSlice slice) -> std::string_view {
46-
std::string_view ref = slice.stringView();
47-
return std::string_view(ref.data(), ref.size());
48-
}
49-
} // namespace
50-
5145
AqlCallList::AqlCallList(AqlCall const& call) : _specificCalls{call} {
5246
// We can never create a new CallList with existing skipCounter
5347
TRI_ASSERT(call.getSkipCount() == 0);
@@ -66,7 +60,7 @@ AqlCallList::AqlCallList(AqlCall const& specificCall,
6660
if (!_specificCalls.empty()) {
6761
// We only implemented for a single given call.
6862
TRI_ASSERT(_specificCalls.size() == 1);
69-
auto res = _specificCalls.back();
63+
auto res = std::move(_specificCalls.back());
7064
_specificCalls.pop_back();
7165
return res;
7266
}
@@ -121,9 +115,13 @@ auto AqlCallList::fromVelocyPack(VPackSlice slice) -> ResultT<AqlCallList> {
121115
slice.typeName());
122116
}
123117

124-
auto expectedPropertiesFound = std::map<std::string_view, bool>{};
125-
expectedPropertiesFound.emplace(StaticStrings::AqlCallListSpecific, false);
126-
expectedPropertiesFound.emplace(StaticStrings::AqlCallListDefault, false);
118+
// we only have 2 different keys to check. using an std::map requires
119+
// dynamic memory allocation and would be wasteful. instead, use a simple
120+
// std::array here to get rid of any allocations
121+
auto expectedPropertiesFound =
122+
std::array<std::pair<std::string_view, bool>, 2>{
123+
{{StaticStrings::AqlCallListSpecific, false},
124+
{StaticStrings::AqlCallListDefault, false}}};
127125

128126
auto const readSpecific =
129127
[](velocypack::Slice slice) -> ResultT<std::vector<AqlCall>> {
@@ -137,7 +135,7 @@ auto AqlCallList::fromVelocyPack(VPackSlice slice) -> ResultT<AqlCallList> {
137135
}
138136
std::vector<AqlCall> res;
139137
res.reserve(slice.length());
140-
for (VPackSlice const c : VPackArrayIterator(slice)) {
138+
for (VPackSlice c : VPackArrayIterator(slice)) {
141139
auto maybeAqlCall = AqlCall::fromVelocyPack(c);
142140
if (ADB_UNLIKELY(maybeAqlCall.fail())) {
143141
auto message = std::string{"When deserializing AqlCallList: entry "};
@@ -146,14 +144,17 @@ auto AqlCallList::fromVelocyPack(VPackSlice slice) -> ResultT<AqlCallList> {
146144
message += maybeAqlCall.errorMessage();
147145
return Result(TRI_ERROR_TYPE_ERROR, std::move(message));
148146
}
149-
res.emplace_back(maybeAqlCall.get());
147+
res.emplace_back(std::move(maybeAqlCall.get()));
150148
}
151149
return res;
152150
};
153151

154152
auto const readDefault =
155153
[](velocypack::Slice slice) -> ResultT<std::optional<AqlCall>> {
156-
if (ADB_UNLIKELY(!slice.isObject() && !slice.isNull())) {
154+
if (slice.isNull()) {
155+
return {std::nullopt};
156+
}
157+
if (ADB_UNLIKELY(!slice.isObject())) {
157158
auto message =
158159
std::string{"When deserializating AqlCallList: When reading " +
159160
StaticStrings::AqlCallListDefault +
@@ -162,9 +163,6 @@ auto AqlCallList::fromVelocyPack(VPackSlice slice) -> ResultT<AqlCallList> {
162163
message += slice.typeName();
163164
return Result(TRI_ERROR_TYPE_ERROR, std::move(message));
164165
}
165-
if (slice.isNull()) {
166-
return {std::nullopt};
167-
}
168166
auto maybeAqlCall = AqlCall::fromVelocyPack(slice);
169167
if (ADB_UNLIKELY(maybeAqlCall.fail())) {
170168
auto message = std::string{"When deserializing AqlCallList: default "};
@@ -177,20 +175,13 @@ auto AqlCallList::fromVelocyPack(VPackSlice slice) -> ResultT<AqlCallList> {
177175
AqlCallList result{AqlCall{}};
178176

179177
for (auto const it : velocypack::ObjectIterator(slice)) {
180-
auto const keySlice = it.key;
181-
if (ADB_UNLIKELY(!keySlice.isString())) {
182-
return Result(TRI_ERROR_TYPE_ERROR,
183-
"When deserializating AqlCallList: Key is not a string");
184-
}
185-
auto const key = getStringView(keySlice);
178+
auto key = it.key.stringView();
186179

187-
if (auto propIt = expectedPropertiesFound.find(key);
180+
if (auto propIt = std::find_if(
181+
expectedPropertiesFound.begin(), expectedPropertiesFound.end(),
182+
[&key](auto const& epf) { return epf.first == key; });
188183
ADB_LIKELY(propIt != expectedPropertiesFound.end())) {
189-
if (ADB_UNLIKELY(propIt->second)) {
190-
return Result(
191-
TRI_ERROR_TYPE_ERROR,
192-
"When deserializating AqlCallList: Encountered duplicate key");
193-
}
184+
TRI_ASSERT(!propIt->second);
194185
propIt->second = true;
195186
}
196187

arangod/Aql/AqlCallStack.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ void AqlCallStack::pushCall(AqlCallList const& call) {
8181
_operations.emplace_back(call);
8282
}
8383

84-
auto AqlCallStack::fromVelocyPack(velocypack::Slice const slice)
84+
auto AqlCallStack::fromVelocyPack(velocypack::Slice slice)
8585
-> ResultT<AqlCallStack> {
8686
if (ADB_UNLIKELY(!slice.isArray())) {
8787
using namespace std::string_literals;
@@ -95,25 +95,22 @@ auto AqlCallStack::fromVelocyPack(velocypack::Slice const slice)
9595
}
9696

9797
auto stack = std::vector<AqlCallList>{};
98-
auto i = std::size_t{0};
9998
stack.reserve(slice.length());
100-
for (auto const entry : VPackArrayIterator(slice)) {
99+
for (auto entry : VPackArrayIterator(slice)) {
101100
auto maybeAqlCall = AqlCallList::fromVelocyPack(entry);
102101

103102
if (ADB_UNLIKELY(maybeAqlCall.fail())) {
104103
auto message = std::string{"When deserializing AqlCallStack: entry "};
105-
message += std::to_string(i);
104+
message += std::to_string(stack.size());
106105
message += ": ";
107106
message += std::move(maybeAqlCall).errorMessage();
108107
return Result(TRI_ERROR_TYPE_ERROR, std::move(message));
109108
}
110109

111-
stack.emplace_back(maybeAqlCall.get());
112-
113-
++i;
110+
stack.emplace_back(std::move(maybeAqlCall.get()));
114111
}
115112

116-
TRI_ASSERT(i > 0);
113+
TRI_ASSERT(!stack.empty());
117114

118115
return AqlCallStack{std::move(stack)};
119116
}

arangod/Aql/Ast.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -382,10 +382,8 @@ void Ast::clearMost() noexcept { _resources.clearMost(); }
382382

383383
/// @brief convert the AST into VelocyPack
384384
void Ast::toVelocyPack(VPackBuilder& builder, bool verbose) const {
385-
{
386-
VPackArrayBuilder guard(&builder);
387-
_root->toVelocyPack(builder, verbose);
388-
}
385+
VPackArrayBuilder guard(&builder);
386+
_root->toVelocyPack(builder, verbose);
389387
}
390388

391389
/// @brief add an operation to the AST
@@ -3422,7 +3420,7 @@ AstNode* Ast::optimizeBinaryOperatorRelational(
34223420
bool const lhsIsConst = lhs->isConstant();
34233421

34243422
if (!lhsIsConst) {
3425-
if (rhs->numMembers() >= AstNode::SortNumberThreshold &&
3423+
if (rhs->numMembers() >= AstNode::kSortNumberThreshold &&
34263424
rhs->type == NODE_TYPE_ARRAY &&
34273425
(node->type == NODE_TYPE_OPERATOR_BINARY_IN ||
34283426
node->type == NODE_TYPE_OPERATOR_BINARY_NIN)) {

0 commit comments

Comments
 (0)
0