8000 improve query plan serialization time by jsteemann · Pull Request #17897 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

improve query plan serialization time #17897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
improve query plan serialization time
* Added new stage "instantiating executors" to the query profiling output.
  The time spent in "instantiating executors" is the time needed to create
  the query executors from the final query execution time. In cluster mode,
  this stage also includes the time needed for physically distributing the
  query snippets to the participating database servers.
  Previously, the time spent for instantiating executors and the physical
  distribution was contained in the "optimizing plan" stage, which was
  misleading.

* Remove constant values for query variables from query plan serialization
  in cases they were not needed. Previously, constant values of query variables
  were always serialized for all occurrences of a variable in a query plan.
  If the constant values were large, this contributed to higher serialization
  and thus query setup times. Now the constant values are only serialized
  for relevant parts of query execution plans.
  • Loading branch information
jsteemann committed Jan 6, 2023
commit 0c9487683d588194366905c8d6938ceef510dca1
16 changes: 16 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
devel
-----

* Added new stage "instantiating executors" to the query profiling output.
The time spent in "instantiating executors" is the time needed to create
the query executors from the final query execution time. In cluster mode,
this stage also includes the time needed for physically distributing the
query snippets to the participating database servers.
Previously, the time spent for instantiating executors and the physical
distribution was contained in the "optimizing plan" stage, which was
misleading.

* Remove constant values for query variables from query plan serialization
in cases they were not needed. Previously, constant values of query variables
were always serialized for all occurrences of a variable in a query plan.
If the constant values were large, this contributed to higher serialization
and thus query setup times. Now the constant values are only serialized
for relevant parts of query execution plans.

* BTS-199: remove check for environment variable `GLIBCXX_FORCE_NEW` from
server start, and remove setting this variable from startup scripts.
The reason is that the environment variable only controls the behavior of
Expand Down
29 changes: 7 additions & 22 deletions arangod/Aql/Ast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1346,10 +1346,7 @@ AstNode* Ast::createNodeValueInt(int64_t value) {
AstNode* node = createNode(NODE_TYPE_VALUE);
node->setValueType(VALUE_TYPE_INT);
node->setIntValue(value);
node->setFlag(DETERMINED_CONSTANT, VALUE_CONSTANT);
node->setFlag(DETERMINED_SIMPLE, VALUE_SIMPLE);
node->setFlag(DETERMINED_RUNONDBSERVER, VALUE_RUNONDBSERVER);

node->setConstantFlags();
return node;
}

Expand All @@ -1367,10 +1364,7 @@ AstNode* Ast::createNodeValueDouble(double value) {
AstNode* node = createNode(NODE_TYPE_VALUE);
node->setValueType(VALUE_TYPE_DOUBLE);
node->setDoubleValue(value);
node->setFlag(DETERMINED_CONSTANT, VALUE_CONSTANT);
node->setFlag(DETERMINED_SIMPLE, VALUE_SIMPLE);
node->setFlag(DETERMINED_RUNONDBSERVER, VALUE_RUNONDBSERVER);

node->setConstantFlags();
return node;
}

Expand Down Expand Up @@ -1403,10 +1397,7 @@ AstNode* Ast::createNodeValueString(char const* value, size_t length) {
AstNode* node = createNode(NODE_TYPE_VALUE);
node->setValueType(VALUE_TYPE_STRING);
node->setStringValue(value, length);
node->setFlag(DETERMINED_CONSTANT, VALUE_CONSTANT);
node->setFlag(DETERMINED_SIMPLE, VALUE_SIMPLE);
node->setFlag(DETERMINED_RUNONDBSERVER, VALUE_RUNONDBSERVER);

node->setConstantFlags();
return node;
}

Expand Down Expand Up @@ -3966,7 +3957,7 @@ AstNode* Ast::optimizeObject(AstNode* node) {
/// sure then that string values are valid through the query lifetime.
AstNode* Ast::nodeFromVPack(VPackSlice slice, bool copyStringValues) {
if (slice.isBoolean()) {
return createNodeValueBool(slice.getBoolean());
return createNodeValueBool(slice.isTrue());
}

if (slice.isNumber()) {
Expand Down Expand Up @@ -4008,16 +3999,12 @@ AstNode* Ast::nodeFromVPack(VPackSlice slice, bool copyStringValues) {
it.next();
}

node->setFlag(DETERMINED_CONSTANT, VALUE_CONSTANT);
node->setFlag(DETERMINED_SIMPLE, VALUE_SIMPLE);
node->setFlag(DETERMINED_RUNONDBSERVER, VALUE_RUNONDBSERVER);

node->setConstantFlags();
return node;
}

if (slice.isObject()) {
VPackObjectIterator it(slice, true);

auto node = createNodeObject();
node->members.reserve(static_cast<size_t>(it.size()));

Expand All @@ -4037,10 +4024,7 @@ AstNode* Ast::nodeFromVPack(VPackSlice slice, bool copyStringValues) {
it.next();
}

node->setFlag(DETERMINED_CONSTANT, VALUE_CONSTANT);
node->setFlag(DETERMINED_SIMPLE, VALUE_SIMPLE);
node->setFlag(DETERMINED_RUNONDBSERVER, VALUE_RUNONDBSERVER);

node->setConstantFlags();
return node;
}

Expand Down Expand Up @@ -4369,6 +4353,7 @@ AstNode* Ast::endSubQuery() {
}

bool Ast::isInSubQuery() const { return (_queries.size() > 1); }

std::unordered_set<std::string> Ast::bindParameters() const {
return std::unordered_set<std::string>(_bindParameters);
}
Expand Down
63 changes: 31 additions & 32 deletions arangod/Aql/AstNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,21 @@
#include "Basics/Utf8Helper.h"
#include "Basics/VelocyPackHelper.h"
#include "Basics/fasthash.h"
#include "Containers/FlatHashSet.h"
#include "Transaction/Methods.h"

#include <array>
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
#include <iostream>
#endif
#include <unordered_map>

#include <velocypack/Builder.h>
#include <velocypack/Dumper.h>
#include <velocypack/Iterator.h>
#include <velocypack/Sink.h>
#include <velocypack/Slice.h>
#include <velocypack/ValueType.h>
#include <array>

using namespace arangodb;
using namespace arangodb::aql;
Expand Down Expand Up @@ -521,17 +523,11 @@ AstNode::AstNode(Ast* ast, arangodb::velocypack::Slice slice)
case NODE_TYPE_OPERATOR_BINARY_NIN:
case NODE_TYPE_OPERATOR_BINARY_ARRAY_IN:
case NODE_TYPE_OPERATOR_BINARY_ARRAY_NIN: {
bool sorted = false;
VPackSlice v = slice.get("sorted");
if (v.isBoolean()) {
sorted = v.getBoolean();
}
setBoolValue(sorted);
setBoolValue(slice.get("sorted").isTrue());
break;
}
case NODE_TYPE_ARRAY: {
VPackSlice v = slice.get("sorted");
if (v.isBoolean() && v.getBoolean()) {
if (VPackSlice v = slice.get("sorted"); v.isTrue()) {
setFlag(DETERMINED_SORTED, VALUE_SORTED);
}
break;
Expand All @@ -544,12 +540,7 @@ AstNode::AstNode(Ast* ast, arangodb::velocypack::Slice slice)
case NODE_TYPE_OPERATOR_BINARY_EQ:
case NODE_TYPE_OPERATOR_BINARY_LT:
case NODE_TYPE_OPERATOR_BINARY_LE: {
bool excludesNull = false;
VPackSlice v = slice.get("excludesNull");
if (v.isBoolean()) {
excludesNull = v.getBoolean();
}
setExcludesNull(excludesNull);
setExcludesNull(slice.get("excludesNull").isTrue());
break;
}
case NODE_TYPE_OBJECT:
Expand Down Expand Up @@ -614,9 +605,7 @@ AstNode::AstNode(Ast* ast, arangodb::velocypack::Slice slice)
break;
}

VPackSlice subNodes = slice.get("subNodes");

if (subNodes.isArray()) {
if (VPackSlice subNodes = slice.get("subNodes"); subNodes.isArray()) {
members.reserve(subNodes.length());

try {
Expand Down Expand Up @@ -893,13 +882,18 @@ void AstNode::validateValueType(int type) {
}

/// @brief fetch a node's type from VPack
AstNodeType AstNode::getNodeTypeFromVPack(
arangodb::velocypack::Slice const& slice) {
AstNodeType AstNode::getNodeTypeFromVPack(arangodb::velocypack::Slice slice) {
int type = slice.get("typeID").getNumericValue<int>();
validateType(type);
return static_cast<AstNodeType>(type);
}

void AstNode::setConstantFlags() noexcept {
setFlag(DETERMINED_CONSTANT, VALUE_CONSTANT);
setFlag(DETERMINED_SIMPLE, VALUE_SIMPLE);
setFlag(DETERMINED_RUNONDBSERVER, VALUE_RUNONDBSERVER);
}

bool AstNode::valueHasVelocyPackRepresentation() const {
switch (type) {
case NODE_TYPE_VALUE:
Expand Down Expand Up @@ -973,7 +967,7 @@ void AstNode::toVelocyPackValue(VPackBuilder& builder) const {
return;
}
if (type == NODE_TYPE_ARRAY) {
builder.openArray(false);
builder.openArray(/*allowUnindexed*/ false);
size_t const n = numMembers();
for (size_t i = 0; i < n; ++i) {
auto member = getMemberUnchecked(i);
Expand All @@ -988,15 +982,22 @@ void AstNode::toVelocyPackValue(VPackBuilder& builder) const {
if (type == NODE_TYPE_OBJECT) {
builder.openObject();

std::unordered_set<std::string_view> keys;
containers::FlatHashSet<std::string_view> keys;
size_t const n = numMembers();

// only check for duplicate keys if we have more than a single attribute
bool checkUniqueness = (n > 1);
if (checkUniqueness && hasFlag(DETERMINED_CHECKUNIQUENESS)) {
// turn off duplicate keys checking if everything was already checked
checkUniqueness = hasFlag(VALUE_CHECKUNIQUENESS);
}

for (size_t i = 0; i < n; ++i) {
auto member = getMemberUnchecked(i);
if (member != nullptr) {
std::string_view key(member->getStringView());

if (n > 1 && !keys.emplace(key).second) {
if (checkUniqueness && !keys.emplace(key).second) {
// duplicate key, skip it
continue;
}
Expand All @@ -1016,7 +1017,7 @@ void AstNode::toVelocyPackValue(VPackBuilder& builder) const {

VPackSlice slice = tmp.slice();
if (slice.isObject()) {
slice = slice.get(getString());
slice = slice.get(getStringView());
if (!slice.isNone()) {
builder.add(slice);
return;
Expand All @@ -1037,6 +1038,7 @@ void AstNode::toVelocyPack(VPackBuilder& builder, bool verbose) const {
if (verbose) {
builder.add("typeID", VPackValue(static_cast<int>(type)));
}

if (type == NODE_TYPE_COLLECTION || type == NODE_TYPE_VIEW ||
type == NODE_TYPE_PARAMETER || type == NODE_TYPE_PARAMETER_DATASOURCE ||
type == NODE_TYPE_ATTRIBUTE_ACCESS || type == NODE_TYPE_OBJECT_ELEMENT ||
Expand Down Expand Up @@ -1100,10 +1102,9 @@ void AstNode::toVelocyPack(VPackBuilder& builder, bool verbose) const {
}

// dump sub-nodes
size_t const n = members.size();
if (n > 0) {
if (size_t const n = members.size(); n > 0) {
builder.add(VPackValue("subNodes"));
builder.openArray(false);
builder.openArray(/*allowUnindexed*/ true);
for (size_t i = 0; i < n; ++i) {
AstNode* member = getMemberUnchecked(i);
if (member != nullptr) {
Expand Down Expand Up @@ -1790,18 +1791,16 @@ bool AstNode::mustCheckUniqueness() const {
bool mustCheck = false;

// check the actual key members now
size_t const n = numMembers();

if (n >= 2) {
std::unordered_set<std::string> keys;
if (size_t const n = numMembers(); n >= 2) {
containers::FlatHashSet<std::string_view> keys;

// only useful to check when there are 2 or more keys
for (size_t i = 0; i < n; ++i) {
auto member = getMemberUnchecked(i);

if (member->type == NODE_TYPE_OBJECT_ELEMENT) {
// constant key
if (!keys.emplace(member->getString()).second) {
if (!keys.emplace(member->getStringView()).second) {
// duplicate key
mustCheck = true;
break;
Expand Down
14 changes: 5 additions & 9 deletions arangod/Aql/AstNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,11 @@ struct AstNode {
static void validateValueType(int type);

/// @brief fetch a node's type from VPack
static AstNodeType getNodeTypeFromVPack(
arangodb::velocypack::Slice const& slice);

/**
* @brief Helper class to check if this node can be represented as VelocyPack
* If this method returns FALSE a call to "toVelocyPackValue" will yield
* no change in the handed in builder.
* On TRUE it is guaranteed that the handed in Builder was modified.
*/
static AstNodeType getNodeTypeFromVPack(arangodb::velocypack::Slice slice);

void setConstantFlags() noexcept;

/// @brief function to check if this node can be represented as VelocyPack.
bool valueHasVelocyPackRepresentation() const;

/// @brief build a VelocyPack representation of the node value
Expand Down
5 changes: 3 additions & 2 deletions arangod/Aql/EngineInfoContainerDBServerServerBased.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,9 @@ EngineInfoContainerDBServerServerBased::buildSetupRequest(
network::RequestOptions const& options) const {
TRI_ASSERT(!server.starts_with("server:"));

VPackBuffer<uint8_t> buffer(infoSlice.byteSize());
buffer.append(infoSlice.begin(), infoSlice.byteSize());
auto byteSize = infoSlice.byteSize();
VPackBuffer<uint8_t> buffer(byteSize);
buffer.append(infoSlice.begin(), byteSize);

// add the transaction ID header
network::Headers headers;
Expand Down
2 changes: 1 addition & 1 deletion arangod/Aql/Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9539,7 +9539,7 @@ AqlValue DistanceImpl(aql::ExpressionContext* expressionContext,
F&& distanceFunc) {
auto calculateDistance = [distanceFunc = std::forward<F>(distanceFunc),
expressionContext,
&node](const VPackSlice lhs, const VPackSlice rhs) {
&node](VPackSlice lhs, VPackSlice rhs) {
TRI_ASSERT(lhs.isArray());
TRI_ASSERT(rhs.isArray());
auto lhsLength = lhs.length();
Expand Down
2 changes: 2 additions & 0 deletions arangod/Aql/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,8 @@ void Query::prepareQuery(SerializationFormat format) {
}
}

enterState(QueryExecutionState::ValueType::PHYSICAL_INSTANTIATION);

// simon: assumption is _queryString is empty for DBServer snippets
bool const planRegisters = !_queryString.empty();
ExecutionEngine::instantiateFromPlan(*this, *plan, planRegisters, format);
Expand Down
21 changes: 11 additions & 10 deletions arangod/Aql/QueryExecutionState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,17 @@ using namespace arangodb::aql;

/// @brief names of query phases / states
static std::string const StateNames[] = {
"initializing", // INITIALIZATION
"parsing", // PARSING
"optimizing ast", // AST_OPTIMIZATION
"loading collections", // LOADING_COLLECTIONS
"instantiating plan", // PLAN_INSTANTIATION
"optimizing plan", // PLAN_OPTIMIZATION
"executing", // EXECUTION
"finalizing", // FINALIZATION
"finished", // FINISHED
"killed", // KILLED
"initializing", // INITIALIZATION
"parsing", // PARSING
"optimizing ast", // AST_OPTIMIZATION
"loading collections", // LOADING_COLLECTIONS
"instantiating plan", // PLAN_INSTANTIATION
"optimizing plan", // PLAN_OPTIMIZATION
"instantiating executors", // PHYSICAL_INSTANTIATION
"executing", // EXECUTION
"finalizing", // FINALIZATION
"finished", // FINISHED
"killed", // KILLED

"invalid" // INVALID
};
Expand Down
9 changes: 3 additions & 6 deletions arangod/Aql/QueryExecutionState.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@
#include <iosfwd>
#include <string>

namespace arangodb {
namespace aql {
namespace QueryExecutionState {
namespace arangodb::aql::QueryExecutionState {

/// @brief execution states
enum class ValueType {
Expand All @@ -40,6 +38,7 @@ enum class ValueType {
LOADING_COLLECTIONS,
PLAN_INSTANTIATION,
PLAN_OPTIMIZATION,
PHYSICAL_INSTANTIATION,
EXECUTION,
FINALIZATION,
FINISHED,
Expand All @@ -52,9 +51,7 @@ size_t toNumber(QueryExecutionState::ValueType value);
std::string const& toString(QueryExecutionState::ValueType state);
std::string toStringWithPrefix(QueryExecutionState::ValueType state);

} // namespace QueryExecutionState
} // namespace aql
} // namespace arangodb
} // namespace arangodb::aql::QueryExecutionState

std::ostream& operator<<(std::ostream&,
arangodb::aql::QueryExecutionState::ValueType);
2 changes: 1 addition & 1 deletion arangod/Aql/QueryProfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ static_assert(
static_cast<int>(QueryExecutionState::ValueType::INITIALIZATION) == 0,
"unexpected min QueryExecutionState enum value");
static_assert(static_cast<int>(QueryExecutionState::ValueType::INVALID_STATE) <
11,
12,
"unexpected max QueryExecutionState enum value");

} // namespace aql
Expand Down
Loading
0