8000 APM-786 heap sort with stored values by Dronplane · Pull Request #19551 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

APM-786 heap sort with stored values #19551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
07b591f
wip
Dronplane Apr 25, 2023
899331c
wip
Dronplane Apr 26, 2023
a03e71e
Merge remote-tracking branch 'origin/devel' into feature/APM-786-heap…
Dronplane Jul 18, 2023
a1ea685
wip
Dronplane Jul 18, 2023
b1ba920
wip
Dronplane Aug 1, 2023
8e392ce
Merge remote-tracking branch 'origin/devel' into feature/APM-786-heap…
Dronplane Aug 1, 2023
4584827
wip
Dronplane Aug 1, 2023
13d90f3
wip
Dronplane Aug 1, 2023
f62a7d5
wip
Dronplane Aug 3, 2023
dc44497
Merge remote-tracking branch 'origin/devel' into feature/APM-786-heap…
Dronplane Aug 3, 2023
48442bc
wip
Dronplane Aug 7, 2023
c2a0b23
Merge remote-tracking branch 'origin/devel' into feature/APM-786-heap…
Dronplane Aug 7, 2023
6995111
wip
Dronplane Aug 8, 2023
3227d21
wip
Dronplane Aug 8, 2023
de064a2
wip
Dronplane Aug 9, 2023
022d9da
Merge remote-tracking branch 'origin/devel' into feature/APM-786-heap…
Dronplane Aug 9, 2023
9599662
wip
Dronplane Aug 9, 2023
8f8acff
fixex for postfix case
Dronplane Aug 9, 2023
8000
e53f502
cleanup
Dronplane Aug 9, 2023
e7589a9
fix build
Dronplane Aug 10, 2023
7227454
fix
Dronplane Aug 10, 2023
0b91f37
fix typename
Dronplane Aug 10, 2023
0cdf93b
clang-format
Dronplane Aug 10, 2023
a47eb93
add explicit inits
Dronplane Aug 10, 2023
b56a4ec
fix
Dronplane Aug 10, 2023
e4fc95a
review comments
Dronplane Aug 10, 2023
34b8946
get rid of values mapping
Dronplane Aug 11, 2023
3504ef2
remove redundant inits
Dronplane Aug 11, 2023
9b8f84f
Merge remote-tracking branch 'origin/devel' into feature/APM-786-heap…
Dronplane Aug 11, 2023
9db2616
fix empty field case
Dronplane Aug 11, 2023
ec8b7b1
fix tests
Dronplane Aug 11, 2023
40dbd9e
Merge remote-tracking branch 'origin/devel' into feature/APM-786-heap…
Dronplane Aug 11, 2023
b1c9e09
fix asan build + fix slice iteration
Dronplane Aug 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
8000
Diff view
Prev Previous commit
Next Next commit
review comments
  • Loading branch information
Dronplane committed Aug 10, 2023
commit e4fc95a1b02a4ffb86a02e7bb81b702fab3f50ac
16 changes: 9 additions & 7 deletions arangod/Aql/IResearchViewExecutor.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

#include <formats/formats.hpp>
#include <index/heap_iterator.hpp>
#include <utils/empty.hpp>

#include <utility>
#include <variant>
Expand Down Expand Up @@ -379,6 +380,10 @@ class IndexReadBuffer {
// indexed access for setting values
_storedValuesBuffer.resize(atMost * stored);
_heapOnlyStoredValuesBuffer.resize(_heapOnlyColumnsCount * atMost);
if (!scores) {
// save ourselves 1 "if" during pushing values
_scoreBuffer.push_back(irs::score_t{});
}
} else {
_storedValuesBuffer.reserve(atMost * stored);
}
Expand Down Expand Up @@ -444,9 +449,7 @@ class IndexReadBuffer {
// FIXME(gnusi): compile time
std::vector<iresearch::SearchDoc> _searchDocs;
std::vector<irs::score_t> _scoreBuffer;
StoredValuesContainer _storedValuesBuffer; // FIXME: ensure atMost rows are
// allocated from the beginning!

StoredValuesContainer _storedValuesBuffer;
// Heap Sort facilities
// maps stored column to index in _storedValuesBuffer
containers::FlatHashMap<ptrdiff_t, size_t> _heapUsedStoredColumns;
Expand All @@ -456,10 +459,9 @@ class IndexReadBuffer {
std::vector<ColumnIterator> _heapOnlyStoredValuesReaders;
// <num heap only values>
StoredValuesContainer _currentDocumentBuffer;
using DocumentSlicesContainer =
typename std::conditional<copyStored, bool,
std::vector<velocypack::Slice>>::type;
DocumentSlicesContainer _currentDocumentSlices;
IRS_NO_UNIQUE_ADDRESS
irs::utils::Need<copyStored, std::vector<velocypack::Slice>>
_currentDocumentSlices;
std::vector<HeapSortValue> _heapSortValues;
std::span<iresearch::HeapSortElement const> _heapSort;
size_t _heapOnlyColumnsCount{0};
Expand Down
54 changes: 22 additions & 32 deletions arangod/Aql/IResearchViewExecutor.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ lookupCollection(arangodb::transaction::Methods& trx, DataSourceId cid,
}
}

constexpr int kSortMultiplier[]{-1, 1};
inline constexpr int kSortMultiplier[]{-1, 1};

template<typename ValueType>
class BufferHeapSortContext {
Expand Down Expand Up @@ -204,25 +204,23 @@ class BufferHeapSortContext {
velocypack::Slice getStoredValue(irs::bytes_view storedValue,
HeapSortElement const& sort) {
TRI_ASSERT(!sort.isScore());
[[maybe_unused]] auto const totalSize = storedValue.size();
VPackSlice slice{storedValue.data()};
if (slice.isNull()) {
return slice;
}
size_t size = 0;
size_t i = 0;
while (i < sort.fieldNumber) {
size += slice.byteSize();
TRI_ASSERT(size <= totalSize);
slice = VPackSlice{slice.end()};
++i;
auto* start = storedValue.data();
[[maybe_unused]] auto* end = start + storedValue.size();
velocypack::Slice slice{start};
for (size_t i = 0; i != sort.fieldNumber; ++i) {
start += slice.byteSize();
TRI_ASSERT(start < end);
slice = velocypack::Slice{start};
}
TRI_ASSERT(!slice.isNone());
if (sort.postfix.empty()) {
return slice;
} else {
return slice.get(sort.postfix);
}
if (!slice.isObject()) {
return velocypack::Slice::nullSlice();
}
auto value = slice.get(sort.postfix);
return !value.isNone() ? value : velocypack::Slice::nullSlice();
}

} // namespace
Expand Down Expand Up @@ -476,9 +474,8 @@ velocypack::Slice IndexReadBuffer<ValueType, copySorted>::readHeapSortColumn(
if constexpr (copySorted) {
return getStoredValue(_currentDocumentBuffer.back(), cmp);
} else {
_currentDocumentSlices.push_back(
return _currentDocumentSlices.emplace_back(
getStoredValue(_currentDocumentBuffer.back(), cmp));
return _currentDocumentSlices.back();
}
}

Expand Down Expand Up @@ -569,8 +566,7 @@ void IndexReadBuffer<ValueType, copySorted>::pushSortedValue(
sortContext(_heapSortValues, _heapSort);
size_t readerSlot{0};
if (sortContext.compareInput(_rows.front(), scores.data(),
[this, &value, &readerSlot, &columnReader](
iresearch::HeapSortElement const& cmp) {
[&](iresearch::HeapSortElement const& cmp) {
return readHeapSortColumn(
cmp, value.irsDocId(), columnReader,
readerSlot++);
Expand Down Expand Up @@ -598,9 +594,7 @@ void IndexReadBuffer<ValueType, copySorted>::pushSortedValue(
TRI_ASSERT(!sortContext.descScore() ||
threshold <= _scoreBuffer[_rows.front() * _numScoreRegisters]);
#endif
if (!scores.empty()) {
threshold = _scoreBuffer[_rows.front() * _numScoreRegisters];
}
threshold = _scoreBuffer[_rows.front() * _numScoreRegisters];
score.Min(threshold);
} else {
finalizeHeapSortDocument<false>(_rows.size(), value.irsDocId(), scores,
Expand Down Expand Up @@ -1105,15 +1099,15 @@ template<typename Impl, typename ExecutionTraits>
bool IResearchViewExecutorBase<Impl, ExecutionTraits>::getStoredValuesReaders(
irs::SubReader const& segmentReader, size_t storedValuesIndex /*= 0*/) {
auto const& columnsFieldsRegs = _infos.getOutNonMaterializedViewRegs();
constexpr bool HeapSort =
std::is_same_v<HeapSortExecutorValue,
typename arangodb::aql::IResearchViewExecutorBase<
Impl, ExecutionTraits>::Traits::IndexBufferValueType>;
if (!columnsFieldsRegs.empty()) {
auto columnFieldsRegs = columnsFieldsRegs.cbegin();
auto index = storedValuesIndex * columnsFieldsRegs.size();
if (IResearchViewNode::kSortColumnNumber == columnFieldsRegs->first) {
if (!std::is_same_v<
HeapSortExecutorValue,
typename arangodb::aql::IResearchViewExecutorBase<
Impl, ExecutionTraits>::Traits::IndexBufferValueType> ||
!_storedColumnsMask.contains(columnFieldsRegs->first)) {
if (!HeapSort || !_storedColumnsMask.contains(columnFieldsRegs->first)) {
auto sortReader = ::sortColumn(segmentReader);
if (ADB_UNLIKELY(!sortReader)) {
LOG_TOPIC("bc5bd", WARN, arangodb::iresearch::TOPIC)
Expand All @@ -1132,11 +1126,7 @@ bool IResearchViewExecutorBase<Impl, ExecutionTraits>::getStoredValuesReaders(
for (; columnFieldsRegs != columnsFieldsRegs.cend(); ++columnFieldsRegs) {
TRI_ASSERT(IResearchViewNode::kSortColumnNumber <
columnFieldsRegs->first);
if constexpr (std::is_same_v<
HeapSortExecutorValue,
typename arangodb::aql::IResearchViewExecutorBase<
Impl,
ExecutionTraits>::Traits::IndexBufferValueType>) {
if constexpr (HeapSort) {
if (_storedColumnsMask.contains(columnFieldsRegs->first)) {
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion arangod/Aql/IResearchViewNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1416,7 +1416,7 @@ IResearchViewNode::IResearchViewNode(aql::ExecutionPlan& plan,
}
auto index = scorersSortElement.get(kNodeViewScorersSortIndex);
auto asc = scorersSortElement.get(kNodeViewScorersSortAsc);
if (index.isNumber() && asc.isBoolean()) {
if (index.isNumber<size_t>() && asc.isBoolean()) {
auto indexVal = index.getNumber<size_t>();
if (indexVal >= _scorers.size()) {
THROW_ARANGO_EXCEPTION_MESSAGE(
Expand Down
40 changes: 19 additions & 21 deletions arangod/Aql/IResearchViewOptimizerRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,28 +334,26 @@ bool optimizeScoreSort(IResearchViewNode& viewNode, ExecutionPlan* plan) {
return false;
}
switch (astCalcNode->type) {
case AstNodeType::NODE_TYPE_REFERENCE:
case AstNodeType::NODE_TYPE_REFERENCE: {
// something produced by during search function replacement.
// e.g. it is expected to be LET sortVar = scorerVar;
{
auto sortVariable =
reinterpret_cast<Variable const*>(astCalcNode->getData());
TRI_ASSERT(sortVariable);
auto const s = std::find_if(
std::begin(scorers), std::end(scorers),
[sortVariableId = sortVariable->id](auto const& t) noexcept {
return t.var->id == sortVariableId;
});
if (s == std::end(scorers)) {
return false;
}
heapSort.push_back(HeapSortElement{
.postfix = "",
.source = std::distance(scorers.begin(), s),
.fieldNumber = std::numeric_limits<size_t>::max(),
.ascending = sort.ascending});
auto sortVariable =
reinterpret_cast<Variable const*>(astCalcNode->getData());
TRI_ASSERT(sortVariable);
auto const s = std::find_if(
std::begin(scorers), std::end(scorers),
[sortVariableId = sortVariable->id](auto const& t) noexcept {
return t.var->id == sortVariableId;
});
if (s == std::end(scorers)) {
return false;
}
break;
heapSort.push_back(HeapSortElement{
.postfix = "",
.source = std::distance(scorers.begin(), s),
.fieldNumber = std::numeric_limits<size_t>::max(),
.ascending = sort.ascending});
} break;
case AstNodeType::NODE_TYPE_ATTRIBUTE_ACCESS:
if (checkAttributeAccess(astCalcNode, viewVariable, false)) {
// direct access to view variable
Expand Down Expand Up @@ -442,8 +440,8 @@ bool optimizeScoreSort(IResearchViewNode& viewNode, ExecutionPlan* plan) {
return false;
}
}
if (heapSort.front().isScore() && heapSort.front().source != 0) {
auto idx = heapSort.front().source;
if (auto& front = heapSort.front(); front.isScore() && front.source != 0) {
auto idx = front.source;
std::swap(scorers.front(), scorers[idx]);
for (auto it = heapSort.begin(); it != heapSort.end(); ++it) {
if (it->isScore() && it->source == 0) {
Expand Down
0