30
30
#include " Aql/AqlFunctionsInternalCache.h"
31
31
#include " Aql/RegisterInfos.h"
32
32
#include " Aql/VarInfoMap.h"
33
+ #include " Containers/FlatHashSet.h"
33
34
#include " IResearch/ExpressionFilter.h"
34
35
#include " IResearch/IResearchFilterFactory.h"
35
36
#include " IResearch/IResearchExpressionContext.h"
45
46
46
47
#include < formats/formats.hpp>
47
48
#include < index/heap_iterator.hpp>
49
+ #include < utils/empty.hpp>
48
50
49
51
#include < utility>
50
52
#include < variant>
@@ -88,8 +90,8 @@ class IResearchViewExecutorInfos {
88
90
iresearch::IResearchViewNode::ViewValuesRegisters&&
89
91
outNonMaterializedViewRegs,
90
92
iresearch::CountApproximate, iresearch::FilterOptimization,
91
- std::vector<std::pair< size_t , bool >> scorersSort, size_t scorersSortLimit ,
92
- iresearch::SearchMeta const * meta);
93
+ std::vector<iresearch::HeapSortElement> const & heapSort ,
94
+ size_t heapSortLimit, iresearch::SearchMeta const * meta);
93
95
94
96
auto getDocumentRegister () const noexcept -> RegisterId;
95
97
@@ -132,9 +134,9 @@ class IResearchViewExecutorInfos {
132
134
133
135
iresearch::IResearchViewStoredValues const & storedValues () const noexcept ;
134
136
135
- size_t scoresSortLimit () const noexcept { return _scorersSortLimit ; }
137
+ size_t heapSortLimit () const noexcept { return _heapSortLimit ; }
136
138
137
- auto scoresSort () const noexcept { return std::span{_scorersSort }; }
139
+ auto heapSort () const noexcept { return std::span{_heapSort }; }
138
140
139
141
size_t scoreRegistersCount () const noexcept { return _scoreRegistersCount; }
140
142
@@ -160,8 +162,8 @@ class IResearchViewExecutorInfos {
160
162
iresearch::IResearchViewNode::ViewValuesRegisters _outNonMaterializedViewRegs;
161
163
iresearch::CountApproximate _countApproximate;
162
164
iresearch::FilterOptimization _filterOptimization;
163
- std::vector<std::pair< size_t , bool >> _scorersSort ;
164
- size_t _scorersSortLimit ;
165
+ std::vector<iresearch::HeapSortElement> const & _heapSort ;
166
+ size_t _heapSortLimit ;
165
167
iresearch::SearchMeta const * _meta;
166
168
int const _depth;
167
169
bool _filterConditionIsEmpty;
@@ -246,6 +248,11 @@ class ScoreIterator {
246
248
size_t _numScores;
247
249
};
248
250
251
+ union HeapSortValue {
252
+ irs::score_t score;
253
+ velocypack::Slice slice;
254
+ };
255
+
249
256
// Holds and encapsulates the data read from the iresearch index.
250
257
template <typename ValueType, bool copyStored>
251
258
class IndexReadBuffer {
@@ -274,8 +281,35 @@ class IndexReadBuffer {
274
281
275
282
ScoreIterator getScores (IndexReadBufferEntry bufferEntry) noexcept ;
276
283
277
- void setScoresSort (std::span<std::pair<size_t , bool > const > s) noexcept {
278
- _scoresSort = s;
284
+ void setHeapSort (std::span<iresearch::HeapSortElement const > s,
285
+ iresearch::IResearchViewNode::ViewValuesRegisters const &
286
+ storedValues) noexcept {
287
+ TRI_ASSERT (_heapSort.empty ());
288
+ TRI_ASSERT (_heapOnlyColumnsCount == 0 );
289
+ auto const storedValuesCount = storedValues.size ();
290
+ size_t j = 0 ;
291
+ for (auto const & c : s) {
292
+ auto & sort = _heapSort.emplace_back (BufferHeapSortElement{c});
293
+ if (c.isScore ()) {
294
+ continue ;
295
+ }
296
+ auto it = storedValues.find (c.source );
297
+ if (it != storedValues.end ()) {
298
+ sort.container = &_storedValuesBuffer;
299
+ sort.multiplier = storedValuesCount;
300
+ sort.offset =
301
+ static_cast <size_t >(std::distance (storedValues.begin (), it));
302
+ } else {
303
+ sort.container = &_heapOnlyStoredValuesBuffer;
304
+ sort.offset = j++;
305
+ ++_heapOnlyColumnsCount;
306
+ }
307
+ }
308
+ for (auto & c : _heapSort) {
309
+ if (c.container == &_heapOnlyStoredValuesBuffer) {
310
+ c.multiplier = _heapOnlyColumnsCount;
311
+ }
312
+ }
279
313
}
280
314
281
315
irs::score_t * pushNoneScores (size_t count);
@@ -288,7 +322,9 @@ class IndexReadBuffer {
288
322
_searchDocs.emplace_back (segment, docId);
289
323
}
290
324
291
- void pushSortedValue (StorageSnapshot const & snapshot, ValueType&& value,
325
+ template <typename ColumnReaderProvider>
326
+ void pushSortedValue (ColumnReaderProvider& columnReader,
327
+ StorageSnapshot const & snapshot, ValueType&& value,
292
328
std::span<float_t const > scores, irs::score& score,
293
329
irs::score_t & threshold);
294
330
@@ -315,14 +351,22 @@ class IndexReadBuffer {
315
351
// before and after.
316
352
void assertSizeCoherence () const noexcept ;
317
353
318
- size_t memoryUsage (size_t maxSize) const noexcept {
354
+ size_t memoryUsage (size_t maxSize, size_t scores,
355
+ size_t stored) const noexcept {
319
356
auto res =
320
357
maxSize * sizeof (typename decltype (_keyBuffer)::value_type) +
321
- maxSize * sizeof (typename decltype (_scoreBuffer)::value_type) +
358
+ scores * maxSize * sizeof (typename decltype (_scoreBuffer)::value_type) +
322
359
maxSize * sizeof (typename decltype (_searchDocs)::value_type) +
323
- maxSize * sizeof (typename decltype (_storedValuesBuffer)::value_type);
324
- if (!_scoresSort.empty ()) {
360
+ stored * maxSize *
361
+ sizeof (typename decltype (_storedValuesBuffer)::value_type);
362
+ if (!_heapSort.empty ()) {
325
363
res += maxSize * sizeof (typename decltype (_rows)::value_type);
364
+ res += _heapOnlyColumnsCount * maxSize *
365
+ sizeof (typename decltype (_heapOnlyStoredValuesBuffer)::value_type);
366
+ res += _heapOnlyColumnsCount *
367
+ sizeof (typename decltype (_currentDocumentBuffer)::value_type);
368
+ res += (maxSize * _heapSort.size ()) *
369
+ sizeof (typename decltype (_heapSortValues)::value_type);
326
370
}
327
371
return res;
328
372
}
@@ -331,17 +375,27 @@ class IndexReadBuffer {
331
375
size_t stored) {
332
376
TRI_ASSERT (_storedValuesBuffer.empty ());
333
377
if (_keyBuffer.capacity () < atMost) {
334
- auto newMemoryUsage = memoryUsage (atMost);
378
+ auto newMemoryUsage = memoryUsage (atMost, scores, stored );
335
379
auto tracked = _memoryTracker.tracked ();
336
380
if (newMemoryUsage > tracked) {
337
381
_memoryTracker.increase (newMemoryUsage - tracked);
338
382
}
339
383
_keyBuffer.reserve (atMost);
340
384
_searchDocs.reserve (atMost);
341
385
_scoreBuffer.reserve (atMost * scores);
342
- _storedValuesBuffer.reserve (atMost * stored);
343
- if (!_scoresSort.empty ()) {
386
+ if (!_heapSort.empty ()) {
344
387
_rows.reserve (atMost);
388
+ _currentDocumentBuffer.reserve (_heapSort.size ());
389
+ // resize is important here as we want
390
+ // indexed access for setting values
391
+ _storedValuesBuffer.resize (atMost * stored);
392
+ _heapOnlyStoredValuesBuffer.resize (_heapOnlyColumnsCount * atMost);
393
+ if (!scores) {
394
+ // save ourselves 1 "if" during pushing values
395
+ _scoreBuffer.push_back (irs::score_t {});
396
+ }
397
+ } else {
398
+ _storedValuesBuffer.reserve (atMost * stored);
345
399
}
346
400
}
347
401
_maxSize = atMost;
@@ -370,6 +424,12 @@ class IndexReadBuffer {
370
424
371
425
StoredValuesContainer const & getStoredValues () const noexcept ;
372
426
427
+ struct BufferHeapSortElement : iresearch::HeapSortElement {
428
+ StoredValuesContainer* container{};
429
+ size_t offset{};
430
+ size_t multiplier{};
431
+ };
432
+
373
433
private:
374
434
// _keyBuffer, _scoreBuffer, _storedValuesBuffer together hold all the
375
435
// information read from the iresearch index.
@@ -390,14 +450,39 @@ class IndexReadBuffer {
390
450
StorageSnapshot const * snapshot;
391
451
ValueType value;
392
452
};
453
+
454
+ template <typename ColumnReaderProvider>
455
+ velocypack::Slice readHeapSortColumn (iresearch::HeapSortElement const & cmp,
456
+ irs::doc_id_t doc,
457
+ ColumnReaderProvider& readerProvider,
458
+ size_t readerSlot);
459
+ template <bool fullHeap, typename ColumnReaderProvider>
460
+ void finalizeHeapSortDocument (size_t idx, irs::doc_id_t ,
461
+ std::span<float_t const > scores,
462
+ ColumnReaderProvider& readerProvider);
463
+
393
464
std::vector<BufferValueType> _keyBuffer;
394
465
// FIXME(gnusi): compile time
395
466
std::vector<iresearch::SearchDoc> _searchDocs;
396
467
std::vector<irs::score_t > _scoreBuffer;
397
468
StoredValuesContainer _storedValuesBuffer;
469
+ // Heap Sort facilities
470
+ // atMost * <num heap only values>
471
+ StoredValuesContainer _heapOnlyStoredValuesBuffer;
472
+ // <num heap only values>
473
+ std::vector<ColumnIterator> _heapOnlyStoredValuesReaders;
474
+ // <num heap only values>
475
+ StoredValuesContainer _currentDocumentBuffer;
476
+ IRS_NO_UNIQUE_ADDRESS
477
+ irs::utils::Need<!copyStored, std::vector<velocypack::Slice>>
478
+
A2F4
_currentDocumentSlices;
479
+ std::vector<HeapSortValue> _heapSortValues;
480
+ std::vector<BufferHeapSortElement> _heapSort;
481
+ size_t _heapOnlyColumnsCount{0 };
482
+ size_t _currentReaderOffset{std::numeric_limits<size_t >::max ()};
483
+
398
484
size_t _numScoreRegisters;
399
485
size_t _keyBaseIdx;
400
- std::span<std::pair<size_t , bool > const > _scoresSort;
401
486
std::vector<size_t > _rows;
402
487
size_t _maxSize;
403
488
size_t _heapSizeLeft;
@@ -585,6 +670,7 @@ class IResearchViewExecutorBase {
585
670
irs::Scorers _scorers;
586
671
irs::WandContext _wand;
587
672
std::vector<ColumnIterator> _storedValuesReaders;
673
+ containers::FlatHashSet<ptrdiff_t > _storedColumnsMask;
588
674
std::array<char , arangodb::iresearch::kSearchDocBufSize > _buf;
589
675
bool _isInitialized;
590
676
// new mangling only:
0 commit comments