8000 SEARCH-279 snapshot for inverted index by Dronplane · Pull Request #18128 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

SEARCH-279 snapshot for inverted index #18128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
cc68456
make inverted index read docs from custom snapshot
Dronplane Jan 24, 2023
25f6498
wip - adding late materialization snapshot
Dronplane Jan 27, 2023
df30a6f
Merge remote-tracking branch 'origin/devel' into feature/SEARCH-279-s…
Dronplane Feb 6, 2023
6ad9c5c
wip
Dronplane Feb 8, 2023
9618cf5
Merge remote-tracking branch 'origin/devel' into feature/SEARCH-279-s…
Dronplane Feb 8, 2023
c381636
wip
Dronplane Feb 8, 2023
64b2af9
switch inverted index to viewsnapshot
Dronplane Feb 10, 2023
c59db0d
Merge remote-tracking branch 'origin/devel' into feature/SEARCH-279-s…
Dronplane Feb 10, 2023
f26ed9e
fix
Dronplane Feb 10, 2023
d871f9c
wip
Dronplane Feb 13, 2023
754d9b5
wip
Dronplane Feb 13, 2023
82257e2
Merge remote-tracking branch 'origin/devel' into feature/SEARCH-279-s…
Dronplane Feb 13, 2023
cb6e02c
try to fix mac build
Dronplane Feb 14, 2023
7a2b17c
Merge remote-tracking branch 'origin/devel' into feature/SEARCH-279-s…
Dronplane Feb 14, 2023
d24f85d
fix tests build
Dronplane Feb 14, 2023
a28e2c5
add explicit instantiations
Dronplane Feb 14, 2023
04b2f29
add test
Dronplane Feb 14, 2023
f73d376
fix issue
Dronplane Feb 15, 2023
41754b9
Merge remote-tracking branch 'origin/devel' into feature/SEARCH-279-s…
Dronplane Feb 15, 2023
29629f6
add js tests
Dronplane Feb 16, 2023
833c507
Merge remote-tracking branch 'origin/devel' into feature/SEARCH-279-s…
Dronplane Feb 16, 2023
b38c3e8
clang-format
Dronplane Feb 16, 2023
419f3b3
cleanup
Dronplane Feb 16, 2023
d6e4e49
adress review comments
Dronplane Feb 16, 2023
76d2de6
fix for cluster
Dronplane Feb 16, 2023
a4958ce
Merge remote-tracking branch 'origin/devel' into feature/SEARCH-279-s…
Dronplane Feb 16, 2023
ee277a9
review comments
Dronplane Feb 16, 2023
9306ddb
jslint
Dronplane Feb 16, 2023
0ed94e3
Merge branch 'devel' into feature/SEARCH-279-snapshot-for-inverted-index
Dronplane Feb 17, 2023
94bf50c
cleanup
Dronplane Feb 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
clang-format
  • Loading branch information
Dronplane committed Feb 16, 2023
commit b38c3e8d30213d0f144d7cffbc593f94f13cd6db
1 change: 0 additions & 1 deletion arangod/Aql/DocumentProducingHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,6 @@ RegisterId DocumentProducingFunctionContext::getOutputRegister()
return _outputRegister;
}


ReadOwnWrites DocumentProducingFunctionContext::getReadOwnWrites()
const noexcept {
return _readOwnWrites;
Expand Down
33 changes: 14 additions & 19 deletions arangod/Aql/ExecutionNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2932,9 +2932,8 @@ std::unique_ptr<ExecutionBlock> MaterializeMultiNode::createBlock(
auto registerInfos = createRegisterInfos(std::move(readableInputRegisters),
std::move(writableOutputRegisters));

auto executorInfos =
MaterializerExecutorInfos<void>(inNmDocIdRegId,
outDocumentRegId, engine.getQuery());
auto executorInfos = MaterializerExecutorInfos<void>(
inNmDocIdRegId, outDocumentRegId, engine.getQuery());

return std::make_unique<ExecutionBlockImpl<MaterializeExecutor<void, false>>>(
&engine, this, std::move(registerInfos), std::move(executorInfos));
Expand All @@ -2960,11 +2959,9 @@ ExecutionNode* MaterializeMultiNode::clone(ExecutionPlan* plan,
}

template<bool localDocumentId>
MaterializeSingleNode<localDocumentId>::MaterializeSingleNode(ExecutionPlan* plan,
ExecutionNodeId id,
aql::Collection const* collection,
aql::Variable const& inDocId,
aql::Variable const& outVariable)
MaterializeSingleNode<localDocumentId>::MaterializeSingleNode(
ExecutionPlan* plan, ExecutionNodeId id, aql::Collection const* collection,
aql::Variable const& inDocId, aql::Variable const& outVariable)
: MaterializeNode(plan, id, inDocId, outVariable),
CollectionAccessingNode(collection) {}

Expand All @@ -2974,8 +2971,8 @@ MaterializeSingleNode<localDocumentId>::MaterializeSingleNode(
: MaterializeNode(plan, base), CollectionAccessingNode(plan, base) {}

template<bool localDocumentId>
void MaterializeSingleNode<localDocumentId>::doToVelocyPack(velocypack::Builder& nodes,
unsigned flags) const {
void MaterializeSingleNode<localDocumentId>::doToVelocyPack(
velocypack::Builder& nodes, unsigned flags) const {
// call base class method
MaterializeNode::doToVelocyPack(nodes, flags);

Expand All @@ -2985,7 +2982,8 @@ void MaterializeSingleNode<localDocumentId>::doToVelocyPack(velocypack::Builder&
}

template<bool localDocumentId>
std::unique_ptr<ExecutionBlock> MaterializeSingleNode<localDocumentId>::createBlock(
std::unique_ptr<ExecutionBlock>
MaterializeSingleNode<localDocumentId>::createBlock(
ExecutionEngine& engine,
std::unordered_map<ExecutionNode*, ExecutionBlock*> const&) const {
ExecutionNode const* previousNode = getFirstDependency();
Expand All @@ -2998,7 +2996,7 @@ std::unique_ptr<ExecutionBlock> MaterializeSingleNode<localDocumentId>::createBl
}
auto const& name = collection()->name();
RegisterId inNmDocIdRegId;
{
{
auto it = getRegisterPlan()->varInfo.find(_inNonMaterializedDocId->id);
TRI_ASSERT(it != getRegisterPlan()->varInfo.end());
inNmDocIdRegId = it->second.registerId;
Expand All @@ -3013,17 +3011,15 @@ std::unique_ptr<ExecutionBlock> MaterializeSingleNode<localDocumentId>::createBl
std::move(writableOutputRegisters));

auto executorInfos = MaterializerExecutorInfos<decltype(name)>(
inNmDocIdRegId, outDocumentRegId, engine.getQuery(),
name);
inNmDocIdRegId, outDocumentRegId, engine.getQuery(), name);
return std::make_unique<
ExecutionBlockImpl<MaterializeExecutor<decltype(name), localDocumentId>>>(
&engine, this, std::move(registerInfos), std::move(executorInfos));
}

template<bool localDocumentId>
ExecutionNode* MaterializeSingleNode<localDocumentId>::clone(ExecutionPlan* plan,
bool withDependencies,
bool withProperties) const {
ExecutionNode* MaterializeSingleNode<localDocumentId>::clone(
ExecutionPlan* plan, bool withDependencies, bool withProperties) const {
TRI_ASSERT(plan);

auto* outVariable = _outVariable;
Expand All @@ -3036,8 +3032,7 @@ ExecutionNode* MaterializeSingleNode<localDocumentId>::clone(ExecutionPlan* plan
}

auto c = std::make_unique<MaterializeSingleNode<localDocumentId>>(
plan, _id, collection(), *inNonMaterializedDocId,
*outVariable);
plan, _id, collection(), *inNonMaterializedDocId, *outVariable);
CollectionAccessingNode::cloneInto(*c);
return cloneHelper(std::move(c), withDependencies, withProperties);
}
Expand Down
2 changes: 1 addition & 1 deletion arangod/Aql/ExecutionNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -1225,7 +1225,7 @@ class MaterializeSingleNode : public MaterializeNode,
ExecutionNode* clone(ExecutionPlan* plan, bool withDependencies,
bool withProperties) const override final;

protected:
protected:
/// @brief export to VelocyPack
void doToVelocyPack(arangodb::velocypack::Builder& nodes,
unsigned flags) const override final;
Expand Down
11 changes: 6 additions & 5 deletions arangod/Aql/IndexExecutor.cpp
67F4
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,13 @@ IndexIterator::CoveringCallback getCallback(
IndexNode::IndexValuesVars const& outNonMaterializedIndVars,
IndexNode::IndexValuesRegisters const& outNonMaterializedIndRegs) {
auto impl = [&context, &index, &outNonMaterializedIndVars,
&outNonMaterializedIndRegs]<typename TokenType>(TokenType&& token,
IndexIteratorCoveringData& covering) {
constexpr bool isLocalDocumentId = std::is_same_v <LocalDocumentId,
std::decay_t<TokenType>>;
&outNonMaterializedIndRegs]<typename TokenType>(
TokenType&& token, IndexIteratorCoveringData& covering) {
constexpr bool isLocalDocumentId =
std::is_same_v<LocalDocumentId, std::decay_t<TokenType>>;
// can't be a static_assert as this implementation is still possible
// just can't be used right now as for restriction in late materialization rule
// just can't be used right now as for restriction in late materialization
// rule
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
if constexpr (checkUniqueness && !isLocalDocumentId) {
TRI_ASSERT(false);
Expand Down
1 change: 0 additions & 1 deletion arangod/Aql/IndexNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,6 @@ std::unique_ptr<ExecutionBlock> IndexNode::createBlock(
auto const outVariable =
isLateMaterialized() ? _outNonMaterializedDocId : _outVariable;


auto const outRegister = variableToRegisterId(outVariable);

auto numIndVarsRegisters =
Expand Down
16 changes: 7 additions & 9 deletions arangod/Aql/IndexNodeOptimizerRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ void arangodb::aql::lateDocumentMaterializationRule(
auto& indexes = indexNode->getIndexes();
TRI_ASSERT(!indexes.empty());
if (indexes.size() != 1) {
// When enabling this please consider how inverted index would
// When enabling this please consider how inverted index would
// operate together with persistent as first produces
// SearchDocs but latter LocalDocumentIds. Usage of
// two separate variables might be the simplest solution
Expand Down Expand Up @@ -338,15 +338,13 @@ void arangodb::aql::lateDocumentMaterializationRule(
// insert a materialize node
auto makeMaterializer = [&]() -> std::unique_ptr<ExecutionNode> {
if (index->type() == Index::TRI_IDX_TYPE_INVERTED_INDEX) {
return std::make_unique <materialize::MaterializeSingleNode<false>>(
plan.get(), plan->nextId(),
indexNode->collection(),
*localDocIdTmp, *var);
return std::make_unique<materialize::MaterializeSingleNode<false>>(
plan.get(), plan->nextId(), indexNode->collection(),
*localDocIdTmp, *var);
}
return std::make_unique <materialize::MaterializeSingleNode<true>>(
plan.get(), plan->nextId(),
indexNode->collection(), *localDocIdTmp,
*var);
return std::make_unique<materialize::MaterializeSingleNode<true>>(
plan.get(), plan->nextId(), indexNode->collection(),
*localDocIdTmp, *var);
};
auto* materializeNode = plan->registerNode(makeMaterializer());
TRI_ASSERT(materializeNode);
Expand Down
14 changes: 8 additions & 6 deletions arangod/Aql/MaterializeExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ using namespace arangodb::aql;

template<typename T, bool localDocumentId>
arangodb::IndexIterator::DocumentCallback
MaterializeExecutor<T, localDocumentId>::ReadContext::copyDocumentCallback(ReadContext& ctx) {
MaterializeExecutor<T, localDocumentId>::ReadContext::copyDocumentCallback(
ReadContext& ctx) {
typedef std::function<arangodb::IndexIterator::DocumentCallback(ReadContext&)>
CallbackFactory;
static CallbackFactory const callbackFactory{[](ReadContext& ctx) {
Expand Down Expand Up @@ -74,7 +75,8 @@ MaterializeExecutor<T, localDocumentId>::MaterializeExecutor(
}

template<typename T, bool localDocumentId>
void MaterializeExecutor<T, localDocumentId>::fillBuffer(AqlItemBlockInputRange& inputRange) {
void MaterializeExecutor<T, localDocumentId>::fillBuffer(
AqlItemBlockInputRange& inputRange) {
TRI_ASSERT(!localDocumentId);
_bufferedDocs.clear();
auto const block = inputRange.getBlock();
Expand Down Expand Up @@ -191,8 +193,8 @@ void MaterializeExecutor<T, localDocumentId>::fillBuffer(AqlItemBlockInputRange&

template<typename T, bool localDocumentId>
std::tuple<ExecutorState, MaterializeStats, AqlCall>
MaterializeExecutor<T, localDocumentId>::produceRows(AqlItemBlockInputRange& inputRange,
OutputAqlItemRow& output) {
MaterializeExecutor<T, localDocumentId>::produceRows(
AqlItemBlockInputRange& inputRange, OutputAqlItemRow& output) {
MaterializeStats stats;

AqlCall upstreamCall{};
Expand Down Expand Up @@ -277,8 +279,8 @@ MaterializeExecutor<T, localDocumentId>::produceRows(AqlItemBlockInputRange& inp

template<typename T, bool localDocumentId>
std::tuple<ExecutorState, MaterializeStats, size_t, AqlCall>
MaterializeExecutor<T, localDocumentId>::skipRowsRange(AqlItemBlockInputRange& inputRange,
AqlCall& call) {
MaterializeExecutor<T, localDocumentId>::skipRowsRange(
AqlItemBlockInputRange& inputRange, AqlCall& call) {
size_t skipped = 0;

// hasDataRow may only occur during fullCount due to previous overfetching
Expand Down
34 changes: 17 additions & 17 deletions arangod/IResearch/IResearchInvertedIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,11 +455,13 @@ template<bool emitLocalDocumentId>
class IResearchInvertedIndexIterator final
: public IResearchInvertedIndexIteratorBase {
public:
IResearchInvertedIndexIterator(
ResourceMonitor& monitor, LogicalCollection* collection,
ViewSnapshot& state, transaction::Methods* trx,
aql::AstNode const* condition, IResearchInvertedIndexMeta const* meta,
aql::Variable const* variable, int mutableConditionIdx)
IResearchInvertedIndexIterator(ResourceMonitor& monitor,
LogicalCollection* collection,
ViewSnapshot& state, transaction::Methods* trx,
aql::AstNode const* condition,
IResearchInvertedIndexMeta const* meta,
aql::Variable const* variable,
int mutableConditionIdx)
: IResearchInvertedIndexIteratorBase(collection, state, trx, condition,
meta, variable, mutableConditionIdx),
_projections(*meta) {}
Expand All @@ -484,20 +486,19 @@ class IResearchInvertedIndexIterator final
nextImplInternal<decltype(skipped), false, false>(skipped, count);
}

bool nextDocumentImpl(DocumentCallback const& cb,
uint64_t limit) override {
bool nextDocumentImpl(DocumentCallback const& cb, uint64_t limit) override {
return nextImpl(
[this, &cb](LocalDocumentId const& token) {
// we use here just first snapshot as they are all the same here.
// iterator operates only one iresearch datastore
return _collection->getPhysical()
->readFromSnapshot(_trx, token, cb, canReadOwnWrites(), _snapshot.snapshot(0))
->readFromSnapshot(_trx, token, cb, canReadOwnWrites(),
_snapshot.snapshot(0))
.ok();
},
limit);
}


// FIXME: Evaluate buffering iresearch reads
template<typename Callback, bool withCovering, bool produce>
bool nextImplInternal(Callback const& callback, uint64_t limit) {
Expand Down Expand Up @@ -668,18 +669,16 @@ class IResearchInvertedIndexMergeIterator final
// Otherwise we read it only if required.
constexpr bool needReadLocalDocumentId =
!withCovering || emitLocalDocumentId;
if (!needReadLocalDocumentId || segment.doc->value ==
segment.pkDocItr->seek(segment.doc->value)) {
if (!needReadLocalDocumentId ||
segment.doc->value == segment.pkDocItr->seek(segment.doc->value)) {
LocalDocumentId documentId;
bool const readSuccess =
!needReadLocalDocumentId ||
!needReadLocalDocumentId ||
DocumentPrimaryKey::read(documentId, segment.pkValue->value);
if (readSuccess) {
if constexpr (withCovering) {
segment.projections.seek(segment.doc->value);
SearchDoc doc(
_snapshot.segment(currentIdx),
segment.doc->value);
SearchDoc doc(_snapshot.segment(currentIdx), segment.doc->value);
TRI_ASSERT(documentId.isSet() == emitLocalDocumentId);
bool emitRes{false};
if constexpr (emitLocalDocumentId) {
Expand Down Expand Up @@ -991,7 +990,8 @@ std::unique_ptr<IndexIterator> IResearchInvertedIndex::iteratorForCondition(
if (!selfLock) {
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_INTERNAL,
absl::StrCat("Failed to lock datastore for index '", index().name(), "'"));
absl::StrCat("Failed to lock datastore for index '", index().name(),
"'"));
}
// TODO(MBkkt) Move it to optimization stage
if (opts.waitForSync &&
Expand All @@ -1004,7 +1004,7 @@ std::unique_ptr<IndexIterator> IResearchInvertedIndex::iteratorForCondition(
ViewSnapshot::Links links;
links.push_back(std::move(selfLock));
ctx = makeViewSnapshot(*trx, key, opts.waitForSync, index().name(),
std::move(links));
std::move(links));
}
return *ctx;
}();
Expand Down
2 changes: 1 addition & 1 deletion arangod/IResearch/IResearchInvertedIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class IResearchInvertedIndex : public IResearchDataStore {
bool covers(aql::Projections& projections) const;

std::vector<std::vector<basics::AttributeName>> const& coveredFields()
const noexcept {
const noexcept {
return _coveredFields;
}

Expand Down
18 changes: 7 additions & 11 deletions arangod/Indexes/IndexIterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,34 +150,30 @@ class IndexIterator {
velocypack::ValueLength _storedValuesLength;
};


// TODO: Move to iresearch
template<typename... Funcs>
class CallbackImpl : private fu2::function<Funcs...> {
using Base = fu2::function<Funcs...>;

struct DummyRetval {
template<typename T>
operator T() const{
operator T() const {
TRI_ASSERT(false);
throw std::bad_function_call{};
}
};

public:
using Base::operator();
using Base::operator bool;

bool operator==(std::nullptr_t) {
return !bool(*this);
}
bool operator==(std::nullptr_t) { return !bool(*this); }

bool operator!=(std::nullptr_t) {
return bool(*this);
}
bool operator!=(std::nullptr_t) { return bool(*this); }

CallbackImpl() noexcept = default;

template<typename...Fs>
template<typename... Fs>
CallbackImpl(Fs&&... fs)
: Base{fu2::overload(std::forward<Fs>(fs)...,
[](auto&&...) { return DummyRetval{}; })} {}
Expand All @@ -186,8 +182,8 @@ class IndexIterator {
using LocalDocumentIdCallback =
CallbackImpl<bool(LocalDocumentId const& token) const>;

using DocumentCallback =
CallbackImpl<bool(LocalDocumentId const& token, velocypack::Slice doc) const>;
using DocumentCallback = CallbackImpl<bool(LocalDocumentId const& token,
velocypack::Slice doc) const>;

using CoveringCallback =
CallbackImpl<bool(LocalDocumentId const& token,
Expand Down
17 changes: 8 additions & 9 deletions tests/IResearch/IResearchInvertedIndexIteratorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,7 @@ class IResearchInvertedIndexIteratorTestBase
std::function<void(arangodb::IndexIterator* it)> const& test,
std::string_view refName = "d",
std::shared_ptr<arangodb::velocypack::Builder> bindVars = nullptr,
int mutableConditionIdx = -1,
bool forLateMaterialization = false) {
int mutableConditionIdx = -1, bool forLateMaterialization = false) {
SCOPED_TRACE(testing::Message("ExecuteIteratorTest failed for query ")
<< queryString);
auto ctx =
Expand Down Expand Up @@ -406,7 +405,8 @@ TEST_F(IResearchInvertedIndexIteratorTest, test_skip_nextCovering) {
executeIteratorTest(queryString, test);
}

TEST_F(IResearchInvertedIndexIteratorTest, test_skip_nextCovering_LateMaterialized) {
TEST_F(IResearchInvertedIndexIteratorTest,
test_skip_nextCovering_LateMaterialized) {
std::string queryString{
R"(FOR d IN col FILTER d.a == "1" OR d.b == "2" RETURN d)"};
auto test = [this](arangodb::IndexIterator* iterator) {
Expand All @@ -423,12 +423,11 @@ TEST_F(IResearchInvertedIndexIteratorTest, test_skip_nextCovering_LateMaterializ
ASSERT_TRUE(iterator->hasMore());
unsigned docs{0};

auto docCallback = [&docs](
arangodb::aql::AqlValue&& token,
arangodb::IndexIteratorCoveringData& data) {
auto docCallback = [&docs](arangodb::aql::AqlValue&& token,
arangodb::IndexIteratorCoveringData& data) {
++docs;
auto searchDoc = arangodb::iresearch::SearchDoc::decode(
token.slice().stringView());
auto searchDoc =
arangodb::iresearch::SearchDoc::decode(token.slice().stringView());
token.destroy();
EXPECT_TRUE(searchDoc.isValid());
EXPECT_TRUE(data.isArray());
Expand All @@ -454,7 +453,7 @@ TEST_F(IResearchInvertedIndexIteratorTest, test_skip_nextCovering_LateMaterializ
ASSERT_EQ(docs, 2);
ASSERT_FALSE(iterator->hasMore());
};
executeIteratorTest(queryString, test, "d", nullptr, - 1, true);
executeIteratorTest(queryString, test, "d", nullptr, -1, true);
}

TEST_F(IResearchInvertedIndexIteratorTest, test_next_array) {
Expand Down
0