From 6c38d4c772e3206f78f2f02e6fd5d6c1fffe11d7 Mon Sep 17 00:00:00 2001 From: Andrei Lobov Date: Mon, 30 Aug 2021 18:06:26 +0300 Subject: [PATCH 1/4] add test for concurrent access to the typed analyzer inside FieldIterator --- tests/IResearch/IResearchDocument-test.cpp | 157 +++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/tests/IResearch/IResearchDocument-test.cpp b/tests/IResearch/IResearchDocument-test.cpp index eadc8e535ba9..24e947219a46 100644 --- a/tests/IResearch/IResearchDocument-test.cpp +++ b/tests/IResearch/IResearchDocument-test.cpp @@ -334,6 +334,72 @@ class TypedAnalyzer : public irs::analysis::analyzer { REGISTER_ANALYZER_VPACK(TypedAnalyzer, TypedAnalyzer::make, TypedAnalyzer::normalize); +class TypedArrayAnalyzer : public irs::analysis::analyzer { + public: + static constexpr irs::string_ref type_name() noexcept { + return "iresearch-document-typed-array"; + } + + static ptr make(irs::string_ref const& args) { + PTR_NAMED(TypedArrayAnalyzer, ptr, args); + return ptr; + } + + static bool normalize(irs::string_ref const& args, std::string& out) { + out.assign(args.c_str(), args.size()); + return true; + } + + explicit TypedArrayAnalyzer(irs::string_ref const&) : irs::analysis::analyzer(irs::type::get()) { + _returnType.value = arangodb::iresearch::AnalyzerValueType::Number; + } + + __declspec(noinline) virtual bool reset(irs::string_ref const& data) override { + auto value = std::string(data); + _values.clear(); + _current = 0; + for (double d = 1; d < std::atoi(value.c_str()); ++d) { + _values.push_back(d); + } + return true; + } + + __declspec(noinline) virtual bool next() override { + if (_current < _values.size()) { + _typedValue = arangodb::aql::AqlValue( + arangodb::aql::AqlValueHintDouble(_values[_current++])); + _vpackTerm.value = _typedValue.slice(); + return true; + } else { + return false; + } + } + + virtual irs::attribute* get_mutable(irs::type_info::type_id type) noexcept override { + if (type == irs::type::id()) { + return &_inc; + } + if (type == irs::type::id()) { + return &_returnType; + } + if (type == irs::type::id()) { + return &_vpackTerm; + } + return nullptr; + } + + private: + arangodb::iresearch::VPackTermAttribute _vpackTerm; + irs::increment _inc; + std::vector _values; + size_t _current{0}; + arangodb::iresearch::AnalyzerValueTypeAttribute _returnType; + arangodb::aql::AqlValue _typedValue; +}; + +REGISTER_ANALYZER_VPACK(TypedArrayAnalyzer, TypedArrayAnalyzer::make, TypedArrayAnalyzer::normalize); + + } // namespace namespace std { @@ -426,6 +492,14 @@ class IResearchDocumentTest arangodb::velocypack::Parser::fromJson("{ \"type\": \"string\" }")->slice(), arangodb::iresearch::Features{}); EXPECT_TRUE(res.ok()); + + res = analyzers.emplace( + result, + arangodb::StaticStrings::SystemDatabase + "::iresearch-document-number-array", + "iresearch-document-typed-array", + arangodb::velocypack::Parser::fromJson("{ \"type\": \"number\" }")->slice(), + arangodb::iresearch::Features{}); + EXPECT_TRUE(res.ok()); } }; @@ -2679,3 +2753,86 @@ TEST_F(IResearchDocumentTest, FieldIterator_dbServer_index_id_attr) { ASSERT_TRUE(id_indexed); } } + +TEST_F(IResearchDocumentTest, FieldIterator_concurrent_use_typed_analyzer) { + auto& sysDatabase = server.getFeature(); + auto sysVocbase = sysDatabase.use(); + auto& analyzers = server.template getFeature(); + arangodb::iresearch::IResearchLinkMeta linkMeta; + linkMeta._analyzers.clear(); + linkMeta._analyzers.emplace_back(arangodb::iresearch::FieldMeta::Analyzer( + analyzers.get(arangodb::StaticStrings::SystemDatabase + "::iresearch-document-number-array", + arangodb::QueryAnalyzerRevisions::QUERY_LATEST), + "iresearch-document-number-array")); // add analyzer + ASSERT_TRUE(linkMeta._analyzers.front()._pool); + linkMeta._includeAllFields = true; // include all fields + linkMeta._primitiveOffset = linkMeta._analyzers.size(); + std::string error; + std::vector EMPTY; + arangodb::transaction::Methods trx(arangodb::transaction::StandaloneContext::Create(*sysVocbase), + EMPTY, EMPTY, EMPTY, + arangodb::transaction::Options()); + arangodb::iresearch::FieldIterator it1(trx, irs::string_ref::EMPTY, arangodb::IndexId(0)); + + auto json = arangodb::velocypack::Parser::fromJson("{\"value\":\"3\"}"); + auto json2 = arangodb::velocypack::Parser::fromJson("{\"value\":\"4\"}"); + + it1.reset(json->slice(), linkMeta); + ASSERT_TRUE(it1.valid()); + + arangodb::iresearch::FieldIterator it2(trx, irs::string_ref::EMPTY, arangodb::IndexId(0)); + it2.reset(json2->slice(), linkMeta); + ASSERT_TRUE(it2.valid()); + + // exhaust first array member (1) of it1 + { + irs::numeric_token_stream expected_tokens; expected_tokens.reset(1.); + auto& actual_tokens = (*it1).get_tokens(); + auto actual_value = irs::get(actual_tokens); + auto expected_value = irs::get(expected_tokens); + while (actual_tokens.next()) { + ASSERT_TRUE(expected_tokens.next()); + ASSERT_EQ(actual_value->value, expected_value->value); + } + ASSERT_FALSE(expected_tokens.next()); + } + // exhaust first array member (1) of it2 + { + irs::numeric_token_stream expected_tokens; expected_tokens.reset(1.); + auto& actual_tokens = (*it2).get_tokens(); + auto actual_value = irs::get(actual_tokens); + auto expected_value = irs::get(expected_tokens); + while (actual_tokens.next()) { + ASSERT_TRUE(expected_tokens.next()); + ASSERT_EQ(actual_value->value, expected_value->value); + } + ASSERT_FALSE(expected_tokens.next()); + } + ++it1; // now it1 should have it`s typed iterator pointing to 2 + ++it2; // now it2 should have it`s typed iterator pointing to 2 (not to 3!) + ASSERT_TRUE(it1.valid()); + { + irs::numeric_token_stream expected_tokens; expected_tokens.reset(2.); + auto& actual_tokens = (*it1).get_tokens(); + auto actual_value = irs::get(actual_tokens); + auto expected_value = irs::get(expected_tokens); + while (actual_tokens.next()) { + ASSERT_TRUE(expected_tokens.next()); + ASSERT_EQ(actual_value->value, expected_value->value); + } + ASSERT_FALSE(expected_tokens.next()); + } + ASSERT_TRUE(it2.valid()); + { + irs::numeric_token_stream expected_tokens; expected_tokens.reset(2.); + auto& actual_tokens = (*it2).get_tokens(); + auto actual_value = irs::get(actual_tokens); + auto expected_value = irs::get(expected_tokens); + while (actual_tokens.next()) { + ASSERT_TRUE(expected_tokens.next()); + ASSERT_EQ(actual_value->value, expected_value->value); + } + ASSERT_FALSE(expected_tokens.next()); + } +} + From ef8059f655f0fd18ad6bbf9334464851730d16c1 Mon Sep 17 00:00:00 2001 From: Andrei Lobov Date: Mon, 30 Aug 2021 18:09:13 +0300 Subject: [PATCH 2/4] Update CHANGELOG --- CHANGELOG | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 529d8d06d7a4..1438a49e87db 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,10 @@ devel ----- +* Fix rare case of invalid data could be inserted in the index if + several clientc concurrently inserting data and use custom analyzer + with non-string return type + * Fix a rare shutdown race in RocksDBShaCalculatorThread. * Added "Analyzers" view to web UI to let manage ArangoSearch analyzers From c1c5c3b92b1a6ba03dce5eff115af00fd18dbe6d Mon Sep 17 00:00:00 2001 From: Andrei Lobov Date: Mon, 30 Aug 2021 18:09:52 +0300 Subject: [PATCH 3/4] Update CHANGELOG --- CHANGELOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 1438a49e87db..606cd4349c74 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,8 +1,8 @@ devel ----- -* Fix rare case of invalid data could be inserted in the index if - several clientc concurrently inserting data and use custom analyzer +* Fix rare case of invalid data could be inserted in the ArangoSearch index if + several clients concurrently inserts data and use custom analyzer with non-string return type * Fix a rare shutdown race in RocksDBShaCalculatorThread. From dd94d66ef73d9db72d952ed6bbc84eaa72d9ca62 Mon Sep 17 00:00:00 2001 From: Andrei Lobov Date: Mon, 30 Aug 2021 18:30:55 +0300 Subject: [PATCH 4/4] fix build --- tests/IResearch/IResearchDocument-test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/IResearch/IResearchDocument-test.cpp b/tests/IResearch/IResearchDocument-test.cpp index 24e947219a46..168ce5c50f70 100644 --- a/tests/IResearch/IResearchDocument-test.cpp +++ b/tests/IResearch/IResearchDocument-test.cpp @@ -354,7 +354,7 @@ class TypedArrayAnalyzer : public irs::analysis::analyzer { _returnType.value = arangodb::iresearch::AnalyzerValueType::Number; } - __declspec(noinline) virtual bool reset(irs::string_ref const& data) override { + virtual bool reset(irs::string_ref const& data) override { auto value = std::string(data); _values.clear(); _current = 0; @@ -364,7 +364,7 @@ class TypedArrayAnalyzer : public irs::analysis::analyzer { return true; } - __declspec(noinline) virtual bool next() override { + virtual bool next() override { if (_current < _values.size()) { _typedValue = arangodb::aql::AqlValue( arangodb::aql::AqlValueHintDouble(_values[_current++]));