8000 Feature/remove shared ptrs from analyzers (#14694) · arangodb/arangodb@dfbe4b9 · GitHub
[go: up one dir, main page]

Skip to content

Commit dfbe4b9

Browse files
authored
Feature/remove shared ptrs from analyzers (#14694)
* update iresearch * get rid of shared ptrs * guidelines fix * fix build * fix build * moar uniques * fix text cmake * fix build * fix build * fix storreFunc execution * fix sharing typed analyzer * fix tests
1 parent 59274d2 commit dfbe4b9

File tree

77 files changed

+101718
-100751
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+101718
-100751
lines changed

3rdParty/iresearch/CHANGELOG

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@ master
44
v1.1 (2021-08-18)
55
-------------------------
66

7+
* Rework Analyzer API to return `std::unique_ptr` instead of `std::shared_ptr`.
8+
9+
* Derive `null_token_stream`, `string_token_stream`, `numeric_token_stream` and `null_token_stream`
10+
from `analysis::analyzer`.
11+
12+
* Rework iterators API to reduce number of heap allocations.
13+
714
* Add new analyzer `collation` capable of producing tokens honoring language
815
specific sorting.
916

3rdParty/iresearch/core/analysis/analyzer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ namespace analysis {
3131

3232
class IRESEARCH_API analyzer : public token_stream {
3333
public:
34-
using ptr = std::shared_ptr<analyzer>;
34+
using ptr = std::unique_ptr<analyzer>;
3535

3636
explicit analyzer(const type_info& type) noexcept;
3737

3rdParty/iresearch/core/analysis/collation_token_stream.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ bool parse_vpack_options(
9090
analysis::analyzer::ptr make_vpack(const VPackSlice slice) {
9191
analysis::collation_token_stream::options_t options;
9292
if (parse_vpack_options(slice, options)) {
93-
return memory::make_shared<analysis::collation_token_stream>(std::move(options));
93+
return memory::make_unique<analysis::collation_token_stream>(std::move(options));
9494
} else {
9595
return nullptr;
9696
}
@@ -146,7 +146,7 @@ analysis::analyzer::ptr make_text(const string_ref& args) {
146146
analysis::collation_token_stream::options_t options;
147147

148148
if (locale_utils::icu_locale(args, options.locale)) {// interpret 'args' as a locale name
149-
return memory::make_shared<analysis::collation_token_stream>(
149+
return memory::make_unique<analysis::collation_token_stream>(
150150
std::move(options));
151151
}
152152
} catch (...) {

3rdParty/iresearch/core/analysis/delimited_token_stream.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,9 +231,7 @@ bool normalize_json_config(const irs::string_ref& args, std::string& definition)
231231
/// @brief args is a delimiter to use for tokenization
232232
////////////////////////////////////////////////////////////////////////////////
233233
irs::analysis::analyzer::ptr make_text(const irs::string_ref& args) {
234-
return irs::memory::make_shared<irs::analysis::delimited_token_stream>(
235-
args
236-
);
234+
return irs::memory::make_unique<irs::analysis::delimited_token_stream>(args);
237235
}
238236

239237
bool normalize_text_config(const irs::string_ref& delimiter, std::string& definition) {

3rdParty/iresearch/core/analysis/ngram_token_stream.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,8 @@ namespace analysis {
319319

320320
template<irs::analysis::ngram_token_stream_base::InputType StreamType>
321321
/*static*/ analyzer::ptr ngram_token_stream<StreamType>::make(
322-
const ngram_token_stream_base::Options& options
323-
) {
324-
return std::make_shared<ngram_token_stream<StreamType>>(options);
322+
const ngram_token_stream_base::Options& options) {
323+
return std::make_unique<ngram_token_stream<StreamType>>(options);
325324
}
326325

327326
/*static*/ void ngram_token_stream_base::init() {

3rdParty/iresearch/core/analysis/pipeline_token_stream.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,7 @@ class empty_analyzer final
4848
virtual bool reset(const irs::string_ref&) override { return false; }
4949
};
5050

51-
empty_analyzer EMPTY_ANALYZER;
52-
53-
using options_normalize_t = std::vector<std::pair<std::string, std::string>>;
51+
using options_normalize_t = std::vector<std::pair<std::string, std::string>>;
5452

5553
template<typename T>
5654
bool parse_vpack_options(const VPackSlice slice, T& options) {
@@ -224,7 +222,8 @@ bool normalize_vpack_config(const irs::string_ref& args, std::string& config) {
224222
irs::analysis::analyzer::ptr make_vpack(const VPackSlice slice) {
225223
irs::analysis::pipeline_token_stream::options_t options;
226224
if (parse_vpack_options(slice, options)) {
227-
return std::make_shared<irs::analysis::pipeline_token_stream>(std::move(options));
225+
return irs::memory::make_unique<irs::analysis::pipeline_token_stream>(
226+
std::move(options));
228227
} else {
229228
return nullptr;
230229
}
@@ -318,7 +317,7 @@ pipeline_token_stream::pipeline_token_stream(pipeline_token_stream::options_t&&
318317
} {
319318
const auto track_offset = irs::get<offset>(*this) != nullptr;
320319
pipeline_.reserve(options.size());
321-
for (auto p : options) {
320+
for (auto& p : options) {
322321
assert(p);
323322
pipeline_.emplace_back(std::move(p), track_offset);
324323
}
@@ -405,19 +404,19 @@ bool pipeline_token_stream::reset(const string_ref& data) {
405404
}
406405

407406
pipeline_token_stream::sub_analyzer_t::sub_analyzer_t(
408-
const irs::analysis::analyzer::ptr& a,
407+
irs::analysis::analyzer::ptr a,
409408
bool track_offset)
410409
: term(irs::get<irs::term_attribute>(*a)),
411410
inc(irs::get<irs::increment>(*a)),
412411
offs(track_offset ? irs::get<irs::offset>(*a) : &NO_OFFSET),
413-
analyzer(a) {
412+
analyzer(std::move(a)) {
414413
assert(inc);
415414
assert(term);
416415
}
417416

418417
pipeline_token_stream::sub_analyzer_t::sub_analyzer_t()
419418
: term(nullptr), inc(nullptr), offs(nullptr),
420-
analyzer(irs::analysis::analyzer::ptr(), &EMPTY_ANALYZER) { }
419+
analyzer(memory::make_unique<empty_analyzer>()) { }
421420

422421
} // namespace analysis
423422
} // namespace iresearch

3rdParty/iresearch/core/analysis/pipeline_token_stream.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ class pipeline_token_stream final
9898

9999
private:
100100
struct sub_analyzer_t {
101-
explicit sub_analyzer_t(const irs::analysis::analyzer::ptr& a, bool track_offset);
101+
explicit sub_analyzer_t(irs::analysis::analyzer::ptr a, bool track_offset);
102102
sub_analyzer_t();
103103

104104
bool reset(uint32_t start, uint32_t end, const string_ref& data) {

3rdParty/iresearch/core/analysis/text_token_normalizing_stream.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ bool parse_vpack_options(
191191
analysis::analyzer::ptr make_vpack(const VPackSlice slice) {
192192
analysis::text_token_normalizing_stream::options_t options;
193193
if (parse_vpack_options(slice, options)) {
194-
return memory::make_shared<
195-
analysis::text_token_normalizing_stream>(std::move(options));
194+
return memory::make_unique<analysis::text_token_normalizing_stream>(
195+
std::move(options));
196196
} else {
197197
return nullptr;
198198
}
@@ -266,8 +266,8 @@ analysis::analyzer::ptr make_text(const string_ref& args) {
266266
analysis::text_token_normalizing_stream::options_t options;
267267

268268
if (locale_utils::icu_locale(args, options.locale)) {// interpret 'args' as a locale name
269-
return memory::make_shared<analysis::text_token_normalizing_stream>(
270-
std::move(options) );
269+
return memory::make_unique<analysis::text_token_normalizing_stream>(
270+
std::move(options));
271271
}
272272
} catch (...) {
273273
std::string err_msg = static_cast<std::string>(args);

3rdParty/iresearch/core/analysis/text_token_stemming_stream.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ bool parse_vpack_options(const VPackSlice slice, std::locale& locale) {
8484
analysis::analyzer::ptr make_vpack(const VPackSlice slice) {
8585
std::locale locale;
8686
if (parse_vpack_options(slice, locale)) {
87-
return memory::make_shared<analysis::text_token_stemming_stream>(locale);
87+
return memory::make_unique<analysis::text_token_stemming_stream>(locale);
8888
} else {
8989
return nullptr;
9090
}
@@ -177,7 +177,7 @@ analysis::analyzer::ptr make_text(const string_ref& args) {
177177
try {
178178
std::locale locale;
179179
if (locale_utils::icu_locale(args, locale)) {
180-
return memory::make_shared<analysis::text_token_stemming_stream>(locale);
180+
return memory::make_unique<analysis::text_token_stemming_stream>(locale);
181181
}
182182
} catch (...) {
183183
std::string err_msg = static_cast<std::string>(args);

3rdParty/iresearch/core/analysis/text_token_stream.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,9 @@ bool build_stopwords(const irs::analysis::text_token_stream::options_t& options,
296296
/// @brief create an analyzer based on the supplied cache_key and options
297297
////////////////////////////////////////////////////////////////////////////////
298298
irs::analysis::analyzer::ptr construct(
299-
const irs::string_ref& cache_key,
300-
irs::analysis::text_token_stream::options_t&& options,
301-
irs::analysis::text_token_stream::stopwords_t&& stopwords
302-
) {
299+
const irs::string_ref& cache_key,
300+
irs::analysis::text_token_stream::options_t&& options,
301+
irs::analysis::text_token_stream::stopwords_t&& stopwords) {
303302
static auto generator = [](
304303
const irs::hashed_string_ref& key,
305304
cached_options_t& value
@@ -328,9 +327,8 @@ irs::analysis::analyzer::ptr construct(
328327
}
329328

330329
return irs::memory::make_unique<irs::analysis::text_token_stream>(
331-
*options_ptr,
332-
options_ptr->stopwords_
333-
);
330+
*options_ptr,
331+
options_ptr->stopwords_);
334332
}
335333

336334
////////////////////////////////////////////////////////////////////////////////
@@ -342,14 +340,12 @@ irs::analysis::analyzer::ptr construct(
342340
{
343341
auto lock = irs::make_lock_guard(mutex);
344342
auto itr = cached_state_by_key.find(
345-
irs::make_hashed_ref(irs::string_ref(cache_key))
346-
);
343+
irs::make_hashed_ref(irs::string_ref(cache_key)));
347344

348345
if (itr != cached_state_by_key.end()) {
349346
return irs::memory::make_unique<irs::analysis::text_token_stream>(
350-
itr->second,
351-
itr->second.stopwords_
352-
);
347+
itr->second,
348+
itr->second.stopwords_);
353349
}
354350
}
355351

3rdParty/iresearch/core/analysis/token_stopwords_stream.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ irs::analysis::analyzer::ptr construct(const VPackArrayIterator& mask, bool hex)
102102
return nullptr; // hex-decoding failed
103103
}
104104
}
105-
return irs::memory::make_shared<irs::analysis::token_stopwords_stream>(
105+
return irs::memory::make_unique<irs::analysis::token_stopwords_stream>(
106106
std::move(tokens));
107107
}
108108

3rdParty/iresearch/core/analysis/token_streams.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ namespace iresearch {
3232
// -----------------------------------------------------------------------------
3333

3434
boolean_token_stream::boolean_token_stream(bool value /*= false*/) noexcept
35-
: in_use_(false),
35+
: basic_token_stream(irs::type<boolean_token_stream>::get()), in_use_(false),
3636
value_(value) {
3737
}
3838

@@ -48,7 +48,7 @@ bool boolean_token_stream::next() noexcept {
4848
// -----------------------------------------------------------------------------
4949

5050
string_token_stream::string_token_stream() noexcept
51-
: in_use_(false) {
51+
: analysis::analyzer(irs::type<string_token_stream>::get()), in_use_(false) {
5252
}
5353

5454
bool string_token_stream::next() noexcept {

3rdParty/iresearch/core/analysis/token_streams.hpp

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
////////////////////////////////////////////////////////////////////////////////
1+
////////////////////////////////////////////////////////////////////////////////
22
/// DISCLAIMER
33
///
44
/// Copyright 2016 by EMC Corporation, All Rights Reserved
@@ -23,7 +23,7 @@
2323
#ifndef IRESEARCH_TOKEN_STREAMS_H
2424
#define IRESEARCH_TOKEN_STREAMS_H
2525

26-
#include "token_stream.hpp"
26+
#include "analyzer.hpp"
2727
#include "token_attributes.hpp"
2828
#include "utils/frozen_attributes.hpp"
2929
#include "utils/numeric_utils.hpp"
@@ -35,12 +35,19 @@ namespace iresearch {
3535
/// @brief convenient helper implementation providing access to "increment"
3636
/// and "term_attributes" attributes
3737
//////////////////////////////////////////////////////////////////////////////
38-
class IRESEARCH_API basic_token_stream : public token_stream {
38+
class IRESEARCH_API basic_token_stream : public analysis::analyzer {
3939
public:
40+
41+
explicit basic_token_stream(const type_info& type) : analysis::analyzer(type) {}
42+
4043
virtual attribute* get_mutable(irs::type_info::type_id type) noexcept override final {
4144
return irs::get_mutable(attrs_, type);
4245
}
4346

47+
bool reset(const string_ref&) override {
48+
return false;
49+
}
50+
4451
protected:
4552
std::tuple<term_attribute, increment> attrs_;
4653
}; // basic_token_stream
@@ -53,6 +60,7 @@ class IRESEARCH_API boolean_token_stream final
5360
: public basic_token_stream,
5461
private util::noncopyable {
5562
public:
63+
5664
static constexpr string_ref value_true() noexcept {
5765
return { "\xFF", 1 };
5866
}
@@ -74,7 +82,13 @@ class IRESEARCH_API boolean_token_stream final
7482
in_use_ = false;
7583
}
7684

85+
static constexpr irs::string_ref type_name() noexcept {
86+
return "boolean_token_stream";
87+
}
88+
7789
private:
90+
using basic_token_stream::reset;
91+
7892
bool in_use_;
7993
bool value_;
8094
}; // boolean_token_stream
@@ -86,7 +100,7 @@ class IRESEARCH_API boolean_token_stream final
86100
/// on initial string length
87101
//////////////////////////////////////////////////////////////////////////////
88102
class IRESEARCH_API string_token_stream final
89-
: public token_stream,
103+
: public analysis::analyzer,
90104
private util::noncopyable {
91105
public:
92106
string_token_stream() noexcept;
@@ -102,9 +116,14 @@ class IRESEARCH_API string_token_stream final
102116
in_use_ = false;
103117
}
104118

105-
void reset(const string_ref& value) noexcept {
119+
bool reset(const string_ref& value) noexcept override {
106120
value_ = ref_cast<byte_type>(value);
107121
in_use_ = false;
122+
return true;
123+
}
124+
125+
static constexpr irs::string_ref type_name() noexcept {
126+
return "string_token_stream";
108127
}
109128

110129
private:
@@ -123,6 +142,10 @@ class IRESEARCH_API numeric_token_stream final
123142
: public basic_token_stream,
124143
private util::noncopyable {
125144
public:
145+
146+
explicit numeric_token_stream()
147+
: basic_token_stream(irs::type<numeric_token_stream>::get()) {}
148+
126149
static constexpr uint32_t PRECISION_STEP_DEF = 16;
127150
static constexpr uint32_t PRECISION_STEP_32 = 8;
128151

@@ -145,7 +168,13 @@ class IRESEARCH_API numeric_token_stream final
145168

146169
static bytes_ref value(bstring& buf, double_t value);
147170

171+
static constexpr irs::string_ref type_name() noexcept {
172+
return "numeric_token_stream";
173+
}
174+
148175
private:
176+
using basic_token_stream::reset;
177+
149178
//////////////////////////////////////////////////////////////////////////////
150179
/// @class numeric_term
151180
/// @brief term_attribute implementation for numeric_token_stream
@@ -254,6 +283,10 @@ class IRESEARCH_API null_token_stream final
254283
: public basic_token_stream,
255284
private util::noncopyable {
256285
public:
286+
287+
explicit null_token_stream()
288+
: basic_token_stream(irs::type<null_token_stream>::get()) {}
289+
257290
static constexpr string_ref value_null() noexcept {
258291
// data pointer != nullptr or assert failure in bytes_hash::insert(...)
259292
return { "\x00", 0 };
@@ -265,7 +298,13 @@ class IRESEARCH_API null_token_stream final
265298
in_use_ = false;
266299
}
267300

301+
static constexpr irs::string_ref type_name() noexcept {
302+
return "null_token_stream";
303+
}
304+
268305
private:
306+
using basic_token_stream::reset;
307+
269308
bool in_use_{false};
270309
}; // null_token_stream
271310

3rdParty/iresearch/core/formats/columnstore.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ class index_block {
337337
const auto stats = encode::avg::encode(offsets_, offset_);
338338
const auto bits = encode::avg::write_block(
339339
&format_traits::pack64,
340-
out, stats.first, stats.second,
340+
out, std::get<0>(stats), std::get<1>(stats),
341341
offsets_, block_size, buf);
342342

343343
if (0 == offsets_[0] && bitpack::rl(bits)) {

0 commit comments

Comments
 (0)
0