8000 Feature/remove shared ptrs from analyzers by Dronplane · Pull Request #14694 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Feature/remove shared ptrs from analyzers #14694

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions 3rdParty/iresearch/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ master
v1.1 (2021-08-18)
-------------------------

* Rework Analyzer API to return `std::unique_ptr` instead of `std::shared_ptr`.

* Derive `null_token_stream`, `string_token_stream`, `numeric_token_stream` and `null_token_stream`
from `analysis::analyzer`.

* Rework iterators API to reduce number of heap allocations.

* Add new analyzer `collation` capable of producing tokens honoring language
specific sorting.

Expand Down
2 changes: 1 addition & 1 deletion 3rdParty/iresearch/core/analysis/analyzer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace analysis {

class IRESEARCH_API analyzer : public token_stream {
public:
using ptr = std::shared_ptr<analyzer>;
using ptr = std::unique_ptr<analyzer>;

explicit analyzer(const type_info& type) noexcept;

Expand Down
4 changes: 2 additions & 2 deletions 3rdParty/iresearch/core/analysis/collation_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ bool parse_vpack_options(
analysis::analyzer::ptr make_vpack(const VPackSlice slice) {
analysis::collation_token_stream::options_t options;
if (parse_vpack_options(slice, options)) {
return memory::make_shared<analysis::collation_token_stream>(std::move(options));
return memory::make_unique<analysis::collation_token_stream>(std::move(options));
} else {
return nullptr;
}
Expand Down Expand Up @@ -146,7 +146,7 @@ analysis::analyzer::ptr make_text(const string_ref& args) {
analysis::collation_token_stream::options_t options;

if (locale_utils::icu_locale(args, options.locale)) {// interpret 'args' as a locale name
return memory::make_shared<analysis::collation_token_stream>(
return memory::make_unique<analysis::collation_token_stream>(
std::move(options));
}
} catch (...) {
Expand Down
4 changes: 1 addition & 3 deletions 3rdParty/iresearch/core/analysis/delimited_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,7 @@ bool normalize_json_config(const irs::string_ref& args, std::string& definition)
/// @brief args is a delimiter to use for tokenization
////////////////////////////////////////////////////////////////////////////////
irs::analysis::analyzer::ptr make_text(const irs::string_ref& args) {
return irs::memory::make_shared<irs::analysis::delimited_token_stream>(
args
);
return irs::memory::make_unique<irs::analysis::delimited_token_stream>(args);
}

bool normalize_text_config(const irs::string_ref& delimiter, std::string& definition) {
Expand Down
5 changes: 2 additions & 3 deletions 3rdParty/iresearch/core/analysis/ngram_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,8 @@ namespace analysis {

template<irs::analysis::ngram_token_stream_base::InputType StreamType>
/*static*/ analyzer::ptr ngram_token_stream<StreamType>::make(
const ngram_token_stream_base::Options& options
) {
return std::make_shared<ngram_token_stream<StreamType>>(options);
const ngram_token_stream_base::Options& options) {
return std::make_unique<ngram_token_stream<StreamType>>(options);
}

/*static*/ void ngram_token_stream_base::init() {
Expand Down
15 changes: 7 additions & 8 deletions 3rdParty/iresearch/core/analysis/pipeline_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@ class empty_analyzer final
virtual bool reset(const irs::string_ref&) override { return false; }
};

empty_analyzer EMPTY_ANALYZER;

using options_normalize_t = std::vector<std::pair<std::string, std::string>>;
using options_normalize_t = std::vector<std::pair<std::string, std::string>>;

template<typename T>
bool parse_vpack_options(const VPackSlice slice, T& options) {
Expand Down Expand Up @@ -224,7 +222,8 @@ bool normalize_vpack_config(const irs::string_ref& args, std::string& config) {
irs::analysis::analyzer::ptr make_vpack(const VPackSlice slice) {
irs::analysis::pipeline_token_stream::options_t options;
if (parse_vpack_options(slice, options)) {
return std::make_shared<irs::analysis::pipeline_token_stream>(std::move(options));
return irs::memory::make_unique<irs::analysis::pipeline_token_stream>(
std::move(options));
} else {
return nullptr;
}
Expand Down Expand Up @@ -318,7 +317,7 @@ pipeline_token_stream::pipeline_token_stream(pipeline_token_stream::options_t&&
} {
const auto track_offset = irs::get<offset>(*this) != nullptr;
pipeline_.reserve(options.size());
for (auto p : options) {
for (auto& p : options) {
assert(p);
pipeline_.emplace_back(std::move(p), track_offset);
}
Expand Down Expand Up @@ -405,19 +404,19 @@ bool pipeline_token_stream::reset(const string_ref& data) {
}

pipeline_token_stream::sub_analyzer_t::sub_analyzer_t(
const irs::analysis::analyzer::ptr& a,
irs::analysis::analyzer::ptr a,
bool track_offset)
: term(irs::get<irs::term_attribute>(*a)),
inc(irs::get<irs::increment>(*a)),
offs(track_offset ? irs::get<irs::offset>(*a) : &NO_OFFSET),
analyzer(a) {
analyzer(std::move(a)) {
assert(inc);
assert(term);
}

pipeline_token_stream::sub_analyzer_t::sub_analyzer_t()
: term(nullptr), inc(nullptr), offs(nullptr),
analyzer(irs::analysis::analyzer::ptr(), &EMPTY_ANALYZER) { }
analyzer(memory::make_unique<empty_analyzer>()) { }

} // namespace analysis
} // namespace iresearch
2 changes: 1 addition & 1 deletion 3rdParty/iresearch/core/analysis/pipeline_token_stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class pipeline_token_stream final

private:
struct sub_analyzer_t {
explicit sub_analyzer_t(const irs::analysis::analyzer::ptr& a, bool track_offset);
explicit sub_analyzer_t(irs::analysis::analyzer::ptr a, bool track_offset);
sub_analyzer_t();

bool reset(uint32_t start, uint32_t end, const string_ref& data) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ bool parse_vpack_options(
analysis::analyzer::ptr make_vpack(const VPackSlice slice) {
analysis::text_token_normalizing_stream::options_t options;
if (parse_vpack_options(slice, options)) {
return memory::make_shared<
analysis::text_token_normalizing_stream>(std::move(options));
return memory::make_uniq B41A ue<analysis::text_token_normalizing_stream>(
std::move(options));
} else {
return nullptr;
}
Expand Down Expand Up @@ -266,8 +266,8 @@ analysis::analyzer::ptr make_text(const string_ref& args) {
analysis::text_token_normalizing_stream::options_t options;

if (locale_utils::icu_locale(args, options.locale)) {// interpret 'args' as a locale name
return memory::make_shared<analysis::text_token_normalizing_stream>(
std::move(options) );
return memory::make_unique<analysis::text_token_normalizing_stream>(
std::move(options));
}
} catch (...) {
std::string err_msg = static_cast<std::string>(args);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ bool parse_vpack_options(const VPackSlice slice, std::locale& locale) {
analysis::analyzer::ptr make_vpack(const VPackSlice slice) {
std::locale locale;
if (parse_vpack_options(slice, locale)) {
return memory::make_shared<analysis::text_token_stemming_stream>(locale);
return memory::make_unique<analysis::text_token_stemming_stream>(locale);
} else {
return nullptr;
}
Expand Down Expand Up @@ -177,7 +177,7 @@ analysis::analyzer::ptr make_text(const string_ref& args) {
try {
std::locale locale;
if (locale_utils::icu_locale(args, locale)) {
return memory::make_shared<analysis::text_token_stemming_stream>(locale);
return memory::make_unique<analysis::text_token_stemming_stream>(locale);
}
} catch (...) {
std::string err_msg = static_cast<std::string>(args);
Expand Down
20 changes: 8 additions & 12 deletions 3rdParty/iresearch/core/analysis/text_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,10 +296,9 @@ bool build_stopwords(const irs::analysis::text_token_stream::options_t& options,
/// @brief create an analyzer based on the supplied cache_key and options
////////////////////////////////////////////////////////////////////////////////
irs::analysis::analyzer::ptr construct(
const irs::string_ref& cache_key,
irs::analysis::text_token_stream::options_t&& options,
irs::analysis::text_token_stream::stopwords_t&& stopwords
) {
const irs::string_ref& cache_key,
irs::analysis::text_token_stream::options_t&& options,
irs::analysis::text_token_stream::stopwords_t&& stopwords) {
static auto generator = [](
const irs::hashed_string_ref& key,
cached_options_t& value
Expand Down Expand Up @@ -328,9 +327,8 @@ irs::analysis::analyzer::ptr construct(
}

return irs::memory::make_unique<irs::analysis::text_token_stream>(
*options_ptr,
options_ptr->stopwords_
);
*options_ptr,
options_ptr->stopwords_);
}

////////////////////////////////////////////////////////////////////////////////
Expand All @@ -342,14 +340,12 @@ irs::analysis::analyzer::ptr construct(
{
auto lock = irs::make_lock_guard(mutex);
auto itr = cached_state_by_key.find(
irs::make_hashed_ref(irs::string_ref(cache_key))
);
irs::make_hashed_ref(irs::string_ref(cache_key)));

if (itr != cached_state_by_key.end()) {
return irs::memory::make_unique<irs::analysis::text_token_stream>(
itr->second,
itr->second.stopwords_
);
itr->second,
itr->second.stopwords_);
}
}

Expand Down
Diff line change
Original file line number Diff line number
Expand Up @@ -102,7 +102,7 @@ irs::analysis::analyzer::ptr construct(const VPackArrayIterator& mask, bool hex)
return nullptr; // hex-decoding failed
}
}
return irs::memory::make_shared<irs::analysis::token_stopwords_stream>(
return irs::memory::make_unique<irs::analysis::token_stopwords_stream>(
std::move(tokens));
}

Expand Down
4 changes: 2 additions & 2 deletions 3rdParty/iresearch/core/analysis/token_streams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace iresearch {
// -----------------------------------------------------------------------------

boolean_token_stream::boolean_token_stream(bool value /*= false*/) noexcept
: in_use_(false),
: basic_token_stream(irs::type<boolean_token_stream>::get()), in_use_(false),
value_(value) {
}

Expand All @@ -48,7 +48,7 @@ bool boolean_token_stream::next() noexcept {
// -----------------------------------------------------------------------------

string_token_stream::string_token_stream() noexcept
: in_use_(false) {
: analysis::analyzer(irs::type<string_token_stream>::get()), in_use_(false) {
}

bool string_token_stream::next() noexcept {
Expand Down
49 changes: 44 additions & 5 deletions 3rdParty/iresearch/core/analysis/token_streams.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2016 by EMC Corporation, All Rights Reserved
Expand All @@ -23,7 +23,7 @@
#ifndef IRESEARCH_TOKEN_STREAMS_H
#define IRESEARCH_TOKEN_STREAMS_H

#include "token_stream.hpp"
#include "analyzer.hpp"
#include "token_attributes.hpp"
#include "utils/frozen_attributes.hpp"
#include "utils/numeric_utils.hpp"
Expand All @@ -35,12 +35,19 @@ namespace iresearch {
/// @brief convenient helper implementation providing access to "increment"
/// and "term_attributes" attributes
//////////////////////////////////////////////////////////////////////////////
class IRESEARCH_API basic_token_stream : public token_stream {
class IRESEARCH_API basic_token_stream : public analysis::analyzer {
public:

explicit basic_token_stream(const type_info& type) : analysis::analyzer(type) {}

virtual attribute* get_mutable(irs::type_info::type_id type) noexcept override final {
return irs::get_mutable(attrs_, type);
}

bool reset(const string_ref&) override {
return false;
}

protected:
std::tuple<term_attribute, increment> attrs_;
}; // basic_token_stream
Expand All @@ -53,6 +60,7 @@ class IRESEARCH_API boolean_token_stream final
: public basic_token_stream,
private util::noncopyable {
public:

static constexpr string_ref value_true() noexcept {
return { "\xFF", 1 };
}
Expand All @@ -74,7 +82,13 @@ class IRESEARCH_API boolean_token_stream final
in_use_ = false;
}

static constexpr irs::string_ref type_name() noexcept {
return "boolean_token_stream";
}

private:
using basic_token_stream::reset;

bool in_use_;
bool value_;
}; // boolean_token_stream
Expand All @@ -86,7 +100,7 @@ class IRESEARCH_API boolean_token_stream final
/// on initial string length
//////////////////////////////////////////////////////////////////////////////
class IRESEARCH_API string_token_stream final
: public token_stream,
: public analysis::analyzer,
private util::noncopyable {
public:
string_token_stream() noexcept;
Expand All @@ -102,9 +116,14 @@ class IRESEARCH_API string_token_stream final
in_use_ = false;
}

void reset(const string_ref& value) noexcept {
bool reset(const string_ref& value) noexcept override {
value_ = ref_cast<byte_type>(value);
in_use_ = false;
return true;
}

static constexpr irs::string_ref type_name() noexcept {
return "string_token_stream";
}

private:
Expand All @@ -123,6 +142,10 @@ class IRESEARCH_API numeric_token_stream final
: public basic_token_stream,
private util::noncopyable {
public:

explicit numeric_token_stream()
: basic_token_stream(irs::type<numeric_token_stream>::get()) {}

static constexpr uint32_t PRECISION_STEP_DEF = 16;
static constexpr uint32_t PRECISION_STEP_32 = 8;

Expand All @@ -145,7 +168,13 @@ class IRESEARCH_API numeric_token_stream final

static bytes_ref value(bstring& buf, double_t value);

static constexpr irs::string_ref type_name() noexcept {
return "numeric_token_stream";
}

private:
using basic_token_stream::reset;

//////////////////////////////////////////////////////////////////////////////
/// @class numeric_term
/// @brief term_attribute implementation for numeric_token_stream
Expand Down Expand Up @@ -254,6 +283,10 @@ class IRESEARCH_API null_token_stream final
: public basic_token_stream,
private util::noncopyable {
public:

explicit null_token_stream()
: basic_token_stream(irs::type<null_token_stream>::get()) {}

static constexpr string_ref value_null() noexcept {
// data pointer != nullptr or assert failure in bytes_hash::insert(...)
return { "\x00", 0 };
Expand All @@ -265,7 +298,13 @@ class IRESEARCH_API null_token_stream final
in_use_ = false;
}

static constexpr irs::string_ref type_name() noexcept {
return "null_token_stream";
}

private:
using basic_token_stream::reset;

bool in_use_{false};
}; // null_token_stream

Expand Down
2 changes: 1 addition & 1 deletion 3rdParty/iresearch/core/formats/columnstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ class index_block {
const auto stats = encode::avg::encode(offsets_, offset_);
const auto bits = encode::avg::write_block(
&format_traits::pack64,
out, stats.first, stats.second,
out, std::get<0>(stats), std::get<1>(stats),
offsets_, block_size, buf);

if (0 == offsets_[0] && bitpack::rl(bits)) {
Expand Down
Loading
0