8000 Feature/arangosearch speedup removals (#7134) · sita1999/arangodb@937a7ee · GitHub
[go: up one dir, main page]

Skip to content

Commit 937a7ee

Browse files
author
Andrey Abramov
authored
Feature/arangosearch speedup removals (arangodb#7134)
* speedup document removals and optimize data model * fix invalid constexpr
1 parent 8f44afb commit 937a7ee

File tree

8 files changed

+219
-131
lines changed

8 files changed

+219
-131
lines changed

3rdParty/iresearch/core/search/filter.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class IRESEARCH_API filter {
8787
//////////////////////////////////////////////////////////////////////////////
8888
class IRESEARCH_API prepared: public util::attribute_store_provider {
8989
public:
90-
DECLARE_SHARED_PTR(prepared);
90+
DECLARE_SHARED_PTR(const prepared);
9191
DEFINE_FACTORY_INLINE(prepared);
9292

9393
static prepared::ptr empty();

arangod/IResearch/IResearchDocument.cpp

Lines changed: 41 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ static_assert(
6565
);
6666

6767
irs::string_ref const CID_FIELD("@_CID");
68-
irs::string_ref const RID_FIELD("@_REV");
6968
irs::string_ref const PK_COLUMN("@_PK");
7069

7170
// wrapper for use objects with the IResearch unbounded_object_pool
@@ -370,19 +369,6 @@ bool setStringValue(
370369
return true;
371370
}
372371

373-
void setIdValue(
374-
uint64_t& value,
375-
irs::token_stream& analyzer
376-
) {
377-
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
378-
auto& sstream = dynamic_cast<irs::string_token_stream&>(analyzer);
379-
#else
380-
auto& sstream = static_cast<irs::string_token_stream&>(analyzer);
381-
#endif
382-
383-
sstream.reset(arangodb::iresearch::DocumentPrimaryKey::encode(value));
384-
}
385-
386372
NS_END
387373

388374
NS_BEGIN(arangodb)
@@ -392,36 +378,62 @@ NS_BEGIN(iresearch)
392378
// --SECTION-- Field implementation
393379
// ----------------------------------------------------------------------------
394380

395-
/*static*/ void Field::setCidValue(Field& field, TRI_voc_cid_t& cid) {
381+
/*static*/ void Field::setCidValue(
382+
Field& field,
383+
TRI_voc_cid_t const& cid
384+
) {
385+
TRI_ASSERT(field._analyzer);
386+
387+
irs::bytes_ref const cidRef(
388+
reinterpret_cast<irs::byte_type const*>(&cid),
389+
sizeof(TRI_voc_cid_t)
390+
);
391+
396392
field._name = CID_FIELD;
397-
setIdValue(cid, *field._analyzer);
398393
field._features = &irs::flags::empty_instance();
394+
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
395+
auto& sstream = dynamic_cast<irs::string_token_stream&>(*field._analyzer);
396+
#else
397+
auto& sstream = static_cast<irs::string_token_stream&>(*field._analyzer);
398+
#endif
399+
sstream.reset(cidRef);
399400
}
400401

401402
/*static*/ void Field::setCidValue(
402403
Field& field,
403-
TRI_voc_cid_t& cid,
404+
TRI_voc_cid_t const& cid,
404405
Field::init_stream_t
405406
) {
406407
field._analyzer = StringStreamPool.emplace().release(); // FIXME don't use shared_ptr
407408
setCidValue(field, cid);
408409
}
409410

410-
/*static*/ void Field::setRidValue(Field& field, TRI_voc_rid_t& rid) {
411-
field._name = RID_FIELD;
412-
setIdValue(rid, *field._analyzer);
411+
/*static*/ void Field::setPkValue(
412+
Field& field,
413+
DocumentPrimaryKey const& pk
414+
) {
415+
field._name = PK_COLUMN;
413416
field._features = &irs::flags::empty_instance();
417+
field._storeValues = ValueStorage::FULL;
418+
field._value = static_cast<irs::bytes_ref>(pk);
419+
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
420+
auto& sstream = dynamic_cast<irs::string_token_stream&>(*field._analyzer);
421+
#else
422+
auto& sstream = static_cast<irs::string_token_stream&>(*field._analyzer);
423+
#endif
424+
sstream.reset(field._value);
414425
}
415426

416-
/*static*/ void Field::setRidValue(
427+
/*static*/ void Field::setPkValue(
417428
Field& field,
418-
TRI_voc_rid_t& rid,
429+
DocumentPrimaryKey const& pk,
419430
Field::init_stream_t
420431
) {
421432
field._analyzer = StringStreamPool.emplace().release(); // FIXME don't use shared_ptr
422-
setRidValue(field, rid);
433+
setPkValue(field, pk);
423434
}
424435

436+
425437
Field::Field(Field&& rhs)
426438
: _features(rhs._features),
427439
_analyzer(std::move(rhs._analyzer)),
@@ -632,10 +644,6 @@ void FieldIterator::next() {
632644
return CID_FIELD;
633645
}
634646

635-
/* static */ irs::string_ref const& DocumentPrimaryKey::RID() {
636-
return RID_FIELD;
637-
}
638-
639647
/* static */ bool DocumentPrimaryKey::decode(
640648
uint64_t& buf, const irs::bytes_ref& value
641649
) {
@@ -671,6 +679,12 @@ DocumentPrimaryKey::DocumentPrimaryKey(
671679
) noexcept
672680
: _keys{ cid, rid } {
673681
static_assert(sizeof(_keys) == sizeof(cid) + sizeof(rid), "Invalid size");
682+
683+
// ensure little endian
684+
if (irs::numeric_utils::is_big_endian()) {
685+
_keys[0] = Swap8Bytes(_keys[0]);
686+
_keys[1] = Swap8Bytes(_keys[1]);
687+
}
674688
}
675689

676690
bool DocumentPrimaryKey::read(irs::bytes_ref const& in) noexcept {
@@ -683,15 +697,6 @@ bool DocumentPrimaryKey::read(irs::bytes_ref const& in) noexcept {
683697
return true;
684698
}
685699

686-
bool DocumentPrimaryKey::write(irs::data_output& out) const {
687-
out.write_bytes(
688-
reinterpret_cast<const irs::byte_type*>(_keys),
689-
sizeof(_keys)
690-
);
691-
692-
return true;
693-
}
694-
695700
bool appendKnownCollections(
696701
std::unordered_set<TRI_voc_cid_t>& set,
697702
const irs::index_reader& reader

arangod/IResearch/IResearchDocument.h

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "VelocyPackHelper.h"
3131

3232
#include "search/filter.hpp"
33+
#include "store/data_output.hpp"
3334

3435
NS_BEGIN(iresearch)
3536

@@ -80,17 +81,18 @@ char const NESTING_LIST_OFFSET_PREFIX = '[';
8081
char const NESTING_LIST_OFFSET_SUFFIX = ']';
8182

8283
struct IResearchViewMeta; // forward declaration
84+
class DocumentPrimaryKey; // forward declaration
8385

8486
////////////////////////////////////////////////////////////////////////////////
8587
/// @brief indexed/stored document field adapter for IResearch
8688
////////////////////////////////////////////////////////////////////////////////
8789
struct Field {
8890
struct init_stream_t{}; // initialize stream
8991

90-
static void setCidValue(Field& field, TRI_voc_cid_t& cid);
91-
static void setCidValue(Field& field, TRI_voc_cid_t& cid, init_stream_t);
92-
static void setRidValue(Field& field, TRI_voc_rid_t& rid);
93-
static void setRidValue(Field& field, TRI_voc_rid_t& rid, init_stream_t);
92+
static void setCidValue(Field& field, TRI_voc_cid_t const& cid);
93+
static void setCidValue(Field& field, TRI_voc_cid_t const& cid, init_stream_t);
94+
static void setPkValue(Field& field, DocumentPrimaryKey const& pk);
95+
static void setPkValue(Field& field, DocumentPrimaryKey const& pk, init_stream_t);
9496

9597
Field() = default;
9698
Field(Field&& rhs);
@@ -110,13 +112,18 @@ struct Field {
110112
return *_analyzer;
111113
}
112114

113-
bool write(irs::data_output&) const noexcept {
115+
bool write(irs::data_output& out) const noexcept {
116+
if (!_value.null()) {
117+
out.write_bytes(_value.c_str(), _value.size());
118+
}
119+
114120
return true;
115121
}
116122

117123
irs::flags const* _features{ &irs::flags::empty_instance() };
118124
std::shared_ptr<irs::token_stream> _analyzer;
119125
irs::string_ref _name;
126+
irs::bytes_ref _value;
120127
ValueStorage _storeValues;
121128
}; // Field
122129

@@ -239,7 +246,6 @@ class DocumentPrimaryKey {
239246
public:
240247
static irs::string_ref const& PK(); // stored primary key column
241248
static irs::string_ref const& CID(); // stored collection id column
242-
static irs::string_ref const& RID(); // stored revision id column
243249

244250
////////////////////////////////////////////////////////////////////////////////
245251
/// @brief decodes the specified value in a proper way into 'buf'
@@ -254,18 +260,22 @@ class DocumentPrimaryKey {
254260
////////////////////////////////////////////////////////////////////////////////
255261
static irs::bytes_ref encode(uint64_t& value);
256262

257-
DocumentPrimaryKey() = default;
263+
constexpr DocumentPrimaryKey() = default;
258264
DocumentPrimaryKey(TRI_voc_cid_t cid, TRI_voc_rid_t rid) noexcept;
259265

260-
irs::string_ref const& name() const noexcept { return PK(); }
261-
bool read(irs::bytes_ref const& in) noexcept;
262-
bool write(irs::data_output& out) const;
266+
// returning reference is important
267+
// (because of casting to 'irs::bytes_ref')
268+
constexpr TRI_voc_cid_t const& cid() const noexcept { return _keys[0]; }
269+
constexpr TRI_voc_rid_t const& rid() const noexcept { return _keys[1]; }
263270

264-
TRI_voc_cid_t cid() const noexcept { return _keys[0]; }
265-
void cid(TRI_voc_cid_t cid) noexcept { _keys[0] = cid; }
271+
explicit operator irs::bytes_ref() const noexcept {
272+
return {
273+
reinterpret_cast<irs::byte_type const*>(_keys),
274+
sizeof _keys
275+
};
276+
}
266277

267-
TRI_voc_rid_t rid() const noexcept { return _keys[1]; }
268-
void rid(TRI_voc_rid_t rid) noexcept { _keys[1] = rid; }
278+
bool read(irs::bytes_ref const& in) noexcept;
269279

270280
private:
271281
// FIXME: define storage format (LE or BE)

0 commit comments

Comments
 (0)
0