From e056a8f650032a6d8c80545a8ac58e0371e65a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Thu, 18 Mar 2021 08:22:43 +0100 Subject: [PATCH 1/4] Avoid interleave&transpose in getNextZValue() --- arangod/Indexes/IndexFactory.cpp | 2 +- arangod/Zkd/ZkdHelper.cpp | 108 +++++++++++++++++-------------- arangod/Zkd/ZkdHelper.h | 12 ++-- 3 files changed, 67 insertions(+), 55 deletions(-) diff --git a/arangod/Indexes/IndexFactory.cpp b/arangod/Indexes/IndexFactory.cpp index f3688940596b..e5b622724347 100644 --- a/arangod/Indexes/IndexFactory.cpp +++ b/arangod/Indexes/IndexFactory.cpp @@ -581,7 +581,7 @@ Result IndexFactory::enhanceJsonIndexFulltext(VPackSlice definition, return res; } -/// @brief enhances the json of a fulltext index +/// @brief enhances the json of a zkd index Result IndexFactory::enhanceJsonIndexZkd(VPackSlice definition, VPackBuilder& builder, bool create) { if (auto fieldValueTypes = definition.get("fieldValueTypes"); diff --git a/arangod/Zkd/ZkdHelper.cpp b/arangod/Zkd/ZkdHelper.cpp index 56c531c8ab92..e772a405c37b 100644 --- a/arangod/Zkd/ZkdHelper.cpp +++ b/arangod/Zkd/ZkdHelper.cpp @@ -143,31 +143,35 @@ void zkd::BitWriter::reserve(std::size_t amount) { _buffer.reserve(amount); } -zkd::RandomBitReader::RandomBitReader(byte_string_view ref) : ref(ref) {} +zkd::RandomBitReader::RandomBitReader(byte_string_view ref) : _ref(ref) {} -auto zkd::RandomBitReader::getBit(std::size_t index) -> Bit { +auto zkd::RandomBitReader::getBit(std::size_t index) const -> Bit { auto byte = index / 8; auto nibble = index % 8; - if (byte >= ref.size()) { + if (byte >= _ref.size()) { return Bit::ZERO; } - auto b = ref[byte] & (1_b << (7 - nibble)); + auto b = _ref[byte] & (1_b << (7 - nibble)); return b != 0_b ? Bit::ONE : Bit::ZERO; } -zkd::RandomBitManipulator::RandomBitManipulator(byte_string& ref) : ref(ref) {} +auto zkd::RandomBitReader::bits() const -> std::size_t { + return 8 * _ref.size(); +} + +zkd::RandomBitManipulator::RandomBitManipulator(byte_string& ref) : _ref(ref) {} -auto zkd::RandomBitManipulator::getBit(std::size_t index) -> Bit { +auto zkd::RandomBitManipulator::getBit(std::size_t index) const -> Bit { auto byte = index / 8; auto nibble = index % 8; - if (byte >= ref.size()) { + if (byte >= _ref.size()) { return Bit::ZERO; } - auto b = ref[byte] & (1_b << (7 - nibble)); + auto b = _ref[byte] & (1_b << (7 - nibble)); return b != 0_b ? Bit::ONE : Bit::ZERO; } @@ -175,11 +179,15 @@ auto zkd::RandomBitManipulator::setBit(std::size_t index, Bit value) -> void { auto byte = index / 8; auto nibble = index % 8; - if (byte >= ref.size()) { - ref.resize(byte + 1); + if (byte >= _ref.size()) { + _ref.resize(byte + 1); } auto bit = 1_b << (7 - nibble); - ref[byte] = (ref[byte] & ~bit) | (value == Bit::ONE ? bit : 0_b); + _ref[byte] = (_ref[byte] & ~bit) | (value == Bit::ONE ? bit : 0_b); +} + +auto zkd::RandomBitManipulator::bits() const -> std::size_t { + return 8 * _ref.size(); } auto zkd::interleave(std::vector const& vec) -> zkd::byte_string { @@ -187,7 +195,7 @@ auto zkd::interleave(std::vector const& vec) -> zkd::byte_stri std::vector reader; reader.reserve(vec.size()); - for (auto& str : vec) { + for (auto const& str : vec) { if (str.size() > max_size) { max_size = str.size(); } @@ -374,7 +382,7 @@ auto zkd::getNextZValue(byte_string_view cur, byte_string_view min, byte_string_ } return a.outStep < b.outStep; }); - assert(minOutstepIter->flag != 0); + TRI_ASSERT(minOutstepIter->flag != 0); auto const d = std::distance(cmpResult.begin(), minOutstepIter); RandomBitReader nisp(cur); @@ -382,7 +390,8 @@ auto zkd::getNextZValue(byte_string_view cur, byte_string_view min, byte_string_ std::size_t changeBP = dims * minOutstepIter->outStep + d; if (minOutstepIter->flag > 0) { - while (changeBP != 0) { + bool update_dims = false; + while (changeBP != 0 && !update_dims) { --changeBP; if (nisp.getBit(changeBP) == Bit::ZERO) { auto dim = changeBP % dims; @@ -390,68 +399,67 @@ auto zkd::getNextZValue(byte_string_view cur, byte_string_view min, byte_string_ if (cmpResult[dim].saveMax <= step) { cmpResult[dim].saveMin = step; cmpResult[dim].flag = 0; - goto update_dims; + update_dims = true; } } } - return std::nullopt; + if (!update_dims) { + return std::nullopt; + } } - update_dims: - RandomBitManipulator rbm(result); - assert(rbm.getBit(changeBP) == Bit::ZERO); - rbm.setBit(changeBP, Bit::ONE); - assert(rbm.getBit(changeBP) == Bit::ONE); + { + RandomBitManipulator rbm(result); + TRI_ASSERT(rbm.getBit(changeBP) == Bit::ZERO); + rbm.setBit(changeBP, Bit::ONE); + TRI_ASSERT(rbm.getBit(changeBP) == Bit::ONE); + } + auto resultManipulator = RandomBitManipulator{result}; + auto minReader = RandomBitReader{min}; auto min_trans = transpose(min, dims); auto next_v = transpose(result, dims); + // Calculate the next bit position in dimension `dim` (regarding dims) + // after `bitPos` + auto const nextGreaterBitInDim = [dims](std::size_t const bitPos, std::size_t const dim) { + auto const posRem = bitPos % dims; + auto const posFloor = bitPos - posRem; + auto const result = dim > posRem ? (posFloor + dim) : posFloor + dims + dim; + // result must be a bit of dimension `dim` + TRI_ASSERT(result % dims == dim); + // result must be strictly greater than bitPos + TRI_ASSERT(bitPos < result); + // and the lowest bit with the above properties + TRI_ASSERT(result <= bitPos + dims); + return result; + }; + for (unsigned dim = 0; dim < dims; dim++) { auto& cmpRes = cmpResult[dim]; if (cmpRes.flag >= 0) { auto bp = dims * cmpRes.saveMin + dim; if (changeBP >= bp) { // “set all bits of dim with bit positions > changeBP to 0” - BitReader br(next_v[dim]); - BitWriter bw; - size_t i = 0; - while (auto bit = br.next()) { - if (i * dims + dim > changeBP) { - break; - } - bw.append(bit.value()); - i++; + for (std::size_t i = nextGreaterBitInDim(changeBP, dim); i < resultManipulator.bits(); i += dims) { + resultManipulator.setBit(i, Bit::ZERO); } - next_v[dim] = std::move(bw).str(); } else { // “set all bits of dim with bit positions > changeBP to the minimum of the query box in this dim” - BitReader br(next_v[dim]); - BitWriter bw; - size_t i = 0; - while (auto bit = br.next()) { - if (i * dims + dim > changeBP) { - break; - } - bw.append(bit.value()); - i++; - } - BitReader br_min(min_trans[dim]); - for (size_t j = 0; j < i; ++j) { - br_min.next(); + for (std::size_t i = nextGreaterBitInDim(changeBP, dim); i < resultManipulator.bits(); i += dims) { + resultManipulator.setBit(i, minReader.getBit(i)); } - for (; auto bit = br_min.next(); ++i) { - bw.append(bit.value()); - } - next_v[dim] = std::move(bw).str(); } } else { // load the minimum for that dimension - next_v[dim] = min_trans[dim]; + for (std::size_t i = dim; i < resultManipulator.bits(); i += dims) { + resultManipulator.setBit(i, minReader.getBit(i)); + } } } - return interleave(next_v); + return result; } template diff --git a/arangod/Zkd/ZkdHelper.h b/arangod/Zkd/ZkdHelper.h index 569deff93dc1..61a845847a90 100644 --- a/arangod/Zkd/ZkdHelper.h +++ b/arangod/Zkd/ZkdHelper.h @@ -120,21 +120,25 @@ class BitWriter { struct RandomBitReader { explicit RandomBitReader(byte_string_view ref); - auto getBit(std::size_t index) -> Bit; + [[nodiscard]] auto getBit(std::size_t index) const -> Bit; + + [[nodiscard]] auto bits() const -> std::size_t; private: - byte_string_view ref; + byte_string_view _ref; }; struct RandomBitManipulator { explicit RandomBitManipulator(byte_string& ref); - auto getBit(std::size_t index) -> Bit; + [[nodiscard]] auto getBit(std::size_t index) const -> Bit; auto setBit(std::size_t index, Bit value) -> void; + [[nodiscard]] auto bits() const -> std::size_t; + private: - byte_string& ref; + byte_string& _ref; }; template From 3df773f0dd1d0aede1c6b57f87abac101bfd73f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Thu, 18 Mar 2021 08:56:04 +0100 Subject: [PATCH 2/4] Change order of actual/expected in test assertion --- tests/js/server/aql/aql-optimizer-zkdindex-multi.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/js/server/aql/aql-optimizer-zkdindex-multi.js b/tests/js/server/aql/aql-optimizer-zkdindex-multi.js index 85ce9db2846e..e789345a852d 100644 --- a/tests/js/server/aql/aql-optimizer-zkdindex-multi.js +++ b/tests/js/server/aql/aql-optimizer-zkdindex-multi.js @@ -144,9 +144,10 @@ function optimizerRuleZkd2dIndexTestSuite() { } const executeRes = AQL_EXECUTE(query); const res = executeRes.json; - res.sort(); const expected = productSet(x, y, z, w); - assertEqual(res, expected.sort()); + res.sort(); + expected.sort(); + assertEqual(expected, res, JSON.stringify({query})); }; } } From 0b66d63b0dec2f2ed43a4ff181acfe6d4d1003f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Thu, 18 Mar 2021 12:01:24 +0100 Subject: [PATCH 3/4] Minor improvements --- arangod/Zkd/ZkdHelper.cpp | 24 ++++++++++++++---------- arangod/Zkd/ZkdHelper.h | 19 ++----------------- 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/arangod/Zkd/ZkdHelper.cpp b/arangod/Zkd/ZkdHelper.cpp index e772a405c37b..b800f54c2222 100644 --- a/arangod/Zkd/ZkdHelper.cpp +++ b/arangod/Zkd/ZkdHelper.cpp @@ -1,7 +1,9 @@ #include "ZkdHelper.h" -#include -#include +#include "Basics/ScopeGuard.h" +#include "Basics/debugging.h" +#include "Containers/SmallVector.h" + #include #include #include @@ -113,7 +115,7 @@ auto zkd::ByteReader::next() -> std::optional { void zkd::BitWriter::append(Bit bit) { if (bit == Bit::ONE) { - _value |= std::byte{1} << (7u - _nibble); + _value |= std::byte{1} << (7U - _nibble); } _nibble += 1; if (_nibble == 8) { @@ -263,9 +265,13 @@ void zkd::compareWithBoxInto(byte_string_view cur, byte_string_view min, byte_st BitReader cur_reader(cur); BitReader min_reader(min); BitReader max_reader(max); - ::arangodb::containers::SmallVector>::allocator_type::arena_type a; - ::arangodb::containers::SmallVector> isLargerLowerThanMinMax{a}; - isLargerLowerThanMinMax.resize(dimensions); + + auto const isLargerThanMin = [&result](auto const dim) { + return result[dim].saveMin != CompareResult::max; + }; + auto const isLowerThanMax = [&result](auto const dim) { + return result[dim].saveMax != CompareResult::max; + }; unsigned step = 0; unsigned dim = 0; @@ -279,22 +285,20 @@ void zkd::compareWithBoxInto(byte_string_view cur, byte_string_view min, byte_st auto max_bit = max_reader.next().value_or(Bit::ZERO); if (result[dim].flag == 0) { - if (!isLargerLowerThanMinMax[dim].first) { + if (!isLargerThanMin(dim)) { if (cur_bit == Bit::ZERO && min_bit == Bit::ONE) { result[dim].outStep = step; result[dim].flag = -1; } else if (cur_bit == Bit::ONE && min_bit == Bit::ZERO) { - isLargerLowerThanMinMax[dim].first = true; result[dim].saveMin = step; } } - if (!isLargerLowerThanMinMax[dim].second) { + if (!isLowerThanMax(dim)) { if (cur_bit == Bit::ONE && max_bit == Bit::ZERO) { result[dim].outStep = step; result[dim].flag = 1; } else if (cur_bit == Bit::ZERO && max_bit == Bit::ONE) { - isLargerLowerThanMinMax[dim].second = true; result[dim].saveMax = step; } } diff --git a/arangod/Zkd/ZkdHelper.h b/arangod/Zkd/ZkdHelper.h index 61a845847a90..d6fc255d15c0 100644 --- a/arangod/Zkd/ZkdHelper.h +++ b/arangod/Zkd/ZkdHelper.h @@ -13,22 +13,7 @@ namespace arangodb::zkd { inline static std::byte operator"" _b(unsigned long long b) { return std::byte{(unsigned char)b}; } -/* -struct byte_string : public std::basic_string { - using std::basic_string::basic_string; - using std::basic_string::operator=; - - //byte_string(std::basic_string str) : std::basic_string(std::move(str)) {} - - template - auto operator+(T&& other) -> byte_string { - return byte_string(static_cast&>(*this) + std::forward(other)); - } - - auto as_string_view() const -> std::string_view { - return std::string_view(reinterpret_cast(data()), size()); - } -};*/ + using byte_string = std::basic_string; using byte_string_view = std::basic_string_view; @@ -38,7 +23,7 @@ byte_string operator"" _bss(const char* str, std::size_t len); auto interleave(std::vector const& vec) -> byte_string; auto transpose(byte_string_view bs, std::size_t dimensions) -> std::vector; -struct CompareResult { +struct alignas(32) CompareResult { static constexpr auto max = std::numeric_limits::max(); signed flag = 0; From b6d7030015832cb25d2c9217bab0b00f6fc4bf3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Thu, 18 Mar 2021 12:16:02 +0100 Subject: [PATCH 4/4] Removed superfluous transpose calls --- arangod/Zkd/ZkdHelper.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/arangod/Zkd/ZkdHelper.cpp b/arangod/Zkd/ZkdHelper.cpp index b800f54c2222..ae59f07e82e1 100644 --- a/arangod/Zkd/ZkdHelper.cpp +++ b/arangod/Zkd/ZkdHelper.cpp @@ -422,9 +422,6 @@ auto zkd::getNextZValue(byte_string_view cur, byte_string_view min, byte_string_ auto resultManipulator = RandomBitManipulator{result}; auto minReader = RandomBitReader{min}; - auto min_trans = transpose(min, dims); - auto next_v = transpose(result, dims); - // Calculate the next bit position in dimension `dim` (regarding dims) // after `bitPos` auto const nextGreaterBitInDim = [dims](std::size_t const bitPos, std::size_t const dim) {