8000 Feature/zkd index speedup getnextzvalue by goedderz · Pull Request #13799 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Feature/zkd index speedup getnextzvalue #13799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arangod/Indexes/IndexFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ Result IndexFactory::enhanceJsonIndexFulltext(VPackSlice definition,
return res;
}

/// @brief enhances the json of a fulltext index
/// @brief enhances the json of a zkd index
Result IndexFactory::enhanceJsonIndexZkd(VPackSlice definition,
VPackBuilder& builder, bool create) {
if (auto fieldValueTypes = definition.get("fieldValueTypes");
Expand Down
135 changes: 72 additions & 63 deletions arangod/Zkd/ZkdHelper.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include "ZkdHelper.h"

#include <Basics/debugging.h>
#include <Containers/SmallVector.h>
#include "Basics/ScopeGuard.h"
#include "Basics/debugging.h"
#include "Containers/SmallVector.h"

#include <algorithm>
#include <cassert>
#include <cmath>
Expand Down Expand Up @@ -113,7 +115,7 @@ auto zkd::ByteReader::next() -> std::optional<std::byte> {

void zkd::BitWriter::append(Bit bit) {
if (bit == Bit::ONE) {
_value |= std::byte{1} << (7u - _nibble);
_value |= std::byte{1} << (7U - _nibble);
}
_nibble += 1;
if (_nibble == 8) {
Expand Down Expand Up @@ -143,51 +145,59 @@ void zkd::BitWriter::reserve(std::size_t amount) {
_buffer.reserve(amount);
}

zkd::RandomBitReader::RandomBitReader(byte_string_view ref) : ref(ref) {}
zkd::RandomBitReader::RandomBitReader(byte_string_view ref) : _ref(ref) {}

auto zkd::RandomBitReader::getBit(std::size_t index) -> Bit {
auto zkd::RandomBitReader::getBit(std::size_t index) const -> Bit {
auto byte = index / 8;
auto nibble = index % 8;

if (byte >= ref.size()) {
if (byte >= _ref.size()) {
return Bit::ZERO;
}

auto b = ref[byte] & (1_b << (7 - nibble));
auto b = _ref[byte] & (1_b << (7 - nibble));
return b != 0_b ? Bit::ONE : Bit::ZERO;
}

zkd::RandomBitManipulator::RandomBitManipulator(byte_string& ref) : ref(ref) {}
auto zkd::RandomBitReader::bits() const -> std::size_t {
return 8 * _ref.size();
}

zkd::RandomBitManipulator::RandomBitManipulator(byte_string& ref) : _ref(ref) {}

auto zkd::RandomBitManipulator::getBit(std::size_t index) -> Bit {
auto zkd::RandomBitManipulator::getBit(std::size_t index) const -> Bit {
auto byte = index / 8;
auto nibble = index % 8;

if (byte >= ref.size()) {
if (byte >= _ref.size()) {
return Bit::ZERO;
}

auto b = ref[byte] & (1_b << (7 - nibble));
auto b = _ref[byte] & (1_b << (7 - nibble));
return b != 0_b ? Bit::ONE : Bit::ZERO;
}

auto zkd::RandomBitManipulator::setBit(std::size_t index, Bit value) -> void {
auto byte = index / 8;
auto nibble = index % 8;

if (byte >= ref.size()) {
ref.resize(byte + 1);
if (byte >= _ref.size()) {
_ref.resize(byte + 1);
}
auto bit = 1_b << (7 - nibble);
ref[byte] = (ref[byte] & ~bit) | (value == Bit::ONE ? bit : 0_b);
_ref[byte] = (_ref[byte] & ~bit) | (value == Bit::ONE ? bit : 0_b);
}

auto zkd::RandomBitManipulator::bits() const -> std::size_t {
return 8 * _ref.size();
}

auto zkd::interleave(std::vector<zkd::byte_string> const& vec) -> zkd::byte_string {
std::size_t max_size = 0;
std::vector<BitReader> reader;
reader.reserve(vec.size());

for (auto& str : vec) {
for (auto const& str : vec) {
if (str.size() > max_size) {
max_size = str.size();
}
Expand Down Expand Up @@ -255,9 +265,13 @@ void zkd::compareWithBoxInto(byte_string_view cur, byte_string_view min, byte_st
BitReader cur_reader(cur);
BitReader min_reader(min);
BitReader max_reader(max);
::arangodb::containers::SmallVector<std::pair<bool, bool>>::allocator_type::arena_type a;
::arangodb::containers::SmallVector<std::pair<bool, bool>> isLargerLowerThanMinMax{a};
isLargerLowerThanMinMax.resize(dimensions);

auto const isLargerThanMin = [&result](auto const dim) {
return result[dim].saveMin != CompareResult::max;
};
auto const isLowerThanMax = [&result](auto const dim) {
return result[dim].saveMax != CompareResult::max;
};

unsigned step = 0;
unsigned dim = 0;
Expand All @@ -271,22 +285,20 @@ void zkd::compareWithBoxInto(byte_string_view cur, byte_string_view min, byte_st
auto max_bit = max_reader.next().value_or(Bit::ZERO);

if (result[dim].flag == 0) {
if (!isLargerLowerThanMinMax[dim].first) {
if (!isLargerThanMin(dim)) {
if (cur_bit == Bit::ZERO && min_bit == Bit::ONE) {
result[dim].outStep = step;
result[dim].flag = -1;
} else if (cur_bit == Bit::ONE && min_bit == Bit::ZERO) {
isLargerLowerThanMinMax[dim].first = true;
result[dim].saveMin = step;
}
}

if (!isLargerLowerThanMinMax[dim].second) {
if (!isLowerThanMax(dim)) {
if (cur_bit == Bit::ONE && max_bit == Bit::ZERO) {
result[dim].outStep = step;
result[dim].flag = 1;
} else if (cur_bit == Bit::ZERO && max_bit == Bit::ONE) {
isLargerLowerThanMinMax[dim].second = true;
result[dim].saveMax = step;
}
}
Expand Down Expand Up @@ -374,84 +386,81 @@ auto zkd::getNextZValue(byte_string_view cur, byte_string_view min, byte_string_
}
return a.outStep < b.outStep;
});
assert(minOutstepIter->flag != 0);
TRI_ASSERT(minOutstepIter->flag != 0);
auto const d = std::distance(cmpResult.begin(), minOutstepIter);

RandomBitReader nisp(cur);

std::size_t changeBP = dims * minOutstepIter->outStep + d;

if (minOutstepIter->flag > 0) {
while (changeBP != 0) {
bool update_dims = false;
while (changeBP != 0 && !update_dims) {
--changeBP;
if (nisp.getBit(changeBP) == Bit::ZERO) {
auto dim = changeBP % dims;
auto step = changeBP / dims;
if (cmpResult[dim].saveMax <= step) {
cmpResult[dim].saveMin = step;
cmpResult[dim].flag = 0;
goto update_dims;
update_dims = true;
}
}
}

return std::nullopt;
if (!update_dims) {
return std::nullopt;
}
}

update_dims:
RandomBitManipulator rbm(result);
assert(rbm.getBit(changeBP) == Bit::ZERO);
rbm.setBit(changeBP, Bit::ONE);
assert(rbm.getBit(changeBP) == Bit::ONE);

auto min_trans = transpose(min, dims);
auto next_v = transpose(result, dims);
{
RandomBitManipulator rbm(result);
TRI_ASSERT(rbm.getBit(changeBP) == Bit::ZERO);
rbm.setBit(changeBP, Bit::ONE);
TRI_ASSERT(rbm.getBit(changeBP) == Bit::ONE);
}
auto resultManipulator = RandomBitManipulator{result};
auto minReader = RandomBitReader{min};

// Calculate the next bit position in dimension `dim` (regarding dims)
// after `bitPos`
auto const nextGreaterBitInDim = [dims](std::size_t const bitPos, std::size_t const dim) {
auto const posRem = bitPos % dims;
auto const posFloor = bitPos - posRem;
auto const result = dim > posRem ? (posFloor + dim) : posFloor + dims + dim;
// result must be a bit of dimension `dim`
TRI_ASSERT(result % dims == dim);
// result must be strictly greater than bitPos
TRI_ASSERT(bitPos < result);
// and the lowest bit with the above properties
TRI_ASSERT(result <= bitPos + dims);
return result;
};

for (unsigned dim = 0; dim < dims; dim++) {
auto& cmpRes = cmpResult[dim];
if (cmpRes.flag >= 0) {
auto bp = dims * cmpRes.saveMin + dim;
if (changeBP >= bp) {
// “set all bits of dim with bit positions > changeBP to 0”
BitReader br(next_v[dim]);
BitWriter bw;
size_t i = 0;
while (auto bit = br.next()) {
if (i * dims + dim > changeBP) {
break;
}
bw.append(bit.value());
i++;
for (std::size_t i = nextGreaterBitInDim(changeBP, dim); i < resultManipulator.bits(); i += dims) {
resultManipulator.setBit(i, Bit::ZERO);
}
next_v[dim] = std::move(bw).str();
} else {
// “set all bits of dim with bit positions > changeBP to the minimum of the query box in this dim”
BitReader br(next_v[dim]);
BitWriter bw;
size_t i = 0;
while (auto bit = br.next()) {
if (i * dims + dim > changeBP) {
break;
}
bw.append(bit.value());
i++;
for (std::size_t i = nextGreaterBitInDim(changeBP, dim); i < resultManipulator.bits(); i += dims) {
resultManipulator.setBit(i, minReader.getBit(i));
}
BitReader br_min(min_trans[dim]);
for (size_t j = 0; j < i; ++j) {
br_min.next();
}
for (; auto bit = br_min.next(); ++i) {
bw.append(bit.value());
}
next_v[dim] = std::move(bw).str();
}
} else {
// load the minimum for that dimension
next_v[dim] = min_trans[dim];
for (std::size_t i = dim; i < resultManipulator.bits(); i += dims) {
resultManipulator.setBit(i, minReader.getBit(i));
}
}
}

return interleave(next_v);
return result;
}

template<typename T>
Expand Down
31 changes: 10 additions & 21 deletions arangod/Zkd/ZkdHelper.h
B938
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,7 @@ namespace arangodb::zkd {
inline static std::byte operator"" _b(unsigned long long b) {
return std::byte{(unsigned char)b};
}
/*
struct byte_string : public std::basic_string<std::byte> {
using std::basic_string<std::byte>::basic_string;
using std::basic_string<std::byte>::operator=;

//byte_string(std::basic_string<std::byte> str) : std::basic_string<std::byte>(std::move(str)) {}

template<typename T>
auto operator+(T&& other) -> byte_string {
return byte_string(static_cast<std::basic_string<std::byte>&>(*this) + std::forward<T>(other));
}

auto as_string_view() const -> std::string_view {
return std::string_view(reinterpret_cast<const char *>(data()), size());
}
};*/

using byte_string = std::basic_string<std::byte>;
using byte_string_view = std::basic_string_view<std::byte>;

Expand All @@ -38,7 +23,7 @@ byte_string operator"" _bss(const char* str, std::size_t len);
auto interleave(std::vector<byte_string> const& vec) -> byte_string;
auto transpose(byte_string_view bs, std::size_t dimensions) -> std::vector<byte_string>;

struct CompareResult {
struct alignas(32) CompareResult {
static constexpr auto max = std::numeric_limits<std::size_t>::max();

signed flag = 0;
Expand Down Expand Up @@ -120,21 +105,25 @@ class BitWriter {
struct RandomBitReader {
explicit RandomBitReader(byte_string_view ref);

auto getBit(std::size_t index) -> Bit;
[[nodiscard]] auto getBit(std::size_t index) const -> Bit;

[[nodiscard]] auto bits() const -> std::size_t;

private:
byte_string_view ref;
byte_string_view _ref;
};

struct RandomBitManipulator {
explicit RandomBitManipulator(byte_string& ref);

auto getBit(std::size_t index) -> Bit;
[[nodiscard]] auto getBit(std::size_t index) const -> Bit;

auto setBit(std::size_t index, Bit value) -> void;

[[nodiscard]] auto bits() const -> std::size_t;

private:
byte_string& ref;
byte_string& _ref;
};

template <typename T>
Expand Down
5 changes: 3 additions & 2 deletions tests/js/server/aql/aql-optimizer-zkdindex-multi.js
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,10 @@ function optimizerRuleZkd2dIndexTestSuite() {
}
const executeRes = AQL_EXECUTE(query);
const res = executeRes.json;
res.sort();
const expected = productSet(x, y, z, w);
assertEqual(res, expected.sort());
res.sort();
expected.sort();
assertEqual(expected, res, JSON.stringify({query}));
};
}
}
Expand Down
0