8000 Feature/internal issue #672 by Dronplane · Pull Request #11370 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Feature/internal issue #672 #11370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 39 commits into from
Apr 3, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
d41bf1b
Tests now passes
Dronplane Mar 25, 2020
6ab67ce
More tests
Dronplane Mar 25, 2020
cd77866
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Mar 25, 2020
c0ee917
Added compression settings
Dronplane Mar 25, 2020
d988302
Fixed storage compression settings
Dronplane Mar 25, 2020
9c7e874
reworked compression setting
Dronplane Mar 26, 2020
e431072
added mock compressor
Dronplane Mar 26, 2020
2cb1112
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Mar 26, 2020
b4daac1
fixed linking
Dronplane Mar 27, 2020
63f0fac
added primarySortCompression
Dronplane Mar 29, 2020
61975a8
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Mar 29, 2020
a92ce3b
Added tests
Dronplane Mar 30, 2020
5757c47
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Mar 30, 2020
ffa1248
fix tests for mac
Dronplane Mar 30, 2020
79561da
added primarySortCompression test
Dronplane Mar 30, 2020
e0b220f
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Mar 31, 2020
62ea27b
added primarySortCompression and storedValues compression to js tests.
Dronplane Apr 1, 2020
e6a390b
more tests
Dronplane Apr 1, 2020
3b60ac0
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Apr 1, 2020
80583ba
jslint fixes
Dronplane Apr 1, 2020
8a36791
code cleanup. Jaccard function fix for empty arrays
Dronplane Apr 1, 2020
b27ab6e
Update CHANGELOG
Dronplane Apr 1, 2020
e3f7746
Code cleanup. More tests
Dronplane Apr 1, 2020
8e12b0d
test fixes
Dronplane Apr 1, 2020
8c473c0
fixed bug
Dronplane Apr 1, 2020
373a884
test
Dronplane Apr 1, 2020
91d2f42
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Apr 1, 2020
93dd5f8
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Apr 2, 2020
5c9c0f5
adressed review comments
Dronplane Apr 2, 2020
f14afc2
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Apr 2, 2020
6429477
Fix after merge
Dronplane Apr 2, 2020
8edfb98
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Apr 2, 2020
975d709
Merge branch 'devel' into feature/internal-issue-#672
Dronplane Apr 2, 2020
693cd26
fix build
Dronplane Apr 2, 2020
a8a3dd5
cleanup
Dronplane Apr 2, 2020
523072b
cleanup
Dronplane Apr 2, 2020
da4c6f8
fixed backslash
Dronplane Apr 3, 2020
eafd1cf
fix
Dronplane Apr 3, 2020
022ee7a
fix typo
Dronplane Apr 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
added mock compressor
  • Loading branch information
Dronplane committed Mar 26, 2020
commit e431072c53e084cc321d3d384f9d617b6cd98a7c
80 changes: 45 additions & 35 deletions arangod/IResearch/IResearchLink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@
#include "Transaction/Helpers.h"
#include "Transaction/Methods.h"
#include "VocBase/LogicalCollection.h"

#ifdef ARANGODB_USE_GOOGLE_TESTS
#include "../tests/IResearch/IResearchTestCompressor.h"
#endif
using namespace std::literals;

namespace {
Expand Down Expand Up @@ -1142,69 +1144,77 @@ Result IResearchLink::initDataStore(
options.lock_repository = false; // do not lock index, ArangoDB has its own lock
options.comparator = sorted ? &_comparer : nullptr; // set comparator if requested

bool differentCompressions = false; // storedValues uses different compression method
IResearchViewStoredValues::ColumnCompression soleCompressionType{
IResearchViewStoredValues::ColumnCompression::LZ4 };
bool nonDefaultCompressions = false; // storedValues uses no default compression method
std::map<std::string, // we must store string as storedColumns could be temporary
const irs::compression::type_id&> compressionMap;
if (!storedColumns.empty()) {
soleCompressionType = storedColumns.front().compression;
for (auto c : storedColumns) {
if (soleCompressionType != c.compression) {
differentCompressions = true;
if (IResearchViewStoredValues::ColumnCompres 8000 sion::LZ4 != c.compression) {
nonDefaultCompressions = true;
break;
}
}
}
const irs::compression::type_id* soleCompression{ &irs::compression::lz4::type() };
if (differentCompressions) {
if (nonDefaultCompressions) {
// we will need compression map to handle compressions
// on insert
for (auto c : storedColumns) {
compressionMap.emplace(c.name,
(c.compression == IResearchViewStoredValues::ColumnCompression::LZ4) ?
irs::compression::lz4::type() : irs::compression::raw::type());
}
} else {
switch (soleCompressionType) {
case IResearchViewStoredValues::ColumnCompression::NONE:
soleCompression = irs::compression::raw::type();
break;
irs::compression::type_id const* compression{ nullptr };
switch (c.compression) {
case IResearchViewStoredValues::ColumnCompression::LZ4:
compression = irs::compression::lz4::type();
break;
case IResearchViewStoredValues::ColumnCompression::NONE:
compression = irs::compression::raw::type();
break;
#ifdef ARANGODB_USE_GOOGLE_TESTS
//case IResearchViewStoredValues::ColumnCompression::TEST:
// soleCompression = &arangodb::Tests::TestCompressor::type();
// break;
case IResearchViewStoredValues::ColumnCompression::TEST:
compression = irs::compression::mock::test_compressor::type();
break;
#endif
default:
TRI_ASSERT(false);
default:
TRI_ASSERT(false);
// fallback to default on runtime
compression = irs::compression::lz4::type();
break;
}
compressionMap.emplace(c.name, *compression);
}
}
// setup columnstore compression/encryption if requested by storage engine
auto const encrypt = (nullptr != irs::get_encryption(_dataStore._directory->attributes()));
if (encrypt) {
if (differentCompressions) {
if (nonDefaultCompressions) {
options.column_info = [compressionMap](const irs::string_ref& name) -> irs::column_info {
auto compress = compressionMap.find(name);
TRI_ASSERT(compress != compressionMap.end());
// do not waste resources to encrypt primary key column
return { compress->second, {}, DocumentPrimaryKey::PK() != name };
auto compress = compressionMap.find(name);
if (compress != compressionMap.end()) {
// do not waste resources to encrypt primary key column
return { compress->second, {}, DocumentPrimaryKey::PK() != name };
} else {
return { irs::compression::lz4::type(), {}, DocumentPrimaryKey::PK() != name };
}
};
} else {
options.column_info = [soleCompression](const irs::string_ref& name) -> irs::column_info {
options.column_info = [](const irs::string_ref& name) -> irs::column_info {
// do not waste resources to encrypt primary key column
return { *soleCompression, {}, DocumentPrimaryKey::PK() != name };
return { irs::compression::lz4::type(), {}, DocumentPrimaryKey::PK() != name };
};
}
} else {
if (differentCompressions) {
if (nonDefaultCompressions) {
options.column_info = [compressionMap](const irs::string_ref& name) -> irs::column_info {
auto compress = compressionMap.find(name);
TRI_ASSERT(compress != compressionMap.end());
return { compress->second, {}, false };
if (compress != compressionMap.end()) {
// do not waste resources to encrypt primary key column
return { compress->second, {}, false };
}
else {
return { irs::compression::lz4::type(), {}, false };
}
};
} else {
options.column_info = [soleCompression](const irs::string_ref& /*name*/) -> irs::column_info {
return { *soleCompression, {}, false };
options.column_info = [](const irs::string_ref&) -> irs::column_info {
return { irs::compression::lz4::type(), {}, false };
};
}
}
Expand Down
194 changes: 194 additions & 0 deletions tests/IResearch/IResearchLink-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,19 @@
#include "VocBase/KeyGenerator.h"
#include "VocBase/LogicalCollection.h"
#include "VocBase/LogicalView.h"
#include "IResearchTestCompressor.h"

#if USE_ENTERPRISE
#include "Enterprise/Ldap/LdapFeature.h"
#endif

DEFINE_COMPRESSION_TYPE(iresearch::compression::mock::test_compressor);
std::function<irs::bytes_ref(irs::byte_type* src, size_t size, irs::bstring& out)> iresearch::compression::mock::test_compressor::compress_mock;
std::function<irs::bytes_ref(irs::byte_type* src, size_t src_size, irs::byte_type* dst, size_t dst_size)> iresearch::compression::mock::test_compressor::decompress_mock;
REGISTER_COMPRESSION(iresearch::compression::mock::test_compressor,
&iresearch::compression::mock::test_compressor::compressor,
&iresearch::compression::mock::test_compressor::decompressor);

static const VPackBuilder systemDatabaseBuilder = dbArgsBuilder();
static const VPackSlice systemDatabaseArgs = systemDatabaseBuilder.slice();
// -----------------------------------------------------------------------------
Expand Down Expand Up @@ -852,3 +860,189 @@ TEST_F(IResearchLinkTest, test_write) {
logicalCollection->dropIndex(link->id());
EXPECT_ANY_THROW((reader.reopen()));
}

TEST_F(IResearchLinkTest, test_write_with_custom_compression_nondefault_sole) {
static std::vector<std::string> const EMPTY;
auto doc0 = arangodb::velocypack::Parser::fromJson("{ \"abc\": \"def\", \"abc2\":\"aaa\" }");
auto doc1 = arangodb::velocypack::Parser::fromJson("{ \"ghi\": \"jkl\" }");
TRI_vocbase_t vocbase(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, testDBInfo(server.server()));
std::string dataPath = ((((irs::utf8_path() /= testFilesystemPath) /= std::string("databases")) /=
(std::string("database-") + std::to_string(vocbase.id()))) /=
std::string("arangosearch-42"))
.utf8();
auto linkJson = arangodb::velocypack::Parser::fromJson(
"{ \"id\": 42, \"type\": \"arangosearch\", \"view\": \"42\", "
"\"includeAllFields\": true,\
\"storedValues\":[{\"field\":\"abc\", \"compression\":\"test\"}, {\"field\":\"abc2\", \"compression\":\"test\"}]\
}");
auto collectionJson = arangodb::velocypack::Parser::fromJson(
"{ \"name\": \"testCollection\" }");
auto viewJson = arangodb::velocypack::Parser::fromJson(
"{ \
\"id\": 42, \
\"name\": \"testView\", \
\"type\": \"arangosearch\", \
\"storedValues\":[{\"field\":\"abc\", \"compression\":\"test\"}, {\"field\":\"abc2\", \"compression\":\"test\"}]\
}");
std::string compressed_values;
irs::compression::mock::test_compressor::compress_mock = [&compressed_values](irs::byte_type* src, size_t size, irs::bstring& out)->irs::bytes_ref {
out.append(src, size);
compressed_values.append(reinterpret_cast<const char*>(src), size);
return irs::bytes_ref(reinterpret_cast<const irs::byte_type*>(out.data()), size);
};
auto logicalCollection = vocbase.createCollection(collectionJson->slice());
ASSERT_TRUE((nullptr != logicalCollection));
auto view = std::dynamic_pointer_cast<arangodb::iresearch::IResearchView>(
vocbase.createView(viewJson->slice()));
ASSERT_TRUE((false == !view));
view->open();
ASSERT_TRUE(server.server().hasFeature<arangodb::FlushFeature>());

dataPath =
((((irs::utf8_path() /= testFilesystemPath) /=
std::string("databases")) /=
(std::string("database-") + std::to_string(vocbase.id()))) /=
(std::string("arangosearch-") + std::to_string(logicalCollection->id()) + "_42"))
.utf8();
irs::fs_directory directory(dataPath);
bool created;
auto link = logicalCollection->createIndex(linkJson->slice(), created);
ASSERT_TRUE((false == !link && created));
auto reader = irs::directory_reader::open(directory);
EXPECT_EQ(0, reader.reopen().live_docs_count());
{
arangodb::transaction::Methods trx(arangodb::transaction::StandaloneContext::Create(vocbase),
EMPTY, EMPTY, EMPTY,
arangodb::transaction::Options());
EXPECT_TRUE((trx.begin().ok()));
auto* l = dynamic_cast<arangodb::iresearch::IResearchLink*>(link.get());
ASSERT_TRUE(l != nullptr);
EXPECT_TRUE((l->insert(trx, arangodb::LocalDocumentId(1), doc0->slice(),
arangodb::Index::OperationMode::normal)
.ok()));
EXPECT_TRUE((l->commit().ok()));
EXPECT_EQ(0, reader.reopen().live_docs_count());

EXPECT_TRUE((trx.commit().ok()));
EXPECT_TRUE((l->commit().ok()));
}

EXPECT_EQ(1, reader.reopen().live_docs_count());

{
arangodb::transaction::Methods trx(arangodb::transaction::StandaloneContext::Create(vocbase),
EMPTY, EMPTY, EMPTY,
arangodb::transaction::Options());
EXPECT_TRUE((trx.begin().ok()));
auto* l = dynamic_cast<arangodb::iresearch::IResearchLink*>(link.get());
ASSERT_TRUE(l != nullptr);
EXPECT_TRUE((l->insert(trx, arangodb::LocalDocumentId(2), doc1->slice(),
arangodb::Index::OperationMode::normal)
.ok()));
EXPECT_TRUE((trx.commit().ok()));
EXPECT_TRUE((l->commit().ok()));
}

EXPECT_EQ(2, reader.reopen().live_docs_count());
std::string expected;
auto abcSlice = doc0->slice().get("abc");
expected.append(reinterpret_cast<const char*>(abcSlice.start()), abcSlice.byteSize());
auto abc2Slice = doc0->slice().get("abc2");
expected.append(reinterpret_cast<const char*>(abc2Slice.start()), abc2Slice.byteSize());
EXPECT_EQ(expected, compressed_values);
}

TEST_F(IResearchLinkTest, test_write_with_custom_compression_nondefault_mixed) {
static std::vector<std::string> const EMPTY;
auto doc0 = arangodb::velocypack::Parser::fromJson("{ \"abc\": \"def\", \"abc2\":\"aaa\" }");
auto doc1 = arangodb::velocypack::Parser::fromJson("{ \"ghi\": \"jkl\" }");
TRI_vocbase_t vocbase(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, testDBInfo(server.server()));
std::string dataPath = ((((irs::utf8_path() /= testFilesystemPath) /= std::string("databases")) /=
(std::string("database-") + std::to_string(vocbase.id()))) /=
std::string("arangosearch-42"))
.utf8();
auto linkJson = arangodb::velocypack::Parser::fromJson(
"{ \"id\": 42, \"type\": \"arangosearch\", \"view\": \"42\", "
"\"includeAllFields\": true,\
\"storedValues\":[{\"field\":\"abc\", \"compression\":\"test\"},\
{\"field\":\"abc2\", \"compression\":\"lz4\"},\
{\"field\":\"ghi\", \"compression\":\"test\"}]\
}");
auto collectionJson = arangodb::velocypack::Parser::fromJson(
"{ \"name\": \"testCollection\" }");
auto viewJson = arangodb::velocypack::Parser::fromJson(
"{ \
\"id\": 42, \
\"name\": \"testView\", \
\"type\": \"arangosearch\", \
\"storedValues\":[{\"field\":\"abc\", \"compression\":\"test\"},\
{\"field\":\"abc2\", \"compression\":\"lz4\"},\
{\"field\":\"ghi\", \"compression\":\"test\"}]\
}");
std::string compressed_values;
irs::compression::mock::test_compressor::compress_mock = [&compressed_values](irs::byte_type* src, size_t size, irs::bstring& out)->irs::bytes_ref {
out.append(src, size);
compressed_values.append(reinterpret_cast<const char*>(src), size);
return irs::bytes_ref(reinterpret_cast<const irs::byte_type*>(out.data()), size);
};
auto logicalCollection = vocbase.createCollection(collectionJson->slice());
ASSERT_TRUE((nullptr != logicalCollection));
auto view = std::dynamic_pointer_cast<arangodb::iresearch::IResearchView>(
vocbase.createView(viewJson->slice()));
ASSERT_TRUE((false == !view));
view->open();
ASSERT_TRUE(server.server().hasFeature<arangodb::FlushFeature>());

dataPath =
((((irs::utf8_path() /= testFilesystemPath) /=
std::string("databases")) /=
(std::string("database-") + std::to_string(vocbase.id()))) /=
(std::string("arangosearch-") + std::to_string(logicalCollection->id()) + "_42"))
.utf8();
irs::fs_directory directory(dataPath);
bool created;
auto link = logicalCollection->createIndex(linkJson->slice(), created);
ASSERT_TRUE((false == !link && created));
auto reader = irs::directory_reader::open(directory);
EXPECT_EQ(0, reader.reopen().live_docs_count());
{
arangodb::transaction::Methods trx(arangodb::transaction::StandaloneContext::Create(vocbase),
EMPTY, EMPTY, EMPTY,
arangodb::transaction::Options());
EXPECT_TRUE((trx.begin().ok()));
auto* l = dynamic_cast<arangodb::iresearch::IResearchLink*>(link.get());
ASSERT_TRUE(l != nullptr);
EXPECT_TRUE((l->insert(trx, arangodb::LocalDocumentId(1), doc0->slice(),
arangodb::Index::OperationMode::normal)
.ok()));
EXPECT_TRUE((l->commit().ok()));
EXPECT_EQ(0, reader.reopen().live_docs_count());

EXPECT_TRUE((trx.commit().ok()));
EXPECT_TRUE((l->commit().ok()));
}

EXPECT_EQ(1, reader.reopen().live_docs_count());

{
arangodb::transaction::Methods trx(arangodb::transaction::StandaloneContext::Create(vocbase),
EMPTY, EMPTY, EMPTY,
arangodb::transaction::Options());
EXPECT_TRUE((trx.begin().ok()));
auto* l = dynamic_cast<arangodb::iresearch::IResearchLink*>(link.get());
ASSERT_TRUE(l != nullptr);
EXPECT_TRUE((l->insert(trx, arangodb::LocalDocumentId(2), doc1->slice(),
arangodb::Index::OperationMode::normal)
.ok()));
EXPECT_TRUE((trx.commit().ok()));
EXPECT_TRUE((l->commit().ok()));
}

EXPECT_EQ(2, reader.reopen().live_docs_count());
std::string expected;
auto abcSlice = doc0->slice().get("abc");
expected.append(reinterpret_cast<const char*>(abcSlice.start()), abcSlice.byteSize());
auto abc2Slice = doc1->slice().get("ghi");
expected.append(reinterpret_cast<const char*>(abc2Slice.start()), abc2Slice.byteSize());
EXPECT_EQ(expected, compressed_values);
}
Loading
0