8000 Feature/internal issue #672 by Dronplane · Pull Request #11370 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
d41bf1b
Tests now passes
Mar 25, 2020
6ab67ce
More tests
Mar 25, 2020
cd77866
Merge branch 'devel' into feature/internal-issue-#672
Mar 25, 2020
c0ee917
Added compression settings
Mar 25, 2020
d988302
Fixed storage compression settings
Mar 25, 2020
9c7e874
reworked compression setting
Mar 26, 2020
e431072
added mock compressor
Mar 26, 2020
2cb1112
Merge branch 'devel' into feature/internal-issue-#672
Mar 26, 2020
b4daac1
fixed linking
Mar 27, 2020
63f0fac
added primarySortCompression
Mar 29, 2020
61975a8
Merge branch 'devel' into feature/internal-issue-#672
Mar 29, 2020
a92ce3b
Added tests
Mar 30, 2020
5757c47
Merge branch 'devel' into feature/internal-issue-#672
Mar 30, 2020
ffa1248
fix tests for mac
Mar 30, 2020
79561da
added primarySortCompression test
Mar 30, 2020
e0b220f
Merge branch 'devel' into feature/internal-issue-#672
Mar 31, 2020
62ea27b
added primarySortCompression and storedValues compression to js tests.
Apr 1, 2020
e6a390b
more tests
Apr 1, 2020
3b60ac0
Merge branch 'devel' into feature/internal-issue-#672
Apr 1, 2020
80583ba
jslint fixes
Apr 1, 2020
8a36791
code cleanup. Jaccard function fix for empty arrays
Apr 1, 2020
b27ab6e
Update CHANGELOG
Apr 1, 2020
e3f7746
Code cleanup. More tests
Apr 1, 2020
8e12b0d
test fixes
Apr 1, 2020
8c473c0
fixed bug
Apr 1, 2020
373a884
test
Apr 1, 2020
91d2f42
Merge branch 'devel' into feature/internal-issue-#672
Apr 1, 2020
93dd5f8
Merge branch 'devel' into feature/internal-issue-#672
Apr 2, 2020
5c9c0f5
adressed review comments
Apr 2, 2020
f14afc2
Merge branch 'devel' into feature/internal-issue-#672
Apr 2, 2020
6429477
Fix after merge
Apr 2, 2020
8edfb98
Merge branch 'devel' into feature/internal-issue-#672
Apr 2, 2020
975d709
Merge branch 'devel' into feature/internal-issue-#672
Apr 2, 2020
693cd26
fix build
Apr 2, 2020
a8a3dd5
cleanup
Apr 2, 2020
523072b
cleanup
Apr 2, 2020
da4c6f8
fixed backslash
Apr 3, 2020
eafd1cf
fix
Apr 3, 2020
022ee7a
fix typo
Apr 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 3rdParty/iresearch/core/index/index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ const size_t NON_UPDATE_RECORD = irs::integer_traits<size_t>::const_max; // non-

const irs::column_info_provider_t DEFAULT_COLUMN_INFO = [](const irs::string_ref&) {
// no compression, no encryption
return irs::column_info{ irs::compression::raw::type(), {}, false };
return irs::column_info{ irs::compression::none::type(), {}, false };
};

struct flush_segment_context {
Expand Down
10 changes: 5 additions & 5 deletions 3rdParty/iresearch/core/utils/compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ void init() {
#ifndef IRESEARCH_DLL
lz4::init();
delta::init();
raw::init();
none::init();
#endif
}

8000 Expand All @@ -192,16 +192,16 @@ bool visit(const std::function<bool(const string_ref&)>& visitor) {
// --SECTION-- raw implementation
// -----------------------------------------------------------------------------

/*static*/ void raw::init() {
/*static*/ void none::init() {
#ifndef IRESEARCH_DLL
// match registration below
REGISTER_COMPRESSION(raw, &raw::compressor, &raw::decompressor);
REGISTER_COMPRESSION(none, &none::compressor, &none::decompressor);
#endif
}

DEFINE_COMPRESSION_TYPE(iresearch::compression::raw);
DEFINE_COMPRESSION_TYPE(iresearch::compression::none);

REGISTER_COMPRESSION(raw, &raw::compressor, &raw::decompressor);
REGISTER_COMPRESSION(none, &none::compressor, &none::decompressor);

NS_END // compression
NS_END
2 changes: 1 addition & 1 deletion 3rdParty/iresearch/core/utils/compression.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ IRESEARCH_API bool visit(const std::function<bool(const string_ref&)>& visitor);
/// @class raw
/// @brief no compression
////////////////////////////////////////////////////////////////////////////////
struct IRESEARCH_API raw {
struct IRESEARCH_API none {
DECLARE_COMPRESSION_TYPE();

static void init();
Expand Down
4 changes: 2 additions & 2 deletions 3rdParty/iresearch/tests/index/index_column_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1969,7 +1969,7 @@ TEST_P(index_column_test_case, read_write_doc_attributes_sparse_column_dense_var
// sparse_column<dense_block>
irs::index_writer::init_options options;
options.column_info = [](const irs::string_ref&) {
return irs::column_info{ irs::compression::raw::type(), irs::compression::options{}, true };
return irs::column_info{ irs::compression::none::type(), irs::compression::options{}, true };
};

static const irs::doc_id_t BLOCK_SIZE = 1024;
Expand Down Expand Up @@ -3118,7 +3118,7 @@ TEST_P(index_column_test_case, read_write_doc_attributes_sparse_column_dense_fix

irs::index_writer::init_options options;
options.column_info = [](const irs::string_ref&) {
return irs::column_info{ irs::compression::raw::type(), irs::compression::options{}, false };
return irs::column_info{ irs::compression::none::type(), irs::compression::options{}, false };
};

// border case for sparse fixed offset columns, e.g.
Expand Down
2 changes: 1 addition & 1 deletion 3rdParty/iresearch/tests/index/sorted_column_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ TEST(sorted_column_test, insert_duplicates) {

writer->prepare(dir, segment);

irs::sorted_column col({ irs::compression::raw::type(), {}, true });
irs::sorted_column col({ irs::compression::none::type(), {}, true });
ASSERT_TRUE(col.empty());
ASSERT_EQ(0, col.size());
ASSERT_EQ(0, col.memory_active());
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG
2D00
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
devel
-----
* Added JACCARD AQL function

* storedValues property is removed from arngosearch link properties output.

* Added primarySortCompression property to ArangoSearch Views

* Added compression property to ArangoSearch View storedValues

* Removed deprecated MMFiles storage engine and also the `arango-dfdb`
(datafile debugger) executable that could be used to validate MMFiles
Expand Down
2 changes: 1 addition & 1 deletion arangod/Aql/Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5065,7 +5065,7 @@ AqlValue Functions::Jaccard(ExpressionContext* ctx, transaction::Methods* trx,
count = 0;
}

auto const jaccard = values.empty() ? 0.0 : double_t(cardinality) / values.size();
auto const jaccard = values.empty() ? 1.0 : double_t(cardinality) / values.size();

return AqlValue{AqlValueHintDouble{jaccard}};
}
Expand Down
1 change: 1 addition & 0 deletions arangod/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ add_library(arango_iresearch
IResearch/Containers.cpp IResearch/Containers.h
IResearch/IResearchAnalyzerFeature.cpp IResearch/IResearchAnalyzerFeature.h
IResearch/IResearchCommon.cpp IResearch/IResearchCommon.h
IResearch/IResearchCompression.cpp IResearch/IResearchCompression.h
IResearch/IResearchKludge.cpp IResearch/IResearchKludge.h
IResearch/IResearchLink.cpp IResearch/IResearchLink.h
IResearch/IResearchLinkCoordinator.cpp IResearch/IResearchLinkCoordinator.h
Expand Down
69 changes: 69 additions & 0 deletions arangod/IResearch/IResearchCompression.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2020 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrei Lobov
////////////////////////////////////////////////////////////////////////////////

#include "IResearchCompression.h"
#include "Basics/debugging.h"
#include <utils/lz4compression.hpp>
#ifdef ARANGODB_USE_GOOGLE_TESTS
#include "../tests/IResearch/IResearchTestCompressor.h"
#endif

namespace arangodb {
namespace iresearch {

irs::string_ref columnCompressionToString(irs::compression::type_id const* type) {
if (ADB_UNLIKELY(type == nullptr)) {
TRI_ASSERT(false);
return irs::string_ref::EMPTY;
}
auto const& mangled_name = type->name();
TRI_ASSERT(!mangled_name.empty());
auto demangled_start = mangled_name.end() - 1;
while (demangled_start != mangled_name.begin() && *(demangled_start-1) != ':') {
demangled_start--;
}
return irs::string_ref(demangled_start, std::distance(demangled_start, mangled_name.end()));
}

irs::compression::type_id const* columnCompressionFromString(irs::string_ref const& c) {
TRI_ASSERT(!c.null());
#ifdef ARANGODB_USE_GOOGLE_TESTS
if (c == "test") {
return &irs::compression::mock::test_compressor::type();
}
#endif
if (c == "lz4") {
return &irs::compression::lz4::type();
}
if (c == "none") {
return &irs::compression::none::type();
}
return nullptr;
}

irs::compression::type_id const& getDefaultCompression() {
return irs::compression::lz4::type();
}

} // namespace iresearch
} // namespace arangodb

37 changes: 37 additions & 0 deletions arangod/IResearch/IResearchCompression.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2020 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrei Lobov
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGOD_IRESEARCH__IRESEARCH_COMPRESSION_H
#define ARANGOD_IRESEARCH__IRESEARCH_COMPRESSION_H 1

#include "utils/string.hpp"
#include <utils/compression.hpp>

namespace arangodb {
namespace iresearch {

irs::string_ref columnCompressionToString(irs::compression::type_id const* type);
irs::compression::type_id const* columnCompressionFromString(irs::string_ref const& c);
irs::compression::type_id const& getDefaultCompression();
} // iresearch
} // arangodb

#endif // ARANGOD_IRESEARCH__IRESEARCH_COMPRESSION_H
2 changes: 1 addition & 1 deletion arangod/IResearch/IResearchFeature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ bool upgradeSingleServerArangoSearchView0_1(

void registerFilters(arangodb::aql::AqlFunctionFeature& functions) {
using arangodb::iresearch::addFunction;

auto flags =
arangodb::aql::Function::makeFlags(arangodb::aql::Function::Flags::Deterministic,
arangodb::aql::Function::Flags::Cacheable,
Expand Down
2 changes: 1 addition & 1 deletion arangod/IResearch/IResearchFilterFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2742,7 +2742,7 @@ arangodb::Result fromFuncNgramMatch(
kludge::mangleStringField(name, analyzerPool);

auto& ngramFilter = filter->add<irs::by_ngram_similarity>();
ngramFilter.field(st 7398 d::move(name)).threshold(threshold).boost(filterCtx.boost);;
ngramFilter.field(std::move(name)).threshold((float_t)threshold).boost(filterCtx.boost);;

analyzer->reset(matchValue);
irs::term_attribute const& token = *analyzer->attributes().get<irs::term_attribute>();
Expand Down
63 changes: 40 additions & 23 deletions arangod/IResearch/IResearchLink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include <store/mmap_directory.hpp>
#include <store/store_utils.hpp>
#include <utils/encryption.hpp>
#include <utils/lz4compression.hpp>
#include <utils/singleton.hpp>

#include "IResearchLink.h"
Expand All @@ -37,6 +36,7 @@
#include "Cluster/ClusterFeature.h"
#include "Cluster/ClusterInfo.h"
#include "IResearch/IResearchCommon.h"
#include "IResearch/IResearchCompression.h"
#include "IResearch/IResearchFeature.h"
#include "IResearch/IResearchLinkHelper.h"
#include "IResearch/IResearchPrimaryKeyFilter.h"
Expand Down Expand Up @@ -247,8 +247,7 @@ bool readTick(irs::bytes_ref const& payload, TRI_voc_tick_t& tick) noexcept {
static_assert(
// cppcheck-suppress duplicateExpression
sizeof(uint64_t) == sizeof(TRI_voc_tick_t),
"sizeof(uint64_t) != sizeof(TRI_voc_tick_t)"
);
"sizeof(uint64_t) != sizeof(TRI_voc_tick_t)");

if (payload.size() != sizeof(uint64_t)) {
return false;
Expand All @@ -259,7 +258,6 @@ bool readTick(irs::bytes_ref const& payload, TRI_voc_tick_t& tick) noexcept {

return true;
}

} // namespace

namespace arangodb {
Expand Down Expand Up @@ -305,12 +303,12 @@ IResearchLink::IResearchLink(arangodb::IndexId iid, LogicalCollection& collectio
_trxCallback = [key](transaction::Methods& trx, transaction::Status status)->void {
auto* state = trx.state();
TRI_ASSERT(state != nullptr);

// check state of the top-most transaction only
if (!state || !state->isTopLevelTransaction()) {
return; // NOOP
}

auto prev = state->cookie(key, nullptr); // get existing cookie

if (prev) {
Expand Down Expand Up @@ -819,7 +817,9 @@ Result IResearchLink::init(
auto viewId = definition.get(StaticStrings::ViewIdField).copyString();
auto& vocbase = _collection.vocbase();
bool const sorted = !meta._sort.empty();

auto const& storedValuesColumns = meta._storedValues.columns();
TRI_ASSERT(meta._sortCompression);
auto const& primarySortCompression = meta._sortCompression? *meta._sortCompression : getDefaultCompression();
if (ServerState::ins 7398 tance()->isCoordinator()) { // coordinator link
if (!vocbase.server().hasFeature<arangodb::ClusterFeature>()) {
return {
Expand Down Expand Up @@ -874,7 +874,7 @@ Result IResearchLink::init(
if (!clusterWideLink) {
// prepare data-store which can then update options
// via the IResearchView::link(...) call
auto const res = initDataStore(initCallback, sorted);
auto const res = initDataStore(initCallback, sorted, storedValuesColumns, primarySortCompression);

if (!res.ok()) {
return res;
Expand Down Expand Up @@ -911,7 +911,7 @@ Result IResearchLink::init(
// missing links will be populated when they are created in the
// per-shard collection
if (shardIds) {
for (auto& entry: *shardIds) {
for (auto& entry : *shardIds) {
auto collection = vocbase.lookupCollection(entry.first); // per-shard collections are always in 'vocbase'

if (!collection) {
Expand Down Expand Up @@ -942,7 +942,7 @@ Result IResearchLink::init(
} else if (ServerState::instance()->isSingleServer()) { // single-server link
// prepare data-store which can then update options
// via the IResearchView::link(...) call
auto const res = initDataStore(initCallback, sorted);
auto const res = initDataStore(initCallback, sorted, storedValuesColumns, primarySortCompression);

if (!res.ok()) {
return res;
Expand Down Expand Up @@ -989,14 +989,17 @@ Result IResearchLink::init(
return {};
}

Result IResearchLink::initDataStore(InitCallback const& initCallback, bool sorted) {
Result IResearchLink::initDataStore(
InitCallback const& initCallback, bool sorted,
std::vector<IResearchViewStoredValues::StoredColumn> const& storedColumns,
irs::compression::type_id const& primarySortCompression) {
_asyncTerminate.store(true); // mark long-running async jobs for terminatation

if (_asyncFeature) {
_asyncFeature->asyncNotify(); // trigger reload of settings for async jobs
}

_flushSubscription.reset() ; // reset together with '_asyncSelf'
_flushSubscription.reset(); // reset together with '_asyncSelf'
_asyncSelf->reset(); // the data-store is being deallocated, link use is no longer valid (wait for all the view users to finish)

auto& server = _collection.vocbase().server();
Expand Down Expand Up @@ -1094,7 +1097,6 @@ Result IResearchLink::initDataStore(InitCallback const& initCallback, bool sorte
<< "link '" << id() << "', docs count '" << _dataStore._reader->docs_count()
<< "', live docs count '" << _dataStore._reader->live_docs_count()
<< "', recovery tick '" << _dataStore._recoveryTick << "'";

} catch (irs::index_not_found const&) {
// NOOP
}
Expand All @@ -1108,18 +1110,33 @@ Result IResearchLink::initDataStore(InitCallback const& initCallback, bool sorte
options.lock_repository = false; // do not lock index, ArangoDB has its own lock
options.comparator = sorted ? &_comparer : nullptr; // set comparator if requested

// as meta is still not filled at this moment
// we need to store all compression mapping there
// as values provided may be temporary
std::map<std::string, irs::compression::type_id const&> compressionMap;
for (auto c : storedColumns) {
if (ADB_LIKELY(c.compression != nullptr)) {
compressionMap.emplace(c.name, *c.compression);
} else {
TRI_ASSERT(false);
compressionMap.emplace(c.name, getDefaultCompression());
}
}
// setup columnstore compression/encryption if requested by storage engine
auto const encrypt = (nullptr != irs::get_encryption(_dataStore._directory->attributes()));
if (encrypt) {
options.column_info = [](const irs::string_ref& name) -> irs::column_info {
// do not waste resources to encrypt primary key column
return { irs::compression::lz4::type(), {}, DocumentPrimaryKey::PK() != name };
};
} else { 7398
options.column_info = [](const irs::string_ref& /*name*/) -> irs::column_info {
return { irs::compression::lz4::type(), {}, false };
options.column_info =
[encrypt, comprMap = std::move(compressionMap), &primarySortCompression](const irs::string_ref& name) -> irs::column_info {
if (name.null()) {
return { primarySortCompression, {}, encrypt };
}
auto compress = comprMap.find(name);
if (compress != comprMap.end()) {
// do not waste resources to encrypt primary key column
return { compress->second, {}, encrypt && (DocumentPrimaryKey::PK() != name) };
} else {
return { getDefaultCompression(), {}, encrypt && (DocumentPrimaryKey::PK() != name) };
}
};
}

auto openFlags = irs::OM_APPEND;
if (!_dataStore._reader) {
Expand Down Expand Up @@ -1722,7 +1739,7 @@ Result IResearchLink::unload() {
_asyncFeature->asyncNotify(); // trigger reload of settings for async jobs
}

_flushSubscription.reset() ; // reset together with '_asyncSelf'
_flushSubscription.reset(); // reset together with '_asyncSelf'
_asyncSelf->reset(); // the data-store is being deallocated, link use is no longer valid (wait for all the view users to finish)

try {
Expand Down
Loading
0