8000 ZKD Indexes (#13650) · arangodb/arangodb@8c2374f · GitHub
[go: up one dir, main page]

Skip to content

Commit 8c2374f

Browse files
author
Lars Maier
authored
ZKD Indexes (#13650)
1 parent 0849e98 commit 8c2374f

35 files changed

+3074
-22
lines changed

CHANGELOG

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,23 @@
11
devel
22
-----
33

4+
* Added multidimensional indexes which can be used to efficiently intersect
5+
multiple range queries. They are currently limited to IEEE-754 double values.
6+
Given documents of the form {x: 12.9, y: -284.0, z: 0.02} one can define a
7+
multidimensional index using the new type 'zkd' on the fields ["x", "y", "z"].
8+
9+
The AQL optimizer will then consider this index when doing queries on multiple
10+
ranges, for example:
11+
12+
FOR p IN points
13+
FILTER x0 <= p.x && p.x <= x1
14+
FILTER y0 <= p.y && p.y <= y1
15+
FILTER z0 <= p.z && p.z <= z1
16+
RETURN p
17+
18+
The index implements the relation <=, == and >= natively. Strict relations are
19+
emulated using post filtering. Ranges can be unbounded on one or both sides.
20+
421
* No runtime limits for shard move and server cleanout jobs, instead
522
possibility to cancel them.
623

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
2+
@startDocuBlock post_api_index_zkd
3+
@brief creates a multi-dimensional index
4+
5+
@RESTHEADER{POST /_api/index#multi-dim, Create multi-dimensional index, createIndex#multi-dim}
6+
7+
@RESTQUERYPARAMETERS
8+
9+
@RESTQUERYPARAM{collection,string,required}
10+
The collection name.
11+
12+
@RESTBODYPARAM{type,string,required,string}
13+
must be equal to *"zkd"*.
14+
15+
@RESTBODYPARAM{fields,array,required,string}
16+
an array of attribute names used for each dimension. Array expansions are not allowed.
17+
18+
@RESTBODYPARAM{unique,boolean,required,}
19+
if *true*, then create a unique index.
20+
21+
@RESTBODYPARAM{fieldValueTypes,string,required,string}
22+
must be equal to *"double"*. Currently only doubles are supported as values.
23+
24+
@RESTDESCRIPTION
25+
Creates a multi-dimensional index for the collection *collection-name*, if
26+
it does not already exist. The call expects an object containing the index
27+
details.
28+
29+
@RESTRETURNCODES
30+
31+
@RESTRETURNCODE{200}
32+
If the index already exists, then a *HTTP 200* is
33+
returned.
34+
35+
@RESTRETURNCODE{201}
36+
If the index does not already exist and could be created, then a *HTTP 201*
37+
is returned.
38+
39+
@RESTRETURNCODE{404}
40+
If the *collection-name* is unknown, then a *HTTP 404* is returned.
41+
42+
@RESTRETURNCODE{400}
43+
If the index definition is invalid, then a *HTTP 400* is returned.
44+
45+
@EXAMPLES
46+
47+
Creating a multi-dimensional index
48+
49+
@EXAMPLE_ARANGOSH_RUN{RestIndexCreateNewFulltext}
50+
var cn = "intervals";
51+
db._drop(cn);
52+
db._create(cn);
53+
54+
var url = "/_api/index?collection=" + cn;
55+
var body = {
56+
type: "zkd",
57+
fields: [ "from", "to" ],
58+
fieldValueTypes: "double"
59+
};
60+
61+
var response = logCurlRequest('POST', url, body);
62+
63+
assert(response.code === 201);
64+
65+
logJsonResponse(response);
66+
~ db._drop(cn);
67+
@END_EXAMPLE_ARANGOSH_RUN
68+
@endDocuBlock

arangod/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,8 @@ endif()
836836
include(ClusterEngine/CMakeLists.txt)
837837
include(RocksDBEngine/CMakeLists.txt)
838838

839+
add_library(arango_zkd STATIC Zkd/ZkdHelper.cpp)
840+
839841
add_library(arango_restart_action STATIC
840842
RestServer/RestartAction.cpp
841843
)
@@ -919,6 +921,7 @@ add_library(arango_rocksdb STATIC
919921
${ROCKSDB_SOURCES}
920922
${ADDITIONAL_LIB_ARANGO_ROCKSDB_SOURCES}
921923
)
924+
target_link_libraries(arango_rocksdb arango_zkd)
922925

923926
add_dependencies(arango_rocksdb snappy)
924927

arangod/ClusterEngine/ClusterIndex.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "ClusterIndex.h"
2828
#include "Indexes/SimpleAttributeEqualityMatcher.h"
2929
#include "Indexes/SortedIndexAttributeMatcher.h"
30+
#include "RocksDBEngine/RocksDBZkdIndex.h"
3031
#include "StorageEngine/EngineSelectorFeature.h"
3132
#include "VocBase/LogicalCollection.h"
3233
#include "VocBase/ticks.h"
@@ -277,6 +278,9 @@ Index::FilterCosts ClusterIndex::supportsFilterCondition(
277278
return Index::supportsFilterCondition(allIndexes, node, reference, itemsInIndex);
278279
}
279280

281+
case TRI_IDX_TYPE_ZKD_INDEX:
282+
return zkd::supportsFilterCondition(this, allIndexes, node, reference, itemsInIndex);
283+
280284
case TRI_IDX_TYPE_UNKNOWN:
281285
break;
282286
}
@@ -315,6 +319,10 @@ Index::SortCosts ClusterIndex::supportsSortCondition(arangodb::aql::SortConditio
315319
break;
316320
}
317321

322+
case TRI_IDX_TYPE_ZKD_INDEX:
323+
// Sorting not supported
324+
return Index::SortCosts{};
325+
318326
case TRI_IDX_TYPE_UNKNOWN:
319327
break;
320328
}
@@ -359,7 +367,10 @@ aql::AstNode* ClusterIndex::specializeCondition(aql::AstNode* node,
359367
case TRI_IDX_TYPE_PERSISTENT_INDEX: {
360368
return SortedIndexAttributeMatcher::specializeCondition(this, node, reference);
361369
}
362-
370+
371+
case TRI_IDX_TYPE_ZKD_INDEX:
372+
return zkd::specializeCondition(this, node, reference);
373+
363374
case TRI_IDX_TYPE_UNKNOWN:
364375
break;
365376
}

arangod/ClusterEngine/ClusterIndexFactory.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ void ClusterIndexFactory::linkIndexFactories(application_features::ApplicationSe
166166
static const PrimaryIndexFactory primaryIndexFactory(server, "primary");
167167
static const DefaultIndexFactory skiplistIndexFactory(server, "skiplist");
168168
static const DefaultIndexFactory ttlIndexFactory(server, "ttl");
169+
static const DefaultIndexFactory zkdIndexFactory(server, "zkd");
169170

170171
factory.emplace(edgeIndexFactory._type, edgeIndexFactory);
171172
factory.emplace(fulltextIndexFactory._type, fulltextIndexFactory);
@@ -177,6 +178,7 @@ void ClusterIndexFactory::linkIndexFactories(application_features::ApplicationSe
177178
factory.emplace(primaryIndexFactory._type, primaryIndexFactory);
178179
factory.emplace(skiplistIndexFactory._type, skiplistIndexFactory);
179180
factory.emplace(ttlIndexFactory._type, ttlIndexFactory);
181+
factory.emplace(zkdIndexFactory._type, zkdIndexFactory);
180182
}
181183

182184
ClusterIndexFactory::ClusterIndexFactory(application_features::ApplicationServer& server)

arangod/Indexes/Index.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,9 @@ Index::IndexType Index::type(char const* type, size_t len) {
332332
if (::typeMatch(type, len, "geo2")) {
333333
return TRI_IDX_TYPE_GEO2_INDEX;
334334
}
335+
if (::typeMatch(type, len, "zkd")) {
336+
return TRI_IDX_TYPE_ZKD_INDEX;
337+
}
335338
std::string const& tmp = arangodb::iresearch::DATA_SOURCE_TYPE.name();
336339
if (::typeMatch(type, len, tmp.c_str())) {
337340
return TRI_IDX_TYPE_IRESEARCH_LINK;
@@ -374,6 +377,8 @@ char const* Index::oldtypeName(Index::IndexType type) {
374377
return arangodb::iresearch::DATA_SOURCE_TYPE.name().c_str();
375378
case TRI_IDX_TYPE_NO_ACCESS_INDEX:
376379
return "noaccess";
380+
case TRI_IDX_TYPE_ZKD_INDEX:
381+
return "zkd";
377382
case TRI_IDX_TYPE_UNKNOWN: {
378383
}
379384
}

arangod/Indexes/Index.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ class Index {
102102
TRI_IDX_TYPE_TTL_INDEX,
103103
TRI_IDX_TYPE_PERSISTENT_INDEX,
104104
TRI_IDX_TYPE_IRESEARCH_LINK,
105-
TRI_IDX_TYPE_NO_ACCESS_INDEX
105+
TRI_IDX_TYPE_NO_ACCESS_INDEX,
106+
TRI_IDX_TYPE_ZKD_INDEX
106107
};
107108

108109
/// @brief: helper struct returned by index methods that determine the costs

arangod/Indexes/IndexFactory.cpp

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,9 @@ std::shared_ptr<Index> IndexFactory::prepareIndexFromSlice(velocypack::Slice def
301301

302302
/// same for both storage engines
303303
std::vector<std::string> IndexFactory::supportedIndexes() const {
304-
return std::vector<std::string>{"primary", "edge", "hash", "skiplist",
305-
"ttl", "persistent", "geo", "fulltext"};
304+
return std::vector<std::string>{"primary", "edge", "hash",
305+
"skiplist", "ttl", "persistent",
306+
"geo", "fulltext", "zkd"};
306307
}
307308

308309
std::unordered_map<std::string, std::string> IndexFactory::indexAliases() const {
@@ -581,4 +582,34 @@ Result IndexFactory::enhanceJsonIndexFulltext(VPackSlice definition,
581582
return res;
582583
}
583584

585+
/// @brief enhances the json of a zkd index
586+
Result IndexFactory::enhanceJsonIndexZkd(VPackSlice definition,
587+
VPackBuilder& builder, bool create) {
588+
if (auto fieldValueTypes = definition.get("fieldValueTypes");
589+
!fieldValueTypes.isString() || !fieldValueTypes.isEqualString("double")) {
590+
return Result(
591+
TRI_ERROR_BAD_PARAMETER,
592+
"zkd index requires `fieldValueTypes` to be set to `double` - future "
593+
"releases might lift this requirement");
594+
}
595+
596+
builder.add("fieldValueTypes", VPackValue("double"));
597+
Result res = processIndexFields(definition, builder, 1, INT_MAX, create, false);
598+
599+
if (res.ok()) {
600+
if (auto isSparse = definition.get(StaticStrings::IndexSparse).isTrue(); isSparse) {
601+
return Result(TRI_ERROR_BAD_PARAMETER,
602+
"zkd index does not support sparse property");
603+
}
604+
605+
processIndexUniqueFlag(definition, builder);
606+
607+
bool bck = basics::VelocyPackHelper::getBooleanValue(definition, StaticStrings::IndexInBackground,
608+
false);
609+
builder.add(StaticStrings::IndexInBackground, VPackValue(bck));
610+
}
611+
612+
return res;
613+
}
614+
584615
} // namespace arangodb

arangod/Indexes/IndexFactory.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,15 @@ class IndexFactory {
158158
static Result enhanceJsonIndexGeo(velocypack::Slice definition,
159159
velocypack::Builder& builder, bool create,
160160
int minFields, int maxFields);
161-
161+
162162
/// @brief enhances the json of a fulltext index
163163
static Result enhanceJsonIndexFulltext(velocypack::Slice definition,
164164
velocypack::Builder& builder, bool create);
165165

166+
/// @brief enhances the json of a zkd index
167+
static Result enhanceJsonIndexZkd(arangodb::velocypack::Slice definition,
168+
arangodb::velocypack::Builder& builder, bool create);
169+
166170
protected:
167171
/// @brief clear internal factory/normalizer maps
168172
void clear();

arangod/RocksDBEngine/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ set(ROCKSDB_SOURCES
7272
RocksDBEngine/RocksDBLogValue.cpp
7373
RocksDBEngine/RocksDBMetaCollection.cpp
7474
RocksDBEngine/RocksDBMetadata.cpp
75-
RocksDBEngine/RocksDBTransactionMethods.cpp
7675
RocksDBEngine/RocksDBOptimizerRules.cpp
7776
RocksDBEngine/RocksDBOptionFeature.cpp
77+
RocksDBEngine/RocksDBPersistedLog.cpp
7878
RocksDBEngine/RocksDBPrimaryIndex.cpp
7979
RocksDBEngine/RocksDBRecoveryManager.cpp
8080
RocksDBEngine/RocksDBReplicationCommon.cpp
@@ -90,6 +90,7 @@ set(ROCKSDB_SOURCES
9090
RocksDBEngine/RocksDBSettingsManager.cpp
9191
RocksDBEngine/RocksDBSyncThread.cpp
9292
RocksDBEngine/RocksDBTransactionCollection.cpp
93+
RocksDBEngine/RocksDBTransactionMethods.cpp
9394
RocksDBEngine/RocksDBTransactionState.cpp
9495
RocksDBEngine/RocksDBTtlIndex.cpp
9596
RocksDBEngine/RocksDBTypes.cpp
@@ -98,6 +99,6 @@ set(ROCKSDB_SOURCES
9899
RocksDBEngine/RocksDBVPackIndex.cpp
99100
RocksDBEngine/RocksDBValue.cpp
100101
RocksDBEngine/RocksDBWalAccess.cpp
101-
RocksDBEngine/RocksDBPersistedLog.cpp
102+
RocksDBEngine/RocksDBZkdIndex.cpp
102103
)
103104
set(ROCKSDB_SOURCES ${ROCKSDB_SOURCES} PARENT_SCOPE)

arangod/RocksDBEngine/RocksDBColumnFamilyManager.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,19 @@
3030

3131
namespace arangodb {
3232

33-
std::array<char const*, arangodb::RocksDBColumnFamilyManager::numberOfColumnFamilies>
34-
RocksDBColumnFamilyManager::_internalNames = {"default", "Documents",
35-
"PrimaryIndex", "EdgeIndex",
36-
"VPackIndex", "GeoIndex",
37-
"FulltextIndex", "ReplicatedLogs"};
33+
std::array<char const*, arangodb::RocksDBColumnFamilyManager::numberOfColumnFamilies> RocksDBColumnFamilyManager::_internalNames =
34+
{"default", "Documents", "PrimaryIndex",
35+
"EdgeIndex", "VPackIndex", "GeoIndex",
36+
"FulltextIndex", "ReplicatedLogs", "ZkdIndex"};
37+
3838
std::array<char const*, arangodb::RocksDBColumnFamilyManager::numberOfColumnFamilies> RocksDBColumnFamilyManager::_externalNames =
39-
{"definitions", "documents", "primary", "edge", "vpack", "geo", "fulltext", "replicated-logs"};
39+
{"definitions", "documents", "primary", "edge", "vpack",
40+
"geo", "fulltext", "replicated-logs", "zkd"};
4041

4142
std::array<rocksdb::ColumnFamilyHandle*, RocksDBColumnFamilyManager::numberOfColumnFamilies>
42-
RocksDBColumnFamilyManager::_handles = {nullptr, nullptr, nullptr, nullptr,
43-
nullptr, nullptr, nullptr, nullptr};
43+
RocksDBColumnFamilyManager::_handles = {nullptr, nullptr, nullptr,
44+
nullptr, nullptr, nullptr,
45+
nullptr, nullptr, nullptr};
4446

4547
rocksdb::ColumnFamilyHandle* RocksDBColumnFamilyManager::_defaultHandle = nullptr;
4648

arangod/RocksDBEngine/RocksDBColumnFamilyManager.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct RocksDBColumnFamilyManager {
4646
GeoIndex = 5,
4747
FulltextIndex = 6,
4848
ReplicatedLogs = 7,
49+
ZkdIndex = 8,
4950

5051
Invalid = 1024 // special placeholder
5152
};
@@ -56,7 +57,7 @@ struct RocksDBColumnFamilyManager {
5657
};
5758

5859
static constexpr size_t minNumberOfColumnFamilies = 7;
59-
static constexpr size_t numberOfColumnFamilies = 8;
60+
static constexpr size_t numberOfColumnFamilies = 9;
6061

6162
static void initialize();
6263

arangod/RocksDBEngine/RocksDBEngine.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ void RocksDBEngine::start() {
707707
addFamily(RocksDBColumnFamilyManager::Family::GeoIndex);
708708
addFamily(RocksDBColumnFamilyManager::Family::FulltextIndex);
709709
addFamily(RocksDBColumnFamilyManager::Family::ReplicatedLogs);
710-
710+
addFamily(RocksDBColumnFamilyManager::Family::ZkdIndex);
711711

712712
size_t const minNumberOfColumnFamilies = RocksDBColumnFamilyManager::minNumberOfColumnFamilies;
713713
bool dbExisted = false;
@@ -746,15 +746,17 @@ void RocksDBEngine::start() {
746746
<< "found existing column families: " << names;
747747
auto const replicatedLogsName = RocksDBColumnFamilyManager::name(
748748
RocksDBColumnFamilyManager::Family::ReplicatedLogs);
749+
auto const zkdIndexName = RocksDBColumnFamilyManager::name(
750+
RocksDBColumnFamilyManager::Family::ReplicatedLogs);
749751

750752
for (auto const& it : cfFamilies) {
751753
auto it2 = std::find(existingColumnFamilies.begin(),
752754
existingColumnFamilies.end(), it.name);
753755
if (it2 == existingColumnFamilies.end()) {
754756

755-
if (it.name == replicatedLogsName) {
757+
if (it.name == replicatedLogsName || it.name == zkdIndexName) {
756758
LOG_TOPIC("293c3", INFO, Logger::STARTUP)
757-
<< "column family " << replicatedLogsName
759+
<< "column family " << it.name
758760
<< " is missing and will be created.";
759761
continue;
760762
}
@@ -836,6 +838,8 @@ void RocksDBEngine::start() {
836838
cfHandles[6]);
837839
RocksDBColumnFamilyManager::set(RocksDBColumnFamilyManager::Family::ReplicatedLogs,
838840
cfHandles[7]);
841+
RocksDBColumnFamilyManager::set(RocksDBColumnFamilyManager::Family::ZkdIndex,
842+
cfHandles[8]);
839843
TRI_ASSERT(RocksDBColumnFamilyManager::get(RocksDBColumnFamilyManager::Family::Definitions)
840844
->GetID() == 0);
841845

arangod/RocksDBEngine/RocksDBIndex.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ RocksDBKeyBounds RocksDBIndex::getBounds(Index::IndexType type, uint64_t objectI
378378
return RocksDBKeyBounds::GeoIndex(objectId);
379379
case RocksDBIndex::TRI_IDX_TYPE_IRESEARCH_LINK:
380380
return RocksDBKeyBounds::DatabaseViews(objectId);
381+
case RocksDBIndex::TRI_IDX_TYPE_ZKD_INDEX:
382+
return RocksDBKeyBounds::ZkdIndex(objectId);
381383
case RocksDBIndex::TRI_IDX_TYPE_UNKNOWN:
382384
default:
383385
THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED);

0 commit comments

Comments
 (0)
0