10000 allow in-place analyzer creation via link definition (#10466) (#10481) · arangodb/arangodb@8741d33 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8741d33

Browse files
gnusiKVS85
authored andcommitted
allow in-place analyzer creation via link definition (#10466) (#10481)
* allow in-place analyzer creation via link definition (#10466) * allow in-place analyzer creation via link definition * add special handling for _analyzers collection * modify initial syncer * address review commments * fix accidentally broken test * address compilation errors
1 parent 8128781 commit 8741d33

13 files changed

+761
-153
lines changed

arangod/IResearch/IResearchCommon.cpp

Lines changed: 2 additions & 0 deletions
53
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,12 @@ arangodb::LogTopic& logTopic() {
5252
/*static*/ std::string const StaticStrings::LinksField("links");
53
/*static*/ std::string const StaticStrings::VersionField("version");
5454
/*static*/ std::string const StaticStrings::ViewIdField("view");
55+
/*static*/ std::string const StaticStrings::AnalyzerDefinitionsField("analyzerDefinitions");
5556
/*static*/ std::string const StaticStrings::AnalyzerFeaturesField("features");
5657
/*static*/ std::string const StaticStrings::AnalyzerNameField("name");
5758
/*static*/ std::string const StaticStrings::AnalyzerPropertiesField("properties");
5859
/*static*/ std::string const StaticStrings::AnalyzerTypeField("type");
60+
/*static*/ std::string const StaticStrings::PrimarySortField("primarySort");
5961

6062
} // namespace iresearch
6163
} // namespace arangodb

arangod/IResearch/IResearchCommon.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ struct StaticStrings {
6161
////////////////////////////////////////////////////////////////////////////////
6262
static std::string const ViewIdField;
6363

64+
////////////////////////////////////////////////////////////////////////////////
65+
/// @brief the name of the field in the IResearch Link definition denoting the
66+
/// referenced analyzer definitions
67+
////////////////////////////////////////////////////////////////////////////////
68+
static std::string const AnalyzerDefinitionsField;
69+
6470
////////////////////////////////////////////////////////////////////////////////
6571
/// @brief the name of the field in the analyzer definition denoting the
6672
/// corresponding analyzer name
@@ -84,6 +90,12 @@ struct StaticStrings {
8490
/// corresponding analyzer features
8591
////////////////////////////////////////////////////////////////////////////////
8692
static std::string const AnalyzerFeaturesField;
93+
94+
////////////////////////////////////////////////////////////////////////////////
95+
/// @brief the name of the field in the IResearch Link definition denoting the
96+
/// primary sort
97+
////////////////////////////////////////////////////////////////////////////////
98+
static std::string const PrimarySortField;
8799
};
88100

89101
} // namespace iresearch

arangod/IResearch/IResearchLinkHelper.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,7 @@ namespace iresearch {
783783
std::string errorField;
784784

785785
if (!linkDefinition.isNull()) { // have link definition
786-
if (!meta.init(linkDefinition, false, errorField, &vocbase)) { // for db-server analyzer validation should have already applied on coordinator
786+
if (!meta.init(linkDefinition, true, errorField, &vocbase)) { // for db-server analyzer validation should have already applied on coordinator
787787
return arangodb::Result( // result
788788
TRI_ERROR_BAD_PARAMETER, // code
789789
errorField.empty()

arangod/IResearch/IResearchView.cpp

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -363,12 +363,11 @@ arangodb::Result IResearchView::appendVelocyPackImpl( // append JSON
363363
static const std::function<bool(irs::string_ref const& key)> persistenceAcceptor =
364364
[](irs::string_ref const&) -> bool { return true; };
365365

366-
auto& acceptor =
367-
(context == Serialization::Persistence || context == Serialization::PersistenceWithInProgress || context == Serialization::Inventory)
368-
? persistenceAcceptor
369-
: propertiesAcceptor;
366+
auto* acceptor = &propertiesAcceptor;
367+
368+
if (context == Serialization::Persistence || context == Serialization::PersistenceWithInProgress) {
369+
acceptor = &persistenceAcceptor;
370370

371-
if (context == Serialization::Persistence || context == Serialization::PersistenceWithInProgress) {
372371
if (arangodb::ServerState::instance()->isSingleServer()) {
373372
auto res = arangodb::LogicalViewHelperStorageEngine::properties(builder, *this);
374373

@@ -392,19 +391,14 @@ arangodb::Result IResearchView::appendVelocyPackImpl( // append JSON
392391
sanitizedBuilder.openObject();
393392

394393
if (!_meta.json(sanitizedBuilder) ||
395-
!mergeSliceSkipKeys(builder, sanitizedBuilder.close().slice(), acceptor)) {
394+
!mergeSliceSkipKeys(builder, sanitizedBuilder.close().slice(), *acceptor)) {
396395
return arangodb::Result(
397396
TRI_ERROR_INTERNAL,
398397
std::string("failure to generate definition while generating "
399398
"properties jSON for arangosearch View in database '") +
400399
vocbase().name() + "'");
401400
}
402401

403-
if (context == Serialization::Inventory) {
404-
// nothing more to output
405-
return {};
406-
}
407-
408402
if (context == Serialization::Persistence || context == Serialization::PersistenceWithInProgress) {
409403
IResearchViewMetaState metaState;
410404

@@ -461,7 +455,7 @@ arangodb::Result IResearchView::appendVelocyPackImpl( // append JSON
461455
);
462456
}
463457

464-
auto visitor = [this, &linksBuilder, &res]( // visit collections
458+
auto visitor = [this, &linksBuilder, &res, context]( // visit collections
465459
arangodb::TransactionCollection& trxCollection // transaction collection
466460
)->bool {
467461
auto collection = trxCollection.collection();
@@ -480,7 +474,7 @@ arangodb::Result IResearchView::appendVelocyPackImpl( // append JSON
480474

481475
linkBuilder.openObject();
482476

483-
if (!link->properties(linkBuilder, false).ok()) { // link definitions are not output if forPersistence
477+
if (!link->properties(linkBuilder, Serialization::Inventory == context).ok()) { // link definitions are not output if forPersistence
484478
LOG_TOPIC("713ad", WARN, arangodb::iresearch::TOPIC)
485479
<< "failed to generate json for arangosearch link '" << link->id() << "' while generating json for arangosearch view '" 10000 << name() << "'";
486480

arangod/IResearch/IResearchViewCoordinator.cpp

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -181,28 +181,34 @@ arangodb::Result IResearchViewCoordinator::appendVelocyPackImpl(
181181
return {};
182182
}
183183

184-
static const std::function<bool(irs::string_ref const& key)> propertiesAcceptor =
184+
static const std::function<bool(irs::string_ref const&)> propertiesAcceptor =
185185
[](irs::string_ref const& key) -> bool {
186186
return key != StaticStrings::VersionField; // ignored fields
187187
};
188-
static const std::function<bool(irs::string_ref const& key)> persistenceAcceptor =
188+
static const std::function<bool(irs::string_ref const&)> persistenceAcceptor =
189189
[](irs::string_ref const&) -> bool { return true; };
190190

191+
static const std::function<bool(irs::string_ref const&)> linkPropertiesAcceptor =
192+
[](irs::string_ref const& key) -> bool {
193+
return key != iresearch::StaticStrings::AnalyzerDefinitionsField
194+
&& key != iresearch::StaticStrings::PrimarySortField;
195+
};
196+
191197
auto* acceptor = &propertiesAcceptor;
192198

193199
if (context == Serialization::Persistence ||
194-
context == Serialization::PersistenceWithInProgress ||
195-
context == Serialization::Inventory) {
200+
context == Serialization::PersistenceWithInProgress) {
196201
auto res = arangodb::LogicalViewHelperClusterInfo::properties(builder, *this);
197202

198203
if (!res.ok()) {
199204
return res;
200205
}
201206

202207
acceptor = &persistenceAcceptor;
203-
// links are not persisted, their definitions are part of the corresponding
204-
// collections
205-
} else if (context == Serialization::Properties) {
208+
}
209+
210+
if (context == Serialization::Properties ||
211+
context == Serialization::Inventory) {
206212
// verify that the current user has access on all linked collections
207213
auto* exec = ExecContext::CURRENT;
208214
if (exec) {
@@ -213,18 +219,31 @@ arangodb::Result IResearchViewCoordinator::appendVelocyPackImpl(
213219
}
214220
}
215221

222+
VPackBuilder tmp;
223+
216224
ReadMutex mutex(_mutex);
217225
SCOPED_LOCK(mutex); // '_collections' can be asynchronously modified
218226

219-
VPackBuilder links;
220-
links.openObject();
221-
227+
builder.add(StaticStrings::LinksField, VPackValue(VPackValueType::Object));
222228
for (auto& entry : _collections) {
223-
links.add(entry.second.first, entry.second.second.slice());
224-
}
229+
auto linkSlice = entry.second.second.slice();
230+
231+
if (context == Serialization::Properties) {
232+
tmp.clear();
233+
tmp.openObject();
234+
if (!mergeSliceSkipKeys(tmp, linkSlice, linkPropertiesAcceptor)) {
235+
return {
236+
TRI_ERROR_INTERNAL,
237+
"failed to generate externally visible link definition for arangosearch View '" + name() + "'"
238+
};
239+
}
225240

226-
links.close();
227-
builder.add(StaticStrings::LinksField, links.slice());
241+
linkSlice = tmp.close().slice();
242+
}
243+
244+
builder.add(entry.second.first, linkSlice);
245+
}
246+
builder.close();
228247
}
229248

230249
if (!builder.isOpenObject()) {
@@ -266,7 +285,7 @@ arangodb::Result IResearchViewCoordinator::link(IResearchLink const& link) {
266285

267286
builder.openObject();
268287

269-
auto res = link.properties(builder, false); // generate user-visible definition, agency will not see links
288+
auto res = link.properties(builder, true); // generate user-visible definition, agency will not see links
270289

271290
if (!res.ok()) {
272291
return res;

arangod/Replication/DatabaseInitialSyncer.cpp

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,6 +1359,7 @@ Result DatabaseInitialSyncer::handleCollectionsAndViews(VPackSlice const& collSl
13591359
bool incremental) {
13601360
TRI_ASSERT(collSlices.isArray());
13611361

1362+
std::vector<std::pair<VPackSlice, VPackSlice>> systemCollections;
13621363
std::vector<std::pair<VPackSlice, VPackSlice>> collections;
13631364
for (VPackSlice it : VPackArrayIterator(collSlices)) {
13641365
if (!it.isObject()) {
@@ -1414,10 +1415,46 @@ Result DatabaseInitialSyncer::handleCollectionsAndViews(VPackSlice const& collSl
14141415
}
14151416
}
14161417

1417-
collections.emplace_back(parameters, indexes);
1418+
if (masterName == StaticStrings::AnalyzersCollection) {
1419+
// _analyzers collection has to be restored before view creation
1420+
systemCollections.emplace_back(parameters, indexes);
1421+
} else {
1422+
collections.emplace_back(parameters, indexes);
1423+
}
1424+
}
1425+
1426+
// STEP 1: validate collection declarations from master
1427+
// ----------------------------------------------------------------------------------
1428+
1429+
// STEP 2: drop and re-create collections locally if they are also present on
1430+
// the master
1431+
// ------------------------------------------------------------------------------------
1432+
1433+
// iterate over all collections from the master...
1434+
std::array<SyncPhase, 2> phases{{PHASE_VALIDATE, PHASE_DROP_CREATE}};
1435+
for (auto const& phase : phases) {
1436+
Result r = iterateCollections(systemCollections, incremental, phase);
1437+
1438+
if (r.fail()) {
1439+
return r;
1440+
}
1441+
1442+
r = iterateCollections(collections, incremental, phase);
1443+
1444+
if (r.fail()) {
1445+
return r;
1446+
}
14181447
}
14191448

1420-
// STEP 1: now that the collections exist create the views
1449+
// STEP 3: restore data for system collections
1450+
// ----------------------------------------------------------------------------------
1451+
auto const res = iterateCollections(systemCollections, incremental, PHASE_DUMP);
1452+
1453+
if (res.fail()) {
1454+
return res;
1455+
}
1456+
F438 1457+
// STEP 4: now that the collections exist create the views
14211458
// this should be faster than re-indexing afterwards
14221459
// ----------------------------------------------------------------------------------
14231460

@@ -1435,24 +1472,7 @@ Result DatabaseInitialSyncer::handleCollectionsAndViews(VPackSlice const& collSl
14351472
_config.progress.set("view creation skipped because of configuration");
14361473
}
14371474

1438-
// STEP 2: validate collection declarations from master
1439-
// ----------------------------------------------------------------------------------
1440-
1441-
// STEP 3: drop and re-create collections locally if they are also present on
1442-
// the master
1443-
// ------------------------------------------------------------------------------------
1444-
1445-
// iterate over all collections from the master...
1446-
std::array<SyncPhase, 2> phases{{PHASE_VALIDATE, PHASE_DROP_CREATE}};
1447-
for (auto const& phase : phases) {
1448-
Result r = iterateCollections(collections, incremental, phase);
1449-
1450-
if (r.fail()) {
1451-
return r;
1452-
}
1453-
}
1454-
1455-
// STEP 4: sync collection data from master and create initial indexes
1475+
// STEP 5: sync collection data from master and create initial indexes
14561476
// ----------------------------------------------------------------------------------
14571477

14581478
// now load the data into the collections

arangod/VocBase/LogicalCollection.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -614,9 +614,6 @@ void LogicalCollection::toVelocyPackForClusterInventory(VPackBuilder& result,
614614
case Index::TRI_IDX_TYPE_PRIMARY_INDEX:
615615
case Index::TRI_IDX_TYPE_EDGE_INDEX:
616616
return false;
617-
case Index::TRI_IDX_TYPE_IRESEARCH_LINK:
618-
flags = Index::makeFlags(Index::Serialize::Internals);
619-
return true;
620617
default:
621618
flags = Index::makeFlags();
622619
return !idx->isHidden() && !idx->inProgress();

arangod/VocBase/vocbase.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -939,9 +939,6 @@ void TRI_vocbase_t::inventory(VPackBuilder& result, TRI_voc_tick_t maxTick,
939939
case Index::TRI_IDX_TYPE_PRIMARY_INDEX:
940940
case Index::TRI_IDX_TYPE_EDGE_INDEX:
941941
return false;
942-
case Index::TRI_IDX_TYPE_IRESEARCH_LINK:
943-
flags = Index::makeFlags(Index::Serialize::Internals);
944-
return true;
945942
default:
946943
flags = Index::makeFlags(Index::Serialize::Basics);
947944
return !idx->isHidden();

arangosh/Restore/RestoreFeature.cpp

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -903,27 +903,10 @@ arangodb::Result processInputDirectory(
903903
std::sort(collections.begin(), collections.end(), ::sortCollectionsForCreation);
904904

905905
std::unique_ptr<arangodb::RestoreFeature::JobData> usersData;
906+
std::unique_ptr<arangodb::RestoreFeature::JobData> analyzersData;
906907
std::vector<std::unique_ptr<arangodb::RestoreFeature::JobData>> jobs;
907908
jobs.reserve(collections.size());
908909

909-
// Step 2: create views
910-
// @note: done after collection population since views might depend on data
911-
// in restored collections
912-
if (options.importStructure && !views.empty()) {
913-
LOG_TOPIC("f723c", INFO, Logger::RESTORE) << "# Creating views...";
914-
915-
for (auto const& viewDefinition : views) {
916-
LOG_TOPIC("c608d", DEBUG, Logger::RESTORE)
917-
<< "# Creating view: " << viewDefinition.toJson();
918-
919-
auto res = ::restoreView(httpClient, options, viewDefinition.slice());
920-
921-
if (!res.ok()) {
922-
return res;
923-
}
924-
}
925-
}
926-
927910
bool didModifyFoxxCollection = false;
928911
// Step 3: create collections
929912
for (VPackBuilder const& b : collections) {
@@ -961,13 +944,43 @@ arangodb::Result processInputDirectory(
961944
// reason is that loading into the users collection may change the
962945
// credentials for the current arangorestore connection!
963946
usersData = std::move(jobData);
947+
} else if (name.isString() && name.stringRef() == StaticStrings::AnalyzersCollection) {
948+
// special treatment for _analyzers collection - this must be the very first
949+
stats.totalCollections++;
950+
analyzersData = std::move(jobData);
964951
} else {
965952
stats.totalCollections++;
966953
jobs.push_back(std::move(jobData));
967954
}
968955
}
969-
970-
// Step 4: fire up data transfer
956+
957+
// Step 4: restore data from _analyzers collection
958+
if (analyzersData) {
959+
// restore analyzers
960+
if (!jobQueue.queueJob(std::move(analyzersData))) {
961+
return Result(TRI_ERROR_OUT_OF_MEMORY, "unable to queue restore job");
962+
}
963+
964+
jobQueue.waitForIdle();
965+
}
966+
967+
// Step 5: create arangosearch views
968+
if (options.importStructure && !views.empty()) {
969+
LOG_TOPIC("f723c", INFO, Logger::RESTORE) << "# Creating views...";
970+
971+
for (auto const& viewDefinition : views) {
972+
LOG_TOPIC("c608d", DEBUG, Logger::RESTORE)
973+
<< "# Creating view: " << viewDefinition.toJson();
974+
975+
auto res = ::restoreView(httpClient, options, viewDefinition.slice());
976+
977+
if (!res.ok()) {
978+
return res;
979+
}
980+
}
981+
}
982+
983+
// Step 6: fire up data transfer
971984
for (auto& job : jobs) {
972985
if (!jobQueue.queueJob(std::move(job))) {
973986
return Result(TRI_ERROR_OUT_OF_MEMORY, "unable to queue restore job");
@@ -1045,6 +1058,7 @@ arangodb::Result processInputDirectory(
10451058
return firstError;
10461059
}
10471060
}
1061+
10481062
} catch (std::exception const& ex) {
10491063
return {TRI_ERROR_INTERNAL,
10501064
std::string(

0 commit comments

Comments
 (0)
0