8000 improve parallelism capabilities of arangorestore (#14010) · arangodb/arangodb@d396a1f · GitHub
[go: up one dir, main page]

Skip to content

Commit d396a1f

Browse files
jsteemannmpoeter
andauthored
improve parallelism capabilities of arangorestore (#14010)
Co-authored-by: Manuel Pöter <manuel.poeter@leanfive.com>
1 parent fd9fbec commit d396a1f

File tree

14 files changed

+920
-523
lines changed

14 files changed

+920
-523
lines changed

CHANGELOG

Lines changed: 23 additions & 0 deletions
8000
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,29 @@
11
devel
22
-----
33

4+
* Changed default value of arangodump's `--envelope` option from `true` to
5+
`false`. This allows using higher parallelism in arangorestore when
6+
restoring large collection dumps. As a side-effect, this will also decrease
7+
the size of dumps taken with arangodump, and should slightly improve dump
8+
speed.
9+
10+
* Improve parallelism capabilities of arangorestore.
11+
12+
arangorestore can now dispatch restoring data chunks of a collection to idle
13+
background threads, so that multiple restore requests can be in flight for
14+
the same collection concurrently.
15+
16+
This can improve restore speed in situations when there are idle threads
17+
left (number of threads can be configured via arangorestore's `--threads`
18+
option) and the dump file for the collection is large.
19+
20+
The improved parallelism is only used when restoring dumps that are in the
21+
non-enveloped format. This format has been introduced with ArangoDB 3.8.
22+
The reason is that dumps in the non-enveloped format only contain the raw
23+
documents, which can be restored independent of each other, i.e. in any
24+
order. However, the enveloped format may contain documents and remove
25+
operations, which need to be restored in the original order.
26+
427
* Fix BTS-374: thread race between ArangoSearch link unloading and storage
528
engine WAL flushing.
629

arangod/RestHandler/RestReplicationHandler.cpp

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,8 +1448,11 @@ Result RestReplicationHandler::parseBatch(transaction::Methods& trx,
14481448
VPackStringRef bodyStr = _request->rawPayload();
14491449
char const* ptr = bodyStr.data();
14501450
char const* end = ptr + bodyStr.size();
1451-
1452-
VPackBuilder builder(&basics::VelocyPackHelper::strictRequestValidationOptions);
1451+
1452+
VPackOptions builderOptions = basics::VelocyPackHelper::strictRequestValidationOptions;
1453+
builderOptions.paddingBehavior = VPackOptions::PaddingBehavior::UsePadding;
1454+
1455+
VPackBuilder builder(&builderOptions);
14531456

14541457
// First parse and collect all markers, we assemble everything in one
14551458
// large builder holding an array
@@ -1493,12 +1496,15 @@ Result RestReplicationHandler::parseBatch(transaction::Methods& trx,
14931496

14941497
TRI_ASSERT(doc.isObject());
14951498
bool checkKey = true;
1499+
bool checkRev = generateNewRevisionIds;
14961500
for (auto it : VPackObjectIterator(doc, true)) {
14971501
// only check for "_key" attribute here if we still have to.
14981502
// once we have seen it, it will not show up again in the same document
14991503
bool const isKey = checkKey && (arangodb::velocypack::StringRef(it.key) == StaticStrings::KeyString);
15001504

15011505
if (isKey) {
1506+
// _key attribute
1507+
15021508
// prevent checking for _key twice in the same document
15031509
checkKey = false;
15041510

@@ -1515,17 +1521,23 @@ Result RestReplicationHandler::parseBatch(transaction::Methods& trx,
15151521
// with MMFiles dumps from <= 3.6
15161522
documentsToRemove.erase(it.value.copyString());
15171523
}
1518-
}
1524+
1525+
documentsToInsert.add(it.key);
1526+
documentsToInsert.add(it.value);
1527+
} else if (checkRev && arangodb::velocypack::StringRef(it.key) == StaticStrings::RevString) {
1528+
// _rev attribute
15191529

1520-
documentsToInsert.add(it.key);
1530+
// prevent checking for _rev twice in the same document
1531+
checkRev = false;
15211532

1522-
if (generateNewRevisionIds &&
1523-
!isKey &&
1524-
arangodb::velocypack::StringRef(it.key) == StaticStrings::RevString) {
15251533
char ridBuffer[arangodb::basics::maxUInt64StringSize];
15261534
RevisionId newRid = physical->newRevisionId();
1535+
1536+
documentsToInsert.add(it.key);
15271537
documentsToInsert.add(newRid.toValuePair(ridBuffer));
15281538
} else {
1539+
// copy key/value verbatim
1540+
documentsToInsert.add(it.key);
15291541
documentsToInsert.add(it.value);
15301542
}
15311543
}
@@ -1629,15 +1641,18 @@ Result RestReplicationHandler::processRestoreDataBatch(transaction::Methods& trx
16291641
std::string const& collectionName,
16301642
bool generateNewRevisionIds) {
16311643
// we'll build all documents to insert in this builder
1632-
VPackBuilder documentsToInsert;
1644+
VPackOptions vpackOptions;
1645+
vpackOptions.paddingBehavior = VPackOptions::PaddingBehavior::UsePadding;
1646+
1647+
VPackBuilder documentsToInsert(&vpackOptions);
16331648
std::unordered_set<std::string> documentsToRemove;
16341649
Result res = parseBatch(trx, collectionName, documentsToInsert, documentsToRemove, generateNewRevisionIds);
16351650
if (res.fail()) {
16361651
return res;
16371652
}
16381653

16391654
OperationOptions options(_context);
1640-
options.silent = false;
1655+
options.silent = true;
16411656
options.ignoreRevs = true;
16421657
options.isRestore = true;
16431658
options.waitForSync = false;
@@ -1679,6 +1694,42 @@ Result RestReplicationHandler::processRestoreDataBatch(transaction::Methods& trx
16791694
<< " documents for restore: " << opRes.result.errorMessage();
16801695
return opRes.result;
16811696
}
1697+
1698+
if (opRes.countErrorCodes.empty()) {
1699+
// no detailed errors reported. all good
1700+
return Result();
1701+
}
1702+
1703+
// at least one error occurred
1704+
if (opRes.slice().isArray()) {
1705+
// Now go through the individual results and check each errors
1706+
VPackArrayIterator itRequest(requestSlice);
1707+
VPackArrayIterator itResult(opRes.slice());
1708+
1709+
while (itRequest.valid()) {
1710+
VPackSlice result = *itResult;
1711+
VPackSlice error = result.get(StaticStrings::Error);
1712+
if (error.isTrue()) {
1713+
error = result.get(StaticStrings::ErrorNum);
1714+
if (error.isNumber()) {
1715+
auto code = ErrorCode{error.getNumericValue<int>()};
1716+
error = result.get(StaticStrings::ErrorMessage);
1717+
if (error.isString()) {
1718+
return { code, error.copyString() };
1719+
}
1720+
return { code };
1721+
}
1722+
}
1723+
itRequest.next();
1724+
itResult.next();
1725+
}
1726+
}
1727+
1728+
// if we get here, we didn't have a detailed array with results.
1729+
// so we need to stick to the error code map, which is all we got
1730+
TRI_ASSERT(!opRes.countErrorCodes.empty());
1731+
ErrorCode ec = (*opRes.countErrorCodes.begin()).first;
1732+
return {ec};
16821733
} catch (arangodb::basics::Exception const& ex) {
16831734
LOG_TOPIC("8e8e1", WARN, Logger::CLUSTER)
16841735
<< "could not insert documents for restore exception: " << ex.what();
@@ -1692,30 +1743,6 @@ Result RestReplicationHandler::processRestoreDataBatch(transaction::Methods& trx
16921743
<< "could not insert documents for restore exception.";
16931744
return Result(TRI_ERROR_INTERNAL);
16941745
}
1695-
1696-
// Now go through the individual results and check each errors
1697-
VPackArrayIterator itRequest(requestSlice);
1698-
VPackArrayIterator itResult(opRes.slice());
1699-
1700-
while (itRequest.valid()) {
1701-
VPackSlice result = *itResult;
1702-
VPackSlice error = result.get(StaticStrings::Error);
1703-
if (error.isTrue()) {
1704-
error = result.get(StaticStrings::ErrorNum);
1705-
if (error.isNumber()) {
1706-
auto code = ErrorCode{error.getNumericValue<int>()};
1707-
error = result.get(StaticStrings::ErrorMessage);
1708-
if (error.isString()) {
1709-
return { code, error.copyString() };
1710-
}
1711-
return { code };
1712-
}
1713-
}
1714-
itRequest.next();
1715-
itResult.next();
1716-
}
1717-
1718-
return Result();
17191746
}
17201747

17211748
////////////////////////////////////////////////////////////////////////////////

arangosh/Dump/DumpFeature.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class DumpFeature final : public application_features::ApplicationFeature {
8282
bool overwrite{false};
8383
bool progress{true};
8484
bool useGzip{true};
85-
bool useEnvelope{true};
85+
bool useEnvelope{false};
8686
};
8787

8888
/// @brief Stores stats about the overall dump progress

0 commit comments

Comments
 (0)
0