@@ -86,6 +86,10 @@ bool ChecksumHelper::isBlobFile(std::string_view fileName) noexcept {
86
86
return fileName.ends_with (" .blob" );
87
87
}
88
88
89
+ bool ChecksumHelper::isHashFile (std::string_view fileName) noexcept {
90
+ return fileName.ends_with (" .hash" );
91
+ }
92
+
89
93
bool ChecksumHelper::writeShaFile (std::string const & fileName,
90
94
std::string const & checksum) {
91
95
TRI_ASSERT (isSstFile (fileName) || isBlobFile (fileName));
@@ -154,26 +158,25 @@ void ChecksumHelper::checkMissingShaFiles() {
154
158
155
159
// check file extension
156
160
auto isInteresting = [](std::string_view name) noexcept -> bool {
157
- return name.ends_with (" .sst" ) || name.ends_with (" .blob" ) ||
158
- name.ends_with (" .hash" );
161
+ return isSstFile (name) || isBlobFile (name) || isHashFile (name);
159
162
};
160
163
161
164
if (!isInteresting (lhs) || !isInteresting (rhs)) {
162
165
// we are dealing with a non-interesting file type
163
166
return lhs < rhs;
164
167
}
165
168
166
- if (lhs. ends_with ( " .hash " )) {
169
+ if (isHashFile (lhs )) {
167
170
// cannot have 2 hash files for the same prefix
168
- TRI_ASSERT (!rhs. ends_with ( " .hash " ));
171
+ TRI_ASSERT (!isHashFile (rhs ));
169
172
170
173
// prefixes of lhs and rhs are identical - .hash files should be
171
174
// sorted first (before .sst or .blob files)
172
175
return true ;
173
176
}
174
- if (rhs. ends_with ( " .hash " )) {
177
+ if (isHashFile (rhs )) {
175
178
// cannot have 2 hash files for the same prefix
176
- TRI_ASSERT (!lhs. ends_with ( " .hash " ));
179
+ TRI_ASSERT (!isHashFile (lhs ));
177
180
178
181
// prefixes of lhs and rhs are identical - .hash files should be
179
182
// sorted first (before .sst or .blob files)
@@ -185,6 +188,9 @@ void ChecksumHelper::checkMissingShaFiles() {
185
188
return lhs < rhs;
186
189
});
187
190
191
+ // input files for which we need to produce hash files
192
+ std::vector<std::string> toProduce;
193
+
188
194
for (auto it = fileList.begin (); it != fileList.end (); ++it) {
189
195
if (it->size () < 5 ) {
190
196
// filename is too short and does not matter
@@ -229,7 +235,16 @@ void ChecksumHelper::checkMissingShaFiles() {
229
235
} else if (isSstFile (*it) || isBlobFile (*it)) {
230
236
// we have a .sst or .blob file which was not preceeded by a .hash file.
231
237
// this means we need to recalculate the sha hash for it!
232
- std::string tempPath = basics::FileUtils::buildFilename (_rootPath, *it);
238
+ toProduce.emplace_back (basics::FileUtils::buildFilename (_rootPath, *it));
239
+ }
240
+ }
241
+
242
+ if (!toProduce.empty ()) {
243
+ LOG_TOPIC (" ff71d" , INFO, arangodb::Logger::ENGINES)
244
+ << " calculating SHA256 checksums for " << toProduce.size ()
245
+ << " RocksDB .sst file(s)" ;
246
+ size_t produced = 0 ;
247
+ for (auto const & tempPath : toProduce) {
233
248
LOG_TOPIC (" d6c86" , DEBUG, arangodb::Logger::ENGINES)
234
249
<< " checkMissingShaFiles: Computing checksum for " << tempPath;
235
250
auto checksumCalc = ChecksumCalculator ();
@@ -241,6 +256,29 @@ void ChecksumHelper::checkMissingShaFiles() {
241
256
checksumCalc.computeFinalChecksum ();
242
257
writeShaFile (tempPath, checksumCalc.getChecksum ());
243
258
}
259
+
260
+ produced++;
261
+ // progress reporting - we are only interested in very rough progress
262
+ // so that we don't spam that startup log too much. we intentionally
263
+ // report only every 100 .sst files, so in most restart situations
264
+ // there will be no progress reporting. progress reporting will become
265
+ // visible however if there are 100s or 1000s of hashes to compute.
266
+ // this situation should only happen when upgrading from Community
267
+ // Edition to Enterprise Edition or so.
268
+ if (produced != toProduce.size () && (produced % 100 == 0 )) {
269
+ int progress =
270
+ static_cast <int >(static_cast <double >(produced) /
271
+ static_cast <double >(toProduce.size ()) * 100.0 );
272
+ LOG_TOPIC (" cf86b" , INFO, arangodb::Logger::ENGINES)
273
+ << " calculated " << produced << " /" << toProduce.size ()
274
+ << " checksums (" << progress << " % of files)..." ;
275
+ }
276
+ }
277
+
278
+ if (toProduce.size () >= 10 ) {
279
+ // only report end if there was some noteworthy amount of work to do
280
+ LOG_TOPIC (" 96bbd" , INFO, arangodb::Logger::ENGINES)
281
+ << " finished calculating SHA256 checksums for RocksDB .sst files" ;
244
282
}
245
283
}
246
284
}
0 commit comments