8000 Bugfix 3.8/presupp 439 (#15398) · rnshah9/arangodb@10cfbe7 · GitHub
[go: up one dir, main page]

Skip to content

Commit 10cfbe7

Browse files
cpjuliajsteemannKVS85
authored
Bugfix 3.8/presupp 439 (arangodb#15398)
* Added backport for PRESUPP-439 * Changed placement of code which was after the return of the method * Update CHANGELOG * Update CHANGELOG Co-authored-by: Jan <jsteemann@users.noreply.github.com> Co-authored-by: Vadim <vadim@arangodb.com>
1 parent fc50505 commit 10cfbe7

File tree

9 files changed

+140
-12
lines changed

9 files changed

+140
-12
lines changed

CHANGELOG

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
v3.8.5 (XXXX-XX-XX)
22
-------------------
33

4+
* Fixed PRESUPP-439: In arangoimport, for CSV and TSV files, it could happen
5+
that a buffer containing only the header would be sent to the server, and also
6+
batches would contain the documents equivalent to the csv rows in them, but
7+
not the header, which should be sent together with the documents.
8+
49
* Changed various default values for RocksDB to tune operations for different
510
typical scenarios like gp2 type volumes and gp3 type volumes and locally
611
attached SSDs with RAID0:

arangosh/Import/ImportHelper.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,6 @@ bool ImportHelper::readHeadersFile(std::string const& headersFile,
271271

272272
constexpr int BUFFER_SIZE = 16384;
273273
char buffer[BUFFER_SIZE];
274-
275274
while (!_hasError) {
276275
ssize_t n = fd->read(buffer, sizeof(buffer));
277276

@@ -297,6 +296,7 @@ bool ImportHelper::readHeadersFile(std::string const& headersFile,
297296
_outputBuffer.appendChar('\n');
298297
}
299298

299+
300300
if (_rowsRead > 2) {
301301
_errorMessages.push_back("headers file '" + headersFile + "' contained more than a single line of headers");
302302
return false;
@@ -310,6 +310,8 @@ bool ImportHelper::readHeadersFile(std::string const& headersFile,
310310
_numberLines = 0;
311311
// restore copy of _rowsToSkip
312312
_rowsToSkip = rowsToSkip;
313+
_outputBuffer. 6D47 reset();
314+
313315

314316
return true;
315317
}
@@ -427,12 +429,13 @@ bool ImportHelper::importDelimited(std::string const& collectionName,
427429
reportProgress(totalLength, fd->offset(), nextProgress);
428430

429431
TRI_ParseCsvString(&parser, buffer, n);
430-
}
431432

432-
if (_outputBuffer.length() > 0) {
433-
sendCsvBuffer();
434433
}
435434

435+
// trailing buffer items than can be accumulated because buffer length is
436+
// smaller than batch size, so we send the data at the end of the parsing
437+
handleCsvBuffer(0);
438+
436439
TRI_DestroyCsvParser(&parser);
437440

438441
waitForSenders();
@@ -808,9 +811,11 @@ void ImportHelper::addLastField(char const* field, size_t fieldLength,
808811

809812
_lineBuffer.appendChar(']');
810813

811-
if (row == _rowsToSkip) {
814+
if (row == _rowsToSkip && !_headersSeen) {
812815
// save the first line
813816
_firstLine = std::string(_lineBuffer.c_str(), _lineBuffer.length());
817+
_lineBuffer.reset();
818+
return;
814819
} else if (row > _rowsToSkip && _firstLine.empty()) {
815820
// error
816821
MUTEX_LOCKER(guard, _stats._mutex);
@@ -822,17 +827,21 @@ void ImportHelper::addLastField(char const* field, size_t fieldLength,
822827
// read a complete line
823828

824829
if (_lineBuffer.length() > 0) {
830+
if (!_outputBuffer.length()) {
831+
_outputBuffer.appendText(_firstLine);
832+
_outputBuffer.appendChar('\n');
833+
}
825834
_outputBuffer.appendText(_lineBuffer);
826835
_lineBuffer.reset();
827836
} else {
828837
MUTEX_LOCKER(guard, _stats._mutex);
829838
++_stats._numberErrors;
830839
}
831840

832-
if (_outputBuffer.length() > getMaxUploadSize()) {
833-
sendCsvBuffer();
834-
_outputBuffer.appendText(_firstLine);
835-
}
841+
// we will send the data if the buffer is already bigger than the batch size,
842+
// otherwise, it will accumulate to be sent later when buffer length is bigger
843+
// than the batch size
844+
handleCsvBuffer(getMaxUploadSize());
836845
}
837846

838847
bool ImportHelper::collectionExists() {
@@ -954,8 +963,8 @@ bool ImportHelper::truncateCollection() {
954963
return false;
955964
}
956965

957-
void ImportHelper::sendCsvBuffer() {
958-
if (_hasError) {
966+
void ImportHelper::handleCsvBuffer(uint64_t bufferSizeThreshold) {
967+
if (_hasError || _outputBuffer.length() <= bufferSizeThreshold) {
959968
return;
960969
}
961970

arangosh/Import/ImportHelper.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ class ImportHelper {
296296
bool checkCreateCollection();
297297
bool truncateCollection();
298298

299-
void sendCsvBuffer();
299+
void handleCsvBuffer(uint64_t bufferSizeThreshold);
300300
void sendJsonBuffer(char const* str, size_t len, bool isObject);
301301
SenderThread* findIdleSender();
302302
void waitForSenders();

js/client/modules/@arangodb/testsuites/importing.js

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,36 @@ const impTodos = [{
250250
create: 'true',
251251
database: 'UnitTestImportCreateDatabase',
252252
createDatabase: 'true'
253+
}, {
254+
id: 'importDataBatchSizeWithoutHeaderFile',
255+
data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-data-with-header.csv')),
256+
coll: 'UnitTestsImportDataBatchSizeWithoutHeaderFile',
257+
type: 'csv',
258+
create: 'true',
259+
batchSize: 10,
260+
}, {
261+
id: 'importDataBatchSizeWithoutHeaderFile2',
262+
data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-data-with-header.csv')),
263+
coll: 'UnitTestsImportDataBatchSizeWithoutHeaderFile2',
264+
type: 'csv',
265+
create: 'true',
266+
batchSize: 1000
267+
}, {
268+
id: 'importDataBatchSizeWithHeaderFile',
269+
data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-data-without-headers.csv')),
270+
headers: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-headers.csv')),
271+
coll: 'UnitTestsImportDataBatchSizeWithHeaderFile',
272+
type: 'csv',
273+
create: 'true',
274+
batchSize: 10,
275+
}, {
276+
id: 'importDataBatchSizeWithHeaderFile2',
277+
data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-data-without-headers.csv')),
278+
headers: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-headers.csv')),
279+
coll: 'UnitTestsImportDataBatchSizeWithHeaderFile2',
280+
type: 'csv',
281+
create: 'true',
282+
batchSize: 1000
253283
}];
254284

255285
function importing (options) {

js/client/modules/@arangodb/testutils/process-utils.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,10 @@ function runArangoImport (options, instanceInfo, what, coreCheck = false) {
10921092
args['remove-attribute'] = what.removeAttribute;
10931093
}
10941094

1095+
if (what.batchSize !== undefined) {
1096+
args['batch-size'] = what.batchSize;
1097+
}
1098+
10951099
return executeAndWait(ARANGOIMPORT_BIN, toArgv(args), options, 'arangoimport', instanceInfo.rootDir, coreCheck);
10961100
}
10971101

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"id","a","b","c","d","e"
2+
1,"1",1,"1.3",null,-5
3+
2,null,"",3.1,-2.5,"ddd "" ' ffd"
4+
3,"this","is"
5+
4,let's,see,what,happens
6+
5,9999999999999999999999999999999999,test,-99999999,true,-888.4434
7+
6,10e4,20.5,-42, null ,false
8+
7,-1.05e2,1.05e-2,true,false,null

tests/js/server/import/import-setup.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@
5151
db._drop("UnitTestsImportCsvConvert");
5252
db._drop("UnitTestsImportCsvNoConvert");
5353
db._drop("UnitTestsImportCsvNoEol");
54+
db._drop("UnitTestsImportDataBatchSizeWithoutHeaderFile");
55+
db._drop("UnitTestsImportDataBatchSizeWithoutHeaderFile2");
56+
db._drop("UnitTestsImportDataBatchSizeWithHeaderFile");
57+
db._drop("UnitTestsImportDataBatchSizeWithHeaderFile2");
5458
db._drop("UnitTestsImportTsv1");
5559
db._drop("UnitTestsImportTsv1Gz");
5660
db._drop("UnitTestsImportTsv2");
@@ -76,6 +80,10 @@
7680
db._create("UnitTestsImportTsv1Gz");
7781
db._create("UnitTestsImportTsv2");
7882
db._create("UnitTestsImportVertex");
83+
db._create("UnitTestsImportDataBatchSizeWithoutHeaderFile");
84+
db._create("UnitTestsImportDataBatchSizeWithoutHeaderFile2");
85+
db._create("UnitTestsImportDataBatchSizeWithHeaderFile");
86+
db._create("UnitTestsImportDataBatchSizeWithHeaderFile2");
7987
db._createEdgeCollection("UnitTestsImportEdge");
8088
db._createEdgeCollection("UnitTestsImportEdgeGz");
8189
db._create("UnitTestsImportIgnore");

tests/js/server/import/import-teardown.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@
5151
db._drop("UnitTestsImportCsvConvert");
5252
db._drop("UnitTestsImportCsvNoConvert");
5353
db._drop("UnitTestsImportCsvNoEol");
54+
db._drop("UnitTestsImportDataBatchSizeWithoutHeaderFile");
55+
db._drop("UnitTestsImportDataBatchSizeWithoutHeaderFile2");
56+
db._drop("UnitTestsImportDataBatchSizeWithHeaderFile");
57+
db._drop("UnitTestsImportDataBatchSizeWithHeaderFile2");
5458
db._drop("UnitTestsImportTsv1");
5559
db._drop("UnitTestsImportTsv1Gz");
5660
db._drop("UnitTestsImportTsv2");

tests/js/server/import/import.js

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,66 @@ function importTestSuite () {
443443
assertEqual(JSON.stringify(expected), JSON.stringify(actual));
444444
},
445445

446+
////////////////////////////////////////////////////////////////////////////////
447+
/// @brief test csv with data and header in single csv file, small batch-size
448+
////////////////////////////////////////////////////////////////////////////////
449+
testImportDataBatchSizeWithoutHeaderFile: function () {
450+
let expected = [
451+
{ "a": "1", "b": 1, "c": "1.3", "e": -5, "id": 1 },
452+
{ "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
453+
{ "a": "9999999999999999999999999999999999", "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
454+
{ "a": 10e4, "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
455+
{ "a": -1.05e2, "b": 1.05e-2, "c": true, "d": false, "id": 7 }
456+
];
457+
let actual = getQueryResults("FOR i IN UnitTestsImportDataBatchSizeWithoutHeaderFile SORT i.id RETURN i");
458+
assertEqual(expected, actual);
459+
},
460+
461+
////////////////////////////////////////////////////////////////////////////////
462+
/// @brief test csv with data and header in single csv file, small batch-size
463+
////////////////////////////////////////////////////////////////////////////////
464+
testImportDataBatchSizeWithoutHeaderFile2: function () {
465+
let expected = [
466+
{ "a": "1", "b": 1, "c": "1.3", "e": -5, "id": 1 },
467+
{ "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
468+
{ "a": "9999999999999999999999999999999999", "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
469+
{ "a": 10e4, "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
470+
{ "a": -1.05e2, "b": 1.05e-2, "c": true, "d": false, "id": 7 }
471+
];
472+
let actual = getQueryResults("FOR i IN UnitTestsImportDataBatchSizeWithoutHeaderFile2 SORT i.id RETURN i");
473+
assertEqual(expected, actual);
474+
},
475+
476+
////////////////////////////////////////////////////////////////////////////////
477+
/// @brief test csv with data and header in single csv file, small batch-size
478+
////////////////////////////////////////////////////////////////////////////////
479+
testImportDataBatchSizeWithHeaderFile: function () {
480+
let expected = [
481+
{ "a": "1", "b": 1, "c": "1.3", "e": -5, "id": 1 },
482+
{ "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
483+
{ "a": "9999999999999999999999999999999999", "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
484+
{ "a": 10e4, "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
485+
{ "a": -1.05e2, "b": 1.05e-2, "c": true, "d": false, "id": 7 }
486+
];
487+
let actual = getQueryResults("FOR i IN UnitTestsImportDataBatchSizeWithHeaderFile SORT i.id RETURN i");
488+
assertEqual(expected, actual);
489+
},
490+
491+
////////////////////////////////////////////////////////////////////////////////
492+
/// @brief test csv with data and header in single csv file, small batch-size
493+
////////////////////////////////////////////////////////////////////////////////
494+
testImportDataBatchSizeWithHeaderFile2: function () {
495+
let expected = [
496+
{ "a": "1", "b": 1, "c": "1.3", "e": -5, "id": 1 },
497+
{ "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
498+
{ "a": "9999999999999999999999999999999999", "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
499+
{ "a": 10e4, "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
500+
{ "a": -1.05e2, "b": 1.05e-2, "c": true, "d": false, "id": 7 }
501+
];
502+
let actual = getQueryResults("FOR i IN UnitTestsImportDataBatchSizeWithHeaderFile2 SORT i.id RETURN i");
503+
assertEqual(expected, actual);
504+
},
505+
446506
////////////////////////////////////////////////////////////////////////////////
447507
/// @brief test csv import without trailing eol
448508
////////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)
0