rnshah9
diff --git a/‎CHANGELOG
Lines changed: 5 additions & 0 deletions b/‎CHANGELOG
Lines changed: 5 additions & 0 deletions
diff --git a/‎arangosh/Import/ImportHelper.cpp
Lines changed: 20 additions & 11 deletions b/‎arangosh/Import/ImportHelper.cpp
Lines changed: 20 additions & 11 deletions
diff --git a/‎arangosh/Import/ImportHelper.h
Lines changed: 1 addition & 1 deletion b/‎arangosh/Import/ImportHelper.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎js/client/modules/@arangodb/testsuites/importing.js
Lines changed: 30 additions & 0 deletions b/‎js/client/modules/@arangodb/testsuites/importing.js
Lines changed: 30 additions & 0 deletions
diff --git a/‎js/client/modules/@arangodb/testutils/process-utils.js
Lines changed: 4 additions & 0 deletions b/‎js/client/modules/@arangodb/testutils/process-utils.js
Lines changed: 4 additions & 0 deletions
diff --git a/‎tests/js/common/test-data/import/import-data-with-header.csv
Lines changed: 8 additions & 0 deletions b/‎tests/js/common/test-data/import/import-data-with-header.csv
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/js/server/import/import-setup.js
Lines changed: 8 additions & 0 deletions b/‎tests/js/server/import/import-setup.js
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/js/server/import/import-teardown.js
Lines changed: 4 additions & 0 deletions b/‎tests/js/server/import/import-teardown.js
Lines changed: 4 additions & 0 deletions
diff --git a/‎tests/js/server/import/import.js
Lines changed: 60 additions & 0 deletions b/‎tests/js/server/import/import.js
Lines changed: 60 additions & 0 deletions
@@ -1,6 +1,11 @@
 v3.8.5 (XXXX-XX-XX)
 -------------------
 
+* Fixed PRESUPP-439: In arangoimport, for CSV and TSV files, it could happen
+  that a buffer containing only the header would be sent to the server, and also
+  batches would contain the documents equivalent to the csv rows in them, but
+  not the header, which should be sent together with the documents.
+
 * Changed various default values for RocksDB to tune operations for different
   typical scenarios like gp2 type volumes and gp3 type volumes and locally
   attached SSDs with RAID0:
 
@@ -271,7 +271,6 @@ bool ImportHelper::readHeadersFile(std::string const& headersFile,
 
   constexpr int BUFFER_SIZE = 16384;
   char buffer[BUFFER_SIZE];
-
   while (!_hasError) {
     ssize_t n = fd->read(buffer, sizeof(buffer));
 
@@ -297,6 +296,7 @@ bool ImportHelper::readHeadersFile(std::string const& headersFile,
     _outputBuffer.appendChar('\n');
   }
 
+
   if (_rowsRead > 2) {
     _errorMessages.push_back("headers file '" + headersFile + "' contained more than a single line of headers");
     return false;
@@ -310,6 +310,8 @@ bool ImportHelper::readHeadersFile(std::string const& headersFile,
   _numberLines = 0;
   // restore copy of _rowsToSkip 
   _rowsToSkip = rowsToSkip;
+  _outputBuffer.
6D47
reset();
+
 
   return true;
 }
@@ -427,12 +429,13 @@ bool ImportHelper::importDelimited(std::string const& collectionName,
     reportProgress(totalLength, fd->offset(), nextProgress);
 
     TRI_ParseCsvString(&parser, buffer, n);
-  }
 
-  if (_outputBuffer.length() > 0) {
-    sendCsvBuffer();
   }
 
+  // trailing buffer items than can be accumulated because buffer length is
+  // smaller than batch size, so we send the data at the end of the parsing
+  handleCsvBuffer(0);
+
   TRI_DestroyCsvParser(&parser);
 
   waitForSenders();
@@ -808,9 +811,11 @@ void ImportHelper::addLastField(char const* field, size_t fieldLength,
 
   _lineBuffer.appendChar(']');
 
-  if (row == _rowsToSkip) {
+  if (row == _rowsToSkip && !_headersSeen) {
     // save the first line
     _firstLine = std::string(_lineBuffer.c_str(), _lineBuffer.length());
+    _lineBuffer.reset();
+    return;
   } else if (row > _rowsToSkip && _firstLine.empty()) {
     // error
     MUTEX_LOCKER(guard, _stats._mutex);
@@ -822,17 +827,21 @@ void ImportHelper::addLastField(char const* field, size_t fieldLength,
   // read a complete line
 
   if (_lineBuffer.length() > 0) {
+    if (!_outputBuffer.length()) {
+      _outputBuffer.appendText(_firstLine);
+      _outputBuffer.appendChar('\n');
+    }
     _outputBuffer.appendText(_lineBuffer);
     _lineBuffer.reset();
   } else {
     MUTEX_LOCKER(guard, _stats._mutex);
     ++_stats._numberErrors;
   }
 
-  if (_outputBuffer.length() > getMaxUploadSize()) {
-    sendCsvBuffer();
-    _outputBuffer.appendText(_firstLine);
-  }
+  // we will send the data if the buffer is already bigger than the batch size,
+  // otherwise, it will accumulate to be sent later when buffer length is bigger
+  // than the batch size
+  handleCsvBuffer(getMaxUploadSize());
 }
 
 bool ImportHelper::collectionExists() {
@@ -954,8 +963,8 @@ bool ImportHelper::truncateCollection() {
   return false;
 }
 
-void ImportHelper::sendCsvBuffer() {
-  if (_hasError) {
+void ImportHelper::handleCsvBuffer(uint64_t bufferSizeThreshold) {
+  if (_hasError || _outputBuffer.length() <= bufferSizeThreshold) {
     return;
   }
 
 
@@ -296,7 +296,7 @@ class ImportHelper {
   bool checkCreateCollection();
   bool truncateCollection();
 
-  void sendCsvBuffer();
+  void handleCsvBuffer(uint64_t bufferSizeThreshold);
   void sendJsonBuffer(char const* str, size_t len, bool isObject);
   SenderThread* findIdleSender();
   void waitForSenders();
 
@@ -250,6 +250,36 @@ const impTodos = [{
   create: 'true',
   database: 'UnitTestImportCreateDatabase',
   createDatabase: 'true'
+}, {
+  id: 'importDataBatchSizeWithoutHeaderFile',
+  data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-data-with-header.csv')),
+  coll: 'UnitTestsImportDataBatchSizeWithoutHeaderFile',
+  type: 'csv',
+  create: 'true',
+  batchSize: 10,
+}, {
+  id: 'importDataBatchSizeWithoutHeaderFile2',
+  data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-data-with-header.csv')),
+  coll: 'UnitTestsImportDataBatchSizeWithoutHeaderFile2',
+  type: 'csv',
+  create: 'true',
+  batchSize: 1000
+}, {
+  id: 'importDataBatchSizeWithHeaderFile',
+  data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-data-without-headers.csv')),
+  headers: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-headers.csv')),
+  coll: 'UnitTestsImportDataBatchSizeWithHeaderFile',
+  type: 'csv',
+  create: 'true',
+  batchSize: 10,
+}, {
+  id: 'importDataBatchSizeWithHeaderFile2',
+  data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-data-without-headers.csv')),
+  headers: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-headers.csv')),
+  coll: 'UnitTestsImportDataBatchSizeWithHeaderFile2',
+  type: 'csv',
+  create: 'true',
+  batchSize: 1000
 }];
 
 function importing (options) {
 
@@ -1092,6 +1092,10 @@ function runArangoImport (options, instanceInfo, what, coreCheck = false) {
     args['remove-attribute'] = what.removeAttribute;
   }
 
+  if (what.batchSize !== undefined) {
+    args['batch-size'] = what.batchSize;
+  }
+
   return executeAndWait(ARANGOIMPORT_BIN, toArgv(args), options, 'arangoimport', instanceInfo.rootDir, coreCheck);
 }
 
 
@@ -0,0 +1,8 @@
+"id","a","b","c","d","e"
+1,"1",1,"1.3",null,-5
+2,null,"",3.1,-2.5,"ddd "" ' ffd"
+3,"this","is"
+4,let's,see,what,happens
+5,9999999999999999999999999999999999,test,-99999999,true,-888.4434
+6,10e4,20.5,-42, null ,false
+7,-1.05e2,1.05e-2,true,false,null
@@ -51,6 +51,10 @@
   db._drop("UnitTestsImportCsvConvert");
   db._drop("UnitTestsImportCsvNoConvert");
   db._drop("UnitTestsImportCsvNoEol");
+  db._drop("UnitTestsImportDataBatchSizeWithoutHeaderFile");
+  db._drop("UnitTestsImportDataBatchSizeWithoutHeaderFile2");
+  db._drop("UnitTestsImportDataBatchSizeWithHeaderFile");
+  db._drop("UnitTestsImportDataBatchSizeWithHeaderFile2");
   db._drop("UnitTestsImportTsv1");
   db._drop("UnitTestsImportTsv1Gz");
   db._drop("UnitTestsImportTsv2");
@@ -76,6 +80,10 @@
   db._create("UnitTestsImportTsv1Gz");
   db._create("UnitTestsImportTsv2");
   db._create("UnitTestsImportVertex");
+  db._create("UnitTestsImportDataBatchSizeWithoutHeaderFile");
+  db._create("UnitTestsImportDataBatchSizeWithoutHeaderFile2");
+  db._create("UnitTestsImportDataBatchSizeWithHeaderFile");
+  db._create("UnitTestsImportDataBatchSizeWithHeaderFile2");
   db._createEdgeCollection("UnitTestsImportEdge");
   db._createEdgeCollection("UnitTestsImportEdgeGz");
   db._create("UnitTestsImportIgnore");
 
@@ -51,6 +51,10 @@
   db._drop("UnitTestsImportCsvConvert");
   db._drop("UnitTestsImportCsvNoConvert");
   db._drop("UnitTestsImportCsvNoEol");
+  db._drop("UnitTestsImportDataBatchSizeWithoutHeaderFile");
+  db._drop("UnitTestsImportDataBatchSizeWithoutHeaderFile2");
+  db._drop("UnitTestsImportDataBatchSizeWithHeaderFile");
+  db._drop("UnitTestsImportDataBatchSizeWithHeaderFile2");
   db._drop("UnitTestsImportTsv1");
   db._drop("UnitTestsImportTsv1Gz");
   db._drop("UnitTestsImportTsv2");
 
@@ -443,6 +443,66 @@ function importTestSuite () {
       assertEqual(JSON.stringify(expected), JSON.stringify(actual));
     },
 
+    ////////////////////////////////////////////////////////////////////////////////
+/// @brief test csv with data and header in single csv file, small batch-size
+////////////////////////////////////////////////////////////////////////////////
+    testImportDataBatchSizeWithoutHeaderFile: function () {
+      let expected = [
+        { "a": "1", "b": 1, "c": "1.3", "e": -5, "id": 1 },
+        { "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
+        { "a": "9999999999999999999999999999999999", "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
+        { "a": 10e4, "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
+        { "a": -1.05e2, "b": 1.05e-2, "c": true, "d": false, "id": 7 }
+      ];
+      let actual = getQueryResults("FOR i IN UnitTestsImportDataBatchSizeWithoutHeaderFile SORT i.id RETURN i");
+      assertEqual(expected, actual);
+    },
+
+    ////////////////////////////////////////////////////////////////////////////////
+/// @brief test csv with data and header in single csv file, small batch-size
+////////////////////////////////////////////////////////////////////////////////
+    testImportDataBatchSizeWithoutHeaderFile2: function () {
+      let expected = [
+        { "a": "1", "b": 1, "c": "1.3", "e": -5, "id": 1 },
+        { "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
+        { "a": "9999999999999999999999999999999999", "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
+        { "a": 10e4, "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
+        { "a": -1.05e2, "b": 1.05e-2, "c": true, "d": false, "id": 7 }
+      ];
+      let actual = getQueryResults("FOR i IN UnitTestsImportDataBatchSizeWithoutHeaderFile2 SORT i.id RETURN i");
+      assertEqual(expected, actual);
+    },
+
+    ////////////////////////////////////////////////////////////////////////////////
+/// @brief test csv with data and header in single csv file, small batch-size
+////////////////////////////////////////////////////////////////////////////////
+    testImportDataBatchSizeWithHeaderFile: function () {
+      let expected = [
+        { "a": "1", "b": 1, "c": "1.3", "e": -5, "id": 1 },
+        { "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
+        { "a": "9999999999999999999999999999999999", "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
+        { "a": 10e4, "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
+        { "a": -1.05e2, "b": 1.05e-2, "c": true, "d": false, "id": 7 }
+      ];
+      let actual = getQueryResults("FOR i IN UnitTestsImportDataBatchSizeWithHeaderFile SORT i.id RETURN i");
+      assertEqual(expected, actual);
+    },
+
+    ////////////////////////////////////////////////////////////////////////////////
+/// @brief test csv with data and header in single csv file, small batch-size
+////////////////////////////////////////////////////////////////////////////////
+    testImportDataBatchSizeWithHeaderFile2: function () {
+      let expected = [
+        { "a": "1", "b": 1, "c": "1.3", "e": -5, "id": 1 },
+        { "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
+        { "a": "9999999999999999999999999999999999", "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
+        { "a": 10e4, "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
+        { "a": -1.05e2, "b": 1.05e-2, "c": true, "d": false, "id": 7 }
+      ];
+      let actual = getQueryResults("FOR i IN UnitTestsImportDataBatchSizeWithHeaderFile2 SORT i.id RETURN i");
+      assertEqual(expected, actual);
+    },
+
 ////////////////////////////////////////////////////////////////////////////////
 /// @brief test csv import without trailing eol
 ////////////////////////////////////////////////////////////////////////////////
Original file line number	Diff line number	Diff line change
`@@ -1092,6 +1092,10 @@ function runArangoImport (options, instanceInfo, what, coreCheck = false) {`
`1092`	`1092`	`args['remove-attribute'] = what.removeAttribute;`
`1093`	`1093`	`}`
`1094`	`1094`
	`1095`	`+ if (what.batchSize !== undefined) {`
	`1096`	`+ args['batch-size'] = what.batchSize;`
	`1097`	`+ }`
	`1098`	`+`
`1095`	`1099`	`return executeAndWait(ARANGOIMPORT_BIN, toArgv(args), options, 'arangoimport', instanceInfo.rootDir, coreCheck);`
`1096`	`1100`	`}`
`1097`	`1101`