Closed
Description
My Environment
- ArangoDB Version: 3.4.6 and 3.5.0-RC.7
- Storage Engine: RocksDB
- Deployment Mode: Single Server
- Deployment Strategy: ArangoDB Starter in Docker
- Infrastructure: own
- Operating System: MacOS 10.13.4
- Total RAM in your machine: 32Gb
- Disks in use: SSD
- Used Package: Docker - official Docker library
While using the /_api/import
endpoint and writing the same data concurrently to a collection, views linked to that collection eventually lose records.
3.5.0-RC.7 reproduced.
3.4.6 reproduced.
Steps to reproduce
- Spin up Arango
docker run -dt --name arangodb346 -e ARANGO_NO_AUTH=1 -p 8529:8529 arangodb/arangodb:3.4.6
- Create a Collection named
docs
on _system db - Create a View named
docs_view
on _system db and link to thedocs
Collection (setup i used below)
"links": {
"docs": {
"analyzers": [
"identity"
],
"fields": {},
"includeAllFields": true,
"storeValues": "id",
"trackListPositions": false
}
}
- Wrote a javascript to reproduce the issue (requires node)
Create apackage.json
file
{
"dependencies": {
"async": "^3.1.0",
"request": "^2.88.0",
"uuid": "^3.3.2"
}
}
Create a script.js
file
const request = require("request");
const uuidv1 = require("uuid/v1");
const async = require("async");
let docs = [];
for (let i = 0; i < 100; i++) {
let docId = uuidv1();
docs.push({ _id: "docs/" + docId, _key: docId });
}
let requests = [];
for (let j = 0; j < 1000; j++) {
let requestFunc = function(cb) {
let options = {
method: "POST",
// url: "http://localhost:8529/_db/_system/_api/import?collection=docs&type=list&details=true",
url: "http://localhost:8529/_db/_system/_api/import?collection=docs&type=list&details=true&onDuplicate=replace",
body: docs,
json: true
};
request(options, function (err, res, body) {
console.log(j, new Date(), 'response code:', res.statusCode, 'created:', body.created, 'errors:', body.errors, 'empty:', body.empty, 'ignored:', body.ignored, 'updated:', body.updated);
cb();
});
};
requests.push(requestFunc);
}
const args = process.argv.slice(2);
if(args.length) {
if("sequential" == args[0]) {
async.waterfall(requests, function() {
console.log("sequential done");
});
}
if("parallel" == args[0]) {
async.parallelLimit(requests, 10, function() {
console.log("parallel done");
});
}
}
Perform a npm install
-
Execute the following command
node script.js parallel
-
Perform the following query
LET collectionCount = FIRST(FOR u IN docs COLLECT WITH COUNT INTO length RETURN length)
LET viewCount = LENGTH(FOR d IN docs_view RETURN d)
RETURN { collectionCount, viewCount, diff: (collectionCount - viewCount) }
Eventually the view gets out of sync with the collection. Repeat 5 and 6 if necessary.
Few notes
- Dropping
onDuplicate=replace
from the request, becomes less frequent, but still happens (takes longer) - Using
node script.js sequential
, becomes less frequent, but still happens (takes longer)