8000 Bug fix/fix active failover foxx failover (#14754) · arangodb/arangodb@d33f9c8 · GitHub
[go: up one dir, main page]

Skip to content

Commit d33f9c8

Browse files
dothebartjsteemann
andauthored
Bug fix/fix active failover foxx failover (#14754)
Co-authored-by: Jan <jsteemann@users.noreply.github.com>
1 parent 7721412 commit d33f9c8

File tree

15 files changed

+679
-34
lines changed

15 files changed

+679
-34
lines changed

CHANGELOG

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
devel
22
-----
33

4+
* Fix active failover, so that the new host actually has working
5+
foxx services. (BTS-558)
6+
47
* Fixed issue #14720: Bulk import ignores onDuplicate in 3.8.0.
58
The "onDuplicate" attribute was ignored by the `/_api/import` REST API when
69
not specifying the "type" URL parameter.

arangod/Cluster/HeartbeatThread.cpp

Lines changed: 6 additions & 0 deletions
using namespace arangodb::application_features;
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include "Replication/GlobalReplicationApplier.h"
5151
#include "Replication/ReplicationFeature.h"
5252
#include "RestServer/DatabaseFeature.h"
53+
#include "RestServer/SystemDatabaseFeature.h"
5354
#include "RestServer/TtlFeature.h"
5455
#include "Scheduler/Scheduler.h"
5556
#include "Scheduler/SchedulerFeature.h"
@@ -58,6 +59,7 @@
5859
#include "Transaction/ClusterUtils.h"
5960
#include "Utils/Events.h"
6061
#include "VocBase/vocbase.h"
62+
#include "V8Server/V8DealerFeature.h"
6163

6264
using namespace arangodb;
6365
@@ -905,6 +907,10 @@ void HeartbeatThread::runSingleServer() {
905907
ServerState::instance()->setFoxxmaster(_myId);
906908
auto prv = ServerState::setServerMode(ServerState::Mode::DEFAULT);
907909
if (prv == ServerState::Mode::REDIRECT) {
910+
auto& sysDbFeature = server().getFeature<arangodb::SystemDatabaseFeature>();
911+
auto database = sysDbFeature.use();
912+
server().getFeature<V8DealerFeature>().loadJavaScriptFileInAllContexts(
913+
database.get(), "server/leader.js", nullptr);
908914
LOG_TOPIC("98325", INFO, Logger::HEARTBEAT)
909915
<< "Successful leadership takeover: "
910916
<< "All your base are belong to us";

js/server/leader.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
'use strict';
2+
3+
// //////////////////////////////////////////////////////////////////////////////
4+
// / @brief active failover leadership change
5+
// /
6+
// / @file
7+
// /
8+
// / DISCLAIMER
9+
// /
10+
// / Copyright 2014 ArangoDB GmbH, Cologne, Germany
11+
// / Copyright 2011-2014 triAGENS GmbH, Cologne, Germany
12+
// /
13+
// / Licensed under the Apache License, Version 2.0 (the "License")
14+
// / you may not use this file except in compliance with the License.
15+
// / You may obtain a copy of the License at
16+
// /
17+
// / http://www.apache.org/licenses/LICENSE-2.0
18+
// /
19+
// / Unless required by applicable law or agreed to in writing, software
20+
// / distributed under the License is distributed on an "AS IS" BASIS,
21+
// / WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22+
// / See the License for the specific language governing permissions and
23+
// / limitations under the License.
24+
// /
25+
// / Copyright holder is ArangoDB GmbH, Cologne, Germany
26+
// /
< 10000 /td>
27+
// / @author Alan Plum
28+
// / @author Copyright 2021, ArangoDB GmbH, Cologne, Germany
29+
// //////////////////////////////////////////////////////////////////////////////
30+
31+
(function(){
32+
"use strict";
33+
if (require("internal").threadNumber === 0) {
34+
require("@arangodb/foxx/manager").healAll();
35+
}
36+
}());

js/server/modules/@arangodb/foxx/queues/manager.js

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,6 @@ exports.manage = function () {
226226
}
227227

228228
if (global.ArangoServerState.getFoxxmasterQueueupdate()) {
229-
if (!isCluster) {
230-
// On a Foxxmaster change FoxxmasterQueueupdate is set to true
231-
// we use this to signify a Leader change to this server
232-
foxxManager.healAll(true);
233-
}
234229
// do not call again immediately
235230
global.ArangoServerState.setFoxxmasterQueueupdate(false);
236231

tests/js/client/active-failover/basic.js

Lines changed: 197 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,13 @@ const request = require("@arangodb/request");
3535
const tasks = require("@arangodb/tasks");
3636

3737
const arango = internal.arango;
38-
const compareTicks = require("@arangodb/replication").compareTicks;
39-
const wait = internal.wait;
4038
const db = internal.db;
39+
const fs = require('fs');
40+
const path = require('path');
41+
const utils = require('@arangodb/foxx/manager-utils');
42+
const wait = internal.wait;
4143

44+
const compareTicks = require("@arangodb/replication").compareTicks;
4245
const suspendExternal = internal.suspendExternal;
4346
const continueExternal = internal.continueExternal;
4447

@@ -293,10 +296,136 @@ function waitUntilHealthStatusIs(isHealthy, isFailed) {
293296
return false;
294297
}
295298

299+
function loadFoxxIntoZip(path) {
300+
let zip = utils.zipDirectory(path);
301+
let content = fs.readFileSync(zip);
302+
fs.remove(zip);
303+
return {
304+
type: 'inlinezip',
305+
buffer: content
306+
};
307+
}
308+
function checkFoxxService(readOnly) {
309+
const onlyJson = {
310+
'accept': 'application/json',
311+
'accept-content-type': 'application/json'
312+
};
313+
let reply;
314+
db._useDatabase("_system");
315+
316+
[
317+
'/_db/_system/_admin/aardvark/index.html',
318+
'/_db/_system/itz/index',
319+
'/_db/_system/crud/xxx'
320+
].forEach(route => {
321+
for (let i=0; i < 200; i++) {
322+
try {
323+
reply = arango.GET_RAW(route, onlyJson);
324+
if (reply.code === 200) {
325+
print(route + " OK");
326+
return;
327+
}
328+
let msg = JSON.stringify(reply);
329+
if (reply.hasOwnProperty('parsedBody')) {
330+
msg = " '" + reply.parsedBody.errorNum + "' - " + reply.parsedBody.errorMessage;
331+
}
332+
print(route + " Not yet ready, retrying: " + msg);
333+
} catch (e) {
334+
print(route + " Caught - need to retry. " + JSON.stringify(e));
335+
}
336+
internal.sleep(3);
337+
}
338+
throw ("foxx route '" + route + "' not ready on time!");
339+
});
340+
341+
print("Foxx: Itzpapalotl getting the root of the gods");
342+
reply = arango.GET_RAW('/_db/_system/itz');
343+
assertEqual(reply.code, "307", JSON.stringify(reply));
344+
345+
print('Foxx: Itzpapalotl getting index html with list of gods');
346+
reply = arango.GET_RAW('/_db/_system/itz/index');
347+
assertEqual(reply.code, "200", JSON.stringify(reply));
348+
349+
print("Foxx: Itzpapalotl summoning Chalchihuitlicue");
350+
reply = arango.GET_RAW('/_db/_system/itz/Chalchihuitlicue/summon', onlyJson);
351+
assertEqual(reply.code, "200", JSON.stringify(reply));
352+
let parsedBody = JSON.parse(reply.body);
353+
assertEqual(parsedBody.name, "Chalchihuitlicue");
354+
assertTrue(parsedBody.summoned);
355+
356+
print("Foxx: crud testing get xxx");
357+
reply = arango.GET_RAW('/_db/_system/crud/xxx', onlyJson);
358+
assertEqual(reply.code, "200");
359+
parsedBody = JSON.parse(reply.body);
360+
assertEqual(parsedBody, []);
361+
362+
print("Foxx: crud testing POST xxx");
363+
364+
reply = arango.POST_RAW('/_db/_system/crud/xxx', {_key: "test"});
365+
if (readOnly) {
366+
assertEqual(reply.code, "400");
367+
} else {
368+
assertEqual(reply.code, "201");
369+
}
370+
371+
print("Foxx: crud testing get xxx");
372+
reply = arango.GET_RAW('/_db/_system/crud/xxx', onlyJson);
373+
assertEqual(reply.code, "200");
374+
parsedBody = JSON.parse(reply.body);
375+
if (readOnly) {
376+
assertEqual(parsedBody, []);
377+
} else {
378+
assertEqual(parsedBody.length, 1);
379+
}
380+
381+
print('Foxx: crud testing delete document');
382+
reply = arango.DELETE_RAW('/_db/_system/crud/xxx/' + 'test');
383+
if (readOnly) {
384+
assertEqual(reply.code, "400");
385+
} else {
386+
assertEqual(reply.code, "204");
387+
}
388+
}
389+
390+
function installFoxx(mountpoint, which, mode) {
391+
let headers = {};
392+
let content;
393+
if (which.type === 'js') {
394+
headers['content-type'] = 'application/javascript';
395+
content = which.buffer;
396+
} else if (which.type === 'dir') {
397+
headers['content-type'] = 'application/zip';
398+
var utils = require('@arangodb/foxx/manager-utils');
399+
let zip = utils.zipDirectory(which.buffer);
400+
content = fs.readFileSync(zip);
401+
fs.remove(zip);
402+
} else if (which.type === 'inlinezip') {
403+
content = which.buffer;
404+
headers['content-type'] = 'application/zip';
405+
} else if (which.type === 'url') {
406+
content = { source: which };
407+
} else if (which.type === 'file') {
408+
content = fs.readFileSync(which.buffer);
409+
}
410+
let devmode = '';
411+
if (typeof which.devmode === "boolean") {
412+
devmode = `&development=${which.devmode}`;
413+
}
414+
let crudResp;
415+
if (mode === "upgrade") {
416+
crudResp = arango.PATCH('/_api/foxx/service?mount=' + mountpoint + devmode, content, headers);
417+
} else if (mode === "replace") {
418+
crudResp = arango.PUT('/_api/foxx/service?mount=' + mountpoint + devmode, content, headers);
419+
} else {
420+
crudResp = arango.POST('/_api/foxx?mount=' + mountpoint + devmode, content, headers);
421+
}
422+
expect(crudResp).to.have.property('manifest');
423+
return crudResp;
424+
}
425+
296426
// Testsuite that quickly checks some of the basic premises of
297427
// the active failover functionality. It is designed as a quicker
298428
// variant of the node resilience tests (for active failover).
299-
// Things like Foxx resilience are not tested
300429
function ActiveFailoverSuite() {
301430
let servers = getClusterEndpoints();
302431
assertTrue(servers.length >= 4, "This test expects four single instances");
@@ -370,37 +499,76 @@ function ActiveFailoverSuite() {
370499
// Simple failover case: Leader is suspended, slave needs to
371500
// take over within a reasonable amount of time
372501
testFailover: function () {
502+
const itzpapalotlPath = path.resolve(internal.pathForTesting('common'), 'test-data', 'apps', 'itzpapalotl');
503+
const itzpapalotlZip = loadFoxxIntoZip(itzpapalotlPath);
504+
installFoxx("/itz", itzpapalotlZip);
505+
506+
const minimalWorkingServicePath = path.resolve(internal.pathForTesting('common'), 'test-data', 'apps', 'crud');
507+
const minimalWorkingZip = loadFoxxIntoZip(minimalWorkingServicePath);
508+
installFoxx('/crud', minimalWorkingZip);
373509

510+
checkFoxxService(false);
374511
assertTrue(checkInSync(currentLead, servers));
375512
assertEqual(checkData(currentLead), 10000);
376513

377-
suspended = instanceinfo.arangods.filter(arangod => arangod.endpoint === currentLead);
378-
suspended.forEach(arangod => {
379-
print("Suspending Leader: ", arangod.endpoint);
380-
assertTrue(suspendExternal(arangod.pid));
381-
});
382-
514+
let suspended;
383515
let oldLead = currentLead;
384-
// await failover and check that follower get in sync
385-
currentLead = checkForFailover(currentLead);
386-
assertNotEqual(currentLead, oldLead);
387-
print("Failover to new leader : ", currentLead);
388-
389-
internal.wait(5); // settle down, heartbeat interval is 1s
390-
assertEqual(checkData(currentLead), 10000);
391-
print("New leader has correct data");
392-
393-
// check the remaining followers get in sync
394-
assertTrue(checkInSync(currentLead, servers, oldLead));
395-
396-
// restart the old leader
397-
suspended.forEach(arangod => {
398-
print("Resuming: ", arangod.endpoint);
399-
assertTrue(continueExternal(arangod.pid));
400-
});
401-
suspended = [];
402-
403-
assertTrue(checkInSync(currentLead, servers));
516+
try {
517+
suspended = instanceinfo.arangods.filter(arangod => arangod.endpoint === currentLead);
518+
suspended.forEach(arangod => {
519+
print("Suspending Leader: ", arangod.endpoint);
520+
assertTrue(suspendExternal(arangod.pid));
521+
});
522+
523+
// await failover and check that follower get in sync
524+
currentLead = checkForFailover(currentLead);
525+
assertNotEqual(currentLead, oldLead);
526+
print("Failover to new leader : ", currentLead);
527+
528+
internal.wait(5); // settle down, heartbeat interval is 1s
529+
assertEqual(checkData(currentLead), 10000);
530+
print("New leader has correct data");
531+
532+
// check the remaining followers get in sync
533+
assertTrue(checkInSync(currentLead, servers, oldLead));
534+
535+
connectToServer(currentLead);
536+
checkFoxxService(false);
537+
538+
} finally {
539+
// restart the old leader
540+
suspended.forEach(arangod => {
541+
print("Resuming: ", arangod.endpoint);
542+
assertTrue(continueExternal(arangod.pid));
543+
});
544+
assertTrue(checkInSync(currentLead, servers));
545+
// after its in sync, halt all others so it becomes the leader again
546+
suspended = instanceinfo.arangods.filter(arangod =>
547+
(arangod.endpoint !== oldLead) && (arangod.role === 'single'));
548+
suspended.forEach(arangod => {
549+
print("Suspending all but old Leader: ", arangod.endpoint);
550+
assertTrue(suspendExternal(arangod.pid));
551+
});
552+
currentLead = checkForFailover(currentLead);
553+
assertEqual(currentLead, oldLead);
554+
connectToServer(currentLead);
555+
// restart the other followers so the system is all up and running again
556+
suspended.forEach(arangod => {
557+
print("Resuming: ", arangod.endpoint);
558+
assertTrue(continueExternal(arangod.pid));
559+
});
560+
assertTrue(checkInSync(currentLead, servers));
561+
let stati = [];
562+
["/itz", "/crud"].forEach(mount => {
563+
try {
564+
print("Uninstalling " + mount);
565+
let res = arango.DELETE(
566+
"/_db/_system/_admin/aardvark/foxxes?teardown=true&mount=" + mount);
567+
stati.push(res.error);
568+
} catch (e) {}
569+
});
570+
assertEqual(stati, [false, false]);
571+
}
404572
},
405573

406574
// More complex case: We want to get the most up to date follower
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line cha 97AE nge
@@ -0,0 +1,9 @@
1+
# xx
2+
3+
xx
4+
5+
# License
6+
7+
Copyright (c) 2021 xx
8+
9+
License: xx
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
'use strict';
2+
3+
module.context.use('/xxx', require('./routes/xxx'), 'xxx');
4+
module.context.use('/yyyy', require('./routes/yyyy'), 'yyyy');
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"name": "xx",
3+
"version": "0.0.0",
4+
"description": "xx",
5+
"engines": {
6+
"arangodb": "^3.0.0"
7+
},
8+
"author": "xx",
9+
"license": " xx",
10+
"main": "main.js",
11+
"scripts": {
12+
"setup": "scripts/setup.js",
13+
"teardown": "scripts/teardown.js"
14+
},
15+
"tests": "test/**/*.js"
16+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
'use strict';
2+
const _ = require('lodash');
3+
const joi = require('joi');
4+
5+
module.exports = {
6+
schema: {
7+
// Describe the attributes with joi here
8+
_key: joi.string()
9+
},
10+
forClient(obj) {
11+
// Implement outgoing transformations here
12+
obj = _.omit(obj, ['_id', '_rev', '_oldRev']);
13+
return obj;
14+
},
15+
fromClient(obj) {
16+
// Implement incoming transformations here
17+
return obj;
18+
}
19+
};

0 commit comments

Comments
 (0)
0