8000 [CINFRA-184] Replicated Log + Supervision Integration Test (#15451) · strogo/arangodb@975417c · GitHub
[go: up one dir, main page]

Skip to content

Commit 975417c

Browse files
author
Lars Maier
authored
[CINFRA-184] Replicated Log + Supervision Integration Test (arangodb#15451)
* Added first integration test for rlogs + supervision. * Fixing code style. * Fixing test. Reduce grace-period-time. * Use unique ids for new replicated logs.
1 parent 5c0dd76 commit 975417c

File tree

5 files changed

+309
-64
lines changed

5 files changed

+309
-64
lines changed

arangod/Replication2/ReplicatedLog/Algorithms.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -447,14 +447,7 @@ algorithms::CalculateCommitIndexOptions::CalculateCommitIndexOptions(
447447
std::size_t replicationFactor)
448448
: _writeConcern(writeConcern),
449449
_softWriteConcern(softWriteConcern),
450-
_replicationFactor(replicationFactor) {
451-
TRI_ASSERT(_writeConcern <= _softWriteConcern)
452-
<< "writeConcern > softWriteConcern " << _writeConcern << " > "
453-
<< _softWriteConcern;
454-
TRI_ASSERT(_softWriteConcern <= _replicationFactor)
455-
<< "softWriteConcern > opt.replicationFactor " << _softWriteConcern
456-
<< " > " << _replicationFactor;
457-
}
450+
_replicationFactor(replicationFactor) {}
458451

459452
auto algorithms::calculateCommitIndex(
460453
std::vector<ParticipantStateTuple> const& indexes,

js/client/modules/@arangodb/testsuites/replication2.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,13 @@ function replication2Server(options) {
6060
const opts = _.clone(options);
6161
opts.dbServers = Math.max(opts.dbServers, 5);
6262

63-
return tu.performTests(opts, testCases, 'replication2_server', tu.runThere);
63+
return tu.performTests(opts, testCases, 'replication2_server', tu.runThere, {
64+
'javascript.allow-external-process-control': 'true',
65+
'javascript.allow-port-testing': 'true',
66+
'javascript.allow-admin-execute': 'true',
67+
'agency.supervision-grace-period': '3.0',
68+
'agency.supervision-ok-threshold': '1.5',
69+
});
6470
}
6571

6672

@@ -71,6 +77,8 @@ exports.setup = function (testFns, defaultFns, opts, fnDocs, optionsDoc, allTest
7177
for (const [key, value] of Object.entries(functionsDocumentation)) {
7278
fnDocs[key] = value;
7379
}
80+
defaultFns.push('replication2_client');
81+
defaultFns.push('replication2_server');
7482
for (let i = 0; i < optionsDocumentation.length; i++) {
7583
optionsDoc.push(optionsDocumentation[i]);
7684
}

js/server/modules/@arangodb/testutils/replicated-logs-helper.js

Lines changed: 73 additions & 1 deletion
F42D
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ const waitFor = function (checkFn, maxTries = 100) {
3232
if (result === true) {
3333
return result;
3434
}
35+
console.log(result);
3536
if (!(result instanceof Error)) {
3637
throw Error("expected error");
3738
}
38-
console.log(result);
3939
count += 1;
4040
wait(0.5);
4141
}
@@ -128,7 +128,73 @@ const replicatedLogDeletePlan = function (database, logId) {
128128
global.ArangoAgency.increaseVersion(`Plan/Version`);
129129
};
130130

131+
const replicatedLogIsReady = function (database, logId, term, participants, leader) {
132+
return function () {
133+
let {current} = readReplicatedLogAgency(database, logId);
134+
if (current === undefined) {
135+
return Error("current not yet defined");
136+
}
137+
138+
for (const srv of participants) {
139+
if (!current.localStatus || !current.localStatus[srv]) {
140+
return Error(`Participant ${srv} has not yet reported to current.`);
141+
}
142+
if (current.localStatus[srv].term < term) {
143+
return Error(`Participant ${srv} has not yet acknowledged the current term; ` +
144+
`found = ${current.localStatus[srv].term}, expected = ${term}.`);
145+
}
146+
}
131147

148+
if (leader !== undefined) {
149+
if (!current.leader) {
150+
return Error("Leader has not yet established its term");
151+
}
152+
if ( current.leader.serverId !== leader) {
153+
return Error(`Wrong leader in current; found = ${current.leader.serverId}, expected = ${leader}`);
154+
}
155+
if (current.leader.term < term) {
156+
return Error(`Leader has not yet confirmed the term; found = ${current.leader.term}, expected = ${term}`);
157+
}
158+
}
159+
return true;
160+
};
161+
};
162+
163+
const getServerProcessID = function(serverId) {
164+
let endpoint = global.ArangoClusterInfo.getServerEndpoint(serverId);
165+
// Now look for instanceInfo:
166+
let pos = _.findIndex(global.instanceInfo.arangods,
167+
x => x.endpoint === endpoint);
168+
return global.instanceInfo.arangods[pos].pid;
169+
};
170+
171+
const stopServer = function (serverId) {
172+
console.log(`suspending server ${serverId}`);
173+
let result = require('internal').suspendExternal(getServerProcessID(serverId));
174+
if (!result) {
175+
throw Error("Failed to suspend server");
176+
}
177+
};
178+
179+
const continueServer = function (serverId) {
180+
console.log(`continuing server ${serverId}`);
181+
let result = require('internal').continueExternal(getServerProcessID(serverId));
182+
if (!result) {
183+
throw Error("Failed to continue server");
184+
}
185+
};
186+
187+
const nextUniqueLogId = function() {
188+
return parseInt(global.ArangoClusterInfo.uniqid());
189+
};
190+
191+
const registerAgencyTestBegin = function (testName) {
192+
global.ArangoAgency.set(`Testing/${testName}/Begin`, (new Date()).toISOString());
193+
};
194+
195+
const registerAgencyTestEnd = function (testName) {
196+
global.ArangoAgency.set(`Testing/${testName}/End`, (new Date()).toISOString());
197+
};
132198

133199
exports.waitFor = waitFor;
134200
exports.readAgencyValueAt = readAgencyValueAt;
@@ -140,3 +206,9 @@ exports.replicatedLogUpdatePlanParticipantsFlags = replicatedLogUpdatePlanPartic
140206
exports.replicatedLogSetPlanTerm = replicatedLogSetPlanTerm;
141207
exports.replicatedLogSetPlan = replicatedLogSetPlan;
142208
exports.replicatedLogDeletePlan = replicatedLogDeletePlan;
209+
exports.replicatedLogIsReady = replicatedLogIsReady;
210+
exports.stopServer = stopServer;
211+
exports.continueServer = continueServer;
212+
exports.nextUniqueLogId = nextUniqueLogId;
213+
exports.registerAgencyTestBegin = registerAgencyTestBegin;
214+
exports.registerAgencyTestEnd = registerAgencyTestEnd;

tests/js/server/replication2/replication2-maintenance-replicated-log-cluster.js

Lines changed: 31 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -34,37 +34,14 @@ const {
3434
replicatedLogUpdatePlanParticipantsFlags,
3535
replicatedLogSetPlanTerm,
3636
createTermSpecification,
37-
dbservers
37+
replicatedLogIsReady,
38+
dbservers,
39+
nextUniqueLogId,
40+
registerAgencyTestBegin, registerAgencyTestEnd
3841
} = require("@arangodb/testutils/replicated-logs-helper");
3942

4043
const database = 'ReplLogsMaintenanceTest';
4144

42-
const replicatedLogIsReady = function (logId, term, participants, leader) {
43-
return function () {
44-
let {current} = readReplicatedLogAgency(database, logId);
45-
if (current === undefined) {
46-
return Error("current not yet defined");
47-
}
48-
49-
for (const srv of participants) {
50-
if (!current.localStatus || !current.localStatus[srv]) {
51-
return Error(`Participant ${srv} has not yet reported to current.`);
52-
}
53-
if (current.localStatus[srv].term < term) {
54-
return Error(`Participant ${srv} has not yet acknowledged the current term; ` +
55-
`found = ${current.localStatus[srv].term}, expected = ${term}.`);
56-
}
57-
}
58-
59-
if (leader !== undefined) {
60-
if (!current.leader || current.leader.term < term || current.leader.serverId !== leader) {
61-
return Error("Leader has not yet established its term");
62-
}
63-
}
64-
return true;
65-
};
66-
};
67-
6845
const replicatedLogParticipantGeneration = function (logId, generation) {
6946
return function () {
7047
let {current} = readReplicatedLogAgency(database, logId);
@@ -74,9 +51,14 @@ const replicatedLogParticipantGeneration = function (logId, generation) {
7451
if (!current.leader) {
7552
return Error("Leader has not yet established its term");
7653
}
77-
if (!current.leader.committedParticipantsConfig || current.leader.committedParticipantsConfig.generation < generation) {
78-
return Error("Leader has not yet acked new generation");
54+
if (!current.leader.committedParticipantsConfig) {
55+
return Error("Leader has not yet committed any participants config");
56+
}
57+
if (current.leader.committedParticipantsConfig.generation < generation) {
58+
return Error("Leader has not yet acked new generation; "
59+
+ `found ${current.leader.committedParticipantsConfig.generation}, expected = ${generation}`);
7960
}
61+
8062
return true;
8163
};
8264
};
@@ -119,13 +101,6 @@ const replicatedLogParticipantsFlag = function (logId, flags, generation = undef
119101

120102
const replicatedLogSuite = function () {
121103

122-
const nextLogId = (function () {
123-
let logId = 100;
124-
return function () {
125-
return logId++;
126-
};
127-
}());
128-
129104
const targetConfig = {
130105
writeConcern: 2,
131106
softWriteConcern: 2,
@@ -150,15 +125,17 @@ const replicatedLogSuite = function () {
150125
if (!databaseExisted) {
151126
db._dropDatabase(database);
152127
}
153-
}
128+
},
154129
};
155130
}());
156131

157132
return {
158133
setUpAll, tearDownAll,
134+
setUp: registerAgencyTestBegin,
135+
tearDown: registerAgencyTestEnd,
159136

160137
testCreateReplicatedLog: function () {
161-
const logId = nextLogId();
138+
const logId = nextUniqueLogId();
162139
const servers = _.sampleSize(dbservers, targetConfig.replicationFactor);
163140
const leader = servers[0];
164141
const term = 1;
@@ -169,13 +146,13 @@ const replicatedLogSuite = function () {
169146
});
170147

171148
// wait for all servers to have reported in current
172-
waitFor(replicatedLogIsReady(logId, term, servers, leader));
149+
waitFor(replicatedLogIsReady(database, logId, term, servers, leader));
173150

174151
replicatedLogDeletePlan(database, logId);
175152
},
176153

177154
testCreateReplicatedLogWithoutLeader: function () {
178-
const logId = nextLogId();
155+
const logId = nextUniqueLogId();
179156
const servers = _.sampleSize(dbservers, targetConfig.replicationFactor);
180157
const term = 1;
181158
replicatedLogSetPlan(database, logId, {
@@ -185,13 +162,13 @@ const replicatedLogSuite = function () {
185162
});
186163

187164
// wait for all servers to have reported in current
188-
waitFor(replicatedLogIsReady(logId, term, servers));
165+
waitFor(replicatedLogIsReady(database, logId, term, servers));
189166

190167
replicatedLogDeletePlan(database, logId);
191168
},
192169

193170
testAddParticipantFlag: function () {
194-
const logId = nextLogId();
171+
const logId = nextUniqueLogId();
195172
const servers = _.sampleSize(dbservers, targetConfig.replicationFactor);
196173
const leader = servers[0];
197174
const term = 1;
@@ -202,7 +179,7 @@ const replicatedLogSuite = function () {
202179
});
203180

204181
// wait for all servers to have reported in current
205-
waitFor(replicatedLogIsReady(logId, term, servers, leader));
182+
waitFor(replicatedLogIsReady(database, logId, term, servers, leader));
206183

207184
// now update the excluded flag for one participant
208185
const follower = servers[1];
@@ -217,7 +194,7 @@ const replicatedLogSuite = function () {
217194
},
218195

219196
testUpdateTermInPlanLog: function () {
220-
const logId = nextLogId();
197+
const logId = nextUniqueLogId();
221198
const servers = _.sampleSize(dbservers, targetConfig.replicationFactor);
222199
const leader = servers[0];
223200
const term = 1;
@@ -228,16 +205,16 @@ const replicatedLogSuite = function () {
228205
});
229206

230207
// wait for all servers to have reported in current
231-
waitFor(replicatedLogIsReady(logId, term, servers, leader));
208+
waitFor(replicatedLogIsReady(database, logId, term, servers, leader));
232209
replicatedLogSetPlanTerm(database, logId, createTermSpecification(term + 1, servers, targetConfig, leader));
233210

234211
// wait again for all servers to have acked term
235-
waitFor(replicatedLogIsReady(logId, term + 1, servers, leader));
212+
waitFor(replicatedLogIsReady(database, logId, term + 1, servers, leader));
236213
replicatedLogDeletePlan(database, logId);
237214
},
238215

239216
testUpdateTermInPlanLogWithNewLeader: function () {
240-
const logId = nextLogId();
217+
const logId = nextUniqueLogId();
241218
const servers = _.sampleSize(dbservers, targetConfig.replicationFactor);
242219
const leader = servers[0];
243220
const term = 1;
@@ -248,16 +225,16 @@ const replicatedLogSuite = function () {
248225
});
249226

250227
// wait for all servers to have reported in current
251-
waitFor(replicatedLogIsReady(logId, term, servers));
228+
waitFor(replicatedLogIsReady(database, logId, term, servers));
252229
// wait again for all servers to have acked term
253230
const otherLeader = servers[1];
254231
replicatedLogSetPlanTerm(database, logId, createTermSpecification(term + 1, servers, targetConfig, otherLeader));
255-
waitFor(replicatedLogIsReady(logId, term + 1, servers, otherLeader));
232+
waitFor(replicatedLogIsReady(database, logId, term + 1, servers, otherLeader));
256233
replicatedLogDeletePlan(database, logId);
257234
},
258235

259236
testUpdateTermAddParticipant: function () {
260-
const logId = nextLogId();
237+
const logId = nextUniqueLogId();
261238
const servers = _.sampleSize(dbservers, targetConfig.replicationFactor);
262239
const leader = servers[0];
263240
const remaining = _.difference(dbservers, servers);
@@ -269,16 +246,16 @@ const replicatedLogSuite = function () {
269246
});
270247

271248
// wait for all servers to have reported in current
272-
waitFor(replicatedLogIsReady(logId, term, servers));
249+
waitFor(replicatedLogIsReady(database, logId, term, servers));
273250
// now rewrite the term with an additional participant
274251
const newServers = [...servers, _.sample(remaining)];
275252
replicatedLogSetPlanTerm(database, logId, createTermSpecification(term, newServers, targetConfig, leader));
276-
waitFor(replicatedLogIsReady(logId, term, newServers, leader));
253+
waitFor(replicatedLogIsReady(database, logId, term, newServers, leader));
277254
replicatedLogDeletePlan(database, logId);
278255
},
279256

280257
testUpdateTermRemoveParticipant: function () {
281-
const logId = nextLogId();
258+
const logId = nextUniqueLogId();
282259
const servers = _.sampleSize(dbservers, targetConfig.replicationFactor);
283260
const remaining = _.difference(dbservers, servers);
284261
const toBeRemoved = _.sample(remaining);
@@ -292,7 +269,7 @@ const replicatedLogSuite = function () {
292269
});
293270

294271
// wait for all servers to have reported in current
295-
waitFor(replicatedLogIsReady(logId, term, newServers));
272+
waitFor(replicatedLogIsReady(database, logId, term, newServers));
296273
// now rewrite the term with an additional participant
297274
replicatedLogSetPlanTerm(database, logId, createTermSpecification(term, servers, targetConfig, leader));
298275
// TODO waitFor(replicatedLogParticipantsFlag(logId, {[toBeRemoved]: null})); -- doesn't work yet

0 commit comments

Comments
 (0)
0