8000 Add a new failover option prioritising nodes · phpredis/phpredis@84f44c6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 84f44c6

Browse files
committed
Add a new failover option prioritising nodes
This makes it possible to define a list of nodes you'd prefer to communicate with in cluster mode. This behaves similarly to FAILOVER_DISTRIBUTE, where it randomises the list of nodes before trying them in sequence - however it takes that random list and sorts any preferred nodes to the top first. All candidate nodes are still in the list, but the preferred ones get tried first. This is extremely helpful for setups where you know which nodes are closer (i.e. not crossing an AZ boundary).
1 parent f827ea0 commit 84f44c6

7 files changed

+138
-5
lines changed

cluster_library.c

Lines changed: 82 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,10 @@ PHP_REDIS_API redisCluster *cluster_create(double timeout, double read_timeout,
843843
ALLOC_HASHTABLE(c->nodes);
844844
zend_hash_init(c->nodes, 0, NULL, ht_free_node, 0);
845845

846+
/* Allocate our preferred nodes HashTable */
847+
ALLOC_HASHTABLE(c->preferred_nodes);
848+
zend_hash_init(c->preferred_nodes, 0, NULL, NULL, 0);
849+
846850
return c;
847851
}
848852

@@ -861,10 +865,12 @@ cluster_free(redisCluster *c, int free_ctx)
861865
/* Call hash table destructors */
862866
zend_hash_destroy(c->seeds);
863867
zend_hash_destroy(c->nodes);
868+
zend_hash_destroy(c->preferred_nodes);
864869

865870
/* Free hash tables themselves */
866871
efree(c->seeds);
867872
efree(c->nodes);
873+
efree(c->preferred_nodes);
868874

869875
/* Free any error we've got */
870876
if (c->err) zend_string_release(c->err);
@@ -1235,10 +1241,74 @@ PHP_REDIS_API void cluster_disconnect(redisCluster *c, int force) {
12351241
} ZEND_HASH_FOREACH_END();
12361242
}
12371243

1244+
int preferred_compare(const void *const first, const void *const second)
1245+
{
1246+
const preferredNode* a = (const preferredNode*)first;
1247+
const preferredNode* b = (const preferredNode*)second;
1248+
if (a->preferred == 1 && b->preferred == 0)
1249+
return -1;
1250+
else if (a->preferred == 0 && b->preferred == 1)
1251+
return 1;
1252+
else if (a->original_order < b->original_order)
1253+
return -1;
1254+
else if (a->original_order > b->original_order)
1255+
return 1;
1256+
else
1257+
return 0;
1258+
}
1259+
1260+
/* This method takes the randomised list of nodes and sorts preferred nodes to
1261+
* the top. */
1262+
static void preferredsort(int *array, size_t len, redisCluster *c,
1263+
unsigned short slot)
1264+
{
1265+
int i, temp, key_len, *prefnodes;
1266+
size_t r;
1267+
RedisSock *redis_sock;
1268+
char key[1024];
1269+
zval *node;
1270+
1271+
struct preferredNode ab[len];
1272+
1273+
// array: key => order; value => node-idx (0=master)
1274+
for (i = 0; i < len; i++) {
1275+
// Get node host+port string.
1276+
redis_sock = cluster_slot_sock(c, c->cmd_slot, array[i]);
1277+
1278+
ab[i].idx = array[i];
1279+
ab[i].original_order = i;
1280+
ab[i].preferred = 0;
1281+
if (!redis_sock) {
1282+
continue;
1283+
}
1284+
1285+
// Is it in the preferred_nodes map?
1286+
key_len = snprintf(key, sizeof(key), "%s:%d", ZSTR_VAL(redis_sock->host), redis_sock->port);
1287+
// Perhaps the preferred_nodes table should be keyed on the host:port string to make this
1288+
// easier and faster.
1289+
ZEND_HASH_FOREACH_VAL(c->preferred_nodes, node) {
1290+
ZVAL_DEREF(node);
1291+
1292+
if (strcmp(Z_STRVAL_P(node), key) == 0) {
1293+
ab[i].preferred = 1;
1294+
break;
1295+
}
1296+
1297+
} ZEND_HASH_FOREACH_END();
1298+
}
1299+
1300+
// Sort preferred nodes to the top of the list.
1301+
qsort(ab, len, sizeof(*ab), preferred_compare);
1302+
for (i = 0; i < len; i++) {
1303+
array[i] = ab[i].idx;
1304+
}
1305+
}
1306+
1307+
12381308
/* This method attempts to write our command at random to the master and any
12391309
* attached slaves, until we either successufly do so, or fail. */
12401310
static int cluster_dist_write(redisCluster *c, const char *cmd, size_t sz,
1241-
int nomaster)
1311+
int nomaster, int preferred)
12421312
{
12431313
int i, count = 1, *nodes;
12441314
RedisSock *redis_sock;
@@ -1256,6 +1326,11 @@ static int cluster_dist_write(redisCluster *c, const char *cmd, size_t sz,
12561326
for (i = 0; i < count; i++) nodes[i] = i;
12571327
fyshuffle(nodes, count);
12581328

1329+
/* Shift preferred nodes to the top of the list if we're in preferred
1330+
* mode */
1331+
if (preferred && zend_hash_num_elements(c->preferred_nodes) > 0)
1332+
preferredsort(nodes, count, c, c->cmd_slot);
1333+
12591334
/* Iterate through our nodes until we find one we can write to or fail */
12601335
for (i = 0; i < count; i++) {
12611336
/* Skip if this is the master node and we don't want to query that */
@@ -1306,6 +1381,8 @@ static int cluster_dist_write(redisCluster *c, const char *cmd, size_t sz,
13061381
* REDIS_FAILOVER_DISTRIBUTE_SLAVES:
13071382
* We pick at random from slave nodes of a given master. This option is
13081383
* used to load balance read queries against N slaves.
1384+
* REDIS_FAILOVER_PREFERRED:
1385+
* Similar to DISTRIBUTE, but with a list of nodes we prefer over others.
13091386
*
13101387
* Once we are able to find a node we can write to, we check for MOVED or
13111388
* ASKING redirection, such that the keyspace can be updated.
@@ -1315,7 +1392,7 @@ static int cluster_sock_write(redisCluster *c, const char *cmd, size_t sz,
13151392
{
13161393
redisClusterNode *seed_node;
13171394
RedisSock *redis_sock;
1318-
int failover, nomaster;
1395+
int failover, nomaster, preferred;
13191396

13201397
/* First try the socket requested */
13211398
redis_sock = c->cmd_sock;
@@ -1343,12 +1420,13 @@ static int cluster_sock_write(redisCluster *c, const char *cmd, size_t sz,
13431420
} else if (failover == REDIS_FAILOVER_ERROR) {
13441421
/* Try the master, then fall back to any slaves we may have */
13451422
if (CLUSTER_SEND_PAYLOAD(redis_sock, cmd, sz) ||
1346-
!cluster_dist_write(c, cmd, sz, 1)) return 0;
1423+
!cluster_dist_write(c, cmd, sz, 1, 0)) return 0;
13471424
} else {
13481425
/* Include or exclude master node depending on failover option and
13491426
* attempt to make our write */
13501427
nomaster = failover == REDIS_FAILOVER_DISTRIBUTE_SLAVES;
1351-
if (!cluster_dist_write(c, cmd, sz, nomaster)) {
1428+
preferred = failover == REDIS_FAILOVER_PREFERRED;
1429+
if (!cluster_dist_write(c, cmd, sz, nomaster, preferred)) {
13521430
/* We were able to write to a master or slave at random */
13531431
return 0;
13541432
}

cluster_library.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,9 @@ typedef struct redisCluster {
208208
/* All RedisCluster objects we've created/are connected to */
209209
HashTable *nodes;
210210

211+
/* A list of nodes we prefer to talk to when in FAILOVER_PREFERRED mode */
212+
HashTable *preferred_nodes;
213+
211214
/* Transaction handling linked list, and where we are as we EXEC */
212215
clusterFoldItem *multi_head;
213216
clusterFoldItem *multi_curr;
@@ -259,6 +262,12 @@ typedef struct redisCluster {
259262
/* RedisCluster response processing callback */
260263
typedef void (*cluster_cb)(INTERNAL_FUNCTION_PARAMETERS, redisCluster*, void*);
261264

265+
typedef struct preferredNode {
266+
int idx;
267+
int preferred;
268+
int original_order;
269+
} preferredNode;
270+
262271
/* Context for processing transactions */
263272
struct clusterFoldItem {
264273
/* Response processing callback */

common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,14 @@ typedef enum _PUBSUB_TYPE {
8282
#define REDIS_OPT_REPLY_LITERAL 8
8383
#define REDIS_OPT_COMPRESSION_LEVEL 9
8484
#define REDIS_OPT_NULL_MBULK_AS_NULL 10
85+
#define REDIS_OPT_PREFERRED_NODES 11
8586

8687
/* cluster options */
8788
#define REDIS_FAILOVER_NONE 0
8889
#define REDIS_FAILOVER_ERROR 1
8990
#define REDIS_FAILOVER_DISTRIBUTE 2
9091
#define REDIS_FAILOVER_DISTRIBUTE_SLAVES 3
92+
#define REDIS_FAILOVER_PREFERRED 4
9193
/* serializers */
9294
typedef enum {
9395
REDIS_SERIALIZER_NONE,

redis.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,7 @@ static void add_class_constants(zend_class_entry *ce, int is_cluster) {
688688
zend_declare_class_constant_long(ce, ZEND_STRL("OPT_REPLY_LITERAL"), REDIS_OPT_REPLY_LITERAL);
689689
zend_declare_class_constant_long(ce, ZEND_STRL("OPT_COMPRESSION_LEVEL"), REDIS_OPT_COMPRESSION_LEVEL);
690690
zend_declare_class_constant_long(ce, ZEND_STRL("OPT_NULL_MULTIBULK_AS_NULL"), REDIS_OPT_NULL_MBULK_AS_NULL);
691+
zend_declare_class_constant_long(ce, ZEND_STRL("OPT_PREFERRED_NODES"), REDIS_OPT_PREFERRED_NODES);
691692

692693
/* serializer */
693694
zend_declare_class_constant_long(ce, ZEND_STRL("SERIALIZER_NONE"), REDIS_SERIALIZER_NONE);
@@ -737,6 +738,7 @@ static void add_class_constants(zend_class_entry *ce, int is_cluster) {
737738
zend_declare_class_constant_long(ce, ZEND_STRL("FAILOVER_ERROR"), REDIS_FAILOVER_ERROR);
738739
zend_declare_class_constant_long(ce, ZEND_STRL("FAILOVER_DISTRIBUTE"), REDIS_FAILOVER_DISTRIBUTE);
739740
zend_declare_class_constant_long(ce, ZEND_STRL("FAILOVER_DISTRIBUTE_SLAVES"), REDIS_FAILOVER_DISTRIBUTE_SLAVES);
741+
zend_declare_class_constant_long(ce, ZEND_STRL("FAILOVER_PREFERRED"), REDIS_FAILOVER_PREFERRED);
740742
} else {
741743
/* Cluster doesn't support pipelining at this time */
742744
zend_declare_class_constant_long(ce, ZEND_STRL("PIPELINE"), PIPELINE);

redis_cluster.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,10 @@ zend_object * create_cluster_context(zend_class_entry *class_type) {
326326
ALLOC_HASHTABLE(cluster->nodes);
327327
zend_hash_init(cluster->nodes, 0, NULL, ht_free_node, 0);
328328

329+
/* Allocate our preferred nodes HashTable */
330+
ALLOC_HASHTABLE(cluster->preferred_nodes);
331+
zend_hash_init(cluster->preferred_nodes, 0, NULL, NULL, 0);
332+
329333
// Initialize it
330334
zend_object_std_init(&cluster->std, class_type);
331335

redis_commands.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4041,6 +4041,8 @@ void redis_getoption_handler(INTERNAL_FUNCTION_PARAMETERS,
40414041
RETURN_LONG(redis_sock->null_mbulk_as_null);
40424042
case REDIS_OPT_FAILOVER:
40434043
RETURN_LONG(c->failover);
4044+
case REDIS_OPT_PREFERRED_NODES:
4045+
RETURN_ARR(zend_array_dup(c->preferred_nodes));
40444046
default:
40454047
RETURN_FALSE;
40464048
}
@@ -4168,12 +4170,22 @@ void redis_setoption_handler(INTERNAL_FUNCTION_PARAMETERS,
41684170
if (val_long == REDIS_FAILOVER_NONE ||
41694171
val_long == REDIS_FAILOVER_ERROR ||
41704172
val_long == REDIS_FAILOVER_DISTRIBUTE ||
4171-
val_long == REDIS_FAILOVER_DISTRIBUTE_SLAVES)
4173+
val_long == REDIS_FAILOVER_DISTRIBUTE_SLAVES ||
4174+
val_long == REDIS_FAILOVER_PREFERRED)
41724175
{
41734176
c->failover = val_long;
41744177
RETURN_TRUE;
41754178
}
41764179
break;
4180+
case REDIS_OPT_PREFERRED_NODES:
4181+
if (Z_TYPE_P(val) != IS_ARRAY)
4182+
RETURN_FALSE;
4183+
if (c->preferred_nodes) {
4184+
zend_hash_destroy(c->preferred_nodes);
4185+
FREE_HASHTABLE(c->preferred_nodes);
4186+
}
4187+
c->preferred_nodes = zend_array_dup(Z_ARRVAL_P(val));
4188+
RETURN_TRUE;
41774189
EMPTY_SWITCH_DEFAULT_CASE()
41784190
}
41794191
RETURN_FALSE;

tests/RedisClusterTest.php

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,32 @@ public function testFailOver() {
654654
}
655655
}
656656

657+
public function testFailOverPreferred() {
658+
return $this->markTestSkipped(); // this test is racy.
659+
$arr_value_ref = [];
660+
$arr_type_ref = [];
661+
662+
/* Set a bunch of keys of various redis types*/
663+
for ($i = 0; $i < 1; $i++) {
664+
foreach ($this->_arr_redis_types as $i_type) {
665+
$str_key = $this->setKeyVals($i, $i_type, $arr_value_ref);
666+
$arr_type_ref[$str_key] = $i_type;
667+
}
668+
}
669+
670+
// Give slaves a chance to catch up with the master.
671+
// This is racy... which is probably why the above test skips the
672+
// failover modes other than 'NONE'.
673+
usleep(1000);
674+
675+
$this->redis->setOption(RedisCluster::OPT_SLAVE_FAILOVER, RedisCluster::FAILOVER_PREFERRED);
676+
$this->redis->setOption(RedisCluster::OPT_PREFERRED_NODES, ['127.0.0.1:7000']);
677+
678+
foreach ($arr_value_ref as $str_key => $value) {
679+
$this->checkKeyValue($str_key, $arr_type_ref[$str_key], $value);
680+
}
681+
}
682+
657683
/* Test a 'raw' command */
658684
public function testRawCommand() {
659685
$this->redis->rawCommand('mykey', 'set', 'mykey', 'my-value');

0 commit comments

Comments
 (0)
0