8000 Support connection load balancing in libpq · postgres/postgres@7f5b198 · GitHub
[go: up one dir, main page]

Skip to content

File tree

10 files changed

+431
-3
lines changed

10 files changed

+431
-3
lines changed

.cirrus.yml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ env:
2525
MTEST_ARGS: --print-errorlogs --no-rebuild -C build
2626
PGCTLTIMEOUT: 120 # avoids spurious failures during parallel tests
2727
TEMP_CONFIG: ${CIRRUS_WORKING_DIR}/src/tools/ci/pg_ci_base.conf
28-
PG_TEST_EXTRA: kerberos ldap ssl
28+
PG_TEST_EXTRA: kerberos ldap ssl load_balance
2929

3030

3131
# What files to preserve in case tests fail
@@ -313,6 +313,14 @@ task:
313313
mkdir -m 770 /tmp/cores
314314
chown root:postgres /tmp/cores
315315
sysctl kernel.core_pattern='/tmp/cores/%e-%s-%p.core'
316+
317+
setup_hosts_file_script: |
318+
cat >> /etc/hosts <<-EOF
319+
127.0.0.1 pg-loadbalancetest
320+
127.0.0.2 pg-loadbalancetest
321+
127.0.0.3 pg-loadbalancetest
322+
EOF
323+
316324
setup_additional_packages_script: |
317325
#apt-get update
318326
#DEBIAN_FRONTEND=noninteractive apt-get -y install ...
@@ -564,6 +572,12 @@ task:
564572
setup_additional_packages_script: |
565573
REM choco install -y --no-progress ...
566574
575+
setup_hosts_file_script: |
576+
echo 127.0.0.1 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
577+
echo 127.0.0.2 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
578+
echo 127.0.0.3 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
579+
type c:\Windows\System32\Drivers\etc\hosts
580+
567581
# Use /DEBUG:FASTLINK to avoid high memory usage during linking
568582
configure_script: |
569583
vcvarsall x64

doc/src/sgml/libpq.sgml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2115,6 +2115,67 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
21152115
</para>
21162116
</listitem>
21172117
</varlistentry>
2118+
2119+
<varlistentry id="libpq-connect-load-balance-hosts" xreflabel="load_balance_hosts">
2120+
<term><literal>load_balance_hosts</literal></term>
2121+
<listitem>
2122+
<para>
2123+
Controls the order in which the client tries to connect to the available
2124+
hosts and addresses. Once a connection attempt is successful no other
2125+
hosts and addresses will be tried. This parameter is typically used in
2126+
combination with multiple host names or a DNS record that returns
2127+
multiple IPs. This parameter can be used in combination with
2128+
<xref linkend="libpq-connect-target-session-attrs"/>
2129+
to, for example, load balance over standby servers only. Once successfully
2130+
connected, subsequent queries on the returned connection will all be
2131+
sent to the same server. There are currently two modes:
2132+
<variablelist>
2133+
<varlistentry>
2134+
<term><literal>disable</literal> (default)</term>
2135+
<listitem>
2136+
<para>
2137+
No load balancing across hosts is performed. Hosts are tried in
2138+
the order in which they are provided and addresses are tried in
2139+
the order they are received from DNS or a hosts file.
2140+
</para>
2141+
</listitem>
2142+
</varlistentry>
2143+
2144+
<varlistentry>
2145+
<term><literal>random</literal></term>
2146+
<listitem>
2147+
<para>
2148+
Hosts and addresses are tried in random order. This value is mostly
2149+
useful when opening multiple connections at the same time, possibly
2150+
from different machines. This way connections can be load balanced
2151+
across multiple <productname>PostgreSQL</productname> servers.
2152+
</para>
2153+
<para>
2154+
While random load balancing, due to its random nature, will almost
2155+
never result in a completely uniform distribution, it statistically
2156+
gets quite close. One important aspect here is that this algorithm
2157+
uses two levels of random choices: First the hosts
2158+
will be resolved in random order. Then secondly, before resolving
2159+
the next host, all resolved addresses for the current host will be
2160+
tried in random order. This behaviour can skew the amount of
2161+
connections each node gets greatly in certain cases, for instance
2162+
when some hosts resolve to more addresses than others. But such a
2163+
skew can also be used on purpose, e.g. to increase the number of
2164+
connections a larger server gets by providing its hostname multiple
2165+
times in the host string.
2166+
</para>
2167+
<para>
2168+
When using this value it's recommended to also configure a reasonable
2169+
value for <xref linkend="libpq-connect-connect-timeout"/>. Because then,
2170+
if one of the nodes that are used for load balancing is not responding,
2171+
a new node will be tried.
2172+
</para>
2173+
</listitem>
2174+
</varlistentry>
2175+
</variablelist>
2176+
</para>
2177+
</listitem>
2178+
</varlistentry>
21182179
</variablelist>
21192180
</para>
21202181
</sect2>

doc/src/sgml/regress.sgml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ make check-world -j8 >/dev/null
256256
<varname>PG_TEST_EXTRA</varname> to a whitespace-separated list, for
257257
example:
258258
<programlisting>
259-
make check-world PG_TEST_EXTRA='kerberos ldap ssl'
259+
make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance'
260260
</programlisting>
261261
The following values are currently supported:
262262
<variablelist>
@@ -290,6 +290,17 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl'
290290
</listitem>
291291
</varlistentry>
292292

293+
<varlistentry>
294+
<term><literal>load_balance</literal></term>
295+
<listitem>
296+
<para>
297+
Runs the test <filename>src/interfaces/libpq/t/004_load_balance_dns.pl</filename>.
298+
This requires editing the system <filename>hosts</filename> file and
299+
opens TCP/IP listen sockets.
300+
</para>
301+
</listitem>
302+
</varlistentry>
303+
293304
<varlistentry>
294305
<term><literal>wal_consistency_checking</literal></term>
295306
<listitem>

src/interfaces/libpq/fe-connect.c

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options,
123123
#define DefaultChannelBinding "disable"
124124
#endif
125125
#define DefaultTargetSessionAttrs "any"
126+
#define DefaultLoadBalanceHosts "disable"
126127
#ifdef USE_SSL
127128
#define DefaultSSLMode "prefer"
128129
#define DefaultSSLCertMode "allow"
@@ -351,6 +352,11 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
351352
"Target-Session-Attrs", "", 15, /* sizeof("prefer-standby") = 15 */
352353
offsetof(struct pg_conn, target_session_attrs)},
353354

355+
{"load_balance_hosts", "PGLOADBALANCEHOSTS",
356+
DefaultLoadBalanceHosts, NULL,
357+
"Load-Balance-Hosts", "", 8, /* sizeof("disable") = 8 */
358+
offsetof(struct pg_conn, load_balance_hosts)},
359+
354360
/* Terminating entry --- MUST BE LAST */
355361
{NULL, NULL, NULL, NULL,
356362
NULL, NULL, 0}
@@ -435,6 +441,8 @@ static void pgpassfileWarning(PGconn *conn);
435441
static void default_threadlock(int acquire);
436442
static bool sslVerifyProtocolVersion(const char *version);
437443
static bool sslVerifyProtocolRange(const char *min, const char *max);
444+
static bool parse_int_param(const char *value, int *result, PGconn *conn,
445+
const char *context);
438446

439447

440448
/* global variable because fe-auth.c needs to access it */
@@ -1020,6 +1028,31 @@ parse_comma_separated_list(char **startptr, bool *more)
10201028
return p;
10211029
}
10221030

1031+
/*
1032+
* Initializes the prng_state field of the connection. We want something
1033+
* unpredictable, so if possible, use high-quality random bits for the
1034+
* seed. Otherwise, fall back to a seed based on the connection address,
1035+
* timestamp and PID.
1036+
*/
1037+
static void
1038+
libpq_prng_init(PGconn *conn)
1039+
{
1040+
uint64 rseed;
1041+
struct timeval tval = {0};
1042+
1043+
if (pg_prng_strong_seed(&conn->prng_state))
1044+
return;
1045+
1046+
gettimeofday(&tval, NULL);
1047+
1048+
rseed = ((uint64) conn) ^
1049+
((uint64) getpid()) ^
1050+
((uint64) tval.tv_usec) ^
1051+
((uint64) tval.tv_sec);
1052+
1053+
pg_prng_seed(&conn->prng_state, rseed);
1054+
}
1055+
10231056
/*
10241057
* connectOptions2
10251058
*
@@ -1619,6 +1652,49 @@ connectOptions2(PGconn *conn)
16191652
else
16201653
conn->target_server_type = SERVER_TYPE_ANY;
16211654

1655+
/*
1656+
* validate load_balance_hosts option, and set load_balance_type
1657+
*/
1658+
if (conn->load_balance_hosts)
1659+
{
1660+
if (strcmp(conn->load_balance_hosts, "disable") == 0)
1661+
conn->load_balance_type = LOAD_BALANCE_DISABLE;
1662+
else if (strcmp(conn->load_balance_hosts, "random") == 0)
1663+
conn->load_balance_type = LOAD_BALANCE_RANDOM;
1664+
else
1665+
{
1666+
conn->status = CONNECTION_BAD;
1667+
libpq_append_conn_error(conn, "invalid %s value: \"%s\"",
1668+
"load_balance_hosts",
1669+
conn->load_balance_hosts);
1670+
return false;
1671+
}
1672+
}
1673+
else
1674+
conn->load_balance_type = LOAD_BALANCE_DISABLE;
1675+
1676+
if (conn->load_balance_type == LOAD_BALANCE_RANDOM)
1677+
{
1678+
libpq_prng_init(conn);
1679+
1680+
/*
1681+
* This is the "inside-out" variant of the Fisher-Yates shuffle
1682+
* algorithm. Notionally, we append each new value to the array and
1683+
* then swap it with a randomly-chosen array element (possibly
1684+
* including itself, else we fail to generate permutations with the
1685+
* last integer last). The swap step can be optimized by combining it
1686+
* with the insertion.
1687+
*/
1688+
for (i = 1; i < conn->nconnhost; i++)
1689+
{
1690+
int j = pg_prng_uint64_range(&conn->prng_state, 0, i);
1691+
pg_conn_host temp = conn->connhost[j];
1692+
1693+
conn->connhost[j] = conn->connhost[i];
1694+
conn->connhost[i] = temp;
1695+
}
1696+
}
1697+
16221698
/*
16231699
* Resolve special "auto" client_encoding from the locale
16241700
*/
@@ -2626,6 +2702,32 @@ PQconnectPoll(PGconn *conn)
26262702
if (ret)
26272703
goto error_return; /* message already logged */
26282704

2705+
/*
2706+
* If random load balancing is enabled we shuffle the addresses.
2707+
*/
2708+
if (conn->load_balance_type == LOAD_BALANCE_RANDOM)
2709+
{
2710+
/*
2711+
* This is the "inside-out" variant of the Fisher-Yates shuffle
2712+
* algorithm. Notionally, we append each new value to the array
2713+
* and then swap it with a randomly-chosen array element (possibly
2714+
* including itself, else we fail to generate permutations with
2715+
* the last integer last). The swap step can be optimized by
2716+
* combining it with the insertion.
2717+
*
2718+
* We don't need to initialize conn->prng_state here, because that
2719+
* already happened in connectOptions2.
2720+
*/
2721+
for (int i = 1; i < conn->naddr; i++)
2722+
{
2723+
int j = pg_prng_uint64_range(&conn->prng_state, 0, i);
2724+
AddrInfo temp = conn->addr[j];
2725+
2726+
conn->addr[j] = conn->addr[i];
2727+
conn->addr[i] = temp;
2728+
}
2729+
}
2730+
26292731
reset_connection_state_machine = true;
26302732
conn->try_next_host = false;
26312733
}
@@ -4320,6 +4422,7 @@ freePGconn(PGconn *conn)
43204422
free(conn->outBuffer);
43214423
free(conn->rowBuf);
43224424
free(conn->target_session_attrs);
4425+
free(conn->load_balance_hosts);
43234426
termPQExpBuffer(&conn->errorMessage);
43244427
termPQExpBuffer(&conn->workBuffer);
43254428

src/interfaces/libpq/libpq-int.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
#include <netdb.h>
2727
#include <sys/socket.h>
2828
#include <time.h>
29-
#ifndef WIN32
29+
/* MinGW has sys/time.h, but MSVC doesn't */
30+
#ifndef _MSC_VER
3031
#include <sys/time.h>
3132
#endif
3233

@@ -82,6 +83,8 @@ typedef struct
8283
#endif
8384
#endif /* USE_OPENSSL */
8485

86+
#include "common/pg_prng.h"
87+
8588
/*
8689
* POSTGRES backend dependent Constants.
8790
*/
@@ -242,6 +245,13 @@ typedef enum
242245
SERVER_TYPE_PREFER_STANDBY_PASS2 /* second pass - behaves same as ANY */
243246
} PGTargetServerType;
244247

248+
/* Target server type (decoded value of load_balance_hosts) */
249+
typedef enum
250+
{
251+
LOAD_BALANCE_DISABLE = 0, /* Use the existing host order (default) */
252+
LOAD_BALANCE_RANDOM, /* Randomly shuffle the hosts */
253+
} PGLoadBalanceType;
254+
245255
/* Boolean value plus a not-known state, for GUCs we might have to fetch */
246256
typedef enum
247257
{
@@ -398,6 +408,7 @@ struct pg_conn
398408
char *ssl_max_protocol_version; /* maximum TLS protocol version */
399409
char *target_session_attrs; /* desired session properties */
400410
char *require_auth; /* name of the expected auth method */
411+
char *load_balance_hosts; /* load balance over hosts */
401412

402413
/* Optional file to write trace info to */
403414
FILE *Pfdebug;
@@ -469,6 +480,8 @@ struct pg_conn
469480

470481
/* Transient state needed while establishing connection */
471482
PGTargetServerType target_server_type; /* desired session properties */
483+
PGLoadBalanceType load_balance_type; /* desired load balancing
484+
* algorithm */
472485
bool try_next_addr; /* time to advance to next address/host? */
473486
bool try_next_host; /* time to advance to next connhost[]? */
474487
int naddr; /* number of addresses returned by getaddrinfo */
@@ -488,6 +501,8 @@ struct pg_conn
488501
PGVerbosity verbosity; /* error/notice message verbosity */
489502
PGContextVisibility show_context; /* whether to show CONTEXT field */
490503
PGlobjfuncs *lobjfuncs; /* private state for large-object access fns */
504+
pg_prng_state prng_state; /* prng state for load balancing connections */
505+
491506

492507
/* Buffer for data received from backend and not yet processed */
493508
char *inBuffer; /* currently allocated buffer */

src/interfaces/libpq/meson.build

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ tests += {
116116
'tests': [
117117
't/001_uri.pl',
118118
't/002_api.pl',
119+
't/003_load_balance_host_list.pl',
120+
't/004_load_balance_dns.pl',
119121
],
120122
'env': {'with_ssl': ssl_library},
121123
},

0 commit comments

Comments
 (0)
0