8000 Add smart statement timeout for learning AQO. · postgrespro/aqo@442b89d · GitHub
[go: up one dir, main page]

Skip to content

Commit 442b89d

Browse files
author
Alena Rybakina
committed
Add smart statement timeout for learning AQO.
AQO evaluates from the current aqo.statement_timeout value whether it has enough time to train. If not, then it increases this time exponentially. We can evaluate whether there was enough time to train the AQO by the average integral error. If the internal error has not practically changed compared to the previous iteration, then the training time increases. If the user needs an upper limit on query execution time, they can set a vanilla statement timeout. The aqo.statement_timeout value will be stored in shmem, next to the training data. Initially it = 0 (default value). The user has the ability to specify the GUC aqo.statement_timeout and terminates the request by analogy with statement timeout.
1 parent 8a99337 commit 442b89d

File tree

9 files changed

+269
-23
lines changed

9 files changed

+269
-23
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ REGRESS = aqo_disabled \
2525
plancache \
2626
statement_timeout \
2727
temp_tables \
28-
top_queries
28+
top_queries \
29+
smart_statement_timeout
2930

3031
fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw
3132
stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements

aqo--1.3--1.4.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
\echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit
55

66
ALTER TABLE public.aqo_data ADD COLUMN reliability double precision [];
7+
ALTER TABLE public.aqo_queries ADD COLUMN flex_timeout bigint;
8+
ALTER TABLE public.aqo_queries ADD COLUMN count_increase_timeout bigint;
79

810
DROP FUNCTION public.top_error_queries(int);
911

aqo.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ int aqo_mode;
3636
bool aqo_enabled = false; /* Signals that CREATE EXTENSION have executed and
3737
all extension tables is ready for use. */
3838
bool force_collect_stat;
39+
int aqo_statement_timeout;
3940

4041
/*
4142
* Show special info in EXPLAIN mode.
@@ -49,7 +50,7 @@ bool force_collect_stat;
4950
*/
5051
bool aqo_show_hash;
5152
bool aqo_show_details;
52-
53+
bool change_flex_timeout;
5354
/* GUC variables */
5455
static const struct config_enum_entry format_options[] = {
5556
{"intelligent", AQO_MODE_INTELLIGENT, false},
@@ -75,7 +76,6 @@ int auto_tuning_infinite_loop = 8;
7576
/* The number of nearest neighbors which will be chosen for ML-operations */
7677
int aqo_k = 3;
7778
double log_selectivity_lower_bound = -30;
78-
7979
/*
8080
* Currently we use it only to store query_text string which is initialized
8181
* after a query parsing and is used during the query planning.
@@ -210,6 +210,17 @@ _PG_init(void)
210210
NULL,
211211
NULL,
212212
NULL);
213+
DefineCustomIntVariable("aqo.statement_timeout",
214+
"Time limit on learning.",
215+
NULL,
216+
&aqo_statement_timeout,
217+
0,
218+
0, INT_MAX,
219+
PGC_USERSET,
220+
0,
221+
NULL,
222+
NULL,
223+
NULL);
213224

214225
prev_shmem_startup_hook = shmem_startup_hook;
215226
shmem_startup_hook = aqo_init_shmem;

aqo.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,17 @@ typedef struct QueryContextData
224224

225225
instr_time start_execution_time;
226226
double planning_time;
227+
228+
int64 flex_timeout;
229+
int64 count_increase_timeout;
227230
} QueryContextData;
228231

232+
/*
233+
* Indicator for understading update or not
234+
* flexible statement timeout for query
235+
*/
236+
extern bool change_flex_timeout;
237+
229238
extern double predicted_ppi_rows;
230239
extern double fss_ppi_hash;
231240

@@ -245,6 +254,7 @@ extern double log_selectivity_lower_bound;
245254
/* Parameters for current query */
246255
extern QueryContextData query_context;
247256
extern int njoins;
257+
extern int aqo_statement_timeout;
248258

249259
/* Memory context for long-live data */
250260
extern MemoryContext AQOMemoryContext;
@@ -278,6 +288,7 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash);
278288

279289
/* Storage interaction */
280290
extern bool find_query(uint64 qhash, QueryContextData *ctx);
291+
extern bool update_query_timeout(uint64 qhash, int64 flex_timeout, int64 count_increase_timeout);
281292
extern bool update_query(uint64 qhash, uint64 fhash,
282293
bool learn_aqo, bool use_aqo, bool auto_tuning);
283294
extern bool add_query_text(uint64 query_hash, const char *query_string);
@@ -313,6 +324,7 @@ extern double predict_for_relation(List *restrict_clauses, List *selectivities,
313324
void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags);
314325
void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction,
315326
uint64 count, bool execute_once);
327+
void increase_flex_timeout(uint64 query_hash, int64 flex_timeout_fin_time);
316328
void aqo_ExecutorEnd(QueryDesc *queryDesc);
317329

318330
/* Automatic query tuning */
@@ -338,6 +350,8 @@ extern void selectivity_cache_clear(void);
338350
extern Oid get_aqo_schema(void);
339351
extern void init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2);
340352
extern bool IsQueryDisabled(void);
341-
353+
extern double get_mean(double *elems, int nelems);
342354
extern List *cur_classes;
355+
extern bool get_flex_timeout(uint64 qhash, QueryContextData *query_context);
356+
343357
#endif

auto_tuning.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,10 @@
2525
*/
2626
double auto_tuning_convergence_error = 0.01;
2727

28-
static double get_mean(double *elems, int nelems);
2928
static double get_estimation(double *elems, int nelems);
3029
static bool is_stable(double *elems, int nelems);
31-
static bool converged_cq(double *elems, int nelems);
3230
static bool is_in_infinite_loop_cq(double *elems, int nelems);
33-
31+
static bool converged_cq(double *elems, int nelems);
3432

3533
/*
3634
* Returns mean value of the array of doubles.

expected/smart_statement_timeout.out

Whitespace-only changes.

postprocessing.c

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ static void update_query_stat_row(double *et, int *et_size,
8181
static void StoreToQueryEnv(QueryDesc *queryDesc);
8282
static void StorePlanInternals(QueryDesc *queryDesc);
8383
static bool ExtractFromQueryEnv(QueryDesc *queryDesc);
84+
static int64 max_timeout_value;
85+
static int64 growth_rate = 3;
8486

8587

8688
/*
@@ -692,14 +694,44 @@ aqo_timeout_handler(void)
692694
ctx.learn = query_context.learn_aqo;
693695
ctx.isTimedOut = true;
694696

695-
elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data.");
697+
elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is %d",(int) get_timeout_finish_time(timeoutCtl.id));
696698
learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx);
697699
}
698700

701+
/*
702+
* Function to get the value of a variable with exponential growth
703+
*/
704+
static int64
705+
get_increment()
706+
{
707+
return pow(1 + growth_rate, query_context.count_increase_timeout);
708+
}
709+
710+
/*
711+
* Function to update flexible timeout
712+
* with value of STETAMENT_TIMEOUT - 1 nano sec
713+
* or lower
714+
*
715+
*/
716+
void
717+
increase_flex_timeout(uint64 query_hash, int64 flex_timeout_fin_time)
718+
{
719+
LOCKTAG tag;
720+
flex_timeout_fin_time = (flex_timeout_fin_time + 1) * get_increment();
721+
722+
init_lock_tag(&tag, query_context.query_hash, 0);
723+ LockAcquire(&tag, ExclusiveLock, false, false);
724+
if (!update_query_timeout(query_hash, flex_timeout_fin_time, query_context.count_increase_timeout + 1))
725+
elog(NOTICE, "timeout is not updated");
726+
LockRelease(&tag, ExclusiveLock, false);
727+
}
728+
699729
static bool
700730
set_timeout_if_need(QueryDesc *queryDesc)
701731
{
702-
TimestampTz fin_time;
732+
max_timeout_value = Min((int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1, (int64) aqo_statement_timeout);
733+
if (max_timeout_value == 0)
734+
max_timeout_value = Min((int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1, query_context.flex_timeout);
703735

704736
if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout)
705737
return false;
@@ -711,6 +743,8 @@ set_timeout_if_need(QueryDesc *queryDesc)
711743
!(query_context.use_aqo || query_context.learn_aqo))
712744
return false;
713745

746+
get_flex_timeout(query_context.query_hash, &query_context);
747+
714748
/*
715749
* Statement timeout exists. AQO should create user timeout right before the
716750
* timeout.
@@ -722,8 +756,8 @@ set_timeout_if_need(QueryDesc *queryDesc)
722756
else
723757
Assert(!get_timeout_active(timeoutCtl.id));
724758

725-
fin_time = get_timeout_finish_time(STATEMENT_TIMEOUT);
726-
enable_timeout_at(timeoutCtl.id, fin_time - 1);
759+
760+
enable_timeout_at(timeoutCtl.id, (TimestampTz) max_timeout_value);
727761

728762
/* Save pointer to queryDesc to use at learning after a timeout interruption. */
729763
timeoutCtl.queryDesc = queryDesc;
@@ -822,20 +856,25 @@ aqo_ExecutorEnd(QueryDesc *queryDesc)
822856
list_free(ctx.selectivities);
823857
}
824858

825-
if (query_context.collect_stat)
826-
stat = get_aqo_stat(query_context.query_hash);
859+
stat = get_aqo_stat(query_context.query_hash);
860+
if (cardinality_num_objects > 0)
861+
cardinality_error = cardinality_sum_errors / cardinality_num_objects;
862+
else
863+
cardinality_error = -1;
827864

865+
if (stat && aqo_learn_statement_timeout &&
866+
stat->cardinality_error_with_aqo[stat->cardinality_error_with_aqo_size-1] - cardinality_sum_errors/cardinality_num_objects <= 0.1)
867+
{
868+
increase_flex_timeout(query_context.query_hash, (int64) get_timeout_finish_time(timeoutCtl.id));
869+
}
870+
871+
if (query_context.collect_stat)
828872
{
829873
/* Calculate execution time. */
830874
INSTR_TIME_SET_CURRENT(endtime);
831875
INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time);
832876
execution_time = INSTR_TIME_GET_DOUBLE(endtime);
833877

834-
if (cardinality_num_objects > 0)
835-
cardinality_error = cardinality_sum_errors / cardinality_num_objects;
836-
else
837-
cardinality_error = -1;
838-
839878
/* Prevent concurrent updates. */
840879
init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash);
841880
LockAcquire(&tag, ExclusiveLock, false, false);

sql/smart_statement_timeout.sql

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
-- The function just copied from stats_ext.sql
2+
create function check_estimated_rows(text) returns table (estimated int, actual int)
3+
language plpgsql as
4+
$$
5+
declare
6+
ln text;
7+
tmp text[];
8+
first_row bool := true;
9+
begin
10+
for ln in
11+
execute format('explain analyze %s', $1)
12+
loop
13+
if first_row then
14+
first_row := false;
15+
tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
16+
return query select tmp[1]::int, tmp[2]::int;
17+
end if;
18+
end loop;
19+
end;
20+
$$;
21+
22+
SET statement_timeout = 5000; -- [0.8s]
23+
24+
DROP TABLE IF EXISTS a,b CASCADE;
25+
CREATE TABLE a (x int);
26+
INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival;
27+
28+
CREATE TABLE b (y int);
29+
INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival;
30+
31+
CREATE EXTENSION IF NOT EXISTS aqo;
32+
SET aqo.join_threshold = 0;
33+
SET aqo.mode = 'learn';
34+
SET aqo.show_details = 'off';
35+
SET aqo.learn_statement_timeout = 'on';
36+
SET aqo.statement_timeout = 4; -- [0.8s]
37+
SELECT check_estimated_rows('SELECT x FROM A,B where x < 10 and y > 10 group by(x);'); -- haven't any partial data
38+
select flex_timeout, count_increase_timeout from aqo_queries where query_hash <> 0;
39+
SELECT check_estimated_rows('SELECT x FROM A,B where x < 10 and y > 10 group by(x);'); -- haven't any partial data
40+
select flex_timeout, count_increase_timeout from aqo_queries where query_hash <> 0;
41+
SELECT check_estimated_rows('SELECT x FROM A,B where x < 10 and y > 10 group by(x);'); -- haven't any partial data
42+
select flex_timeout, count_increase_timeout from aqo_queries where query_hash <> 0;
43+
SELECT check_estimated_rows('SELECT x FROM A,B where x < 10 and y > 10 group by(x);'); -- haven't any partial data

0 commit comments

Comments
 (0)
0