diff --git a/.gitignore b/.gitignore index cbf8d7996a01a..6bb65ccd45ebd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ # Global excludes across all subdirectories +*.swp +*.DS_store *.o *.obj *.so diff --git a/conf/postgresql.conf b/conf/postgresql.conf new file mode 100644 index 0000000000000..762574f9d79e9 --- /dev/null +++ b/conf/postgresql.conf @@ -0,0 +1,634 @@ +# ----------------------------- +# PostgreSQL configuration file +# ----------------------------- +# +# This file consists of lines of the form: +# +# name = value +# +# (The "=" is optional.) Whitespace may be used. Comments are introduced with +# "#" anywhere on a line. The complete list of parameter names and allowed +# values can be found in the PostgreSQL documentation. +# +# The commented-out settings shown in this file represent the default values. +# Re-commenting a setting is NOT sufficient to revert it to the default value; +# you need to reload the server. +# +# This file is read on server startup and when the server receives a SIGHUP +# signal. If you edit the file on a running system, you have to SIGHUP the +# server for the changes to take effect, or use "pg_ctl reload". Some +# parameters, which are marked below, require a server shutdown and restart to +# take effect. +# +# Any parameter can also be given as a command-line option to the server, e.g., +# "postgres -c log_connections=on". Some parameters can be changed at run time +# with the "SET" SQL command. +# +# Memory units: kB = kilobytes Time units: ms = milliseconds +# MB = megabytes s = seconds +# GB = gigabytes min = minutes +# TB = terabytes h = hours +# d = days + + +#------------------------------------------------------------------------------ +# FILE LOCATIONS +#------------------------------------------------------------------------------ + +# The default values of these variables are driven from the -D command-line +# option or PGDATA environment variable, represented here as ConfigDir. + +#data_directory = 'ConfigDir' # use data in another directory + # (change requires restart) +#hba_file = 'ConfigDir/pg_hba.conf' # host-based authentication file + # (change requires restart) +#ident_file = 'ConfigDir/pg_ident.conf' # ident configuration file + # (change requires restart) + +# If external_pid_file is not explicitly set, no extra PID file is written. +#external_pid_file = '' # write an extra PID file + # (change requires restart) + + +#------------------------------------------------------------------------------ +# CONNECTIONS AND AUTHENTICATION +#------------------------------------------------------------------------------ + +# - Connection Settings - + +#listen_addresses = 'localhost' # what IP address(es) to listen on; + # comma-separated list of addresses; + # defaults to 'localhost'; use '*' for all + # (change requires restart) +#port = 5432 # (change requires restart) +max_connections = 100 # (change requires restart) +# Note: Increasing max_connections costs ~400 bytes of shared memory per +# connection slot, plus lock space (see max_locks_per_transaction). +#superuser_reserved_connections = 3 # (change requires restart) +#unix_socket_directories = '/tmp' # comma-separated list of directories + # (change requires restart) +#unix_socket_group = '' # (change requires restart) +#unix_socket_permissions = 0777 # begin with 0 to use octal notation + # (change requires restart) +#bonjour = off # advertise server via Bonjour + # (change requires restart) +#bonjour_name = '' # defaults to the computer name + # (change requires restart) + +# - Security and Authentication - + +#authentication_timeout = 1min # 1s-600s +#ssl = off # (change requires restart) +#ssl_ciphers = 'HIGH:MEDIUM:+3DES:!aNULL' # allowed SSL ciphers + # (change requires restart) +#ssl_prefer_server_ciphers = on # (change requires restart) +#ssl_ecdh_curve = 'prime256v1' # (change requires restart) +#ssl_cert_file = 'server.crt' # (change requires restart) +#ssl_key_file = 'server.key' # (change requires restart) +#ssl_ca_file = '' # (change requires restart) +#ssl_crl_file = '' # (change requires restart) +#password_encryption = on +#db_user_namespace = off +#row_security = on + +# GSSAPI using Kerberos +#krb_server_keyfile = '' +#krb_caseins_users = off + +# - TCP Keepalives - +# see "man 7 tcp" for details + +#tcp_keepalives_idle = 0 # TCP_KEEPIDLE, in seconds; + # 0 selects the system default +#tcp_keepalives_interval = 0 # TCP_KEEPINTVL, in seconds; + # 0 selects the system default +#tcp_keepalives_count = 0 # TCP_KEEPCNT; + # 0 selects the system default + + +#------------------------------------------------------------------------------ +# RESOURCE USAGE (except WAL) +#------------------------------------------------------------------------------ + +# - Memory - + +shared_buffers = 128MB # min 128kB + # (change requires restart) +#huge_pages = try # on, off, or try + # (change requires restart) +#temp_buffers = 8MB # min 800kB +#max_prepared_transactions = 0 # zero disables the feature + # (change requires restart) +# Note: Increasing max_prepared_transactions costs ~600 bytes of shared memory +# per transaction slot, plus lock space (see max_locks_per_transaction). +# It is not advisable to set max_prepared_transactions nonzero unless you +# actively intend to use prepared transactions. +#work_mem = 4MB # min 64kB +#maintenance_work_mem = 64MB # min 1MB +#autovacuum_work_mem = -1 # min 1MB, or -1 to use maintenance_work_mem +#max_stack_depth = 2MB # min 100kB +dynamic_shared_memory_type = posix # the default is the first option + # supported by the operating system: + # posix + # sysv + # windows + # mmap + # use none to disable dynamic shared memory + +# - Disk - + +#temp_file_limit = -1 # limits per-session temp file space + # in kB, or -1 for no limit + +# - Kernel Resource Usage - + +#max_files_per_process = 1000 # min 25 + # (change requires restart) +#shared_preload_libraries = '' # (change requires restart) + +# - Cost-Based Vacuum Delay - + +#vacuum_cost_delay = 0 # 0-100 milliseconds +#vacuum_cost_page_hit = 1 # 0-10000 credits +#vacuum_cost_page_miss = 10 # 0-10000 credits +#vacuum_cost_page_dirty = 20 # 0-10000 credits +#vacuum_cost_limit = 200 # 1-10000 credits + +# - Background Writer - + +#bgwriter_delay = 200ms # 10-10000ms between rounds +#bgwriter_lru_maxpages = 100 # 0-1000 max buffers written/round +#bgwriter_lru_multiplier = 2.0 # 0-10.0 multiplier on buffers scanned/round + +# - Asynchronous Behavior - + +#effective_io_concurrency = 0 # 1-1000; 0 disables prefetching +#max_worker_processes = 8 +#max_parallel_degree = 0 # max number of worker processes per node + + +#------------------------------------------------------------------------------ +# WRITE AHEAD LOG +#------------------------------------------------------------------------------ + +# - Settings - + +#wal_level = minimal # minimal, archive, hot_standby, or logical + # (change requires restart) +#fsync = on # turns forced synchronization on or off +#synchronous_commit = on # synchronization level; + # off, local, remote_write, or on +#wal_sync_method = fsync # the default is the first option + # supported by the operating system: + # open_datasync + # fdatasync (default on Linux) + # fsync + # fsync_writethrough + # open_sync +#full_page_writes = on # recover from partial page writes +#wal_compression = off # enable compression of full-page writes +#wal_log_hints = off # also do full page writes of non-critical updates + # (change requires restart) +#wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers + # (change requires restart) +#wal_writer_delay = 200ms # 1-10000 milliseconds + +#commit_delay = 0 # range 0-100000, in microseconds +#commit_siblings = 5 # range 1-1000 + +# - Checkpoints - + +#checkpoint_timeout = 5min # range 30s-1h +#max_wal_size = 1GB +#min_wal_size = 80MB +#checkpoint_completion_target = 0.5 # checkpoint target duration, 0.0 - 1.0 +#checkpoint_warning = 30s # 0 disables + +# - Archiving - + +#archive_mode = off # enables archiving; off, on, or always + # (change requires restart) +#archive_command = '' # command to use to archive a logfile segment + # placeholders: %p = path of file to archive + # %f = file name only + # e.g. 'test ! -f /mnt/server/archivedir/%f && cp %p /mnt/server/archivedir/%f' +#archive_timeout = 0 # force a logfile segment switch after this + # number of seconds; 0 disables + + +#------------------------------------------------------------------------------ +# REPLICATION +#------------------------------------------------------------------------------ + +# - Sending Server(s) - + +# Set these on the master and on any standby that will send replication data. + +#max_wal_senders = 0 # max number of walsender processes + # (change requires restart) +#wal_keep_segments = 0 # in logfile segments, 16MB each; 0 disables +#wal_sender_timeout = 60s # in milliseconds; 0 disables + +#max_replication_slots = 0 # max number of replication slots + # (change requires restart) +#track_commit_timestamp = off # collect timestamp of transaction commit + # (change requires restart) + +# - Master Server - + +# These settings are ignored on a standby server. + +#synchronous_standby_names = '' # standby servers that provide sync rep + # comma-separated list of application_name + # from standby(s); '*' = all +#vacuum_defer_cleanup_age = 0 # number of xacts by which cleanup is delayed + +# - Standby Servers - + +# These settings are ignored on a master server. + +#hot_standby = off # "on" allows queries during recovery + # (change requires restart) +#max_standby_archive_delay = 30s # max delay before canceling queries + # when reading WAL from archive; + # -1 allows indefinite delay +#max_standby_streaming_delay = 30s # max delay before canceling queries + # when reading streaming WAL; + # -1 allows indefinite delay +#wal_receiver_status_interval = 10s # send replies at least this often + # 0 disables +#hot_standby_feedback = off # send info from standby to prevent + # query conflicts +#wal_receiver_timeout = 60s # time that receiver waits for + # communication from master + # in milliseconds; 0 disables +#wal_retrieve_retry_interval = 5s # time to wait before retrying to + # retrieve WAL after a failed attempt + + +#------------------------------------------------------------------------------ +# QUERY TUNING +#------------------------------------------------------------------------------ + +# - Planner Method Configuration - + +enable_bitmapscan = off +enable_hashagg = off +enable_hashjoin = off +enable_indexscan = off +enable_indexonlyscan = off +enable_material = off +enable_mergejoin = off +#enable_nestloop = on +#enable_seqscan = on +#enable_sort = on +enable_tidscan = off + +# - Planner Cost Constants - + +#seq_page_cost = 1.0 # measured on an arbitrary scale +#random_page_cost = 4.0 # same scale as above +#cpu_tuple_cost = 0.01 # same scale as above +#cpu_index_tuple_cost = 0.005 # same scale as above +#cpu_operator_cost = 0.0025 # same scale as above +#parallel_tuple_cost = 0.1 # same scale as above +#parallel_setup_cost = 1000.0 # same scale as above +#effective_cache_size = 4GB + +# - Genetic Query Optimizer - + +#geqo = on +#geqo_threshold = 12 +#geqo_effort = 5 # range 1-10 +#geqo_pool_size = 0 # selects default based on effort +#geqo_generations = 0 # selects default based on effort +#geqo_selection_bias = 2.0 # range 1.5-2.0 +#geqo_seed = 0.0 # range 0.0-1.0 + +# - Other Planner Options - + +#default_statistics_target = 100 # range 1-10000 +#constraint_exclusion = partition # on, off, or partition +#cursor_tuple_fraction = 0.1 # range 0.0-1.0 +#from_collapse_limit = 8 +#join_collapse_limit = 8 # 1 disables collapsing of explicit + # JOIN clauses + + +#------------------------------------------------------------------------------ +# ERROR REPORTING AND LOGGING +#------------------------------------------------------------------------------ + +# - Where to Log - + +#log_destination = 'stderr' # Valid values are combinations of + # stderr, csvlog, syslog, and eventlog, + # depending on platform. csvlog + # requires logging_collector to be on. + +# This is used when logging to stderr: +#logging_collector = off # Enable capturing of stderr and csvlog + # into log files. Required to be on for + # csvlogs. + # (change requires restart) + +# These are only used if logging_collector is on: +#log_directory = 'pg_log' # directory where log files are written, + # can be absolute or relative to PGDATA +#log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern, + # can include strftime() escapes +#log_file_mode = 0600 # creation mode for log files, + # begin with 0 to use octal notation +#log_truncate_on_rotation = off # If on, an existing log file with the + # same name as the new log file will be + # truncated rather than appended to. + # But such truncation only occurs on + # time-driven rotation, not on restarts + # or size-driven rotation. Default is + # off, meaning append to existing files + # in all cases. +#log_rotation_age = 1d # Automatic rotation of logfiles will + # happen after that time. 0 disables. +#log_rotation_size = 10MB # Automatic rotation of logfiles will + # happen after that much log output. + # 0 disables. + +# These are relevant when logging to syslog: +#syslog_facility = 'LOCAL0' +#syslog_ident = 'postgres' + +# This is only relevant when logging to eventlog (win32): +#event_source = 'PostgreSQL' + +# - When to Log - + +#client_min_messages = notice # values in order of decreasing detail: + # debug5 + # debug4 + # debug3 + # debug2 + # debug1 + # log + # notice + # warning + # error + +#log_min_messages = warning # values in order of decreasing detail: + # debug5 + # debug4 + # debug3 + # debug2 + # debug1 + # info + # notice + # warning + # error + # log + # fatal + # panic + +#log_min_error_statement = error # values in order of decreasing detail: + # debug5 + # debug4 + # debug3 + # debug2 + # debug1 + # info + # notice + # warning + # error + # log + # fatal + # panic (effectively off) + +#log_min_duration_statement = -1 # -1 is disabled, 0 logs all statements + # and their durations, > 0 logs only + # statements running at least this number + # of milliseconds + + +# - What to Log - + +#debug_print_parse = off +#debug_print_rewritten = off +#debug_print_plan = off +#debug_pretty_print = on +#log_checkpoints = off +#log_connections = off +#log_disconnections = off +#log_duration = off +#log_error_verbosity = default # terse, default, or verbose messages +#log_hostname = off +#log_line_prefix = '' # special values: + # %a = application name + # %u = user name + # %d = database name + # %r = remote host and port + # %h = remote host + # %p = process ID + # %t = timestamp without milliseconds + # %m = timestamp with milliseconds + # %n = timestamp with milliseconds (as a Unix epoch) + # %i = command tag + # %e = SQL state + # %c = session ID + # %l = session line number + # %s = session start timestamp + # %v = virtual transaction ID + # %x = transaction ID (0 if none) + # %q = stop here in non-session + # processes + # %% = '%' + # e.g. '<%u%%%d> ' +#log_lock_waits = off # log lock waits >= deadlock_timeout +#log_statement = 'none' # none, ddl, mod, all +#log_replication_commands = off +#log_temp_files = -1 # log temporary files equal or larger + # than the specified size in kilobytes; + # -1 disables, 0 logs all temp files +log_timezone = 'US/Michigan' + + +# - Process Title - + +#cluster_name = '' # added to process titles if nonempty + # (change requires restart) +#update_process_title = on + + +#------------------------------------------------------------------------------ +# RUNTIME STATISTICS +#------------------------------------------------------------------------------ + +# - Query/Index Statistics Collector - + +#track_activities = on +#track_counts = on +#track_io_timing = off +#track_functions = none # none, pl, all +#track_activity_query_size = 1024 # (change requires restart) +#stats_temp_directory = 'pg_stat_tmp' + + +# - Statistics Monitoring - + +#log_parser_stats = off +#log_planner_stats = off +#log_executor_stats = off +#log_statement_stats = off + + +#------------------------------------------------------------------------------ +# AUTOVACUUM PARAMETERS +#------------------------------------------------------------------------------ + +#autovacuum = on # Enable autovacuum subprocess? 'on' + # requires track_counts to also be on. +#log_autovacuum_min_duration = -1 # -1 disables, 0 logs all actions and + # their durations, > 0 logs only + # actions running at least this number + # of milliseconds. +#autovacuum_max_workers = 3 # max number of autovacuum subprocesses + # (change requires restart) +#autovacuum_naptime = 1min # time between autovacuum runs +#autovacuum_vacuum_threshold = 50 # min number of row updates before + # vacuum +#autovacuum_analyze_threshold = 50 # min number of row updates before + # analyze +#autovacuum_vacuum_scale_factor = 0.2 # fraction of table size before vacuum +#autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze +#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum + # (change requires restart) +#autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age + # before forced vacuum + # (change requires restart) +#autovacuum_vacuum_cost_delay = 20ms # default vacuum cost delay for + # autovacuum, in milliseconds; + # -1 means use vacuum_cost_delay +#autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for + # autovacuum, -1 means use + # vacuum_cost_limit + + +#------------------------------------------------------------------------------ +# CLIENT CONNECTION DEFAULTS +#------------------------------------------------------------------------------ + +# - Statement Behavior - + +#search_path = '"$user", public' # schema names +#default_tablespace = '' # a tablespace name, '' uses the default +#temp_tablespaces = '' # a list of tablespace names, '' uses + # only default tablespace +#check_function_bodies = on +#default_transaction_isolation = 'read committed' +#default_transaction_read_only = off +#default_transaction_deferrable = off +#session_replication_role = 'origin' +#statement_timeout = 0 # in milliseconds, 0 is disabled +#lock_timeout = 0 # in milliseconds, 0 is disabled +#vacuum_freeze_min_age = 50000000 +#vacuum_freeze_table_age = 150000000 +#vacuum_multixact_freeze_min_age = 5000000 +#vacuum_multixact_freeze_table_age = 150000000 +#bytea_output = 'hex' # hex, escape +#xmlbinary = 'base64' +#xmloption = 'content' +#gin_fuzzy_search_limit = 0 +#gin_pending_list_limit = 4MB + +# - Locale and Formatting - + +datestyle = 'iso, mdy' +#intervalstyle = 'postgres' +timezone = 'US/Michigan' +#timezone_abbreviations = 'Default' # Select the set of available time zone + # abbreviations. Currently, there are + # Default + # Australia (historical usage) + # India + # You can create your own file in + # share/timezonesets/. +#extra_float_digits = 0 # min -15, max 3 +#client_encoding = sql_ascii # actually, defaults to database + # encoding + +# These settings are initialized by initdb, but they can be changed. +lc_messages = 'en_US.UTF-8' # locale for system error message + # strings +lc_monetary = 'en_US.UTF-8' # locale for monetary formatting +lc_numeric = 'en_US.UTF-8' # locale for number formatting +lc_time = 'en_US.UTF-8' # locale for time formatting + +# default configuration for text search +default_text_search_config = 'pg_catalog.english' + +# - Other Defaults - + +#dynamic_library_path = '$libdir' +#local_preload_libraries = '' +#session_preload_libraries = '' + + +#------------------------------------------------------------------------------ +# LOCK MANAGEMENT +#------------------------------------------------------------------------------ + +#deadlock_timeout = 1s +#max_locks_per_transaction = 64 # min 10 + # (change requires restart) +# Note: Each lock table slot uses ~270 bytes of shared memory, and there are +# max_locks_per_transaction * (max_connections + max_prepared_transactions) +# lock table slots. +#max_pred_locks_per_transaction = 64 # min 10 + # (change requires restart) + + +#------------------------------------------------------------------------------ +# VERSION/PLATFORM COMPATIBILITY +#------------------------------------------------------------------------------ + +# - Previous PostgreSQL Versions - + +#array_nulls = on +#backslash_quote = safe_encoding # on, off, or safe_encoding +#default_with_oids = off +#escape_string_warning = on +#lo_compat_privileges = off +#operator_precedence_warning = off +#quote_all_identifiers = off +#sql_inheritance = on +#standard_conforming_strings = on +#synchronize_seqscans = on + +# - Other Platforms and Clients - + +#transform_null_equals = off + + +#------------------------------------------------------------------------------ +# ERROR HANDLING +#------------------------------------------------------------------------------ + +#exit_on_error = off # terminate session on any error? +#restart_after_crash = on # reinitialize after backend crash? + + +#------------------------------------------------------------------------------ +# CONFIG FILE INCLUDES +#------------------------------------------------------------------------------ + +# These options allow settings to be loaded from files other than the +# default postgresql.conf. + +#include_dir = 'conf.d' # include files ending in '.conf' from + # directory 'conf.d' +#include_if_exists = 'exists.conf' # include file only if it exists +#include = 'special.conf' # include file + + +#------------------------------------------------------------------------------ +# CUSTOMIZED OPTIONS +#------------------------------------------------------------------------------ + +# Add settings for extensions here diff --git a/contrib/ctidscan/Makefile b/contrib/ctidscan/Makefile new file mode 100644 index 0000000000000..6811a92f827bb --- /dev/null +++ b/contrib/ctidscan/Makefile @@ -0,0 +1,11 @@ +# contrib/ctidscan/Makefile + +MODULES = ctidscan + +EXTENSION = ctidscan + +REGRESS = ctidscan + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) diff --git a/contrib/ctidscan/README.md b/contrib/ctidscan/README.md new file mode 100644 index 0000000000000..15a901c5eceb9 --- /dev/null +++ b/contrib/ctidscan/README.md @@ -0,0 +1,4 @@ +ctidscan +======== + +Example module of custom-scan interface diff --git a/contrib/ctidscan/ctidscan.c b/contrib/ctidscan/ctidscan.c new file mode 100644 index 0000000000000..cbe64ed55adeb --- /dev/null +++ b/contrib/ctidscan/ctidscan.c @@ -0,0 +1,814 @@ +/* + * ctidscan.c + * + * A custom-scan provide that utilizes ctid system column within + * inequality-operators, to skip block reads never referenced. + * + * It is designed to demonstrate Custom Scan APIs; that allows to override + * a part of executor node. This extension focus on a workload that tries + * to fetch records with tid larger or less than a particular value. + * In case when inequality operators were given, this module construct + * a custom scan path that enables to skip records not to be read. Then, + * if it was the cheapest one, it shall be used to run the query. + * Custom Scan APIs callbacks this extension when executor tries to fetch + * underlying records, then it utilizes existing heap_getnext() but seek + * the records to be read prior to fetching the first record. + * + * Portions Copyright (c) 2014, PostgreSQL Global Development Group + */ +#include "postgres.h" +#include "access/relscan.h" +#include "access/sysattr.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "commands/explain.h" +#include "executor/executor.h" +#include "executor/nodeCustom.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/placeholder.h" +#include "optimizer/restrictinfo.h" +#include "optimizer/subselect.h" +#include "parser/parsetree.h" +#include "storage/bufmgr.h" +#include "storage/itemptr.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/guc.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/ruleutils.h" +#include "utils/spccache.h" + +/* missing declaration in pg_proc.h */ +#ifndef TIDGreaterOperator +#define TIDGreaterOperator 2800 +#endif +#ifndef TIDLessEqualOperator +#define TIDLessEqualOperator 2801 +#endif +#ifndef TIDGreaterEqualOperator +#define TIDGreaterEqualOperator 2802 +#endif + +PG_MODULE_MAGIC; + +/* + * NOTE: We don't use any special data type to save the private data. + * All we want to save in private fields is expression-list that shall + * be adjusted by setrefs.c/subselect.c, so we put it on the custom_exprs + * of CustomScan structure, not custom_private field. + * Due to the interface contract, only expression nodes are allowed to put + * on the custom_exprs, and we have to pay attention the core backend may + * adjust expression items. + */ + +/* + * CtidScanState - state object of ctidscan on executor. + * It has few additional internal state. The 'ctid_quals' has list of + * ExprState for inequality operators that involve ctid system column. + */ +typedef struct { + CustomScanState css; + List *ctid_quals; /* list of ExprState for inequality ops */ +} CtidScanState; + +/* static variables */ +static bool enable_ctidscan; +static set_rel_pathlist_hook_type set_rel_pathlist_next = NULL; + +/* function declarations */ +void _PG_init(void); + +static void SetCtidScanPath(PlannerInfo *root, + RelOptInfo *rel, + Index rti, + RangeTblEntry *rte); +/* CustomPathMethods */ +static Plan *PlanCtidScanPath(PlannerInfo *root, + RelOptInfo *rel, + CustomPath *best_path, + List *tlist, + List *clauses); + +/* CustomScanMethods */ +static Node *CreateCtidScanState(CustomScan *custom_plan); + +/* CustomScanExecMethods */ +static void BeginCtidScan(CustomScanState *node, EState *estate, int eflags); +static void ReScanCtidScan(CustomScanState *node); +static TupleTableSlot *ExecCtidScan(CustomScanState *node); +static void EndCtidScan(CustomScanState *node); +static void ExplainCtidScan(CustomScanState *node, List *ancestors, + ExplainState *es); + +/* static table of custom-scan callbacks */ +static CustomPathMethods ctidscan_path_methods = { + "ctidscan", /* CustomName */ + PlanCtidScanPath, /* PlanCustomPath */ + NULL, /* TextOutCustomPath */ +}; + +static CustomScanMethods ctidscan_scan_methods = { + "ctidscan", /* CustomName */ + CreateCtidScanState, /* CreateCustomScanState */ + NULL, /* TextOutCustomScan */ +}; + +static CustomExecMethods ctidscan_exec_methods = { + "ctidscan", /* CustomName */ + BeginCtidScan, /* BeginCustomScan */ + ExecCtidScan, /* ExecCustomScan */ + EndCtidScan, /* EndCustomScan */ + ReScanCtidScan, /* ReScanCustomScan */ + NULL, /* MarkPosCustomScan */ + NULL, /* RestrPosCustomScan */ + ExplainCtidScan, /* ExplainCustomScan */ +}; + +#define IsCTIDVar(node,rtindex) \ + ((node) != NULL && \ + IsA((node), Var) && \ + ((Var *) (node))->varno == (rtindex) && \ + ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \ + ((Var *) (node))->varlevelsup == 0) + +/* + * CTidQualFromExpr + * + * It checks whether the given restriction clauses enables to determine + * the zone to be scanned, or not. If one or more restriction clauses are + * available, it returns a list of them, or NIL elsewhere. + * The caller can consider all the conditions are chained with AND- + * boolean operator, so all the operator works for narrowing down the + * scope of custom tid scan. + */ +static List * +CTidQualFromExpr(Node *expr, int varno) +{ + if (is_opclause(expr)) + { + OpExpr *op = (OpExpr *) expr; + Node *arg1; + Node *arg2; + Node *other = NULL; + + /* only inequality operators are candidate */ + if (op->opno != TIDLessOperator && + op->opno != TIDLessEqualOperator && + op->opno != TIDGreaterOperator && + op->opno != TIDGreaterEqualOperator) + return NULL; + + if (list_length(op->args) != 2) + return false; /* should not happen */ + + arg1 = linitial(op->args); + arg2 = lsecond(op->args); + + if (IsCTIDVar(arg1, varno)) + other = arg2; + else if (IsCTIDVar(arg2, varno)) + other = arg1; + else + return NULL; + if (exprType(other) != TIDOID) + return NULL; /* should not happen */ + /* The other argument must be a pseudoconstant */ + if (!is_pseudo_constant_clause(other)) + return NULL; + + return list_make1(copyObject(op)); + } + else if (and_clause(expr)) + { + List *rlst = NIL; + ListCell *lc; + + foreach(lc, ((BoolExpr *) expr)->args) + { + List *temp = CTidQualFromExpr((Node *) lfirst(lc), varno); + + rlst = list_concat(rlst, temp); + } + return rlst; + } + return NIL; +} + +/* + * CTidEstimateCosts + * + * It estimates cost to scan the target relation according to the given + * restriction clauses. Its logic to scan relations are almost same as + * SeqScan doing, because it uses regular heap_getnext(), except for + * the number of tuples to be scanned if restriction clauses work well. +*/ +static void +CTidEstimateCosts(PlannerInfo *root, + RelOptInfo *baserel, + CustomPath *cpath) +{ + Path *path = &cpath->path; + List *ctid_quals = cpath->custom_private; + ListCell *lc; + double ntuples; + ItemPointerData ip_min; + ItemPointerData ip_max; + bool has_min_val = false; + bool has_max_val = false; + BlockNumber num_pages; + Cost startup_cost = 0; + Cost run_cost = 0; + Cost cpu_per_tuple; + QualCost qpqual_cost; + QualCost ctid_qual_cost; + double spc_random_page_cost; + + /* Should only be applied to base relations */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + + /* Mark the path with the correct row estimate */ + if (path->param_info) + path->rows = path->param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* Estimate how many tuples we may retrieve */ + ItemPointerSet(&ip_min, 0, 0); + ItemPointerSet(&ip_max, MaxBlockNumber, MaxOffsetNumber); + foreach (lc, ctid_quals) + { + OpExpr *op = lfirst(lc); + Oid opno; + Node *other; + + Assert(is_opclause(op)); + if (IsCTIDVar(linitial(op->args), baserel->relid)) + { + opno = op->opno; + other = lsecond(op->args); + } + else if (IsCTIDVar(lsecond(op->args), baserel->relid)) + { + /* To simplifies, we assume as if Var node is 1st argument */ + opno = get_commutator(op->opno); + other = linitial(op->args); + } + else + elog(ERROR, "could not identify CTID variable"); + + if (IsA(other, Const)) + { + ItemPointer ip = (ItemPointer)(((Const *) other)->constvalue); + + /* + * Just an rough estimation, we don't distinct inequality and + * inequality-or-equal operator from scan-size estimation + * perspective. + */ + switch (opno) + { + case TIDLessOperator: + case TIDLessEqualOperator: + if (ItemPointerCompare(ip, &ip_max) < 0) + ItemPointerCopy(ip, &ip_max); + has_max_val = true; + break; + case TIDGreaterOperator: + case TIDGreaterEqualOperator: + if (ItemPointerCompare(ip, &ip_min) > 0) + ItemPointerCopy(ip, &ip_min); + has_min_val = true; + break; + default: + elog(ERROR, "unexpected operator code: %u", op->opno); + break; + } + } + } + + /* estimated number of tuples in this relation */ + ntuples = baserel->pages * baserel->tuples; + + if (has_min_val && has_max_val) + { + /* case of both side being bounded */ + BlockNumber bnum_max = BlockIdGetBlockNumber(&ip_max.ip_blkid); + BlockNumber bnum_min = BlockIdGetBlockNumber(&ip_min.ip_blkid); + + bnum_max = Min(bnum_max, baserel->pages); + bnum_min = Max(bnum_min, 0); + num_pages = Min(bnum_max - bnum_min + 1, 1); + } + else if (has_min_val) + { + /* case of only lower side being bounded */ + BlockNumber bnum_max = baserel->pages; + BlockNumber bnum_min = BlockIdGetBlockNumber(&ip_min.ip_blkid); + + bnum_min = Max(bnum_min, 0); + num_pages = Min(bnum_max - bnum_min + 1, 1); + } + else if (has_max_val) + { + /* case of only upper side being bounded */ + BlockNumber bnum_max = BlockIdGetBlockNumber(&ip_max.ip_blkid); + BlockNumber bnum_min = 0; + + bnum_max = Min(bnum_max, baserel->pages); + num_pages = Min(bnum_max - bnum_min + 1, 1); + } + else + { + /* + * Just a rough estimation. We assume half of records shall be + * read using this restriction clause, but indeterministic until + * executor run it actually. + */ + num_pages = Max((baserel->pages + 1) / 2, 1); + } + ntuples *= ((double) num_pages) / ((double) baserel->pages); + + /* + * The TID qual expressions will be computed once, any other baserestrict + * quals once per retrieved tuple. + */ + cost_qual_eval(&ctid_qual_cost, ctid_quals, root); + + /* fetch estimated page cost for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + NULL); + + /* disk costs --- assume each tuple on a different page */ + run_cost += spc_random_page_cost * ntuples; + + /* + * Add scanning CPU costs + * (logic copied from get_restriction_qual_cost) + */ + if (path->param_info) + { + /* Include costs of pushed-down clauses */ + cost_qual_eval(&qpqual_cost, path->param_info->ppi_clauses, root); + + qpqual_cost.startup += baserel->baserestrictcost.startup; + qpqual_cost.per_tuple += baserel->baserestrictcost.per_tuple; + } + else + qpqual_cost = baserel->baserestrictcost; + + /* + * We don't decrease cost for the inequality operators, because + * it is subset of qpquals and still in. + */ + startup_cost += qpqual_cost.startup + ctid_qual_cost.per_tuple; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple - + ctid_qual_cost.per_tuple; + run_cost = cpu_per_tuple * ntuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * SetCtidScanPath - entrypoint of the series of custom-scan execution. + * It adds CustomPath if referenced relation has inequality expressions on + * the ctid system column. + */ +static void +SetCtidScanPath(PlannerInfo *root, RelOptInfo *baserel, + Index rtindex, RangeTblEntry *rte) +{ + char relkind; + ListCell *lc; + List *ctid_quals = NIL; + + /* only plain relations are supported */ + if (rte->rtekind != RTE_RELATION) + return; + relkind = get_rel_relkind(rte->relid); + if (relkind != RELKIND_RELATION && + relkind != RELKIND_MATVIEW && + relkind != RELKIND_TOASTVALUE) + return; + + /* + * NOTE: Unlike built-in execution path, always we can have core path + * even though ctid scan is not available. So, simply, we don't add + * any paths, instead of adding disable_cost. + */ + if (!enable_ctidscan) + return; + + /* walk on the restrict info */ + foreach (lc, baserel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + List *temp; + + if (!IsA(rinfo, RestrictInfo)) + continue; /* probably should never happen */ + temp = CTidQualFromExpr((Node *) rinfo->clause, baserel->relid); + ctid_quals = list_concat(ctid_quals, temp); + } + + /* + * OK, it is case when a part of restriction clause makes sense to + * reduce number of tuples, so we will add a custom scan path being + * provided by this module. + */ + if (ctid_quals != NIL) + { + CustomPath *cpath; + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a ctidscan, + * but it could still have required parameterization due to LATERAL + * refs in its tlist. + */ + required_outer = baserel->lateral_relids; + + cpath = palloc0(sizeof(CustomPath)); + cpath->path.type = T_CustomPath; + cpath->path.pathtype = T_CustomScan; + cpath->path.parent = baserel; + cpath->path.param_info + = get_baserel_parampathinfo(root, baserel, required_outer); + cpath->flags = CUSTOMPATH_SUPPORT_BACKWARD_SCAN; + cpath->custom_private = ctid_quals; + cpath->methods = &ctidscan_path_methods; + + CTidEstimateCosts(root, baserel, cpath); + + add_path(baserel, &cpath->path); + } +} + +/* + * PlanCtidScanPlan - A method of CustomPath; that populate a custom + * object being delivered from CustomScan type, according to the supplied + * CustomPath object. + */ +static Plan * +PlanCtidScanPath(PlannerInfo *root, + RelOptInfo *rel, + CustomPath *best_path, + List *tlist, + List *clauses) +{ + List *ctid_quals = best_path->custom_private; + CustomScan *cscan = makeNode(CustomScan); + + cscan->flags = best_path->flags; + cscan->methods = &ctidscan_scan_methods; + + /* set scanrelid */ + cscan->scan.scanrelid = rel->relid; + /* set targetlist as is */ + cscan->scan.plan.targetlist = tlist; + /* reduce RestrictInfo list to bare expressions */ + cscan->scan.plan.qual = extract_actual_clauses(clauses, false); + /* set ctid related quals */ + cscan->custom_exprs = ctid_quals; + + return &cscan->scan.plan; +} + +/* + * CreateCtidScanState - A method of CustomScan; that populate a custom + * object being delivered from CustomScanState type, according to the + * supplied CustomPath object. + */ +static Node * +CreateCtidScanState(CustomScan *custom_plan) +{ + CtidScanState *ctss = palloc0(sizeof(CtidScanState)); + + NodeSetTag(ctss, T_CustomScanState); + ctss->css.flags = custom_plan->flags; + ctss->css.methods = &ctidscan_exec_methods; + + return (Node *)&ctss->css; +} + +/* + * BeginCtidScan - A method of CustomScanState; that initializes + * the supplied CtidScanState object, at beginning of the executor. + */ +static void +BeginCtidScan(CustomScanState *node, EState *estate, int eflags) +{ + CtidScanState *ctss = (CtidScanState *) node; + CustomScan *cscan = (CustomScan *) node->ss.ps.plan; + + /* + * In case of custom-scan provider that offers an alternative way + * to scan a particular relation, most of the needed initialization, + * like relation open or assignment of scan tuple-slot or projection + * info, shall be done by the core implementation. So, all we need + * to have is initialization of own local properties. + */ + ctss->ctid_quals = (List *) + ExecInitExpr((Expr *)cscan->custom_exprs, &node->ss.ps); +} + +/* + * ReScanCtidScan - A method of CustomScanState; that rewind the current + * seek position. + */ +static void +ReScanCtidScan(CustomScanState *node) +{ + CtidScanState *ctss = (CtidScanState *)node; + HeapScanDesc scan = ctss->css.ss.ss_currentScanDesc; + EState *estate = node->ss.ps.state; + ScanDirection direction = estate->es_direction; + Relation relation = ctss->css.ss.ss_currentRelation; + ExprContext *econtext = ctss->css.ss.ps.ps_ExprContext; + ScanKeyData keys[2]; + bool has_ubound = false; + bool has_lbound = false; + ItemPointerData ip_max; + ItemPointerData ip_min; + ListCell *lc; + + /* once close the existing scandesc, if any */ + if (scan) + { + heap_endscan(scan); + scan = ctss->css.ss.ss_currentScanDesc = NULL; + } + + /* walks on the inequality operators */ + foreach (lc, ctss->ctid_quals) + { + FuncExprState *fexstate = (FuncExprState *) lfirst(lc); + OpExpr *op = (OpExpr *)fexstate->xprstate.expr; + Node *arg1 = linitial(op->args); + Node *arg2 = lsecond(op->args); + Index scanrelid; + Oid opno; + ExprState *exstate; + ItemPointer itemptr; + bool isnull; + + scanrelid = ((Scan *)ctss->css.ss.ps.plan)->scanrelid; + if (IsCTIDVar(arg1, scanrelid)) + { + exstate = (ExprState *) lsecond(fexstate->args); + opno = op->opno; + } + else if (IsCTIDVar(arg2, scanrelid)) + { + exstate = (ExprState *) linitial(fexstate->args); + opno = get_commutator(op->opno); + } + else + elog(ERROR, "could not identify CTID variable"); + + itemptr = (ItemPointer) + DatumGetPointer(ExecEvalExprSwitchContext(exstate, + econtext, + &isnull, + NULL)); + if (isnull) + { + /* + * Whole of the restriction clauses chained with AND- boolean + * operators because false, if one of the clauses has NULL result. + * So, we can immediately break the evaluation to inform caller + * it does not make sense to scan any more. + * In this case, scandesc is kept to NULL. + */ + return; + } + + switch (opno) + { + case TIDLessOperator: + if (!has_ubound || + ItemPointerCompare(itemptr, &ip_max) <= 0) + { + ScanKeyInit(&keys[0], + SelfItemPointerAttributeNumber, + BTLessStrategyNumber, + F_TIDLT, + PointerGetDatum(itemptr)); + ItemPointerCopy(itemptr, &ip_max); + has_ubound = true; + } + break; + + case TIDLessEqualOperator: + if (!has_ubound || + ItemPointerCompare(itemptr, &ip_max) < 0) + { + ScanKeyInit(&keys[0], + SelfItemPointerAttributeNumber, + BTLessEqualStrategyNumber, + F_TIDLE, + PointerGetDatum(itemptr)); + ItemPointerCopy(itemptr, &ip_max); + has_ubound = true; + } + break; + + case TIDGreaterOperator: + if (!has_lbound || + ItemPointerCompare(itemptr, &ip_min) >= 0) + { + ScanKeyInit(&keys[1], + SelfItemPointerAttributeNumber, + BTGreaterStrategyNumber, + F_TIDGT, + PointerGetDatum(itemptr)); + ItemPointerCopy(itemptr, &ip_min); + has_lbound = true; + } + break; + + case TIDGreaterEqualOperator: + if (!has_lbound || + ItemPointerCompare(itemptr, &ip_min) > 0) + { + ScanKeyInit(&keys[1], + SelfItemPointerAttributeNumber, + BTGreaterEqualStrategyNumber, + F_TIDGE, + PointerGetDatum(itemptr)); + ItemPointerCopy(itemptr, &ip_min); + has_lbound = true; + } + break; + + default: + elog(ERROR, "unsupported operator"); + break; + } + } + + /* begin heapscan with the key above */ + if (has_ubound && has_lbound) + scan = heap_beginscan(relation, estate->es_snapshot, 2, &keys[0]); + else if (has_ubound) + scan = heap_beginscan(relation, estate->es_snapshot, 1, &keys[0]); + else if (has_lbound) + scan = heap_beginscan(relation, estate->es_snapshot, 1, &keys[1]); + else + scan = heap_beginscan(relation, estate->es_snapshot, 0, NULL); + + /* Seek the starting position, if possible */ + if (direction == ForwardScanDirection && has_lbound) + { + BlockNumber blknum = Min(BlockIdGetBlockNumber(&ip_min.ip_blkid), + scan->rs_nblocks - 1); + scan->rs_startblock = blknum; + } + else if (direction == BackwardScanDirection && has_ubound) + { + BlockNumber blknum = Min(BlockIdGetBlockNumber(&ip_max.ip_blkid), + scan->rs_nblocks - 1); + scan->rs_startblock = blknum; + } + ctss->css.ss.ss_currentScanDesc = scan; +} + +/* + * CTidAccessCustomScan + * + * Access method of ExecCtidScan below. It fetches a tuple from the underlying + * heap scan that was started from the point according to the tid clauses. + */ +static TupleTableSlot * +CTidAccessCustomScan(CustomScanState *node) +{ + CtidScanState *ctss = (CtidScanState *) node; + HeapScanDesc scan; + TupleTableSlot *slot; + EState *estate = node->ss.ps.state; + ScanDirection direction = estate->es_direction; + HeapTuple tuple; + + if (!ctss->css.ss.ss_currentScanDesc) + ReScanCtidScan(node); + scan = ctss->css.ss.ss_currentScanDesc; + Assert(scan != NULL); + + /* + * get the next tuple from the table + */ + tuple = heap_getnext(scan, direction); + if (!HeapTupleIsValid(tuple)) + return NULL; + + slot = ctss->css.ss.ss_ScanTupleSlot; + ExecStoreTuple(tuple, slot, scan->rs_cbuf, false); + + return slot; +} + +static bool +CTidRecheckCustomScan(CustomScanState *node, TupleTableSlot *slot) +{ + return true; +} + +/* + * ExecCtidScan - A method of CustomScanState; that fetches a tuple + * from the relation, if exist anymore. + */ +static TupleTableSlot * +ExecCtidScan(CustomScanState *node) +{ + return ExecScan(&node->ss, + (ExecScanAccessMtd) CTidAccessCustomScan, + (ExecScanRecheckMtd) CTidRecheckCustomScan); +} + +/* + * CTidEndCustomScan - A method of CustomScanState; that closes heap and + * scan descriptor, and release other related resources. + */ +static void +EndCtidScan(CustomScanState *node) +{ + CtidScanState *ctss = (CtidScanState *)node; + + if (ctss->css.ss.ss_currentScanDesc) + heap_endscan(ctss->css.ss.ss_currentScanDesc); +} + +/* + * ExplainCtidScan - A method of CustomScanState; that shows extra info + * on EXPLAIN command. + */ +static void +ExplainCtidScan(CustomScanState *node, List *ancestors, ExplainState *es) +{ + CtidScanState *ctss = (CtidScanState *) node; + CustomScan *cscan = (CustomScan *) ctss->css.ss.ps.plan; + + /* logic copied from show_qual and show_expression */ + if (cscan->custom_exprs) + { + bool useprefix = es->verbose; + Node *qual; + List *context; + char *exprstr; + + /* Convert AND list to explicit AND */ + qual = (Node *) make_ands_explicit(cscan->custom_exprs); + + /* Set up deparsing context */ + /* + context = deparse_context_for_planstate((Node *)&node->ss.ps, + ancestors, + es->rtable, + es->rtable_names); + */ + + context = set_deparse_context_planstate(es->deparse_cxt, + (Node *) node, + ancestors); + + /* Deparse the expression */ + exprstr = deparse_expression(qual, context, useprefix, false); + + /* And add to es->str */ + ExplainPropertyText("ctid quals", exprstr, es); + } +} + +/* + * Entrypoint of this extension + */ +void +_PG_init(void) +{ + DefineCustomBoolVariable("enable_ctidscan", + "Enables the planner's use of ctid-scan plans.", + NULL, + &enable_ctidscan, + true, + PGC_USERSET, + GUC_NOT_IN_SAMPLE, + NULL, NULL, NULL); + + /* registration of the hook to add alternative path */ + set_rel_pathlist_next = set_rel_pathlist_hook; + set_rel_pathlist_hook = SetCtidScanPath; +} diff --git a/contrib/ctidscan/ctidscan.control b/contrib/ctidscan/ctidscan.control new file mode 100644 index 0000000000000..ad63432d4c41f --- /dev/null +++ b/contrib/ctidscan/ctidscan.control @@ -0,0 +1,5 @@ +# ctidscan extension +comment = 'example implementation for custom-plan interface' +default_version = '1.0' +module_pathname = '$libdir/ctidscan' +relocatable = true diff --git a/contrib/ctidscan/ctidscan.sgml b/contrib/ctidscan/ctidscan.sgml new file mode 100644 index 0000000000000..ce49e77e1a322 --- /dev/null +++ b/contrib/ctidscan/ctidscan.sgml @@ -0,0 +1,50 @@ + + ctidscan + + + ctidscan + + + + This module implements a custom-scan provider that utilizes inequality + operator that involves the ctid system column. + + + + This module provides no SQL accessible interface. For installation, + all you need to do is just load the module to the server. + + You can load it an individual session using: + +LOAD 'ctidscan'; + + + or, you can also take more typical usage with extension preloading + using or + in + postgresql.conf. + + Then, planner may consider more cheap execution path if supplied query + involves above operators. + + + + + enable_ctidscan (bool) + + enable_ctidscan configuration parameter + + + + + enable_ctidscan turns on/off functionality of + ctidscan custom-scan provider. + If turned off, it does not offer alternative scan path even if + supplied query is sufficient to run by ctidscan plan. + Its default is true. + Anybody can change using SET command. + + + + + diff --git a/contrib/ctidscan/expected/ctidscan.out b/contrib/ctidscan/expected/ctidscan.out new file mode 100644 index 0000000000000..5b289240acce6 --- /dev/null +++ b/contrib/ctidscan/expected/ctidscan.out @@ -0,0 +1,332 @@ +-- +-- Regression Tests for Custom Plan APIs +-- +-- construction of test data +SET client_min_messages TO 'warning'; +CREATE SCHEMA regtest_custom_scan; +SET search_path TO regtest_custom_scan, public; +CREATE TABLE t1 ( + a int primary key, + b text +); +INSERT INTO t1 (SELECT s, md5(s::text) FROM generate_series(1,400) s); +VACUUM ANALYZE t1; +CREATE TABLE t2 ( + x int primary key, + y text +); +INSERT INTO t2 (SELECT s, md5(s::text)||md5(s::text) FROM generate_series(1,400) s); +VACUUM ANALYZE t2; +RESET client_min_messages; +-- +-- Check Plans if no special extension is loaded. +-- +EXPLAIN (costs off) SELECT * FROM t1 WHERE a = 40; + QUERY PLAN +-------------------------------- + Index Scan using t1_pkey on t1 + Index Cond: (a = 40) +(2 rows) + +EXPLAIN (costs off) SELECT * FROM t1 WHERE b like '%789%'; + QUERY PLAN +-------------------------------- + Seq Scan on t1 + Filter: (b ~~ '%789%'::text) +(2 rows) + +EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid = '(2,10)'::tid; + QUERY PLAN +------------------------------------ + Tid Scan on t1 + TID Cond: (ctid = '(2,10)'::tid) +(2 rows) + +EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid; + QUERY PLAN +------------------------------------------------------------------ + Seq Scan on t1 + Filter: ((ctid >= '(2,115)'::tid) AND (ctid <= '(3,10)'::tid)) +(2 rows) + +-- +-- Plan for same query but ctidscan was loaded +-- +LOAD '$libdir/ctidscan'; +EXPLAIN (costs off) SELECT * FROM t1 WHERE a = 40; + QUERY PLAN +-------------------------------- + Index Scan using t1_pkey on t1 + Index Cond: (a = 40) +(2 rows) + +EXPLAIN (costs off) SELECT * FROM t1 WHERE b like '%789%'; + QUERY PLAN +-------------------------------- + Seq Scan on t1 + Filter: (b ~~ '%789%'::text) +(2 rows) + +EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid = '(2,10)'::tid; + QUERY PLAN +------------------------------------ + Tid Scan on t1 + TID Cond: (ctid = '(2,10)'::tid) +(2 rows) + +EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid; + QUERY PLAN +---------------------------------------------------------------------- + Custom Scan (ctidscan) on t1 + Filter: ((ctid >= '(2,115)'::tid) AND (ctid <= '(3,10)'::tid)) + ctid quals: ((ctid >= '(2,115)'::tid) AND (ctid <= '(3,10)'::tid)) +(3 rows) + +EXPLAIN (costs off) SELECT * FROM t1 JOIN t2 ON t1.ctid = t2.ctid WHERE t1.ctid < '(2,10)'::tid AND t2.ctid > '(1,75)'::tid; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (t1.ctid = t2.ctid) + -> Sort + Sort Key: t1.ctid + -> Custom Scan (ctidscan) on t1 + Filter: (ctid < '(2,10)'::tid) + ctid quals: (ctid < '(2,10)'::tid) + -> Sort + Sort Key: t2.ctid + -> Custom Scan (ctidscan) on t2 + Filter: (ctid > '(1,75)'::tid) + ctid quals: (ctid > '(1,75)'::tid) +(12 rows) + +SELECT ctid,* FROM t1 WHERE ctid < '(1,20)'::tid; + ctid | a | b +---------+-----+---------------------------------- + (0,1) | 1 | c4ca4238a0b923820dcc509a6f75849b + (0,2) | 2 | c81e728d9d4c2f636f067f89cc14862c + (0,3) | 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 + (0,4) | 4 | a87ff679a2f3e71d9181a67b7542122c + (0,5) | 5 | e4da3b7fbbce2345d7772b0674a318d5 + (0,6) | 6 | 1679091c5a880faf6fb5e6087eb1b2dc + (0,7) | 7 | 8f14e45fceea167a5a36dedd4bea2543 + (0,8) | 8 | c9f0f895fb98ab9159f51fd0297e236d + (0,9) | 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 + (0,10) | 10 | d3d9446802a44259755d38e6d163e820 + (0,11) | 11 | 6512bd43d9caa6e02c990b0a82652dca + (0,12) | 12 | c20ad4d76fe97759aa27a0c99bff6710 + (0,13) | 13 | c51ce410c124a10e0db5e4b97fc2af39 + (0,14) | 14 | aab3238922bcc25a6f606eb525ffdc56 + (0,15) | 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 + (0,16) | 16 | c74d97b01eae257e44aa9d5bade97baf + (0,17) | 17 | 70efdf2ec9b086079795c442636b55fb + (0,18) | 18 | 6f4922f45568161a8cdf4ad2299f6d23 + (0,19) | 19 | 1f0e3dad99908345f7439f8ffabdffc4 + (0,20) | 20 | 98f13708210194c475687be6106a3b84 + (0,21) | 21 | 3c59dc048e8850243be8079a5c74d079 + (0,22) | 22 | b6d767d2f8ed5d21a44b0e5886680cb9 + (0,23) | 23 | 37693cfc748049e45d87b8c7d8b9aacd + (0,24) | 24 | 1ff1de774005f8da13f42943881c655f + (0,25) | 25 | 8e296a067a37563370ded05f5a3bf3ec + (0,26) | 26 | 4e732ced3463d06de0ca9a15b6153677 + (0,27) | 27 | 02e74f10e0327ad868d138f2b4fdd6f0 + (0,28) | 28 | 33e75ff09dd601bbe69f351039152189 + (0,29) | 29 | 6ea9ab1baa0efb9e19094440c317e21b + (0,30) | 30 | 34173cb38f07f89ddbebc2ac9128303f + (0,31) | 31 | c16a5320fa475530d9583c34fd356ef5 + (0,32) | 32 | 6364d3f0f495b6ab9dcf8d3b5c6e0b01 + (0,33) | 33 | 182be0c5cdcd5072bb1864cdee4d3d6e + (0,34) | 34 | e369853df766fa44e1ed0ff613f563bd + (0,35) | 35 | 1c383cd30b7c298ab50293adfecb7b18 + (0,36) | 36 | 19ca14e7ea6328a42e0eb13d585e4c22 + (0,37) | 37 | a5bfc9e07964f8dddeb95fc584cd965d + (0,38) | 38 | a5771bce93e200c36f7cd9dfd0e5deaa + (0,39) | 39 | d67d8ab4f4c10bf22aa353e27879133c + (0,40) | 40 | d645920e395fedad7bbbed0eca3fe2e0 + (0,41) | 41 | 3416a75f4cea9109507cacd8e2f2aefc + (0,42) | 42 | a1d0c6e83f027327d8461063f4ac58a6 + (0,43) | 43 | 17e62166fc8586dfa4d1bc0e1742c08b + (0,44) | 44 | f7177163c833dff4b38fc8d2872f1ec6 + (0,45) | 45 | 6c8349cc7260ae62e3b1396831a8398f + (0,46) | 46 | d9d4f495e875a2e075a1a4a6e1b9770f + (0,47) | 47 | 67c6a1e7ce56d3d6fa748ab6d9af3fd7 + (0,48) | 48 | 642e92efb79421734881b53e1e1b18b6 + (0,49) | 49 | f457c545a9ded88f18ecee47145a72c0 + (0,50) | 50 | c0c7c76d30bd3dcaefc96f40275bdc0a + (0,51) | 51 | 2838023a778dfaecdc212708f721b788 + (0,52) | 52 | 9a1158154dfa42caddbd0694a4e9bdc8 + (0,53) | 53 | d82c8d1619ad8176d665453cfb2e55f0 + (0,54) | 54 | a684eceee76fc522773286a895bc8436 + (0,55) | 55 | b53b3a3d6ab90ce0268229151c9bde11 + (0,56) | 56 | 9f61408e3afb633e50cdf1b20de6f466 + (0,57) | 57 | 72b32a1f754ba1c09b3695e0cb6cde7f + (0,58) | 58 | 66f041e16a60928b05a7e228a89c3799 + (0,59) | 59 | 093f65e080a295f8076b1c5722a46aa2 + (0,60) | 60 | 072b030ba126b2f4b2374f342be9ed44 + (0,61) | 61 | 7f39f8317fbdb1988ef4c628eba02591 + (0,62) | 62 | 44f683a84163b3523afe57c2e008bc8c + (0,63) | 63 | 03afdbd66e7929b125f8597834fa83a4 + (0,64) | 64 | ea5d2f1c4608232e07d3aa3d998e5135 + (0,65) | 65 | fc490ca45c00b1249bbe3554a4fdf6fb + (0,66) | 66 | 3295c76acbf4caaed33c36b1b5fc2cb1 + (0,67) | 67 | 735b90b4568125ed6c3f678819b6e058 + (0,68) | 68 | a3f390d88e4c41f2747bfa2f1b5f87db + (0,69) | 69 | 14bfa6bb14875e45bba028a21ed38046 + (0,70) | 70 | 7cbbc409ec990f19c78c75bd1e06f215 + (0,71) | 71 | e2c420d928d4bf8ce0ff2ec19b371514 + (0,72) | 72 | 32bb90e8976aab5298d5da10fe66f21d + (0,73) | 73 | d2ddea18f00665ce8623e36bd4e3c7c5 + (0,74) | 74 | ad61ab143223efbc24c7d2583be69251 + (0,75) | 75 | d09bf41544a3365a46c9077ebb5e35c3 + (0,76) | 76 | fbd7939d674997cdb4692d34de8633c4 + (0,77) | 77 | 28dd2c7955ce926456240b2ff0100bde + (0,78) | 78 | 35f4a8d465e6e1edc05f3d8ab658c551 + (0,79) | 79 | d1fe173d08e959397adf34b1d77e88d7 + (0,80) | 80 | f033ab37c30201f73f142449d037028d + (0,81) | 81 | 43ec517d68b6edd3015b3edc9a11367b + (0,82) | 82 | 9778d5d219c5080b9a6a17bef029331c + (0,83) | 83 | fe9fc289c3ff0af142b6d3bead98a923 + (0,84) | 84 | 68d30a9594728bc39aa24be94b319d21 + (0,85) | 85 | 3ef815416f775098fe977004015c6193 + (0,86) | 86 | 93db85ed909c13838ff95ccfa94cebd9 + (0,87) | 87 | c7e1249ffc03eb9ded908c236bd1996d + (0,88) | 88 | 2a38a4a9316c49e5a833517c45d31070 + (0,89) | 89 | 7647966b7343c29048673252e490f736 + (0,90) | 90 | 8613985ec49eb8f757ae6439e879bb2a + (0,91) | 91 | 54229abfcfa5649e7003b83dd4755294 + (0,92) | 92 | 92cc227532d17e56e07902b254dfad10 + (0,93) | 93 | 98dce83da57b0395e163467c9dae521b + (0,94) | 94 | f4b9ec30ad9f68f89b29639786cb62ef + (0,95) | 95 | 812b4ba287f5ee0bc9d43bbf5bbe87fb + (0,96) | 96 | 26657d5ff9020d2abefe558796b99584 + (0,97) | 97 | e2ef524fbf3d9fe611d5a8e90fefdc9c + (0,98) | 98 | ed3d2c21991e3bef5e069713af9fa6ca + (0,99) | 99 | ac627ab1ccbdb62ec96e702f07f6425b + (0,100) | 100 | f899139df5e1059396431415e770c6dd + (0,101) | 101 | 38b3eff8baf56627478ec76a704e9b52 + (0,102) | 102 | ec8956637a99787bd197eacd77acce5e + (0,103) | 103 | 6974ce5ac660610b44d9b9fed0ff9548 + (0,104) | 104 | c9e1074f5b3f9fc8ea15d152add07294 + (0,105) | 105 | 65b9eea6e1cc6bb9f0cd2a47751a186f + (0,106) | 106 | f0935e4cd5920aa6c7c996a5ee53a70f + (0,107) | 107 | a97da629b098b75c294dffdc3e463904 + (0,108) | 108 | a3c65c2974270fd093ee8a9bf8ae7d0b + (0,109) | 109 | 2723d092b63885e0d7c260cc007e8b9d + (0,110) | 110 | 5f93f983524def3dca464469d2cf9f3e + (0,111) | 111 | 698d51a19d8a121ce581499d7b701668 + (0,112) | 112 | 7f6ffaa6bb0b408017b62254211691b5 + (0,113) | 113 | 73278a4a86960eeb576a8fd4c9ec6997 + (0,114) | 114 | 5fd0b37cd7dbbb00f97ba6ce92bf5add + (0,115) | 115 | 2b44928ae11fb9384c4cf38708677c48 + (0,116) | 116 | c45147dee729311ef5b5c3003946c48f + (0,117) | 117 | eb160de1de89d9058fcb0b968dbbbd68 + (0,118) | 118 | 5ef059938ba799aaa845e1c2e8a762bd + (0,119) | 119 | 07e1cd7dca89a1678042477183b7ac3f + (0,120) | 120 | da4fb5c6e93e74d3df8527599fa62642 + (1,1) | 121 | 4c56ff4ce4aaf9573aa5dff913df997a + (1,2) | 122 | a0a080f42e6f13b3a2df133f073095dd + (1,3) | 123 | 202cb962ac59075b964b07152d234b70 + (1,4) | 124 | c8ffe9a587b126f152ed3d89a146b445 + (1,5) | 125 | 3def184ad8f4755ff269862ea77393dd + (1,6) | 126 | 069059b7ef840f0c74a814ec9237b6ec + (1,7) | 127 | ec5decca5ed3d6b8079e2e7e7bacc9f2 + (1,8) | 128 | 76dc611d6ebaafc66cc0879c71b5db5c + (1,9) | 129 | d1f491a404d6854880943e5c3cd9ca25 + (1,10) | 130 | 9b8619251a19057cff70779273e95aa6 + (1,11) | 131 | 1afa34a7f984eeabdbb0a7d494132ee5 + (1,12) | 132 | 65ded5353c5ee48d0b7d48c591b8f430 + (1,13) | 133 | 9fc3d7152ba9336a670e36d0ed79bc43 + (1,14) | 134 | 02522a2b2726fb0a03bb19f2d8d9524d + (1,15) | 135 | 7f1de29e6da19d22b51c68001e7e0e54 + (1,16) | 136 | 42a0e188f5033bc65bf8d78622277c4e + (1,17) | 137 | 3988c7f88ebcb58c6ce932b957b6f332 + (1,18) | 138 | 013d407166ec4fa56eb1e1f8cbe183b9 + (1,19) | 139 | e00da03b685a0dd18fb6a08af0923de0 +(139 rows) + +SELECT ctid,* FROM t1 WHERE ctid > '(4,0)'::tid; + ctid | a | b +------+---+--- +(0 rows) + +SELECT ctid,* FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid; + ctid | a | b +---------+-----+---------------------------------- + (2,115) | 355 | 82cec96096d4281b7c95cd7e74623496 + (2,116) | 356 | 6c524f9d5d7027454a783c841250ba71 + (2,117) | 357 | fb7b9ffa5462084c5f4e7e85a093e6d7 + (2,118) | 358 | aa942ab2bfa6ebda4840e7360ce6e7ef + (2,119) | 359 | c058f544c737782deacefa532d9add4c + (2,120) | 360 | e7b24b112a44fdd9ee93bdf998c6ca0e + (3,1) | 361 | 52720e003547c70561bf5e03b95aa99f + (3,2) | 362 | c3e878e27f52e2a57ace4d9a76fd9acf + (3,3) | 363 | 00411460f7c92d2124a67ea0f4cb5f85 + (3,4) | 364 | bac9162b47c56fc8a4d2a519803d51b3 + (3,5) | 365 | 9be40cee5b0eee1462c82c6964087ff9 + (3,6) | 366 | 5ef698cd9fe650923ea331c15af3b160 + (3,7) | 367 | 05049e90fa4f5039a8cadc6acbb4b2cc + (3,8) | 368 | cf004fdc76fa1a4f25f62e0eb5261ca3 + (3,9) | 369 | 0c74b7f78409a4022a2c4c5a5ca3ee19 + (3,10) | 370 | d709f38ef758b5066ef31b18039b8ce5 +(16 rows) + +SELECT t1.ctid,* FROM t1 JOIN t2 ON t1.ctid = t2.ctid WHERE t1.ctid < '(2,10)'::tid AND t2.ctid > '(1,75)'::tid; + ctid | a | b | x | y +--------+-----+----------------------------------+-----+------------------------------------------------------------------ + (1,76) | 196 | 084b6fbb10729ed4da8c3d3f5a3ae7c9 | 157 | 6c4b761a28b734fe93831e3fb400ce876c4b761a28b734fe93831e3fb400ce87 + (1,77) | 197 | 85d8ce590ad8981ca2c8286f79f59954 | 158 | 06409663226af2f3114485aa4e0a23b406409663226af2f3114485aa4e0a23b4 + (1,78) | 198 | 0e65972dce68dad4d52d063967f0a705 | 159 | 140f6969d5213fd0ece03148e62e461e140f6969d5213fd0ece03148e62e461e + (1,79) | 199 | 84d9ee44e457ddef7f2c4f25dc8fa865 | 160 | b73ce398c39f506af761d2277d853a92b73ce398c39f506af761d2277d853a92 + (1,80) | 200 | 3644a684f98ea8fe223c713b77189a77 | 161 | bd4c9ab730f5513206b999ec0d90d1fbbd4c9ab730f5513206b999ec0d90d1fb + (1,81) | 201 | 757b505cfd34c64c85ca5b5690ee5293 | 162 | 82aa4b0af34c2313a562076992e50aa382aa4b0af34c2313a562076992e50aa3 + (2,1) | 241 | f340f1b1f65b6df5b5e3f94d95b11daf | 163 | 0777d5c17d4066b82ab86dff8a46af6f0777d5c17d4066b82ab86dff8a46af6f + (2,2) | 242 | e4a6222cdb5b34375400904f03d8e6a5 | 164 | fa7cdfad1a5aaf8370ebeda47a1ff1c3fa7cdfad1a5aaf8370ebeda47a1ff1c3 + (2,3) | 243 | cb70ab375662576bd1ac5aaf16b3fca4 | 165 | 9766527f2b5d3e95d4a733fcfb77bd7e9766527f2b5d3e95d4a733fcfb77bd7e + (2,4) | 244 | 9188905e74c28e489b44e954ec0b9bca | 166 | 7e7757b1e12abcb736ab9a754ffb617a7e7757b1e12abcb736ab9a754ffb617a + (2,5) | 245 | 0266e33d3f546cb5436a10798e657d97 | 167 | 5878a7ab84fb43402106c575658472fa5878a7ab84fb43402106c575658472fa + (2,6) | 246 | 38db3aed920cf82ab059bfccbd02be6a | 168 | 006f52e9102a8d3be2fe5614f42ba989006f52e9102a8d3be2fe5614f42ba989 + (2,7) | 247 | 3cec07e9ba5f5bb252d13f5f431e4bbb | 169 | 3636638817772e42b59d74cff571fbb33636638817772e42b59d74cff571fbb3 + (2,8) | 248 | 621bf66ddb7c962aa0d22ac97d69b793 | 170 | 149e9677a5989fd342ae44213df68868149e9677a5989fd342ae44213df68868 + (2,9) | 249 | 077e29b11be80ab57e1a2ecabb7da330 | 171 | a4a042cf4fd6bfb47701cbc8a1653adaa4a042cf4fd6bfb47701cbc8a1653ada +(15 rows) + +PREPARE p1(tid, tid) AS SELECT ctid,* FROM t1 + WHERE b like '%abc%' AND ctid BETWEEN $1 AND $2; +EXPLAIN (costs off) EXECUTE p1('(5,0)'::tid, '(10,0)'::tid); + QUERY PLAN +----------------------------------------------------------------------------------------- + Custom Scan (ctidscan) on t1 + Filter: ((b ~~ '%abc%'::text) AND (ctid >= '(5,0)'::tid) AND (ctid <= '(10,0)'::tid)) + ctid quals: ((ctid >= '(5,0)'::tid) AND (ctid <= '(10,0)'::tid)) +(3 rows) + +EXPLAIN (costs off) EXECUTE p1('(10,0)'::tid, '(5,0)'::tid); + QUERY PLAN +----------------------------------------------------------------------------------------- + Custom Scan (ctidscan) on t1 + Filter: ((b ~~ '%abc%'::text) AND (ctid >= '(10,0)'::tid) AND (ctid <= '(5,0)'::tid)) + ctid quals: ((ctid >= '(10,0)'::tid) AND (ctid <= '(5,0)'::tid)) +(3 rows) + +-- Also, EXPLAIN with none-text format +EXPLAIN (costs off, format xml) EXECUTE p1('(0,0)'::tid, '(5,0)'::tid); + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + + + + + + + Custom Scan + + ctidscan + + t1 + + t1 + + ((b ~~ '%abc%'::text) AND (ctid >= '(0,0)'::tid) AND (ctid <= '(5,0)'::tid))+ + ((ctid >= '(0,0)'::tid) AND (ctid <= '(5,0)'::tid)) + + + + + + +(1 row) + +-- Test cleanup +DROP SCHEMA regtest_custom_scan CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table t1 +drop cascades to table t2 diff --git a/contrib/ctidscan/sql/ctidscan.sql b/contrib/ctidscan/sql/ctidscan.sql new file mode 100644 index 0000000000000..26c22c2bf40ea --- /dev/null +++ b/contrib/ctidscan/sql/ctidscan.sql @@ -0,0 +1,59 @@ +-- +-- Regression Tests for Custom Plan APIs +-- + +-- construction of test data +SET client_min_messages TO 'warning'; + +CREATE SCHEMA regtest_custom_scan; + +SET search_path TO regtest_custom_scan, public; + +CREATE TABLE t1 ( + a int primary key, + b text +); +INSERT INTO t1 (SELECT s, md5(s::text) FROM generate_series(1,400) s); +VACUUM ANALYZE t1; + +CREATE TABLE t2 ( + x int primary key, + y text +); +INSERT INTO t2 (SELECT s, md5(s::text)||md5(s::text) FROM generate_series(1,400) s); +VACUUM ANALYZE t2; + +RESET client_min_messages; +-- +-- Check Plans if no special extension is loaded. +-- +EXPLAIN (costs off) SELECT * FROM t1 WHERE a = 40; +EXPLAIN (costs off) SELECT * FROM t1 WHERE b like '%789%'; +EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid = '(2,10)'::tid; +EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid; + +-- +-- Plan for same query but ctidscan was loaded +-- +LOAD '$libdir/ctidscan'; +EXPLAIN (costs off) SELECT * FROM t1 WHERE a = 40; +EXPLAIN (costs off) SELECT * FROM t1 WHERE b like '%789%'; +EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid = '(2,10)'::tid; +EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid; +EXPLAIN (costs off) SELECT * FROM t1 JOIN t2 ON t1.ctid = t2.ctid WHERE t1.ctid < '(2,10)'::tid AND t2.ctid > '(1,75)'::tid; + +SELECT ctid,* FROM t1 WHERE ctid < '(1,20)'::tid; +SELECT ctid,* FROM t1 WHERE ctid > '(4,0)'::tid; +SELECT ctid,* FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid; +SELECT t1.ctid,* FROM t1 JOIN t2 ON t1.ctid = t2.ctid WHERE t1.ctid < '(2,10)'::tid AND t2.ctid > '(1,75)'::tid; + +PREPARE p1(tid, tid) AS SELECT ctid,* FROM t1 + WHERE b like '%abc%' AND ctid BETWEEN $1 AND $2; +EXPLAIN (costs off) EXECUTE p1('(5,0)'::tid, '(10,0)'::tid); +EXPLAIN (costs off) EXECUTE p1('(10,0)'::tid, '(5,0)'::tid); + +-- Also, EXPLAIN with none-text format +EXPLAIN (costs off, format xml) EXECUTE p1('(0,0)'::tid, '(5,0)'::tid); + +-- Test cleanup +DROP SCHEMA regtest_custom_scan CASCADE; diff --git a/script/graph.sql b/script/graph.sql new file mode 100644 index 0000000000000..86dd558c654ed --- /dev/null +++ b/script/graph.sql @@ -0,0 +1,23 @@ +CREATE TABLE GRAPH( + V1 INTEGER NOT NULL, + V2 INTEGER NOT NULL, + PRIMARY KEY(V1, V2) +); + +/* + + 1 4 + / \ / \ + / \ / \ + 2---3 --- 5 + +*/ + + +INSERT INTO GRAPH (V1, V2) VALUES (1, 2), (2, 1), (2, 3), (3, 2), (1, 3), (3, 1), (3, 4), (4, 3), (3, 5), (5, 3), (4, 5), (5, 4); + +/* + 2 * 6 = 12 +*/ + +select COUNT(*) from GRAPH as T1, GRAPH as T2, GRAPH as T3 where T1.V2 = T2.V1 and T2.V2 = T3.V1 and T3.V2 = T1.V1; \ No newline at end of file diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index e66bcdade716a..030849317b969 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -26,6 +26,76 @@ #include "utils/memutils.h" +bool rootNestLoopNode = true; +/** + * Depth-first search to find all the relations in the entire query. + * The relations array will be populated with those relations. + */ +void +findAllRelations(PlanState ** relations, PlanState * node, int * counter) +{ + if (node == NULL) + return; + + PlanState * outerPlan; + PlanState * innerPlan; + switch (nodeTag(node)) + { + /* + * scan nodes + */ + case T_SeqScanState: + elog(INFO, "T_SeqScan"); + relations[(*counter)++] = node; + break; + case T_IndexScanState: + elog(INFO, "T_IndexScan"); + relations[(*counter)++] = node; + break; + case T_NestLoopState: + elog(INFO, "T_NestLoopScan"); + outerPlan = outerPlanState(node); + innerPlan = innerPlanState(node); + + findAllRelations(relations, outerPlan, counter); + findAllRelations(relations, innerPlan, counter); + break; + default: + elog(INFO, "Only NestLoop, SeqScan, and IndexScan are supported! NodeValue: %d", nodeTag(node)); + } +} + +void +printAllRelations(PlanState ** relations, int size) +{ + for(int i = 0 ;i < size; i ++){ + PlanState* planNode = relations[i]; + TupleTableSlot *slot; + + int count = 0; + + switch(nodeTag(planNode)){ + case T_SeqScanState: + for(;;){ + slot = ExecProcNode(planNode); + if (TupIsNull(slot)) + break; + count ++; + Datum attr1, attr2; // starting from 1? + bool isnull; + attr1 = slot_getattr(slot, 1, &isnull); + attr2 = slot_getattr(slot, 1, &isnull); + elog(INFO, "V1= %d, V2= %d", (int)attr1, (int)attr2); + } + elog(DEBUG1, "table %s, size = %d", "unknown", count); + break; + default: + elog(DEBUG1, "Only SeqScan is supported"); + } + } +} + + /* ---------------------------------------------------------------- * ExecNestLoop(node) * @@ -56,335 +126,345 @@ * are prepared to return the first tuple. * ---------------------------------------------------------------- */ -TupleTableSlot * + TupleTableSlot * ExecNestLoop(NestLoopState *node) { - NestLoop *nl; - PlanState *innerPlan; - PlanState *outerPlan; - TupleTableSlot *outerTupleSlot; - TupleTableSlot *innerTupleSlot; - List *joinqual; - List *otherqual; - ExprContext *econtext; - ListCell *lc; - - /* - * get information from the node - */ - ENL1_printf("getting info from node"); - - nl = (NestLoop *) node->js.ps.plan; - joinqual = node->js.joinqual; - otherqual = node->js.ps.qual; - outerPlan = outerPlanState(node); - innerPlan = innerPlanState(node); - econtext = node->js.ps.ps_ExprContext; - - /* - * Check to see if we're still projecting out tuples from a previous join - * tuple (because there is a function-returning-set in the projection - * expressions). If so, try to project another one. - */ - if (node->js.ps.ps_TupFromTlist) - { - TupleTableSlot *result; - ExprDoneCond isDone; - - result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); - if (isDone == ExprMultipleResult) - return result; - /* Done with that source tuple... */ - node->js.ps.ps_TupFromTlist = false; - } - - /* - * Reset per-tuple memory context to free any expression evaluation - * storage allocated in the previous tuple cycle. Note this can't happen - * until we're done projecting out tuples from a join tuple. - */ - ResetExprContext(econtext); - - /* - * Ok, everything is setup for the join so now loop until we return a - * qualifying join tuple. - */ - ENL1_printf("entering main loop"); - - for (;;) - { - /* - * If we don't have an outer tuple, get the next one and reset the - * inner scan. - */ - if (node->nl_NeedNewOuter) - { - ENL1_printf("getting new outer tuple"); - outerTupleSlot = ExecProcNode(outerPlan); - - /* - * if there are no more outer tuples, then the join is complete.. - */ - if (TupIsNull(outerTupleSlot)) - { - ENL1_printf("no outer tuple, ending join"); - return NULL; - } - - ENL1_printf("saving new outer tuple information"); - econtext->ecxt_outertuple = outerTupleSlot; - node->nl_NeedNewOuter = false; - node->nl_MatchedOuter = false; - - /* - * fetch the values of any outer Vars that must be passed to the - * inner scan, and store them in the appropriate PARAM_EXEC slots. - */ - foreach(lc, nl->nestParams) - { - NestLoopParam *nlp = (NestLoopParam *) lfirst(lc); - int paramno = nlp->paramno; - ParamExecData *prm; - - prm = &(econtext->ecxt_param_exec_vals[paramno]); - /* Param value should be an OUTER_VAR var */ - Assert(IsA(nlp->paramval, Var)); - Assert(nlp->paramval->varno == OUTER_VAR); - Assert(nlp->paramval->varattno > 0); - prm->value = slot_getattr(outerTupleSlot, - nlp->paramval->varattno, - &(prm->isnull)); - /* Flag parameter value as changed */ - innerPlan->chgParam = bms_add_member(innerPlan->chgParam, - paramno); - } - - /* - * now rescan the inner plan - */ - ENL1_printf("rescanning inner plan"); - ExecReScan(innerPlan); - } - - /* - * we have an outerTuple, try to get the next inner tuple. - */ - ENL1_printf("getting new inner tuple"); - - innerTupleSlot = ExecProcNode(innerPlan); - econtext->ecxt_innertuple = innerTupleSlot; - - if (TupIsNull(innerTupleSlot)) - { - ENL1_printf("no inner tuple, need new outer tuple"); - - node->nl_NeedNewOuter = true; - - if (!node->nl_MatchedOuter && - (node->js.jointype == JOIN_LEFT || - node->js.jointype == JOIN_ANTI)) - { - /* - * We are doing an outer join and there were no join matches - * for this outer tuple. Generate a fake join tuple with - * nulls for the inner tuple, and return it if it passes the - * non-join quals. - */ - econtext->ecxt_innertuple = node->nl_NullInnerTupleSlot; - - ENL1_printf("testing qualification for outer-join tuple"); - - if (otherqual == NIL || ExecQual(otherqual, econtext, false)) - { - /* - * qualification was satisfied so we project and return - * the slot containing the result tuple using - * ExecProject(). - */ - TupleTableSlot *result; - ExprDoneCond isDone; - - ENL1_printf("qualification succeeded, projecting tuple"); - - result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); - - if (isDone != ExprEndResult) - { - node->js.ps.ps_TupFromTlist = - (isDone == ExprMultipleResult); - return result; - } - } - else - InstrCountFiltered2(node, 1); - } - - /* - * Otherwise just return to top of loop for a new outer tuple. - */ - continue; - } - - /* - * at this point we have a new pair of inner and outer tuples so we - * test the inner and outer tuples to see if they satisfy the node's - * qualification. - * - * Only the joinquals determine MatchedOuter status, but all quals - * must pass to actually return the tuple. - */ - ENL1_printf("testing qualification"); - - if (ExecQual(joinqual, econtext, false)) - { - node->nl_MatchedOuter = true; - - /* In an antijoin, we never return a matched tuple */ - if (node->js.jointype == JOIN_ANTI) - { - node->nl_NeedNewOuter = true; - continue; /* return to top of loop */ - } - - /* - * In a semijoin, we'll consider returning the first match, but - * after that we're done with this outer tuple. - */ - if (node->js.jointype == JOIN_SEMI) - node->nl_NeedNewOuter = true; - - if (otherqual == NIL || ExecQual(otherqual, econtext, false)) - { - /* - * qualification was satisfied so we project and return the - * slot containing the result tuple using ExecProject(). - */ - TupleTableSlot *result; - ExprDoneCond isDone; - - ENL1_printf("qualification succeeded, projecting tuple"); - - result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); - - if (isDone != ExprEndResult) - { - node->js.ps.ps_TupFromTlist = - (isDone == ExprMultipleResult); - return result; - } - } - else - InstrCountFiltered2(node, 1); - } - else - InstrCountFiltered1(node, 1); - - /* - * Tuple fails qual, so free per-tuple memory and try again. - */ - ResetExprContext(econtext); - - ENL1_printf("qualification failed, looping"); - } + NestLoop *nl; + PlanState *innerPlan; + PlanState *outerPlan; + TupleTableSlot *outerTupleSlot; + TupleTableSlot *innerTupleSlot; + List *joinqual; + List *otherqual; + ExprContext *econtext; + ListCell *lc; + + /* + * get information from the node + */ + ENL1_printf("getting info from node"); + + nl = (NestLoop *) node->js.ps.plan; + joinqual = node->js.joinqual; + otherqual = node->js.ps.qual; + outerPlan = outerPlanState(node); + innerPlan = innerPlanState(node); + if(rootNestLoopNode == true){ + rootNestLoopNode = false; + const int numRelations = 10; + PlanState * relations[numRelations]; + int counter = 0; + findAllRelations(relations, (PlanState *) node, &counter); + elog(INFO, "There are %d relations", counter); + printAllRelations(relations, counter); + } + + econtext = node->js.ps.ps_ExprContext; + + /* + * Check to see if we're still projecting out tuples from a previous join + * tuple (because there is a function-returning-set in the projection + * expressions). If so, try to project another one. + */ + if (node->js.ps.ps_TupFromTlist) + { + TupleTableSlot *result; + ExprDoneCond isDone; + + result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); + if (isDone == ExprMultipleResult) + return result; + /* Done with that source tuple... */ + node->js.ps.ps_TupFromTlist = false; + } + + /* + * Reset per-tuple memory context to free any expression evaluation + * storage allocated in the previous tuple cycle. Note this can't happen + * until we're done projecting out tuples from a join tuple. + */ + ResetExprContext(econtext); + + /* + * Ok, everything is setup for the join so now loop until we return a + * qualifying join tuple. + */ + ENL1_printf("entering main loop"); + + for (;;) + { + /* + * If we don't have an outer tuple, get the next one and reset the + * inner scan. + */ + if (node->nl_NeedNewOuter) + { + ENL1_printf("getting new outer tuple"); + outerTupleSlot = ExecProcNode(outerPlan); + + /* + * if there are no more outer tuples, then the join is complete.. + */ + if (TupIsNull(outerTupleSlot)) + { + ENL1_printf("no outer tuple, ending join"); + return NULL; + } + + ENL1_printf("saving new outer tuple information"); + econtext->ecxt_outertuple = outerTupleSlot; + node->nl_NeedNewOuter = false; + node->nl_MatchedOuter = false; + + /* + * fetch the values of any outer Vars that must be passed to the + * inner scan, and store them in the appropriate PARAM_EXEC slots. + */ + foreach(lc, nl->nestParams) + { + NestLoopParam *nlp = (NestLoopParam *) lfirst(lc); + int paramno = nlp->paramno; + ParamExecData *prm; + + prm = &(econtext->ecxt_param_exec_vals[paramno]); + /* Param value should be an OUTER_VAR var */ + Assert(IsA(nlp->paramval, Var)); + Assert(nlp->paramval->varno == OUTER_VAR); + Assert(nlp->paramval->varattno > 0); + prm->value = slot_getattr(outerTupleSlot, + nlp->paramval->varattno, + &(prm->isnull)); + /* Flag parameter value as changed */ + innerPlan->chgParam = bms_add_member(innerPlan->chgParam, + paramno); + } + + /* + * now rescan the inner plan + */ + ENL1_printf("rescanning inner plan"); + ExecReScan(innerPlan); + } + + /* + * we have an outerTuple, try to get the next inner tuple. + */ + ENL1_printf("getting new inner tuple"); + + innerTupleSlot = ExecProcNode(innerPlan); + econtext->ecxt_innertuple = innerTupleSlot; + + if (TupIsNull(innerTupleSlot)) + { + ENL1_printf("no inner tuple, need new outer tuple"); + + node->nl_NeedNewOuter = true; + + if (!node->nl_MatchedOuter && + (node->js.jointype == JOIN_LEFT || + node->js.jointype == JOIN_ANTI)) + { + /* + * We are doing an outer join and there were no join matches + * for this outer tuple. Generate a fake join tuple with + * nulls for the inner tuple, and return it if it passes the + * non-join quals. + */ + econtext->ecxt_innertuple = node->nl_NullInnerTupleSlot; + + ENL1_printf("testing qualification for outer-join tuple"); + + if (otherqual == NIL || ExecQual(otherqual, econtext, false)) + { + /* + * qualification was satisfied so we project and return + * the slot containing the result tuple using + * ExecProject(). + */ + TupleTableSlot *result; + ExprDoneCond isDone; + + ENL1_printf("qualification succeeded, projecting tuple"); + + result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); + + if (isDone != ExprEndResult) + { + node->js.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; + } + } + else + InstrCountFiltered2(node, 1); + } + + /* + * Otherwise just return to top of loop for a new outer tuple. + */ + continue; + } + + /* + * at this point we have a new pair of inner and outer tuples so we + * test the inner and outer tuples to see if they satisfy the node's + * qualification. + * + * Only the joinquals determine MatchedOuter status, but all quals + * must pass to actually return the tuple. + */ + ENL1_printf("testing qualification"); + + if (ExecQual(joinqual, econtext, false)) + { + node->nl_MatchedOuter = true; + + /* In an antijoin, we never return a matched tuple */ + if (node->js.jointype == JOIN_ANTI) + { + node->nl_NeedNewOuter = true; + continue; /* return to top of loop */ + } + + /* + * In a semijoin, we'll consider returning the first match, but + * after that we're done with this outer tuple. + */ + if (node->js.jointype == JOIN_SEMI) + node->nl_NeedNewOuter = true; + + if (otherqual == NIL || ExecQual(otherqual, econtext, false)) + { + /* + * qualification was satisfied so we project and return the + * slot containing the result tuple using ExecProject(). + */ + TupleTableSlot *result; + ExprDoneCond isDone; + + ENL1_printf("qualification succeeded, projecting tuple"); + + result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); + + if (isDone != ExprEndResult) + { + node->js.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; + } + } + else + InstrCountFiltered2(node, 1); + } + else + InstrCountFiltered1(node, 1); + + /* + * Tuple fails qual, so free per-tuple memory and try again. + */ + ResetExprContext(econtext); + + ENL1_printf("qualification failed, looping"); + } } /* ---------------------------------------------------------------- * ExecInitNestLoop * ---------------------------------------------------------------- */ -NestLoopState * + NestLoopState * ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) { - NestLoopState *nlstate; - - /* check for unsupported flags */ - Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); - - NL1_printf("ExecInitNestLoop: %s\n", - "initializing node"); - - /* - * create state structure - */ - nlstate = makeNode(NestLoopState); - nlstate->js.ps.plan = (Plan *) node; - nlstate->js.ps.state = estate; - - /* - * Miscellaneous initialization - * - * create expression context for node - */ - ExecAssignExprContext(estate, &nlstate->js.ps); - - /* - * initialize child expressions - */ - nlstate->js.ps.targetlist = (List *) - ExecInitExpr((Expr *) node->join.plan.targetlist, - (PlanState *) nlstate); - nlstate->js.ps.qual = (List *) - ExecInitExpr((Expr *) node->join.plan.qual, - (PlanState *) nlstate); - nlstate->js.jointype = node->join.jointype; - nlstate->js.joinqual = (List *) - ExecInitExpr((Expr *) node->join.joinqual, - (PlanState *) nlstate); - - /* - * initialize child nodes - * - * If we have no parameters to pass into the inner rel from the outer, - * tell the inner child that cheap rescans would be good. If we do have - * such parameters, then there is no point in REWIND support at all in the - * inner child, because it will always be rescanned with fresh parameter - * values. - */ - outerPlanState(nlstate) = ExecInitNode(outerPlan(node), estate, eflags); - if (node->nestParams == NIL) - eflags |= EXEC_FLAG_REWIND; - else - eflags &= ~EXEC_FLAG_REWIND; - innerPlanState(nlstate) = ExecInitNode(innerPlan(node), estate, eflags); - - /* - * tuple table initialization - */ - ExecInitResultTupleSlot(estate, &nlstate->js.ps); - - switch (node->join.jointype) - { - case JOIN_INNER: - case JOIN_SEMI: - break; - case JOIN_LEFT: - case JOIN_ANTI: - nlstate->nl_NullInnerTupleSlot = - ExecInitNullTupleSlot(estate, - ExecGetResultType(innerPlanState(nlstate))); - break; - default: - elog(ERROR, "unrecognized join type: %d", - (int) node->join.jointype); - } - - /* - * initialize tuple type and projection info - */ - ExecAssignResultTypeFromTL(&nlstate->js.ps); - ExecAssignProjectionInfo(&nlstate->js.ps, NULL); - - /* - * finally, wipe the current outer tuple clean. - */ - nlstate->js.ps.ps_TupFromTlist = false; - nlstate->nl_NeedNewOuter = true; - nlstate->nl_MatchedOuter = false; - - NL1_printf("ExecInitNestLoop: %s\n", - "node initialized"); - - return nlstate; + NestLoopState *nlstate; + + /* check for unsupported flags */ + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + + NL1_printf("ExecInitNestLoop: %s\n", + "initializing node"); + + /* + * create state structure + */ + nlstate = makeNode(NestLoopState); + nlstate->js.ps.plan = (Plan *) node; + nlstate->js.ps.state = estate; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &nlstate->js.ps); + + /* + * initialize child expressions + */ + nlstate->js.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->join.plan.targetlist, + (PlanState *) nlstate); + nlstate->js.ps.qual = (List *) + ExecInitExpr((Expr *) node->join.plan.qual, + (PlanState *) nlstate); + nlstate->js.jointype = node->join.jointype; + nlstate->js.joinqual = (List *) + ExecInitExpr((Expr *) node->join.joinqual, + (PlanState *) nlstate); + + /* + * initialize child nodes + * + * If we have no parameters to pass into the inner rel from the outer, + * tell the inner child that cheap rescans would be good. If we do have + * such parameters, then there is no point in REWIND support at all in the + * inner child, because it will always be rescanned with fresh parameter + * values. + */ + outerPlanState(nlstate) = ExecInitNode(outerPlan(node), estate, eflags); + if (node->nestParams == NIL) + eflags |= EXEC_FLAG_REWIND; + else + eflags &= ~EXEC_FLAG_REWIND; + innerPlanState(nlstate) = ExecInitNode(innerPlan(node), estate, eflags); + + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &nlstate->js.ps); + + switch (node->join.jointype) + { + case JOIN_INNER: + case JOIN_SEMI: + break; + case JOIN_LEFT: + case JOIN_ANTI: + nlstate->nl_NullInnerTupleSlot = + ExecInitNullTupleSlot(estate, + ExecGetResultType(innerPlanState(nlstate))); + break; + default: + elog(ERROR, "unrecognized join type: %d", + (int) node->join.jointype); + } + + /* + * initialize tuple type and projection info + */ + ExecAssignResultTypeFromTL(&nlstate->js.ps); + ExecAssignProjectionInfo(&nlstate->js.ps, NULL); + + /* + * finally, wipe the current outer tuple clean. + */ + nlstate->js.ps.ps_TupFromTlist = false; + nlstate->nl_NeedNewOuter = true; + nlstate->nl_MatchedOuter = false; + + NL1_printf("ExecInitNestLoop: %s\n", + "node initialized"); + + return nlstate; } /* ---------------------------------------------------------------- @@ -393,55 +473,55 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) * closes down scans and frees allocated storage * ---------------------------------------------------------------- */ -void + void ExecEndNestLoop(NestLoopState *node) { - NL1_printf("ExecEndNestLoop: %s\n", - "ending node processing"); - - /* - * Free the exprcontext - */ - ExecFreeExprContext(&node->js.ps); - - /* - * clean out the tuple table - */ - ExecClearTuple(node->js.ps.ps_ResultTupleSlot); - - /* - * close down subplans - */ - ExecEndNode(outerPlanState(node)); - ExecEndNode(innerPlanState(node)); - - NL1_printf("ExecEndNestLoop: %s\n", - "node processing ended"); + NL1_printf("ExecEndNestLoop: %s\n", + "ending node processing"); + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->js.ps); + + /* + * clean out the tuple table + */ + ExecClearTuple(node->js.ps.ps_ResultTupleSlot); + + /* + * close down subplans + */ + ExecEndNode(outerPlanState(node)); + ExecEndNode(innerPlanState(node)); + + NL1_printf("ExecEndNestLoop: %s\n", + "node processing ended"); } /* ---------------------------------------------------------------- * ExecReScanNestLoop * ---------------------------------------------------------------- */ -void + void ExecReScanNestLoop(NestLoopState *node) { - PlanState *outerPlan = outerPlanState(node); - - /* - * If outerPlan->chgParam is not null then plan will be automatically - * re-scanned by first ExecProcNode. - */ - if (outerPlan->chgParam == NULL) - ExecReScan(outerPlan); - - /* - * innerPlan is re-scanned for each new outer tuple and MUST NOT be - * re-scanned from here or you'll get troubles from inner index scans when - * outer Vars are used as run-time keys... - */ - - node->js.ps.ps_TupFromTlist = false; - node->nl_NeedNewOuter = true; - node->nl_MatchedOuter = false; + PlanState *outerPlan = outerPlanState(node); + + /* + * If outerPlan->chgParam is not null then plan will be automatically + * re-scanned by first ExecProcNode. + */ + if (outerPlan->chgParam == NULL) + ExecReScan(outerPlan); + + /* + * innerPlan is re-scanned for each new outer tuple and MUST NOT be + * re-scanned from here or you'll get troubles from inner index scans when + * outer Vars are used as run-time keys... + */ + + node->js.ps.ps_TupFromTlist = false; + node->nl_NeedNewOuter = true; + node->nl_MatchedOuter = false; }