diff --git a/.gitignore b/.gitignore
index cbf8d7996a01a..6bb65ccd45ebd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
# Global excludes across all subdirectories
+*.swp
+*.DS_store
*.o
*.obj
*.so
diff --git a/conf/postgresql.conf b/conf/postgresql.conf
new file mode 100644
index 0000000000000..762574f9d79e9
--- /dev/null
+++ b/conf/postgresql.conf
@@ -0,0 +1,634 @@
+# -----------------------------
+# PostgreSQL configuration file
+# -----------------------------
+#
+# This file consists of lines of the form:
+#
+# name = value
+#
+# (The "=" is optional.) Whitespace may be used. Comments are introduced with
+# "#" anywhere on a line. The complete list of parameter names and allowed
+# values can be found in the PostgreSQL documentation.
+#
+# The commented-out settings shown in this file represent the default values.
+# Re-commenting a setting is NOT sufficient to revert it to the default value;
+# you need to reload the server.
+#
+# This file is read on server startup and when the server receives a SIGHUP
+# signal. If you edit the file on a running system, you have to SIGHUP the
+# server for the changes to take effect, or use "pg_ctl reload". Some
+# parameters, which are marked below, require a server shutdown and restart to
+# take effect.
+#
+# Any parameter can also be given as a command-line option to the server, e.g.,
+# "postgres -c log_connections=on". Some parameters can be changed at run time
+# with the "SET" SQL command.
+#
+# Memory units: kB = kilobytes Time units: ms = milliseconds
+# MB = megabytes s = seconds
+# GB = gigabytes min = minutes
+# TB = terabytes h = hours
+# d = days
+
+
+#------------------------------------------------------------------------------
+# FILE LOCATIONS
+#------------------------------------------------------------------------------
+
+# The default values of these variables are driven from the -D command-line
+# option or PGDATA environment variable, represented here as ConfigDir.
+
+#data_directory = 'ConfigDir' # use data in another directory
+ # (change requires restart)
+#hba_file = 'ConfigDir/pg_hba.conf' # host-based authentication file
+ # (change requires restart)
+#ident_file = 'ConfigDir/pg_ident.conf' # ident configuration file
+ # (change requires restart)
+
+# If external_pid_file is not explicitly set, no extra PID file is written.
+#external_pid_file = '' # write an extra PID file
+ # (change requires restart)
+
+
+#------------------------------------------------------------------------------
+# CONNECTIONS AND AUTHENTICATION
+#------------------------------------------------------------------------------
+
+# - Connection Settings -
+
+#listen_addresses = 'localhost' # what IP address(es) to listen on;
+ # comma-separated list of addresses;
+ # defaults to 'localhost'; use '*' for all
+ # (change requires restart)
+#port = 5432 # (change requires restart)
+max_connections = 100 # (change requires restart)
+# Note: Increasing max_connections costs ~400 bytes of shared memory per
+# connection slot, plus lock space (see max_locks_per_transaction).
+#superuser_reserved_connections = 3 # (change requires restart)
+#unix_socket_directories = '/tmp' # comma-separated list of directories
+ # (change requires restart)
+#unix_socket_group = '' # (change requires restart)
+#unix_socket_permissions = 0777 # begin with 0 to use octal notation
+ # (change requires restart)
+#bonjour = off # advertise server via Bonjour
+ # (change requires restart)
+#bonjour_name = '' # defaults to the computer name
+ # (change requires restart)
+
+# - Security and Authentication -
+
+#authentication_timeout = 1min # 1s-600s
+#ssl = off # (change requires restart)
+#ssl_ciphers = 'HIGH:MEDIUM:+3DES:!aNULL' # allowed SSL ciphers
+ # (change requires restart)
+#ssl_prefer_server_ciphers = on # (change requires restart)
+#ssl_ecdh_curve = 'prime256v1' # (change requires restart)
+#ssl_cert_file = 'server.crt' # (change requires restart)
+#ssl_key_file = 'server.key' # (change requires restart)
+#ssl_ca_file = '' # (change requires restart)
+#ssl_crl_file = '' # (change requires restart)
+#password_encryption = on
+#db_user_namespace = off
+#row_security = on
+
+# GSSAPI using Kerberos
+#krb_server_keyfile = ''
+#krb_caseins_users = off
+
+# - TCP Keepalives -
+# see "man 7 tcp" for details
+
+#tcp_keepalives_idle = 0 # TCP_KEEPIDLE, in seconds;
+ # 0 selects the system default
+#tcp_keepalives_interval = 0 # TCP_KEEPINTVL, in seconds;
+ # 0 selects the system default
+#tcp_keepalives_count = 0 # TCP_KEEPCNT;
+ # 0 selects the system default
+
+
+#------------------------------------------------------------------------------
+# RESOURCE USAGE (except WAL)
+#------------------------------------------------------------------------------
+
+# - Memory -
+
+shared_buffers = 128MB # min 128kB
+ # (change requires restart)
+#huge_pages = try # on, off, or try
+ # (change requires restart)
+#temp_buffers = 8MB # min 800kB
+#max_prepared_transactions = 0 # zero disables the feature
+ # (change requires restart)
+# Note: Increasing max_prepared_transactions costs ~600 bytes of shared memory
+# per transaction slot, plus lock space (see max_locks_per_transaction).
+# It is not advisable to set max_prepared_transactions nonzero unless you
+# actively intend to use prepared transactions.
+#work_mem = 4MB # min 64kB
+#maintenance_work_mem = 64MB # min 1MB
+#autovacuum_work_mem = -1 # min 1MB, or -1 to use maintenance_work_mem
+#max_stack_depth = 2MB # min 100kB
+dynamic_shared_memory_type = posix # the default is the first option
+ # supported by the operating system:
+ # posix
+ # sysv
+ # windows
+ # mmap
+ # use none to disable dynamic shared memory
+
+# - Disk -
+
+#temp_file_limit = -1 # limits per-session temp file space
+ # in kB, or -1 for no limit
+
+# - Kernel Resource Usage -
+
+#max_files_per_process = 1000 # min 25
+ # (change requires restart)
+#shared_preload_libraries = '' # (change requires restart)
+
+# - Cost-Based Vacuum Delay -
+
+#vacuum_cost_delay = 0 # 0-100 milliseconds
+#vacuum_cost_page_hit = 1 # 0-10000 credits
+#vacuum_cost_page_miss = 10 # 0-10000 credits
+#vacuum_cost_page_dirty = 20 # 0-10000 credits
+#vacuum_cost_limit = 200 # 1-10000 credits
+
+# - Background Writer -
+
+#bgwriter_delay = 200ms # 10-10000ms between rounds
+#bgwriter_lru_maxpages = 100 # 0-1000 max buffers written/round
+#bgwriter_lru_multiplier = 2.0 # 0-10.0 multiplier on buffers scanned/round
+
+# - Asynchronous Behavior -
+
+#effective_io_concurrency = 0 # 1-1000; 0 disables prefetching
+#max_worker_processes = 8
+#max_parallel_degree = 0 # max number of worker processes per node
+
+
+#------------------------------------------------------------------------------
+# WRITE AHEAD LOG
+#------------------------------------------------------------------------------
+
+# - Settings -
+
+#wal_level = minimal # minimal, archive, hot_standby, or logical
+ # (change requires restart)
+#fsync = on # turns forced synchronization on or off
+#synchronous_commit = on # synchronization level;
+ # off, local, remote_write, or on
+#wal_sync_method = fsync # the default is the first option
+ # supported by the operating system:
+ # open_datasync
+ # fdatasync (default on Linux)
+ # fsync
+ # fsync_writethrough
+ # open_sync
+#full_page_writes = on # recover from partial page writes
+#wal_compression = off # enable compression of full-page writes
+#wal_log_hints = off # also do full page writes of non-critical updates
+ # (change requires restart)
+#wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers
+ # (change requires restart)
+#wal_writer_delay = 200ms # 1-10000 milliseconds
+
+#commit_delay = 0 # range 0-100000, in microseconds
+#commit_siblings = 5 # range 1-1000
+
+# - Checkpoints -
+
+#checkpoint_timeout = 5min # range 30s-1h
+#max_wal_size = 1GB
+#min_wal_size = 80MB
+#checkpoint_completion_target = 0.5 # checkpoint target duration, 0.0 - 1.0
+#checkpoint_warning = 30s # 0 disables
+
+# - Archiving -
+
+#archive_mode = off # enables archiving; off, on, or always
+ # (change requires restart)
+#archive_command = '' # command to use to archive a logfile segment
+ # placeholders: %p = path of file to archive
+ # %f = file name only
+ # e.g. 'test ! -f /mnt/server/archivedir/%f && cp %p /mnt/server/archivedir/%f'
+#archive_timeout = 0 # force a logfile segment switch after this
+ # number of seconds; 0 disables
+
+
+#------------------------------------------------------------------------------
+# REPLICATION
+#------------------------------------------------------------------------------
+
+# - Sending Server(s) -
+
+# Set these on the master and on any standby that will send replication data.
+
+#max_wal_senders = 0 # max number of walsender processes
+ # (change requires restart)
+#wal_keep_segments = 0 # in logfile segments, 16MB each; 0 disables
+#wal_sender_timeout = 60s # in milliseconds; 0 disables
+
+#max_replication_slots = 0 # max number of replication slots
+ # (change requires restart)
+#track_commit_timestamp = off # collect timestamp of transaction commit
+ # (change requires restart)
+
+# - Master Server -
+
+# These settings are ignored on a standby server.
+
+#synchronous_standby_names = '' # standby servers that provide sync rep
+ # comma-separated list of application_name
+ # from standby(s); '*' = all
+#vacuum_defer_cleanup_age = 0 # number of xacts by which cleanup is delayed
+
+# - Standby Servers -
+
+# These settings are ignored on a master server.
+
+#hot_standby = off # "on" allows queries during recovery
+ # (change requires restart)
+#max_standby_archive_delay = 30s # max delay before canceling queries
+ # when reading WAL from archive;
+ # -1 allows indefinite delay
+#max_standby_streaming_delay = 30s # max delay before canceling queries
+ # when reading streaming WAL;
+ # -1 allows indefinite delay
+#wal_receiver_status_interval = 10s # send replies at least this often
+ # 0 disables
+#hot_standby_feedback = off # send info from standby to prevent
+ # query conflicts
+#wal_receiver_timeout = 60s # time that receiver waits for
+ # communication from master
+ # in milliseconds; 0 disables
+#wal_retrieve_retry_interval = 5s # time to wait before retrying to
+ # retrieve WAL after a failed attempt
+
+
+#------------------------------------------------------------------------------
+# QUERY TUNING
+#------------------------------------------------------------------------------
+
+# - Planner Method Configuration -
+
+enable_bitmapscan = off
+enable_hashagg = off
+enable_hashjoin = off
+enable_indexscan = off
+enable_indexonlyscan = off
+enable_material = off
+enable_mergejoin = off
+#enable_nestloop = on
+#enable_seqscan = on
+#enable_sort = on
+enable_tidscan = off
+
+# - Planner Cost Constants -
+
+#seq_page_cost = 1.0 # measured on an arbitrary scale
+#random_page_cost = 4.0 # same scale as above
+#cpu_tuple_cost = 0.01 # same scale as above
+#cpu_index_tuple_cost = 0.005 # same scale as above
+#cpu_operator_cost = 0.0025 # same scale as above
+#parallel_tuple_cost = 0.1 # same scale as above
+#parallel_setup_cost = 1000.0 # same scale as above
+#effective_cache_size = 4GB
+
+# - Genetic Query Optimizer -
+
+#geqo = on
+#geqo_threshold = 12
+#geqo_effort = 5 # range 1-10
+#geqo_pool_size = 0 # selects default based on effort
+#geqo_generations = 0 # selects default based on effort
+#geqo_selection_bias = 2.0 # range 1.5-2.0
+#geqo_seed = 0.0 # range 0.0-1.0
+
+# - Other Planner Options -
+
+#default_statistics_target = 100 # range 1-10000
+#constraint_exclusion = partition # on, off, or partition
+#cursor_tuple_fraction = 0.1 # range 0.0-1.0
+#from_collapse_limit = 8
+#join_collapse_limit = 8 # 1 disables collapsing of explicit
+ # JOIN clauses
+
+
+#------------------------------------------------------------------------------
+# ERROR REPORTING AND LOGGING
+#------------------------------------------------------------------------------
+
+# - Where to Log -
+
+#log_destination = 'stderr' # Valid values are combinations of
+ # stderr, csvlog, syslog, and eventlog,
+ # depending on platform. csvlog
+ # requires logging_collector to be on.
+
+# This is used when logging to stderr:
+#logging_collector = off # Enable capturing of stderr and csvlog
+ # into log files. Required to be on for
+ # csvlogs.
+ # (change requires restart)
+
+# These are only used if logging_collector is on:
+#log_directory = 'pg_log' # directory where log files are written,
+ # can be absolute or relative to PGDATA
+#log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern,
+ # can include strftime() escapes
+#log_file_mode = 0600 # creation mode for log files,
+ # begin with 0 to use octal notation
+#log_truncate_on_rotation = off # If on, an existing log file with the
+ # same name as the new log file will be
+ # truncated rather than appended to.
+ # But such truncation only occurs on
+ # time-driven rotation, not on restarts
+ # or size-driven rotation. Default is
+ # off, meaning append to existing files
+ # in all cases.
+#log_rotation_age = 1d # Automatic rotation of logfiles will
+ # happen after that time. 0 disables.
+#log_rotation_size = 10MB # Automatic rotation of logfiles will
+ # happen after that much log output.
+ # 0 disables.
+
+# These are relevant when logging to syslog:
+#syslog_facility = 'LOCAL0'
+#syslog_ident = 'postgres'
+
+# This is only relevant when logging to eventlog (win32):
+#event_source = 'PostgreSQL'
+
+# - When to Log -
+
+#client_min_messages = notice # values in order of decreasing detail:
+ # debug5
+ # debug4
+ # debug3
+ # debug2
+ # debug1
+ # log
+ # notice
+ # warning
+ # error
+
+#log_min_messages = warning # values in order of decreasing detail:
+ # debug5
+ # debug4
+ # debug3
+ # debug2
+ # debug1
+ # info
+ # notice
+ # warning
+ # error
+ # log
+ # fatal
+ # panic
+
+#log_min_error_statement = error # values in order of decreasing detail:
+ # debug5
+ # debug4
+ # debug3
+ # debug2
+ # debug1
+ # info
+ # notice
+ # warning
+ # error
+ # log
+ # fatal
+ # panic (effectively off)
+
+#log_min_duration_statement = -1 # -1 is disabled, 0 logs all statements
+ # and their durations, > 0 logs only
+ # statements running at least this number
+ # of milliseconds
+
+
+# - What to Log -
+
+#debug_print_parse = off
+#debug_print_rewritten = off
+#debug_print_plan = off
+#debug_pretty_print = on
+#log_checkpoints = off
+#log_connections = off
+#log_disconnections = off
+#log_duration = off
+#log_error_verbosity = default # terse, default, or verbose messages
+#log_hostname = off
+#log_line_prefix = '' # special values:
+ # %a = application name
+ # %u = user name
+ # %d = database name
+ # %r = remote host and port
+ # %h = remote host
+ # %p = process ID
+ # %t = timestamp without milliseconds
+ # %m = timestamp with milliseconds
+ # %n = timestamp with milliseconds (as a Unix epoch)
+ # %i = command tag
+ # %e = SQL state
+ # %c = session ID
+ # %l = session line number
+ # %s = session start timestamp
+ # %v = virtual transaction ID
+ # %x = transaction ID (0 if none)
+ # %q = stop here in non-session
+ # processes
+ # %% = '%'
+ # e.g. '<%u%%%d> '
+#log_lock_waits = off # log lock waits >= deadlock_timeout
+#log_statement = 'none' # none, ddl, mod, all
+#log_replication_commands = off
+#log_temp_files = -1 # log temporary files equal or larger
+ # than the specified size in kilobytes;
+ # -1 disables, 0 logs all temp files
+log_timezone = 'US/Michigan'
+
+
+# - Process Title -
+
+#cluster_name = '' # added to process titles if nonempty
+ # (change requires restart)
+#update_process_title = on
+
+
+#------------------------------------------------------------------------------
+# RUNTIME STATISTICS
+#------------------------------------------------------------------------------
+
+# - Query/Index Statistics Collector -
+
+#track_activities = on
+#track_counts = on
+#track_io_timing = off
+#track_functions = none # none, pl, all
+#track_activity_query_size = 1024 # (change requires restart)
+#stats_temp_directory = 'pg_stat_tmp'
+
+
+# - Statistics Monitoring -
+
+#log_parser_stats = off
+#log_planner_stats = off
+#log_executor_stats = off
+#log_statement_stats = off
+
+
+#------------------------------------------------------------------------------
+# AUTOVACUUM PARAMETERS
+#------------------------------------------------------------------------------
+
+#autovacuum = on # Enable autovacuum subprocess? 'on'
+ # requires track_counts to also be on.
+#log_autovacuum_min_duration = -1 # -1 disables, 0 logs all actions and
+ # their durations, > 0 logs only
+ # actions running at least this number
+ # of milliseconds.
+#autovacuum_max_workers = 3 # max number of autovacuum subprocesses
+ # (change requires restart)
+#autovacuum_naptime = 1min # time between autovacuum runs
+#autovacuum_vacuum_threshold = 50 # min number of row updates before
+ # vacuum
+#autovacuum_analyze_threshold = 50 # min number of row updates before
+ # analyze
+#autovacuum_vacuum_scale_factor = 0.2 # fraction of table size before vacuum
+#autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze
+#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum
+ # (change requires restart)
+#autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age
+ # before forced vacuum
+ # (change requires restart)
+#autovacuum_vacuum_cost_delay = 20ms # default vacuum cost delay for
+ # autovacuum, in milliseconds;
+ # -1 means use vacuum_cost_delay
+#autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for
+ # autovacuum, -1 means use
+ # vacuum_cost_limit
+
+
+#------------------------------------------------------------------------------
+# CLIENT CONNECTION DEFAULTS
+#------------------------------------------------------------------------------
+
+# - Statement Behavior -
+
+#search_path = '"$user", public' # schema names
+#default_tablespace = '' # a tablespace name, '' uses the default
+#temp_tablespaces = '' # a list of tablespace names, '' uses
+ # only default tablespace
+#check_function_bodies = on
+#default_transaction_isolation = 'read committed'
+#default_transaction_read_only = off
+#default_transaction_deferrable = off
+#session_replication_role = 'origin'
+#statement_timeout = 0 # in milliseconds, 0 is disabled
+#lock_timeout = 0 # in milliseconds, 0 is disabled
+#vacuum_freeze_min_age = 50000000
+#vacuum_freeze_table_age = 150000000
+#vacuum_multixact_freeze_min_age = 5000000
+#vacuum_multixact_freeze_table_age = 150000000
+#bytea_output = 'hex' # hex, escape
+#xmlbinary = 'base64'
+#xmloption = 'content'
+#gin_fuzzy_search_limit = 0
+#gin_pending_list_limit = 4MB
+
+# - Locale and Formatting -
+
+datestyle = 'iso, mdy'
+#intervalstyle = 'postgres'
+timezone = 'US/Michigan'
+#timezone_abbreviations = 'Default' # Select the set of available time zone
+ # abbreviations. Currently, there are
+ # Default
+ # Australia (historical usage)
+ # India
+ # You can create your own file in
+ # share/timezonesets/.
+#extra_float_digits = 0 # min -15, max 3
+#client_encoding = sql_ascii # actually, defaults to database
+ # encoding
+
+# These settings are initialized by initdb, but they can be changed.
+lc_messages = 'en_US.UTF-8' # locale for system error message
+ # strings
+lc_monetary = 'en_US.UTF-8' # locale for monetary formatting
+lc_numeric = 'en_US.UTF-8' # locale for number formatting
+lc_time = 'en_US.UTF-8' # locale for time formatting
+
+# default configuration for text search
+default_text_search_config = 'pg_catalog.english'
+
+# - Other Defaults -
+
+#dynamic_library_path = '$libdir'
+#local_preload_libraries = ''
+#session_preload_libraries = ''
+
+
+#------------------------------------------------------------------------------
+# LOCK MANAGEMENT
+#------------------------------------------------------------------------------
+
+#deadlock_timeout = 1s
+#max_locks_per_transaction = 64 # min 10
+ # (change requires restart)
+# Note: Each lock table slot uses ~270 bytes of shared memory, and there are
+# max_locks_per_transaction * (max_connections + max_prepared_transactions)
+# lock table slots.
+#max_pred_locks_per_transaction = 64 # min 10
+ # (change requires restart)
+
+
+#------------------------------------------------------------------------------
+# VERSION/PLATFORM COMPATIBILITY
+#------------------------------------------------------------------------------
+
+# - Previous PostgreSQL Versions -
+
+#array_nulls = on
+#backslash_quote = safe_encoding # on, off, or safe_encoding
+#default_with_oids = off
+#escape_string_warning = on
+#lo_compat_privileges = off
+#operator_precedence_warning = off
+#quote_all_identifiers = off
+#sql_inheritance = on
+#standard_conforming_strings = on
+#synchronize_seqscans = on
+
+# - Other Platforms and Clients -
+
+#transform_null_equals = off
+
+
+#------------------------------------------------------------------------------
+# ERROR HANDLING
+#------------------------------------------------------------------------------
+
+#exit_on_error = off # terminate session on any error?
+#restart_after_crash = on # reinitialize after backend crash?
+
+
+#------------------------------------------------------------------------------
+# CONFIG FILE INCLUDES
+#------------------------------------------------------------------------------
+
+# These options allow settings to be loaded from files other than the
+# default postgresql.conf.
+
+#include_dir = 'conf.d' # include files ending in '.conf' from
+ # directory 'conf.d'
+#include_if_exists = 'exists.conf' # include file only if it exists
+#include = 'special.conf' # include file
+
+
+#------------------------------------------------------------------------------
+# CUSTOMIZED OPTIONS
+#------------------------------------------------------------------------------
+
+# Add settings for extensions here
diff --git a/contrib/ctidscan/Makefile b/contrib/ctidscan/Makefile
new file mode 100644
index 0000000000000..6811a92f827bb
--- /dev/null
+++ b/contrib/ctidscan/Makefile
@@ -0,0 +1,11 @@
+# contrib/ctidscan/Makefile
+
+MODULES = ctidscan
+
+EXTENSION = ctidscan
+
+REGRESS = ctidscan
+
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
diff --git a/contrib/ctidscan/README.md b/contrib/ctidscan/README.md
new file mode 100644
index 0000000000000..15a901c5eceb9
--- /dev/null
+++ b/contrib/ctidscan/README.md
@@ -0,0 +1,4 @@
+ctidscan
+========
+
+Example module of custom-scan interface
diff --git a/contrib/ctidscan/ctidscan.c b/contrib/ctidscan/ctidscan.c
new file mode 100644
index 0000000000000..cbe64ed55adeb
--- /dev/null
+++ b/contrib/ctidscan/ctidscan.c
@@ -0,0 +1,814 @@
+/*
+ * ctidscan.c
+ *
+ * A custom-scan provide that utilizes ctid system column within
+ * inequality-operators, to skip block reads never referenced.
+ *
+ * It is designed to demonstrate Custom Scan APIs; that allows to override
+ * a part of executor node. This extension focus on a workload that tries
+ * to fetch records with tid larger or less than a particular value.
+ * In case when inequality operators were given, this module construct
+ * a custom scan path that enables to skip records not to be read. Then,
+ * if it was the cheapest one, it shall be used to run the query.
+ * Custom Scan APIs callbacks this extension when executor tries to fetch
+ * underlying records, then it utilizes existing heap_getnext() but seek
+ * the records to be read prior to fetching the first record.
+ *
+ * Portions Copyright (c) 2014, PostgreSQL Global Development Group
+ */
+#include "postgres.h"
+#include "access/relscan.h"
+#include "access/sysattr.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "commands/explain.h"
+#include "executor/executor.h"
+#include "executor/nodeCustom.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/paths.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/plancat.h"
+#include "optimizer/planmain.h"
+#include "optimizer/placeholder.h"
+#include "optimizer/restrictinfo.h"
+#include "optimizer/subselect.h"
+#include "parser/parsetree.h"
+#include "storage/bufmgr.h"
+#include "storage/itemptr.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/spccache.h"
+
+/* missing declaration in pg_proc.h */
+#ifndef TIDGreaterOperator
+#define TIDGreaterOperator 2800
+#endif
+#ifndef TIDLessEqualOperator
+#define TIDLessEqualOperator 2801
+#endif
+#ifndef TIDGreaterEqualOperator
+#define TIDGreaterEqualOperator 2802
+#endif
+
+PG_MODULE_MAGIC;
+
+/*
+ * NOTE: We don't use any special data type to save the private data.
+ * All we want to save in private fields is expression-list that shall
+ * be adjusted by setrefs.c/subselect.c, so we put it on the custom_exprs
+ * of CustomScan structure, not custom_private field.
+ * Due to the interface contract, only expression nodes are allowed to put
+ * on the custom_exprs, and we have to pay attention the core backend may
+ * adjust expression items.
+ */
+
+/*
+ * CtidScanState - state object of ctidscan on executor.
+ * It has few additional internal state. The 'ctid_quals' has list of
+ * ExprState for inequality operators that involve ctid system column.
+ */
+typedef struct {
+ CustomScanState css;
+ List *ctid_quals; /* list of ExprState for inequality ops */
+} CtidScanState;
+
+/* static variables */
+static bool enable_ctidscan;
+static set_rel_pathlist_hook_type set_rel_pathlist_next = NULL;
+
+/* function declarations */
+void _PG_init(void);
+
+static void SetCtidScanPath(PlannerInfo *root,
+ RelOptInfo *rel,
+ Index rti,
+ RangeTblEntry *rte);
+/* CustomPathMethods */
+static Plan *PlanCtidScanPath(PlannerInfo *root,
+ RelOptInfo *rel,
+ CustomPath *best_path,
+ List *tlist,
+ List *clauses);
+
+/* CustomScanMethods */
+static Node *CreateCtidScanState(CustomScan *custom_plan);
+
+/* CustomScanExecMethods */
+static void BeginCtidScan(CustomScanState *node, EState *estate, int eflags);
+static void ReScanCtidScan(CustomScanState *node);
+static TupleTableSlot *ExecCtidScan(CustomScanState *node);
+static void EndCtidScan(CustomScanState *node);
+static void ExplainCtidScan(CustomScanState *node, List *ancestors,
+ ExplainState *es);
+
+/* static table of custom-scan callbacks */
+static CustomPathMethods ctidscan_path_methods = {
+ "ctidscan", /* CustomName */
+ PlanCtidScanPath, /* PlanCustomPath */
+ NULL, /* TextOutCustomPath */
+};
+
+static CustomScanMethods ctidscan_scan_methods = {
+ "ctidscan", /* CustomName */
+ CreateCtidScanState, /* CreateCustomScanState */
+ NULL, /* TextOutCustomScan */
+};
+
+static CustomExecMethods ctidscan_exec_methods = {
+ "ctidscan", /* CustomName */
+ BeginCtidScan, /* BeginCustomScan */
+ ExecCtidScan, /* ExecCustomScan */
+ EndCtidScan, /* EndCustomScan */
+ ReScanCtidScan, /* ReScanCustomScan */
+ NULL, /* MarkPosCustomScan */
+ NULL, /* RestrPosCustomScan */
+ ExplainCtidScan, /* ExplainCustomScan */
+};
+
+#define IsCTIDVar(node,rtindex) \
+ ((node) != NULL && \
+ IsA((node), Var) && \
+ ((Var *) (node))->varno == (rtindex) && \
+ ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
+ ((Var *) (node))->varlevelsup == 0)
+
+/*
+ * CTidQualFromExpr
+ *
+ * It checks whether the given restriction clauses enables to determine
+ * the zone to be scanned, or not. If one or more restriction clauses are
+ * available, it returns a list of them, or NIL elsewhere.
+ * The caller can consider all the conditions are chained with AND-
+ * boolean operator, so all the operator works for narrowing down the
+ * scope of custom tid scan.
+ */
+static List *
+CTidQualFromExpr(Node *expr, int varno)
+{
+ if (is_opclause(expr))
+ {
+ OpExpr *op = (OpExpr *) expr;
+ Node *arg1;
+ Node *arg2;
+ Node *other = NULL;
+
+ /* only inequality operators are candidate */
+ if (op->opno != TIDLessOperator &&
+ op->opno != TIDLessEqualOperator &&
+ op->opno != TIDGreaterOperator &&
+ op->opno != TIDGreaterEqualOperator)
+ return NULL;
+
+ if (list_length(op->args) != 2)
+ return false; /* should not happen */
+
+ arg1 = linitial(op->args);
+ arg2 = lsecond(op->args);
+
+ if (IsCTIDVar(arg1, varno))
+ other = arg2;
+ else if (IsCTIDVar(arg2, varno))
+ other = arg1;
+ else
+ return NULL;
+ if (exprType(other) != TIDOID)
+ return NULL; /* should not happen */
+ /* The other argument must be a pseudoconstant */
+ if (!is_pseudo_constant_clause(other))
+ return NULL;
+
+ return list_make1(copyObject(op));
+ }
+ else if (and_clause(expr))
+ {
+ List *rlst = NIL;
+ ListCell *lc;
+
+ foreach(lc, ((BoolExpr *) expr)->args)
+ {
+ List *temp = CTidQualFromExpr((Node *) lfirst(lc), varno);
+
+ rlst = list_concat(rlst, temp);
+ }
+ return rlst;
+ }
+ return NIL;
+}
+
+/*
+ * CTidEstimateCosts
+ *
+ * It estimates cost to scan the target relation according to the given
+ * restriction clauses. Its logic to scan relations are almost same as
+ * SeqScan doing, because it uses regular heap_getnext(), except for
+ * the number of tuples to be scanned if restriction clauses work well.
+*/
+static void
+CTidEstimateCosts(PlannerInfo *root,
+ RelOptInfo *baserel,
+ CustomPath *cpath)
+{
+ Path *path = &cpath->path;
+ List *ctid_quals = cpath->custom_private;
+ ListCell *lc;
+ double ntuples;
+ ItemPointerData ip_min;
+ ItemPointerData ip_max;
+ bool has_min_val = false;
+ bool has_max_val = false;
+ BlockNumber num_pages;
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ Cost cpu_per_tuple;
+ QualCost qpqual_cost;
+ QualCost ctid_qual_cost;
+ double spc_random_page_cost;
+
+ /* Should only be applied to base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (path->param_info)
+ path->rows = path->param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Estimate how many tuples we may retrieve */
+ ItemPointerSet(&ip_min, 0, 0);
+ ItemPointerSet(&ip_max, MaxBlockNumber, MaxOffsetNumber);
+ foreach (lc, ctid_quals)
+ {
+ OpExpr *op = lfirst(lc);
+ Oid opno;
+ Node *other;
+
+ Assert(is_opclause(op));
+ if (IsCTIDVar(linitial(op->args), baserel->relid))
+ {
+ opno = op->opno;
+ other = lsecond(op->args);
+ }
+ else if (IsCTIDVar(lsecond(op->args), baserel->relid))
+ {
+ /* To simplifies, we assume as if Var node is 1st argument */
+ opno = get_commutator(op->opno);
+ other = linitial(op->args);
+ }
+ else
+ elog(ERROR, "could not identify CTID variable");
+
+ if (IsA(other, Const))
+ {
+ ItemPointer ip = (ItemPointer)(((Const *) other)->constvalue);
+
+ /*
+ * Just an rough estimation, we don't distinct inequality and
+ * inequality-or-equal operator from scan-size estimation
+ * perspective.
+ */
+ switch (opno)
+ {
+ case TIDLessOperator:
+ case TIDLessEqualOperator:
+ if (ItemPointerCompare(ip, &ip_max) < 0)
+ ItemPointerCopy(ip, &ip_max);
+ has_max_val = true;
+ break;
+ case TIDGreaterOperator:
+ case TIDGreaterEqualOperator:
+ if (ItemPointerCompare(ip, &ip_min) > 0)
+ ItemPointerCopy(ip, &ip_min);
+ has_min_val = true;
+ break;
+ default:
+ elog(ERROR, "unexpected operator code: %u", op->opno);
+ break;
+ }
+ }
+ }
+
+ /* estimated number of tuples in this relation */
+ ntuples = baserel->pages * baserel->tuples;
+
+ if (has_min_val && has_max_val)
+ {
+ /* case of both side being bounded */
+ BlockNumber bnum_max = BlockIdGetBlockNumber(&ip_max.ip_blkid);
+ BlockNumber bnum_min = BlockIdGetBlockNumber(&ip_min.ip_blkid);
+
+ bnum_max = Min(bnum_max, baserel->pages);
+ bnum_min = Max(bnum_min, 0);
+ num_pages = Min(bnum_max - bnum_min + 1, 1);
+ }
+ else if (has_min_val)
+ {
+ /* case of only lower side being bounded */
+ BlockNumber bnum_max = baserel->pages;
+ BlockNumber bnum_min = BlockIdGetBlockNumber(&ip_min.ip_blkid);
+
+ bnum_min = Max(bnum_min, 0);
+ num_pages = Min(bnum_max - bnum_min + 1, 1);
+ }
+ else if (has_max_val)
+ {
+ /* case of only upper side being bounded */
+ BlockNumber bnum_max = BlockIdGetBlockNumber(&ip_max.ip_blkid);
+ BlockNumber bnum_min = 0;
+
+ bnum_max = Min(bnum_max, baserel->pages);
+ num_pages = Min(bnum_max - bnum_min + 1, 1);
+ }
+ else
+ {
+ /*
+ * Just a rough estimation. We assume half of records shall be
+ * read using this restriction clause, but indeterministic until
+ * executor run it actually.
+ */
+ num_pages = Max((baserel->pages + 1) / 2, 1);
+ }
+ ntuples *= ((double) num_pages) / ((double) baserel->pages);
+
+ /*
+ * The TID qual expressions will be computed once, any other baserestrict
+ * quals once per retrieved tuple.
+ */
+ cost_qual_eval(&ctid_qual_cost, ctid_quals, root);
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ NULL);
+
+ /* disk costs --- assume each tuple on a different page */
+ run_cost += spc_random_page_cost * ntuples;
+
+ /*
+ * Add scanning CPU costs
+ * (logic copied from get_restriction_qual_cost)
+ */
+ if (path->param_info)
+ {
+ /* Include costs of pushed-down clauses */
+ cost_qual_eval(&qpqual_cost, path->param_info->ppi_clauses, root);
+
+ qpqual_cost.startup += baserel->baserestrictcost.startup;
+ qpqual_cost.per_tuple += baserel->baserestrictcost.per_tuple;
+ }
+ else
+ qpqual_cost = baserel->baserestrictcost;
+
+ /*
+ * We don't decrease cost for the inequality operators, because
+ * it is subset of qpquals and still in.
+ */
+ startup_cost += qpqual_cost.startup + ctid_qual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
+ ctid_qual_cost.per_tuple;
+ run_cost = cpu_per_tuple * ntuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * SetCtidScanPath - entrypoint of the series of custom-scan execution.
+ * It adds CustomPath if referenced relation has inequality expressions on
+ * the ctid system column.
+ */
+static void
+SetCtidScanPath(PlannerInfo *root, RelOptInfo *baserel,
+ Index rtindex, RangeTblEntry *rte)
+{
+ char relkind;
+ ListCell *lc;
+ List *ctid_quals = NIL;
+
+ /* only plain relations are supported */
+ if (rte->rtekind != RTE_RELATION)
+ return;
+ relkind = get_rel_relkind(rte->relid);
+ if (relkind != RELKIND_RELATION &&
+ relkind != RELKIND_MATVIEW &&
+ relkind != RELKIND_TOASTVALUE)
+ return;
+
+ /*
+ * NOTE: Unlike built-in execution path, always we can have core path
+ * even though ctid scan is not available. So, simply, we don't add
+ * any paths, instead of adding disable_cost.
+ */
+ if (!enable_ctidscan)
+ return;
+
+ /* walk on the restrict info */
+ foreach (lc, baserel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ List *temp;
+
+ if (!IsA(rinfo, RestrictInfo))
+ continue; /* probably should never happen */
+ temp = CTidQualFromExpr((Node *) rinfo->clause, baserel->relid);
+ ctid_quals = list_concat(ctid_quals, temp);
+ }
+
+ /*
+ * OK, it is case when a part of restriction clause makes sense to
+ * reduce number of tuples, so we will add a custom scan path being
+ * provided by this module.
+ */
+ if (ctid_quals != NIL)
+ {
+ CustomPath *cpath;
+ Relids required_outer;
+
+ /*
+ * We don't support pushing join clauses into the quals of a ctidscan,
+ * but it could still have required parameterization due to LATERAL
+ * refs in its tlist.
+ */
+ required_outer = baserel->lateral_relids;
+
+ cpath = palloc0(sizeof(CustomPath));
+ cpath->path.type = T_CustomPath;
+ cpath->path.pathtype = T_CustomScan;
+ cpath->path.parent = baserel;
+ cpath->path.param_info
+ = get_baserel_parampathinfo(root, baserel, required_outer);
+ cpath->flags = CUSTOMPATH_SUPPORT_BACKWARD_SCAN;
+ cpath->custom_private = ctid_quals;
+ cpath->methods = &ctidscan_path_methods;
+
+ CTidEstimateCosts(root, baserel, cpath);
+
+ add_path(baserel, &cpath->path);
+ }
+}
+
+/*
+ * PlanCtidScanPlan - A method of CustomPath; that populate a custom
+ * object being delivered from CustomScan type, according to the supplied
+ * CustomPath object.
+ */
+static Plan *
+PlanCtidScanPath(PlannerInfo *root,
+ RelOptInfo *rel,
+ CustomPath *best_path,
+ List *tlist,
+ List *clauses)
+{
+ List *ctid_quals = best_path->custom_private;
+ CustomScan *cscan = makeNode(CustomScan);
+
+ cscan->flags = best_path->flags;
+ cscan->methods = &ctidscan_scan_methods;
+
+ /* set scanrelid */
+ cscan->scan.scanrelid = rel->relid;
+ /* set targetlist as is */
+ cscan->scan.plan.targetlist = tlist;
+ /* reduce RestrictInfo list to bare expressions */
+ cscan->scan.plan.qual = extract_actual_clauses(clauses, false);
+ /* set ctid related quals */
+ cscan->custom_exprs = ctid_quals;
+
+ return &cscan->scan.plan;
+}
+
+/*
+ * CreateCtidScanState - A method of CustomScan; that populate a custom
+ * object being delivered from CustomScanState type, according to the
+ * supplied CustomPath object.
+ */
+static Node *
+CreateCtidScanState(CustomScan *custom_plan)
+{
+ CtidScanState *ctss = palloc0(sizeof(CtidScanState));
+
+ NodeSetTag(ctss, T_CustomScanState);
+ ctss->css.flags = custom_plan->flags;
+ ctss->css.methods = &ctidscan_exec_methods;
+
+ return (Node *)&ctss->css;
+}
+
+/*
+ * BeginCtidScan - A method of CustomScanState; that initializes
+ * the supplied CtidScanState object, at beginning of the executor.
+ */
+static void
+BeginCtidScan(CustomScanState *node, EState *estate, int eflags)
+{
+ CtidScanState *ctss = (CtidScanState *) node;
+ CustomScan *cscan = (CustomScan *) node->ss.ps.plan;
+
+ /*
+ * In case of custom-scan provider that offers an alternative way
+ * to scan a particular relation, most of the needed initialization,
+ * like relation open or assignment of scan tuple-slot or projection
+ * info, shall be done by the core implementation. So, all we need
+ * to have is initialization of own local properties.
+ */
+ ctss->ctid_quals = (List *)
+ ExecInitExpr((Expr *)cscan->custom_exprs, &node->ss.ps);
+}
+
+/*
+ * ReScanCtidScan - A method of CustomScanState; that rewind the current
+ * seek position.
+ */
+static void
+ReScanCtidScan(CustomScanState *node)
+{
+ CtidScanState *ctss = (CtidScanState *)node;
+ HeapScanDesc scan = ctss->css.ss.ss_currentScanDesc;
+ EState *estate = node->ss.ps.state;
+ ScanDirection direction = estate->es_direction;
+ Relation relation = ctss->css.ss.ss_currentRelation;
+ ExprContext *econtext = ctss->css.ss.ps.ps_ExprContext;
+ ScanKeyData keys[2];
+ bool has_ubound = false;
+ bool has_lbound = false;
+ ItemPointerData ip_max;
+ ItemPointerData ip_min;
+ ListCell *lc;
+
+ /* once close the existing scandesc, if any */
+ if (scan)
+ {
+ heap_endscan(scan);
+ scan = ctss->css.ss.ss_currentScanDesc = NULL;
+ }
+
+ /* walks on the inequality operators */
+ foreach (lc, ctss->ctid_quals)
+ {
+ FuncExprState *fexstate = (FuncExprState *) lfirst(lc);
+ OpExpr *op = (OpExpr *)fexstate->xprstate.expr;
+ Node *arg1 = linitial(op->args);
+ Node *arg2 = lsecond(op->args);
+ Index scanrelid;
+ Oid opno;
+ ExprState *exstate;
+ ItemPointer itemptr;
+ bool isnull;
+
+ scanrelid = ((Scan *)ctss->css.ss.ps.plan)->scanrelid;
+ if (IsCTIDVar(arg1, scanrelid))
+ {
+ exstate = (ExprState *) lsecond(fexstate->args);
+ opno = op->opno;
+ }
+ else if (IsCTIDVar(arg2, scanrelid))
+ {
+ exstate = (ExprState *) linitial(fexstate->args);
+ opno = get_commutator(op->opno);
+ }
+ else
+ elog(ERROR, "could not identify CTID variable");
+
+ itemptr = (ItemPointer)
+ DatumGetPointer(ExecEvalExprSwitchContext(exstate,
+ econtext,
+ &isnull,
+ NULL));
+ if (isnull)
+ {
+ /*
+ * Whole of the restriction clauses chained with AND- boolean
+ * operators because false, if one of the clauses has NULL result.
+ * So, we can immediately break the evaluation to inform caller
+ * it does not make sense to scan any more.
+ * In this case, scandesc is kept to NULL.
+ */
+ return;
+ }
+
+ switch (opno)
+ {
+ case TIDLessOperator:
+ if (!has_ubound ||
+ ItemPointerCompare(itemptr, &ip_max) <= 0)
+ {
+ ScanKeyInit(&keys[0],
+ SelfItemPointerAttributeNumber,
+ BTLessStrategyNumber,
+ F_TIDLT,
+ PointerGetDatum(itemptr));
+ ItemPointerCopy(itemptr, &ip_max);
+ has_ubound = true;
+ }
+ break;
+
+ case TIDLessEqualOperator:
+ if (!has_ubound ||
+ ItemPointerCompare(itemptr, &ip_max) < 0)
+ {
+ ScanKeyInit(&keys[0],
+ SelfItemPointerAttributeNumber,
+ BTLessEqualStrategyNumber,
+ F_TIDLE,
+ PointerGetDatum(itemptr));
+ ItemPointerCopy(itemptr, &ip_max);
+ has_ubound = true;
+ }
+ break;
+
+ case TIDGreaterOperator:
+ if (!has_lbound ||
+ ItemPointerCompare(itemptr, &ip_min) >= 0)
+ {
+ ScanKeyInit(&keys[1],
+ SelfItemPointerAttributeNumber,
+ BTGreaterStrategyNumber,
+ F_TIDGT,
+ PointerGetDatum(itemptr));
+ ItemPointerCopy(itemptr, &ip_min);
+ has_lbound = true;
+ }
+ break;
+
+ case TIDGreaterEqualOperator:
+ if (!has_lbound ||
+ ItemPointerCompare(itemptr, &ip_min) > 0)
+ {
+ ScanKeyInit(&keys[1],
+ SelfItemPointerAttributeNumber,
+ BTGreaterEqualStrategyNumber,
+ F_TIDGE,
+ PointerGetDatum(itemptr));
+ ItemPointerCopy(itemptr, &ip_min);
+ has_lbound = true;
+ }
+ break;
+
+ default:
+ elog(ERROR, "unsupported operator");
+ break;
+ }
+ }
+
+ /* begin heapscan with the key above */
+ if (has_ubound && has_lbound)
+ scan = heap_beginscan(relation, estate->es_snapshot, 2, &keys[0]);
+ else if (has_ubound)
+ scan = heap_beginscan(relation, estate->es_snapshot, 1, &keys[0]);
+ else if (has_lbound)
+ scan = heap_beginscan(relation, estate->es_snapshot, 1, &keys[1]);
+ else
+ scan = heap_beginscan(relation, estate->es_snapshot, 0, NULL);
+
+ /* Seek the starting position, if possible */
+ if (direction == ForwardScanDirection && has_lbound)
+ {
+ BlockNumber blknum = Min(BlockIdGetBlockNumber(&ip_min.ip_blkid),
+ scan->rs_nblocks - 1);
+ scan->rs_startblock = blknum;
+ }
+ else if (direction == BackwardScanDirection && has_ubound)
+ {
+ BlockNumber blknum = Min(BlockIdGetBlockNumber(&ip_max.ip_blkid),
+ scan->rs_nblocks - 1);
+ scan->rs_startblock = blknum;
+ }
+ ctss->css.ss.ss_currentScanDesc = scan;
+}
+
+/*
+ * CTidAccessCustomScan
+ *
+ * Access method of ExecCtidScan below. It fetches a tuple from the underlying
+ * heap scan that was started from the point according to the tid clauses.
+ */
+static TupleTableSlot *
+CTidAccessCustomScan(CustomScanState *node)
+{
+ CtidScanState *ctss = (CtidScanState *) node;
+ HeapScanDesc scan;
+ TupleTableSlot *slot;
+ EState *estate = node->ss.ps.state;
+ ScanDirection direction = estate->es_direction;
+ HeapTuple tuple;
+
+ if (!ctss->css.ss.ss_currentScanDesc)
+ ReScanCtidScan(node);
+ scan = ctss->css.ss.ss_currentScanDesc;
+ Assert(scan != NULL);
+
+ /*
+ * get the next tuple from the table
+ */
+ tuple = heap_getnext(scan, direction);
+ if (!HeapTupleIsValid(tuple))
+ return NULL;
+
+ slot = ctss->css.ss.ss_ScanTupleSlot;
+ ExecStoreTuple(tuple, slot, scan->rs_cbuf, false);
+
+ return slot;
+}
+
+static bool
+CTidRecheckCustomScan(CustomScanState *node, TupleTableSlot *slot)
+{
+ return true;
+}
+
+/*
+ * ExecCtidScan - A method of CustomScanState; that fetches a tuple
+ * from the relation, if exist anymore.
+ */
+static TupleTableSlot *
+ExecCtidScan(CustomScanState *node)
+{
+ return ExecScan(&node->ss,
+ (ExecScanAccessMtd) CTidAccessCustomScan,
+ (ExecScanRecheckMtd) CTidRecheckCustomScan);
+}
+
+/*
+ * CTidEndCustomScan - A method of CustomScanState; that closes heap and
+ * scan descriptor, and release other related resources.
+ */
+static void
+EndCtidScan(CustomScanState *node)
+{
+ CtidScanState *ctss = (CtidScanState *)node;
+
+ if (ctss->css.ss.ss_currentScanDesc)
+ heap_endscan(ctss->css.ss.ss_currentScanDesc);
+}
+
+/*
+ * ExplainCtidScan - A method of CustomScanState; that shows extra info
+ * on EXPLAIN command.
+ */
+static void
+ExplainCtidScan(CustomScanState *node, List *ancestors, ExplainState *es)
+{
+ CtidScanState *ctss = (CtidScanState *) node;
+ CustomScan *cscan = (CustomScan *) ctss->css.ss.ps.plan;
+
+ /* logic copied from show_qual and show_expression */
+ if (cscan->custom_exprs)
+ {
+ bool useprefix = es->verbose;
+ Node *qual;
+ List *context;
+ char *exprstr;
+
+ /* Convert AND list to explicit AND */
+ qual = (Node *) make_ands_explicit(cscan->custom_exprs);
+
+ /* Set up deparsing context */
+ /*
+ context = deparse_context_for_planstate((Node *)&node->ss.ps,
+ ancestors,
+ es->rtable,
+ es->rtable_names);
+ */
+
+ context = set_deparse_context_planstate(es->deparse_cxt,
+ (Node *) node,
+ ancestors);
+
+ /* Deparse the expression */
+ exprstr = deparse_expression(qual, context, useprefix, false);
+
+ /* And add to es->str */
+ ExplainPropertyText("ctid quals", exprstr, es);
+ }
+}
+
+/*
+ * Entrypoint of this extension
+ */
+void
+_PG_init(void)
+{
+ DefineCustomBoolVariable("enable_ctidscan",
+ "Enables the planner's use of ctid-scan plans.",
+ NULL,
+ &enable_ctidscan,
+ true,
+ PGC_USERSET,
+ GUC_NOT_IN_SAMPLE,
+ NULL, NULL, NULL);
+
+ /* registration of the hook to add alternative path */
+ set_rel_pathlist_next = set_rel_pathlist_hook;
+ set_rel_pathlist_hook = SetCtidScanPath;
+}
diff --git a/contrib/ctidscan/ctidscan.control b/contrib/ctidscan/ctidscan.control
new file mode 100644
index 0000000000000..ad63432d4c41f
--- /dev/null
+++ b/contrib/ctidscan/ctidscan.control
@@ -0,0 +1,5 @@
+# ctidscan extension
+comment = 'example implementation for custom-plan interface'
+default_version = '1.0'
+module_pathname = '$libdir/ctidscan'
+relocatable = true
diff --git a/contrib/ctidscan/ctidscan.sgml b/contrib/ctidscan/ctidscan.sgml
new file mode 100644
index 0000000000000..ce49e77e1a322
--- /dev/null
+++ b/contrib/ctidscan/ctidscan.sgml
@@ -0,0 +1,50 @@
+
+ ctidscan
+
+
+ ctidscan
+
+
+
+ This module implements a custom-scan provider that utilizes inequality
+ operator that involves the ctid system column.
+
+
+
+ This module provides no SQL accessible interface. For installation,
+ all you need to do is just load the module to the server.
+
+ You can load it an individual session using:
+
+LOAD 'ctidscan';
+
+
+ or, you can also take more typical usage with extension preloading
+ using or
+ in
+ postgresql.conf>.
+
+ Then, planner may consider more cheap execution path if supplied query
+ involves above operators.
+
+
+
+
+ enable_ctidscan (bool)
+
+ enable_ctidscan> configuration parameter
+
+
+
+
+ enable_ctidscan turns on/off functionality of
+ ctidscan custom-scan provider.
+ If turned off, it does not offer alternative scan path even if
+ supplied query is sufficient to run by ctidscan plan.
+ Its default is true>.
+ Anybody can change using SET command.
+
+
+
+
+
diff --git a/contrib/ctidscan/expected/ctidscan.out b/contrib/ctidscan/expected/ctidscan.out
new file mode 100644
index 0000000000000..5b289240acce6
--- /dev/null
+++ b/contrib/ctidscan/expected/ctidscan.out
@@ -0,0 +1,332 @@
+--
+-- Regression Tests for Custom Plan APIs
+--
+-- construction of test data
+SET client_min_messages TO 'warning';
+CREATE SCHEMA regtest_custom_scan;
+SET search_path TO regtest_custom_scan, public;
+CREATE TABLE t1 (
+ a int primary key,
+ b text
+);
+INSERT INTO t1 (SELECT s, md5(s::text) FROM generate_series(1,400) s);
+VACUUM ANALYZE t1;
+CREATE TABLE t2 (
+ x int primary key,
+ y text
+);
+INSERT INTO t2 (SELECT s, md5(s::text)||md5(s::text) FROM generate_series(1,400) s);
+VACUUM ANALYZE t2;
+RESET client_min_messages;
+--
+-- Check Plans if no special extension is loaded.
+--
+EXPLAIN (costs off) SELECT * FROM t1 WHERE a = 40;
+ QUERY PLAN
+--------------------------------
+ Index Scan using t1_pkey on t1
+ Index Cond: (a = 40)
+(2 rows)
+
+EXPLAIN (costs off) SELECT * FROM t1 WHERE b like '%789%';
+ QUERY PLAN
+--------------------------------
+ Seq Scan on t1
+ Filter: (b ~~ '%789%'::text)
+(2 rows)
+
+EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid = '(2,10)'::tid;
+ QUERY PLAN
+------------------------------------
+ Tid Scan on t1
+ TID Cond: (ctid = '(2,10)'::tid)
+(2 rows)
+
+EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid;
+ QUERY PLAN
+------------------------------------------------------------------
+ Seq Scan on t1
+ Filter: ((ctid >= '(2,115)'::tid) AND (ctid <= '(3,10)'::tid))
+(2 rows)
+
+--
+-- Plan for same query but ctidscan was loaded
+--
+LOAD '$libdir/ctidscan';
+EXPLAIN (costs off) SELECT * FROM t1 WHERE a = 40;
+ QUERY PLAN
+--------------------------------
+ Index Scan using t1_pkey on t1
+ Index Cond: (a = 40)
+(2 rows)
+
+EXPLAIN (costs off) SELECT * FROM t1 WHERE b like '%789%';
+ QUERY PLAN
+--------------------------------
+ Seq Scan on t1
+ Filter: (b ~~ '%789%'::text)
+(2 rows)
+
+EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid = '(2,10)'::tid;
+ QUERY PLAN
+------------------------------------
+ Tid Scan on t1
+ TID Cond: (ctid = '(2,10)'::tid)
+(2 rows)
+
+EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid;
+ QUERY PLAN
+----------------------------------------------------------------------
+ Custom Scan (ctidscan) on t1
+ Filter: ((ctid >= '(2,115)'::tid) AND (ctid <= '(3,10)'::tid))
+ ctid quals: ((ctid >= '(2,115)'::tid) AND (ctid <= '(3,10)'::tid))
+(3 rows)
+
+EXPLAIN (costs off) SELECT * FROM t1 JOIN t2 ON t1.ctid = t2.ctid WHERE t1.ctid < '(2,10)'::tid AND t2.ctid > '(1,75)'::tid;
+ QUERY PLAN
+--------------------------------------------------
+ Merge Join
+ Merge Cond: (t1.ctid = t2.ctid)
+ -> Sort
+ Sort Key: t1.ctid
+ -> Custom Scan (ctidscan) on t1
+ Filter: (ctid < '(2,10)'::tid)
+ ctid quals: (ctid < '(2,10)'::tid)
+ -> Sort
+ Sort Key: t2.ctid
+ -> Custom Scan (ctidscan) on t2
+ Filter: (ctid > '(1,75)'::tid)
+ ctid quals: (ctid > '(1,75)'::tid)
+(12 rows)
+
+SELECT ctid,* FROM t1 WHERE ctid < '(1,20)'::tid;
+ ctid | a | b
+---------+-----+----------------------------------
+ (0,1) | 1 | c4ca4238a0b923820dcc509a6f75849b
+ (0,2) | 2 | c81e728d9d4c2f636f067f89cc14862c
+ (0,3) | 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3
+ (0,4) | 4 | a87ff679a2f3e71d9181a67b7542122c
+ (0,5) | 5 | e4da3b7fbbce2345d7772b0674a318d5
+ (0,6) | 6 | 1679091c5a880faf6fb5e6087eb1b2dc
+ (0,7) | 7 | 8f14e45fceea167a5a36dedd4bea2543
+ (0,8) | 8 | c9f0f895fb98ab9159f51fd0297e236d
+ (0,9) | 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26
+ (0,10) | 10 | d3d9446802a44259755d38e6d163e820
+ (0,11) | 11 | 6512bd43d9caa6e02c990b0a82652dca
+ (0,12) | 12 | c20ad4d76fe97759aa27a0c99bff6710
+ (0,13) | 13 | c51ce410c124a10e0db5e4b97fc2af39
+ (0,14) | 14 | aab3238922bcc25a6f606eb525ffdc56
+ (0,15) | 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3
+ (0,16) | 16 | c74d97b01eae257e44aa9d5bade97baf
+ (0,17) | 17 | 70efdf2ec9b086079795c442636b55fb
+ (0,18) | 18 | 6f4922f45568161a8cdf4ad2299f6d23
+ (0,19) | 19 | 1f0e3dad99908345f7439f8ffabdffc4
+ (0,20) | 20 | 98f13708210194c475687be6106a3b84
+ (0,21) | 21 | 3c59dc048e8850243be8079a5c74d079
+ (0,22) | 22 | b6d767d2f8ed5d21a44b0e5886680cb9
+ (0,23) | 23 | 37693cfc748049e45d87b8c7d8b9aacd
+ (0,24) | 24 | 1ff1de774005f8da13f42943881c655f
+ (0,25) | 25 | 8e296a067a37563370ded05f5a3bf3ec
+ (0,26) | 26 | 4e732ced3463d06de0ca9a15b6153677
+ (0,27) | 27 | 02e74f10e0327ad868d138f2b4fdd6f0
+ (0,28) | 28 | 33e75ff09dd601bbe69f351039152189
+ (0,29) | 29 | 6ea9ab1baa0efb9e19094440c317e21b
+ (0,30) | 30 | 34173cb38f07f89ddbebc2ac9128303f
+ (0,31) | 31 | c16a5320fa475530d9583c34fd356ef5
+ (0,32) | 32 | 6364d3f0f495b6ab9dcf8d3b5c6e0b01
+ (0,33) | 33 | 182be0c5cdcd5072bb1864cdee4d3d6e
+ (0,34) | 34 | e369853df766fa44e1ed0ff613f563bd
+ (0,35) | 35 | 1c383cd30b7c298ab50293adfecb7b18
+ (0,36) | 36 | 19ca14e7ea6328a42e0eb13d585e4c22
+ (0,37) | 37 | a5bfc9e07964f8dddeb95fc584cd965d
+ (0,38) | 38 | a5771bce93e200c36f7cd9dfd0e5deaa
+ (0,39) | 39 | d67d8ab4f4c10bf22aa353e27879133c
+ (0,40) | 40 | d645920e395fedad7bbbed0eca3fe2e0
+ (0,41) | 41 | 3416a75f4cea9109507cacd8e2f2aefc
+ (0,42) | 42 | a1d0c6e83f027327d8461063f4ac58a6
+ (0,43) | 43 | 17e62166fc8586dfa4d1bc0e1742c08b
+ (0,44) | 44 | f7177163c833dff4b38fc8d2872f1ec6
+ (0,45) | 45 | 6c8349cc7260ae62e3b1396831a8398f
+ (0,46) | 46 | d9d4f495e875a2e075a1a4a6e1b9770f
+ (0,47) | 47 | 67c6a1e7ce56d3d6fa748ab6d9af3fd7
+ (0,48) | 48 | 642e92efb79421734881b53e1e1b18b6
+ (0,49) | 49 | f457c545a9ded88f18ecee47145a72c0
+ (0,50) | 50 | c0c7c76d30bd3dcaefc96f40275bdc0a
+ (0,51) | 51 | 2838023a778dfaecdc212708f721b788
+ (0,52) | 52 | 9a1158154dfa42caddbd0694a4e9bdc8
+ (0,53) | 53 | d82c8d1619ad8176d665453cfb2e55f0
+ (0,54) | 54 | a684eceee76fc522773286a895bc8436
+ (0,55) | 55 | b53b3a3d6ab90ce0268229151c9bde11
+ (0,56) | 56 | 9f61408e3afb633e50cdf1b20de6f466
+ (0,57) | 57 | 72b32a1f754ba1c09b3695e0cb6cde7f
+ (0,58) | 58 | 66f041e16a60928b05a7e228a89c3799
+ (0,59) | 59 | 093f65e080a295f8076b1c5722a46aa2
+ (0,60) | 60 | 072b030ba126b2f4b2374f342be9ed44
+ (0,61) | 61 | 7f39f8317fbdb1988ef4c628eba02591
+ (0,62) | 62 | 44f683a84163b3523afe57c2e008bc8c
+ (0,63) | 63 | 03afdbd66e7929b125f8597834fa83a4
+ (0,64) | 64 | ea5d2f1c4608232e07d3aa3d998e5135
+ (0,65) | 65 | fc490ca45c00b1249bbe3554a4fdf6fb
+ (0,66) | 66 | 3295c76acbf4caaed33c36b1b5fc2cb1
+ (0,67) | 67 | 735b90b4568125ed6c3f678819b6e058
+ (0,68) | 68 | a3f390d88e4c41f2747bfa2f1b5f87db
+ (0,69) | 69 | 14bfa6bb14875e45bba028a21ed38046
+ (0,70) | 70 | 7cbbc409ec990f19c78c75bd1e06f215
+ (0,71) | 71 | e2c420d928d4bf8ce0ff2ec19b371514
+ (0,72) | 72 | 32bb90e8976aab5298d5da10fe66f21d
+ (0,73) | 73 | d2ddea18f00665ce8623e36bd4e3c7c5
+ (0,74) | 74 | ad61ab143223efbc24c7d2583be69251
+ (0,75) | 75 | d09bf41544a3365a46c9077ebb5e35c3
+ (0,76) | 76 | fbd7939d674997cdb4692d34de8633c4
+ (0,77) | 77 | 28dd2c7955ce926456240b2ff0100bde
+ (0,78) | 78 | 35f4a8d465e6e1edc05f3d8ab658c551
+ (0,79) | 79 | d1fe173d08e959397adf34b1d77e88d7
+ (0,80) | 80 | f033ab37c30201f73f142449d037028d
+ (0,81) | 81 | 43ec517d68b6edd3015b3edc9a11367b
+ (0,82) | 82 | 9778d5d219c5080b9a6a17bef029331c
+ (0,83) | 83 | fe9fc289c3ff0af142b6d3bead98a923
+ (0,84) | 84 | 68d30a9594728bc39aa24be94b319d21
+ (0,85) | 85 | 3ef815416f775098fe977004015c6193
+ (0,86) | 86 | 93db85ed909c13838ff95ccfa94cebd9
+ (0,87) | 87 | c7e1249ffc03eb9ded908c236bd1996d
+ (0,88) | 88 | 2a38a4a9316c49e5a833517c45d31070
+ (0,89) | 89 | 7647966b7343c29048673252e490f736
+ (0,90) | 90 | 8613985ec49eb8f757ae6439e879bb2a
+ (0,91) | 91 | 54229abfcfa5649e7003b83dd4755294
+ (0,92) | 92 | 92cc227532d17e56e07902b254dfad10
+ (0,93) | 93 | 98dce83da57b0395e163467c9dae521b
+ (0,94) | 94 | f4b9ec30ad9f68f89b29639786cb62ef
+ (0,95) | 95 | 812b4ba287f5ee0bc9d43bbf5bbe87fb
+ (0,96) | 96 | 26657d5ff9020d2abefe558796b99584
+ (0,97) | 97 | e2ef524fbf3d9fe611d5a8e90fefdc9c
+ (0,98) | 98 | ed3d2c21991e3bef5e069713af9fa6ca
+ (0,99) | 99 | ac627ab1ccbdb62ec96e702f07f6425b
+ (0,100) | 100 | f899139df5e1059396431415e770c6dd
+ (0,101) | 101 | 38b3eff8baf56627478ec76a704e9b52
+ (0,102) | 102 | ec8956637a99787bd197eacd77acce5e
+ (0,103) | 103 | 6974ce5ac660610b44d9b9fed0ff9548
+ (0,104) | 104 | c9e1074f5b3f9fc8ea15d152add07294
+ (0,105) | 105 | 65b9eea6e1cc6bb9f0cd2a47751a186f
+ (0,106) | 106 | f0935e4cd5920aa6c7c996a5ee53a70f
+ (0,107) | 107 | a97da629b098b75c294dffdc3e463904
+ (0,108) | 108 | a3c65c2974270fd093ee8a9bf8ae7d0b
+ (0,109) | 109 | 2723d092b63885e0d7c260cc007e8b9d
+ (0,110) | 110 | 5f93f983524def3dca464469d2cf9f3e
+ (0,111) | 111 | 698d51a19d8a121ce581499d7b701668
+ (0,112) | 112 | 7f6ffaa6bb0b408017b62254211691b5
+ (0,113) | 113 | 73278a4a86960eeb576a8fd4c9ec6997
+ (0,114) | 114 | 5fd0b37cd7dbbb00f97ba6ce92bf5add
+ (0,115) | 115 | 2b44928ae11fb9384c4cf38708677c48
+ (0,116) | 116 | c45147dee729311ef5b5c3003946c48f
+ (0,117) | 117 | eb160de1de89d9058fcb0b968dbbbd68
+ (0,118) | 118 | 5ef059938ba799aaa845e1c2e8a762bd
+ (0,119) | 119 | 07e1cd7dca89a1678042477183b7ac3f
+ (0,120) | 120 | da4fb5c6e93e74d3df8527599fa62642
+ (1,1) | 121 | 4c56ff4ce4aaf9573aa5dff913df997a
+ (1,2) | 122 | a0a080f42e6f13b3a2df133f073095dd
+ (1,3) | 123 | 202cb962ac59075b964b07152d234b70
+ (1,4) | 124 | c8ffe9a587b126f152ed3d89a146b445
+ (1,5) | 125 | 3def184ad8f4755ff269862ea77393dd
+ (1,6) | 126 | 069059b7ef840f0c74a814ec9237b6ec
+ (1,7) | 127 | ec5decca5ed3d6b8079e2e7e7bacc9f2
+ (1,8) | 128 | 76dc611d6ebaafc66cc0879c71b5db5c
+ (1,9) | 129 | d1f491a404d6854880943e5c3cd9ca25
+ (1,10) | 130 | 9b8619251a19057cff70779273e95aa6
+ (1,11) | 131 | 1afa34a7f984eeabdbb0a7d494132ee5
+ (1,12) | 132 | 65ded5353c5ee48d0b7d48c591b8f430
+ (1,13) | 133 | 9fc3d7152ba9336a670e36d0ed79bc43
+ (1,14) | 134 | 02522a2b2726fb0a03bb19f2d8d9524d
+ (1,15) | 135 | 7f1de29e6da19d22b51c68001e7e0e54
+ (1,16) | 136 | 42a0e188f5033bc65bf8d78622277c4e
+ (1,17) | 137 | 3988c7f88ebcb58c6ce932b957b6f332
+ (1,18) | 138 | 013d407166ec4fa56eb1e1f8cbe183b9
+ (1,19) | 139 | e00da03b685a0dd18fb6a08af0923de0
+(139 rows)
+
+SELECT ctid,* FROM t1 WHERE ctid > '(4,0)'::tid;
+ ctid | a | b
+------+---+---
+(0 rows)
+
+SELECT ctid,* FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid;
+ ctid | a | b
+---------+-----+----------------------------------
+ (2,115) | 355 | 82cec96096d4281b7c95cd7e74623496
+ (2,116) | 356 | 6c524f9d5d7027454a783c841250ba71
+ (2,117) | 357 | fb7b9ffa5462084c5f4e7e85a093e6d7
+ (2,118) | 358 | aa942ab2bfa6ebda4840e7360ce6e7ef
+ (2,119) | 359 | c058f544c737782deacefa532d9add4c
+ (2,120) | 360 | e7b24b112a44fdd9ee93bdf998c6ca0e
+ (3,1) | 361 | 52720e003547c70561bf5e03b95aa99f
+ (3,2) | 362 | c3e878e27f52e2a57ace4d9a76fd9acf
+ (3,3) | 363 | 00411460f7c92d2124a67ea0f4cb5f85
+ (3,4) | 364 | bac9162b47c56fc8a4d2a519803d51b3
+ (3,5) | 365 | 9be40cee5b0eee1462c82c6964087ff9
+ (3,6) | 366 | 5ef698cd9fe650923ea331c15af3b160
+ (3,7) | 367 | 05049e90fa4f5039a8cadc6acbb4b2cc
+ (3,8) | 368 | cf004fdc76fa1a4f25f62e0eb5261ca3
+ (3,9) | 369 | 0c74b7f78409a4022a2c4c5a5ca3ee19
+ (3,10) | 370 | d709f38ef758b5066ef31b18039b8ce5
+(16 rows)
+
+SELECT t1.ctid,* FROM t1 JOIN t2 ON t1.ctid = t2.ctid WHERE t1.ctid < '(2,10)'::tid AND t2.ctid > '(1,75)'::tid;
+ ctid | a | b | x | y
+--------+-----+----------------------------------+-----+------------------------------------------------------------------
+ (1,76) | 196 | 084b6fbb10729ed4da8c3d3f5a3ae7c9 | 157 | 6c4b761a28b734fe93831e3fb400ce876c4b761a28b734fe93831e3fb400ce87
+ (1,77) | 197 | 85d8ce590ad8981ca2c8286f79f59954 | 158 | 06409663226af2f3114485aa4e0a23b406409663226af2f3114485aa4e0a23b4
+ (1,78) | 198 | 0e65972dce68dad4d52d063967f0a705 | 159 | 140f6969d5213fd0ece03148e62e461e140f6969d5213fd0ece03148e62e461e
+ (1,79) | 199 | 84d9ee44e457ddef7f2c4f25dc8fa865 | 160 | b73ce398c39f506af761d2277d853a92b73ce398c39f506af761d2277d853a92
+ (1,80) | 200 | 3644a684f98ea8fe223c713b77189a77 | 161 | bd4c9ab730f5513206b999ec0d90d1fbbd4c9ab730f5513206b999ec0d90d1fb
+ (1,81) | 201 | 757b505cfd34c64c85ca5b5690ee5293 | 162 | 82aa4b0af34c2313a562076992e50aa382aa4b0af34c2313a562076992e50aa3
+ (2,1) | 241 | f340f1b1f65b6df5b5e3f94d95b11daf | 163 | 0777d5c17d4066b82ab86dff8a46af6f0777d5c17d4066b82ab86dff8a46af6f
+ (2,2) | 242 | e4a6222cdb5b34375400904f03d8e6a5 | 164 | fa7cdfad1a5aaf8370ebeda47a1ff1c3fa7cdfad1a5aaf8370ebeda47a1ff1c3
+ (2,3) | 243 | cb70ab375662576bd1ac5aaf16b3fca4 | 165 | 9766527f2b5d3e95d4a733fcfb77bd7e9766527f2b5d3e95d4a733fcfb77bd7e
+ (2,4) | 244 | 9188905e74c28e489b44e954ec0b9bca | 166 | 7e7757b1e12abcb736ab9a754ffb617a7e7757b1e12abcb736ab9a754ffb617a
+ (2,5) | 245 | 0266e33d3f546cb5436a10798e657d97 | 167 | 5878a7ab84fb43402106c575658472fa5878a7ab84fb43402106c575658472fa
+ (2,6) | 246 | 38db3aed920cf82ab059bfccbd02be6a | 168 | 006f52e9102a8d3be2fe5614f42ba989006f52e9102a8d3be2fe5614f42ba989
+ (2,7) | 247 | 3cec07e9ba5f5bb252d13f5f431e4bbb | 169 | 3636638817772e42b59d74cff571fbb33636638817772e42b59d74cff571fbb3
+ (2,8) | 248 | 621bf66ddb7c962aa0d22ac97d69b793 | 170 | 149e9677a5989fd342ae44213df68868149e9677a5989fd342ae44213df68868
+ (2,9) | 249 | 077e29b11be80ab57e1a2ecabb7da330 | 171 | a4a042cf4fd6bfb47701cbc8a1653adaa4a042cf4fd6bfb47701cbc8a1653ada
+(15 rows)
+
+PREPARE p1(tid, tid) AS SELECT ctid,* FROM t1
+ WHERE b like '%abc%' AND ctid BETWEEN $1 AND $2;
+EXPLAIN (costs off) EXECUTE p1('(5,0)'::tid, '(10,0)'::tid);
+ QUERY PLAN
+-----------------------------------------------------------------------------------------
+ Custom Scan (ctidscan) on t1
+ Filter: ((b ~~ '%abc%'::text) AND (ctid >= '(5,0)'::tid) AND (ctid <= '(10,0)'::tid))
+ ctid quals: ((ctid >= '(5,0)'::tid) AND (ctid <= '(10,0)'::tid))
+(3 rows)
+
+EXPLAIN (costs off) EXECUTE p1('(10,0)'::tid, '(5,0)'::tid);
+ QUERY PLAN
+-----------------------------------------------------------------------------------------
+ Custom Scan (ctidscan) on t1
+ Filter: ((b ~~ '%abc%'::text) AND (ctid >= '(10,0)'::tid) AND (ctid <= '(5,0)'::tid))
+ ctid quals: ((ctid >= '(10,0)'::tid) AND (ctid <= '(5,0)'::tid))
+(3 rows)
+
+-- Also, EXPLAIN with none-text format
+EXPLAIN (costs off, format xml) EXECUTE p1('(0,0)'::tid, '(5,0)'::tid);
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------
+ +
+ +
+ +
+ Custom Scan +
+ ctidscan +
+ t1 +
+ t1 +
+ ((b ~~ '%abc%'::text) AND (ctid >= '(0,0)'::tid) AND (ctid <= '(5,0)'::tid))+
+ ((ctid >= '(0,0)'::tid) AND (ctid <= '(5,0)'::tid)) +
+ +
+ +
+
+(1 row)
+
+-- Test cleanup
+DROP SCHEMA regtest_custom_scan CASCADE;
+NOTICE: drop cascades to 2 other objects
+DETAIL: drop cascades to table t1
+drop cascades to table t2
diff --git a/contrib/ctidscan/sql/ctidscan.sql b/contrib/ctidscan/sql/ctidscan.sql
new file mode 100644
index 0000000000000..26c22c2bf40ea
--- /dev/null
+++ b/contrib/ctidscan/sql/ctidscan.sql
@@ -0,0 +1,59 @@
+--
+-- Regression Tests for Custom Plan APIs
+--
+
+-- construction of test data
+SET client_min_messages TO 'warning';
+
+CREATE SCHEMA regtest_custom_scan;
+
+SET search_path TO regtest_custom_scan, public;
+
+CREATE TABLE t1 (
+ a int primary key,
+ b text
+);
+INSERT INTO t1 (SELECT s, md5(s::text) FROM generate_series(1,400) s);
+VACUUM ANALYZE t1;
+
+CREATE TABLE t2 (
+ x int primary key,
+ y text
+);
+INSERT INTO t2 (SELECT s, md5(s::text)||md5(s::text) FROM generate_series(1,400) s);
+VACUUM ANALYZE t2;
+
+RESET client_min_messages;
+--
+-- Check Plans if no special extension is loaded.
+--
+EXPLAIN (costs off) SELECT * FROM t1 WHERE a = 40;
+EXPLAIN (costs off) SELECT * FROM t1 WHERE b like '%789%';
+EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid = '(2,10)'::tid;
+EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid;
+
+--
+-- Plan for same query but ctidscan was loaded
+--
+LOAD '$libdir/ctidscan';
+EXPLAIN (costs off) SELECT * FROM t1 WHERE a = 40;
+EXPLAIN (costs off) SELECT * FROM t1 WHERE b like '%789%';
+EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid = '(2,10)'::tid;
+EXPLAIN (costs off) SELECT * FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid;
+EXPLAIN (costs off) SELECT * FROM t1 JOIN t2 ON t1.ctid = t2.ctid WHERE t1.ctid < '(2,10)'::tid AND t2.ctid > '(1,75)'::tid;
+
+SELECT ctid,* FROM t1 WHERE ctid < '(1,20)'::tid;
+SELECT ctid,* FROM t1 WHERE ctid > '(4,0)'::tid;
+SELECT ctid,* FROM t1 WHERE ctid BETWEEN '(2,115)'::tid AND '(3,10)'::tid;
+SELECT t1.ctid,* FROM t1 JOIN t2 ON t1.ctid = t2.ctid WHERE t1.ctid < '(2,10)'::tid AND t2.ctid > '(1,75)'::tid;
+
+PREPARE p1(tid, tid) AS SELECT ctid,* FROM t1
+ WHERE b like '%abc%' AND ctid BETWEEN $1 AND $2;
+EXPLAIN (costs off) EXECUTE p1('(5,0)'::tid, '(10,0)'::tid);
+EXPLAIN (costs off) EXECUTE p1('(10,0)'::tid, '(5,0)'::tid);
+
+-- Also, EXPLAIN with none-text format
+EXPLAIN (costs off, format xml) EXECUTE p1('(0,0)'::tid, '(5,0)'::tid);
+
+-- Test cleanup
+DROP SCHEMA regtest_custom_scan CASCADE;
diff --git a/script/graph.sql b/script/graph.sql
new file mode 100644
index 0000000000000..86dd558c654ed
--- /dev/null
+++ b/script/graph.sql
@@ -0,0 +1,23 @@
+CREATE TABLE GRAPH(
+ V1 INTEGER NOT NULL,
+ V2 INTEGER NOT NULL,
+ PRIMARY KEY(V1, V2)
+);
+
+/*
+
+ 1 4
+ / \ / \
+ / \ / \
+ 2---3 --- 5
+
+*/
+
+
+INSERT INTO GRAPH (V1, V2) VALUES (1, 2), (2, 1), (2, 3), (3, 2), (1, 3), (3, 1), (3, 4), (4, 3), (3, 5), (5, 3), (4, 5), (5, 4);
+
+/*
+ 2 * 6 = 12
+*/
+
+select COUNT(*) from GRAPH as T1, GRAPH as T2, GRAPH as T3 where T1.V2 = T2.V1 and T2.V2 = T3.V1 and T3.V2 = T1.V1;
\ No newline at end of file
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index e66bcdade716a..030849317b969 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -26,6 +26,76 @@
#include "utils/memutils.h"
+bool rootNestLoopNode = true;
+/**
+ * Depth-first search to find all the relations in the entire query.
+ * The relations array will be populated with those relations.
+ */
+void
+findAllRelations(PlanState ** relations, PlanState * node, int * counter)
+{
+ if (node == NULL)
+ return;
+
+ PlanState * outerPlan;
+ PlanState * innerPlan;
+ switch (nodeTag(node))
+ {
+ /*
+ * scan nodes
+ */
+ case T_SeqScanState:
+ elog(INFO, "T_SeqScan");
+ relations[(*counter)++] = node;
+ break;
+ case T_IndexScanState:
+ elog(INFO, "T_IndexScan");
+ relations[(*counter)++] = node;
+ break;
+ case T_NestLoopState:
+ elog(INFO, "T_NestLoopScan");
+ outerPlan = outerPlanState(node);
+ innerPlan = innerPlanState(node);
+
+ findAllRelations(relations, outerPlan, counter);
+ findAllRelations(relations, innerPlan, counter);
+ break;
+ default:
+ elog(INFO, "Only NestLoop, SeqScan, and IndexScan are supported! NodeValue: %d", nodeTag(node));
+ }
+}
+
+void
+printAllRelations(PlanState ** relations, int size)
+{
+ for(int i = 0 ;i < size; i ++){
+ PlanState* planNode = relations[i];
+ TupleTableSlot *slot;
+
+ int count = 0;
+
+ switch(nodeTag(planNode)){
+ case T_SeqScanState:
+ for(;;){
+ slot = ExecProcNode(planNode);
+ if (TupIsNull(slot))
+ break;
+ count ++;
+ Datum attr1, attr2; // starting from 1?
+ bool isnull;
+ attr1 = slot_getattr(slot, 1, &isnull);
+ attr2 = slot_getattr(slot, 1, &isnull);
+ elog(INFO, "V1= %d, V2= %d", (int)attr1, (int)attr2);
+ }
+ elog(DEBUG1, "table %s, size = %d", "unknown", count);
+ break;
+ default:
+ elog(DEBUG1, "Only SeqScan is supported");
+ }
+ }
+}
+
+
/* ----------------------------------------------------------------
* ExecNestLoop(node)
*
@@ -56,335 +126,345 @@
* are prepared to return the first tuple.
* ----------------------------------------------------------------
*/
-TupleTableSlot *
+ TupleTableSlot *
ExecNestLoop(NestLoopState *node)
{
- NestLoop *nl;
- PlanState *innerPlan;
- PlanState *outerPlan;
- TupleTableSlot *outerTupleSlot;
- TupleTableSlot *innerTupleSlot;
- List *joinqual;
- List *otherqual;
- ExprContext *econtext;
- ListCell *lc;
-
- /*
- * get information from the node
- */
- ENL1_printf("getting info from node");
-
- nl = (NestLoop *) node->js.ps.plan;
- joinqual = node->js.joinqual;
- otherqual = node->js.ps.qual;
- outerPlan = outerPlanState(node);
- innerPlan = innerPlanState(node);
- econtext = node->js.ps.ps_ExprContext;
-
- /*
- * Check to see if we're still projecting out tuples from a previous join
- * tuple (because there is a function-returning-set in the projection
- * expressions). If so, try to project another one.
- */
- if (node->js.ps.ps_TupFromTlist)
- {
- TupleTableSlot *result;
- ExprDoneCond isDone;
-
- result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
- if (isDone == ExprMultipleResult)
- return result;
- /* Done with that source tuple... */
- node->js.ps.ps_TupFromTlist = false;
- }
-
- /*
- * Reset per-tuple memory context to free any expression evaluation
- * storage allocated in the previous tuple cycle. Note this can't happen
- * until we're done projecting out tuples from a join tuple.
- */
- ResetExprContext(econtext);
-
- /*
- * Ok, everything is setup for the join so now loop until we return a
- * qualifying join tuple.
- */
- ENL1_printf("entering main loop");
-
- for (;;)
- {
- /*
- * If we don't have an outer tuple, get the next one and reset the
- * inner scan.
- */
- if (node->nl_NeedNewOuter)
- {
- ENL1_printf("getting new outer tuple");
- outerTupleSlot = ExecProcNode(outerPlan);
-
- /*
- * if there are no more outer tuples, then the join is complete..
- */
- if (TupIsNull(outerTupleSlot))
- {
- ENL1_printf("no outer tuple, ending join");
- return NULL;
- }
-
- ENL1_printf("saving new outer tuple information");
- econtext->ecxt_outertuple = outerTupleSlot;
- node->nl_NeedNewOuter = false;
- node->nl_MatchedOuter = false;
-
- /*
- * fetch the values of any outer Vars that must be passed to the
- * inner scan, and store them in the appropriate PARAM_EXEC slots.
- */
- foreach(lc, nl->nestParams)
- {
- NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
- int paramno = nlp->paramno;
- ParamExecData *prm;
-
- prm = &(econtext->ecxt_param_exec_vals[paramno]);
- /* Param value should be an OUTER_VAR var */
- Assert(IsA(nlp->paramval, Var));
- Assert(nlp->paramval->varno == OUTER_VAR);
- Assert(nlp->paramval->varattno > 0);
- prm->value = slot_getattr(outerTupleSlot,
- nlp->paramval->varattno,
- &(prm->isnull));
- /* Flag parameter value as changed */
- innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
- paramno);
- }
-
- /*
- * now rescan the inner plan
- */
- ENL1_printf("rescanning inner plan");
- ExecReScan(innerPlan);
- }
-
- /*
- * we have an outerTuple, try to get the next inner tuple.
- */
- ENL1_printf("getting new inner tuple");
-
- innerTupleSlot = ExecProcNode(innerPlan);
- econtext->ecxt_innertuple = innerTupleSlot;
-
- if (TupIsNull(innerTupleSlot))
- {
- ENL1_printf("no inner tuple, need new outer tuple");
-
- node->nl_NeedNewOuter = true;
-
- if (!node->nl_MatchedOuter &&
- (node->js.jointype == JOIN_LEFT ||
- node->js.jointype == JOIN_ANTI))
- {
- /*
- * We are doing an outer join and there were no join matches
- * for this outer tuple. Generate a fake join tuple with
- * nulls for the inner tuple, and return it if it passes the
- * non-join quals.
- */
- econtext->ecxt_innertuple = node->nl_NullInnerTupleSlot;
-
- ENL1_printf("testing qualification for outer-join tuple");
-
- if (otherqual == NIL || ExecQual(otherqual, econtext, false))
- {
- /*
- * qualification was satisfied so we project and return
- * the slot containing the result tuple using
- * ExecProject().
- */
- TupleTableSlot *result;
- ExprDoneCond isDone;
-
- ENL1_printf("qualification succeeded, projecting tuple");
-
- result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
-
- if (isDone != ExprEndResult)
- {
- node->js.ps.ps_TupFromTlist =
- (isDone == ExprMultipleResult);
- return result;
- }
- }
- else
- InstrCountFiltered2(node, 1);
- }
-
- /*
- * Otherwise just return to top of loop for a new outer tuple.
- */
- continue;
- }
-
- /*
- * at this point we have a new pair of inner and outer tuples so we
- * test the inner and outer tuples to see if they satisfy the node's
- * qualification.
- *
- * Only the joinquals determine MatchedOuter status, but all quals
- * must pass to actually return the tuple.
- */
- ENL1_printf("testing qualification");
-
- if (ExecQual(joinqual, econtext, false))
- {
- node->nl_MatchedOuter = true;
-
- /* In an antijoin, we never return a matched tuple */
- if (node->js.jointype == JOIN_ANTI)
- {
- node->nl_NeedNewOuter = true;
- continue; /* return to top of loop */
- }
-
- /*
- * In a semijoin, we'll consider returning the first match, but
- * after that we're done with this outer tuple.
- */
- if (node->js.jointype == JOIN_SEMI)
- node->nl_NeedNewOuter = true;
-
- if (otherqual == NIL || ExecQual(otherqual, econtext, false))
- {
- /*
- * qualification was satisfied so we project and return the
- * slot containing the result tuple using ExecProject().
- */
- TupleTableSlot *result;
- ExprDoneCond isDone;
-
- ENL1_printf("qualification succeeded, projecting tuple");
-
- result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
-
- if (isDone != ExprEndResult)
- {
- node->js.ps.ps_TupFromTlist =
- (isDone == ExprMultipleResult);
- return result;
- }
- }
- else
- InstrCountFiltered2(node, 1);
- }
- else
- InstrCountFiltered1(node, 1);
-
- /*
- * Tuple fails qual, so free per-tuple memory and try again.
- */
- ResetExprContext(econtext);
-
- ENL1_printf("qualification failed, looping");
- }
+ NestLoop *nl;
+ PlanState *innerPlan;
+ PlanState *outerPlan;
+ TupleTableSlot *outerTupleSlot;
+ TupleTableSlot *innerTupleSlot;
+ List *joinqual;
+ List *otherqual;
+ ExprContext *econtext;
+ ListCell *lc;
+
+ /*
+ * get information from the node
+ */
+ ENL1_printf("getting info from node");
+
+ nl = (NestLoop *) node->js.ps.plan;
+ joinqual = node->js.joinqual;
+ otherqual = node->js.ps.qual;
+ outerPlan = outerPlanState(node);
+ innerPlan = innerPlanState(node);
+ if(rootNestLoopNode == true){
+ rootNestLoopNode = false;
+ const int numRelations = 10;
+ PlanState * relations[numRelations];
+ int counter = 0;
+ findAllRelations(relations, (PlanState *) node, &counter);
+ elog(INFO, "There are %d relations", counter);
+ printAllRelations(relations, counter);
+ }
+
+ econtext = node->js.ps.ps_ExprContext;
+
+ /*
+ * Check to see if we're still projecting out tuples from a previous join
+ * tuple (because there is a function-returning-set in the projection
+ * expressions). If so, try to project another one.
+ */
+ if (node->js.ps.ps_TupFromTlist)
+ {
+ TupleTableSlot *result;
+ ExprDoneCond isDone;
+
+ result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+ if (isDone == ExprMultipleResult)
+ return result;
+ /* Done with that source tuple... */
+ node->js.ps.ps_TupFromTlist = false;
+ }
+
+ /*
+ * Reset per-tuple memory context to free any expression evaluation
+ * storage allocated in the previous tuple cycle. Note this can't happen
+ * until we're done projecting out tuples from a join tuple.
+ */
+ ResetExprContext(econtext);
+
+ /*
+ * Ok, everything is setup for the join so now loop until we return a
+ * qualifying join tuple.
+ */
+ ENL1_printf("entering main loop");
+
+ for (;;)
+ {
+ /*
+ * If we don't have an outer tuple, get the next one and reset the
+ * inner scan.
+ */
+ if (node->nl_NeedNewOuter)
+ {
+ ENL1_printf("getting new outer tuple");
+ outerTupleSlot = ExecProcNode(outerPlan);
+
+ /*
+ * if there are no more outer tuples, then the join is complete..
+ */
+ if (TupIsNull(outerTupleSlot))
+ {
+ ENL1_printf("no outer tuple, ending join");
+ return NULL;
+ }
+
+ ENL1_printf("saving new outer tuple information");
+ econtext->ecxt_outertuple = outerTupleSlot;
+ node->nl_NeedNewOuter = false;
+ node->nl_MatchedOuter = false;
+
+ /*
+ * fetch the values of any outer Vars that must be passed to the
+ * inner scan, and store them in the appropriate PARAM_EXEC slots.
+ */
+ foreach(lc, nl->nestParams)
+ {
+ NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
+ int paramno = nlp->paramno;
+ ParamExecData *prm;
+
+ prm = &(econtext->ecxt_param_exec_vals[paramno]);
+ /* Param value should be an OUTER_VAR var */
+ Assert(IsA(nlp->paramval, Var));
+ Assert(nlp->paramval->varno == OUTER_VAR);
+ Assert(nlp->paramval->varattno > 0);
+ prm->value = slot_getattr(outerTupleSlot,
+ nlp->paramval->varattno,
+ &(prm->isnull));
+ /* Flag parameter value as changed */
+ innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
+ paramno);
+ }
+
+ /*
+ * now rescan the inner plan
+ */
+ ENL1_printf("rescanning inner plan");
+ ExecReScan(innerPlan);
+ }
+
+ /*
+ * we have an outerTuple, try to get the next inner tuple.
+ */
+ ENL1_printf("getting new inner tuple");
+
+ innerTupleSlot = ExecProcNode(innerPlan);
+ econtext->ecxt_innertuple = innerTupleSlot;
+
+ if (TupIsNull(innerTupleSlot))
+ {
+ ENL1_printf("no inner tuple, need new outer tuple");
+
+ node->nl_NeedNewOuter = true;
+
+ if (!node->nl_MatchedOuter &&
+ (node->js.jointype == JOIN_LEFT ||
+ node->js.jointype == JOIN_ANTI))
+ {
+ /*
+ * We are doing an outer join and there were no join matches
+ * for this outer tuple. Generate a fake join tuple with
+ * nulls for the inner tuple, and return it if it passes the
+ * non-join quals.
+ */
+ econtext->ecxt_innertuple = node->nl_NullInnerTupleSlot;
+
+ ENL1_printf("testing qualification for outer-join tuple");
+
+ if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+ {
+ /*
+ * qualification was satisfied so we project and return
+ * the slot containing the result tuple using
+ * ExecProject().
+ */
+ TupleTableSlot *result;
+ ExprDoneCond isDone;
+
+ ENL1_printf("qualification succeeded, projecting tuple");
+
+ result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+ if (isDone != ExprEndResult)
+ {
+ node->js.ps.ps_TupFromTlist =
+ (isDone == ExprMultipleResult);
+ return result;
+ }
+ }
+ else
+ InstrCountFiltered2(node, 1);
+ }
+
+ /*
+ * Otherwise just return to top of loop for a new outer tuple.
+ */
+ continue;
+ }
+
+ /*
+ * at this point we have a new pair of inner and outer tuples so we
+ * test the inner and outer tuples to see if they satisfy the node's
+ * qualification.
+ *
+ * Only the joinquals determine MatchedOuter status, but all quals
+ * must pass to actually return the tuple.
+ */
+ ENL1_printf("testing qualification");
+
+ if (ExecQual(joinqual, econtext, false))
+ {
+ node->nl_MatchedOuter = true;
+
+ /* In an antijoin, we never return a matched tuple */
+ if (node->js.jointype == JOIN_ANTI)
+ {
+ node->nl_NeedNewOuter = true;
+ continue; /* return to top of loop */
+ }
+
+ /*
+ * In a semijoin, we'll consider returning the first match, but
+ * after that we're done with this outer tuple.
+ */
+ if (node->js.jointype == JOIN_SEMI)
+ node->nl_NeedNewOuter = true;
+
+ if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+ {
+ /*
+ * qualification was satisfied so we project and return the
+ * slot containing the result tuple using ExecProject().
+ */
+ TupleTableSlot *result;
+ ExprDoneCond isDone;
+
+ ENL1_printf("qualification succeeded, projecting tuple");
+
+ result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+ if (isDone != ExprEndResult)
+ {
+ node->js.ps.ps_TupFromTlist =
+ (isDone == ExprMultipleResult);
+ return result;
+ }
+ }
+ else
+ InstrCountFiltered2(node, 1);
+ }
+ else
+ InstrCountFiltered1(node, 1);
+
+ /*
+ * Tuple fails qual, so free per-tuple memory and try again.
+ */
+ ResetExprContext(econtext);
+
+ ENL1_printf("qualification failed, looping");
+ }
}
/* ----------------------------------------------------------------
* ExecInitNestLoop
* ----------------------------------------------------------------
*/
-NestLoopState *
+ NestLoopState *
ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
{
- NestLoopState *nlstate;
-
- /* check for unsupported flags */
- Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
-
- NL1_printf("ExecInitNestLoop: %s\n",
- "initializing node");
-
- /*
- * create state structure
- */
- nlstate = makeNode(NestLoopState);
- nlstate->js.ps.plan = (Plan *) node;
- nlstate->js.ps.state = estate;
-
- /*
- * Miscellaneous initialization
- *
- * create expression context for node
- */
- ExecAssignExprContext(estate, &nlstate->js.ps);
-
- /*
- * initialize child expressions
- */
- nlstate->js.ps.targetlist = (List *)
- ExecInitExpr((Expr *) node->join.plan.targetlist,
- (PlanState *) nlstate);
- nlstate->js.ps.qual = (List *)
- ExecInitExpr((Expr *) node->join.plan.qual,
- (PlanState *) nlstate);
- nlstate->js.jointype = node->join.jointype;
- nlstate->js.joinqual = (List *)
- ExecInitExpr((Expr *) node->join.joinqual,
- (PlanState *) nlstate);
-
- /*
- * initialize child nodes
- *
- * If we have no parameters to pass into the inner rel from the outer,
- * tell the inner child that cheap rescans would be good. If we do have
- * such parameters, then there is no point in REWIND support at all in the
- * inner child, because it will always be rescanned with fresh parameter
- * values.
- */
- outerPlanState(nlstate) = ExecInitNode(outerPlan(node), estate, eflags);
- if (node->nestParams == NIL)
- eflags |= EXEC_FLAG_REWIND;
- else
- eflags &= ~EXEC_FLAG_REWIND;
- innerPlanState(nlstate) = ExecInitNode(innerPlan(node), estate, eflags);
-
- /*
- * tuple table initialization
- */
- ExecInitResultTupleSlot(estate, &nlstate->js.ps);
-
- switch (node->join.jointype)
- {
- case JOIN_INNER:
- case JOIN_SEMI:
- break;
- case JOIN_LEFT:
- case JOIN_ANTI:
- nlstate->nl_NullInnerTupleSlot =
- ExecInitNullTupleSlot(estate,
- ExecGetResultType(innerPlanState(nlstate)));
- break;
- default:
- elog(ERROR, "unrecognized join type: %d",
- (int) node->join.jointype);
- }
-
- /*
- * initialize tuple type and projection info
- */
- ExecAssignResultTypeFromTL(&nlstate->js.ps);
- ExecAssignProjectionInfo(&nlstate->js.ps, NULL);
-
- /*
- * finally, wipe the current outer tuple clean.
- */
- nlstate->js.ps.ps_TupFromTlist = false;
- nlstate->nl_NeedNewOuter = true;
- nlstate->nl_MatchedOuter = false;
-
- NL1_printf("ExecInitNestLoop: %s\n",
- "node initialized");
-
- return nlstate;
+ NestLoopState *nlstate;
+
+ /* check for unsupported flags */
+ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
+
+ NL1_printf("ExecInitNestLoop: %s\n",
+ "initializing node");
+
+ /*
+ * create state structure
+ */
+ nlstate = makeNode(NestLoopState);
+ nlstate->js.ps.plan = (Plan *) node;
+ nlstate->js.ps.state = estate;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * create expression context for node
+ */
+ ExecAssignExprContext(estate, &nlstate->js.ps);
+
+ /*
+ * initialize child expressions
+ */
+ nlstate->js.ps.targetlist = (List *)
+ ExecInitExpr((Expr *) node->join.plan.targetlist,
+ (PlanState *) nlstate);
+ nlstate->js.ps.qual = (List *)
+ ExecInitExpr((Expr *) node->join.plan.qual,
+ (PlanState *) nlstate);
+ nlstate->js.jointype = node->join.jointype;
+ nlstate->js.joinqual = (List *)
+ ExecInitExpr((Expr *) node->join.joinqual,
+ (PlanState *) nlstate);
+
+ /*
+ * initialize child nodes
+ *
+ * If we have no parameters to pass into the inner rel from the outer,
+ * tell the inner child that cheap rescans would be good. If we do have
+ * such parameters, then there is no point in REWIND support at all in the
+ * inner child, because it will always be rescanned with fresh parameter
+ * values.
+ */
+ outerPlanState(nlstate) = ExecInitNode(outerPlan(node), estate, eflags);
+ if (node->nestParams == NIL)
+ eflags |= EXEC_FLAG_REWIND;
+ else
+ eflags &= ~EXEC_FLAG_REWIND;
+ innerPlanState(nlstate) = ExecInitNode(innerPlan(node), estate, eflags);
+
+ /*
+ * tuple table initialization
+ */
+ ExecInitResultTupleSlot(estate, &nlstate->js.ps);
+
+ switch (node->join.jointype)
+ {
+ case JOIN_INNER:
+ case JOIN_SEMI:
+ break;
+ case JOIN_LEFT:
+ case JOIN_ANTI:
+ nlstate->nl_NullInnerTupleSlot =
+ ExecInitNullTupleSlot(estate,
+ ExecGetResultType(innerPlanState(nlstate)));
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) node->join.jointype);
+ }
+
+ /*
+ * initialize tuple type and projection info
+ */
+ ExecAssignResultTypeFromTL(&nlstate->js.ps);
+ ExecAssignProjectionInfo(&nlstate->js.ps, NULL);
+
+ /*
+ * finally, wipe the current outer tuple clean.
+ */
+ nlstate->js.ps.ps_TupFromTlist = false;
+ nlstate->nl_NeedNewOuter = true;
+ nlstate->nl_MatchedOuter = false;
+
+ NL1_printf("ExecInitNestLoop: %s\n",
+ "node initialized");
+
+ return nlstate;
}
/* ----------------------------------------------------------------
@@ -393,55 +473,55 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
* closes down scans and frees allocated storage
* ----------------------------------------------------------------
*/
-void
+ void
ExecEndNestLoop(NestLoopState *node)
{
- NL1_printf("ExecEndNestLoop: %s\n",
- "ending node processing");
-
- /*
- * Free the exprcontext
- */
- ExecFreeExprContext(&node->js.ps);
-
- /*
- * clean out the tuple table
- */
- ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
-
- /*
- * close down subplans
- */
- ExecEndNode(outerPlanState(node));
- ExecEndNode(innerPlanState(node));
-
- NL1_printf("ExecEndNestLoop: %s\n",
- "node processing ended");
+ NL1_printf("ExecEndNestLoop: %s\n",
+ "ending node processing");
+
+ /*
+ * Free the exprcontext
+ */
+ ExecFreeExprContext(&node->js.ps);
+
+ /*
+ * clean out the tuple table
+ */
+ ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
+
+ /*
+ * close down subplans
+ */
+ ExecEndNode(outerPlanState(node));
+ ExecEndNode(innerPlanState(node));
+
+ NL1_printf("ExecEndNestLoop: %s\n",
+ "node processing ended");
}
/* ----------------------------------------------------------------
* ExecReScanNestLoop
* ----------------------------------------------------------------
*/
-void
+ void
ExecReScanNestLoop(NestLoopState *node)
{
- PlanState *outerPlan = outerPlanState(node);
-
- /*
- * If outerPlan->chgParam is not null then plan will be automatically
- * re-scanned by first ExecProcNode.
- */
- if (outerPlan->chgParam == NULL)
- ExecReScan(outerPlan);
-
- /*
- * innerPlan is re-scanned for each new outer tuple and MUST NOT be
- * re-scanned from here or you'll get troubles from inner index scans when
- * outer Vars are used as run-time keys...
- */
-
- node->js.ps.ps_TupFromTlist = false;
- node->nl_NeedNewOuter = true;
- node->nl_MatchedOuter = false;
+ PlanState *outerPlan = outerPlanState(node);
+
+ /*
+ * If outerPlan->chgParam is not null then plan will be automatically
+ * re-scanned by first ExecProcNode.
+ */
+ if (outerPlan->chgParam == NULL)
+ ExecReScan(outerPlan);
+
+ /*
+ * innerPlan is re-scanned for each new outer tuple and MUST NOT be
+ * re-scanned from here or you'll get troubles from inner index scans when
+ * outer Vars are used as run-time keys...
+ */
+
+ node->js.ps.ps_TupFromTlist = false;
+ node->nl_NeedNewOuter = true;
+ node->nl_MatchedOuter = false;
}