From 9f44d54c07180b826a6276d3acf5e1458b507c3f Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 23 Sep 2022 20:42:55 +0200
Subject: [PATCH 01/88] Switch back to development mode.

---
 CHANGELOG            | 6 ++++++
 sqlparse/__init__.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 229d9a4d..2b00a890 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,9 @@
+Development Version
+-------------------
+
+Nothing yet.
+
+
 Release 0.4.3 (Sep 23, 2022)
 ----------------------------
 
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
index 0dd3475e..f901185a 100644
--- a/sqlparse/__init__.py
+++ b/sqlparse/__init__.py
@@ -16,7 +16,7 @@
 from sqlparse import formatter
 
 
-__version__ = '0.4.3'
+__version__ = '0.4.4.dev0'
 __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']
 
 

From e9241945801808d1db7f76bdccbbe9a200042c37 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 30 Dec 2022 10:57:31 +0100
Subject: [PATCH 02/88] Revert "add regex pattern to identify IN as a Compasion
 token"

This reverts commit 28c4d4026e1d9389a99d8cd627c96fa360c17fc4.

See #694. The expectation is that IN is primarily recognized as a keyword,
although it acts as a comparison operator. This also matches the definition of
IN in most SQL syntax references where it is listed as a reserved
keyword (PostgreSQL:
https://www.postgresql.org/docs/current/sql-keywords-appendix.html, MySQL:
https://dev.mysql.com/doc/refman/8.0/en/keywords.html, for example).
---
 sqlparse/keywords.py   |  2 +-
 tests/test_grouping.py | 12 +-----------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index d73e1143..dff5e1cb 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -50,7 +50,7 @@ def is_keyword(value):
         (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder),
 
         (r'\\\w+', tokens.Command),
-        (r'(NOT\s+)?(IN)\b', tokens.Operator.Comparison),
+
         # FIXME(andi): VALUES shouldn't be listed here
         # see https://github.com/andialbrecht/sqlparse/pull/64
         # AS and IN are special, it may be followed by a parenthesis, but
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
index 546ad4b2..03d16c5d 100644
--- a/tests/test_grouping.py
+++ b/tests/test_grouping.py
@@ -376,20 +376,10 @@ def test_grouping_function_not_in():
     # issue183
     p = sqlparse.parse('in(1, 2)')[0]
     assert len(p.tokens) == 2
-    assert p.tokens[0].ttype == T.Comparison
+    assert p.tokens[0].ttype == T.Keyword
     assert isinstance(p.tokens[1], sql.Parenthesis)
 
 
-def test_in_comparison():
-    # issue566
-    p = sqlparse.parse('a in (1, 2)')[0]
-    assert len(p.tokens) == 1
-    assert isinstance(p.tokens[0], sql.Comparison)
-    assert len(p.tokens[0].tokens) == 5
-    assert p.tokens[0].left.value == 'a'
-    assert p.tokens[0].right.value == '(1, 2)'
-
-
 def test_grouping_varchar():
     p = sqlparse.parse('"text" Varchar(50) NOT NULL')[0]
     assert isinstance(p.tokens[2], sql.Function)

From 24f29906a422e00f0181a4c801677fbde70f0b94 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 30 Dec 2022 11:08:08 +0100
Subject: [PATCH 03/88] Update changelog.

---
 CHANGELOG | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 2b00a890..123ed173 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,7 +1,12 @@
 Development Version
 -------------------
 
-Nothing yet.
+Bug Fixes
+
+* Revert a change from 0.4.0 that changed IN to be a comparison (issue694).
+  The primary expectation is that IN is treated as a keyword and not as a
+  comparison operator. That also follows the definition of reserved keywords
+  for the major SQL syntax definitions.
 
 
 Release 0.4.3 (Sep 23, 2022)

From bacbeff74bc3d1866246bb1f397f18e64a62c27a Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 30 Dec 2022 11:23:29 +0100
Subject: [PATCH 04/88] Update workflow runner.

---
 .github/workflows/python-app.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 4f165859..e87b5e42 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -12,7 +12,7 @@ on:
 jobs:
   build:
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04  # keep it on 20.04 to have Python 3.5 and 3.6 available
     strategy:
       matrix:
         python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11-dev"]

From bf5aff484146ffda3944088c48323ad9272b91fb Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 30 Dec 2022 11:26:49 +0100
Subject: [PATCH 05/88] Update tested Python versions in workflow.

---
 .github/workflows/python-app.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index e87b5e42..2e07ee10 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-20.04  # keep it on 20.04 to have Python 3.5 and 3.6 available
     strategy:
       matrix:
-        python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11-dev"]
+        python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"]
 
     steps:
     - uses: actions/checkout@v3

From 243da5137c6d21b7b246f884fb07e1f0625f2673 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 30 Dec 2022 11:32:09 +0100
Subject: [PATCH 06/88] Setup a nightly build, even without changes in the
 module itself.

---
 .github/workflows/python-app.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 2e07ee10..1a173f81 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -8,6 +8,8 @@ on:
     branches: [ master ]
   pull_request:
     branches: [ master ]
+  schedule:
+    cron: '0 12 * * *'
 
 jobs:
   build:

From cda0e499a1c762662d2b06b18e7b4aed2da75bc7 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 30 Dec 2022 11:32:54 +0100
Subject: [PATCH 07/88] Fix schedule trigger syntax.

---
 .github/workflows/python-app.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 1a173f81..fef18a5d 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -9,7 +9,7 @@ on:
   pull_request:
     branches: [ master ]
   schedule:
-    cron: '0 12 * * *'
+    - cron: '0 12 * * *'
 
 jobs:
   build:

From 8b789f286e1b6cbf05c15020ea7544cb7f02f8f7 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 30 Dec 2022 15:44:37 +0100
Subject: [PATCH 08/88] Switch to pyproject.toml (fixes #685).

---
 .github/workflows/python-app.yml |  9 ++--
 CHANGELOG                        |  4 ++
 MANIFEST.in                      | 11 -----
 Makefile                         |  2 +-
 pyproject.toml                   | 70 ++++++++++++++++++++++++++++++++
 setup.cfg                        | 55 -------------------------
 setup.py                         | 12 ------
 7 files changed, 80 insertions(+), 83 deletions(-)
 delete mode 100644 MANIFEST.in
 create mode 100644 pyproject.toml
 delete mode 100644 setup.cfg
 delete mode 100644 setup.py

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index fef18a5d..906ca7e8 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -5,7 +5,8 @@ name: Python application
 
 on:
   push:
-    branches: [ master ]
+    branches:
+    - master
   pull_request:
     branches: [ master ]
   schedule:
@@ -27,9 +28,9 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        pip install codecov flake8 pytest pytest-cov
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        python -m pip install --upgrade pip flit
+        flit install --deps=develop
+        pip install codecov
     - name: Lint with flake8
       run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics
     - name: Test with pytest
diff --git a/CHANGELOG b/CHANGELOG
index 123ed173..94864138 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -8,6 +8,10 @@ Bug Fixes
   comparison operator. That also follows the definition of reserved keywords
   for the major SQL syntax definitions.
 
+Other
+
+* sqlparse now uses pyproject.toml instead of setup.cfg (issue685).
+
 
 Release 0.4.3 (Sep 23, 2022)
 ----------------------------
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 8043b359..00000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,11 +0,0 @@
-recursive-include docs source/*
-include docs/sqlformat.1
-include docs/Makefile
-recursive-include tests *.py *.sql
-include LICENSE
-include TODO
-include AUTHORS
-include CHANGELOG
-include Makefile
-include setup.cfg
-include tox.ini
diff --git a/Makefile b/Makefile
index ee35e546..1657822e 100644
--- a/Makefile
+++ b/Makefile
@@ -22,5 +22,5 @@ clean:
 
 release:
 	@rm -rf dist/
-	python setup.py sdist bdist_wheel
+	python -m build
 	twine upload --sign --identity E0B84F81 dist/*
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..338a53ce
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,70 @@
+[build-system]
+requires = ["flit_core >=3.2,<4"]
+build-backend = "flit_core.buildapi"
+
+[project]
+name = "sqlparse"
+description = "A non-validating SQL parser."
+authors = [{name = "Andi Albrecht", email = "albrecht.andi@gmail.com"}]
+readme = "README.rst"
+dynamic = ["version"]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: BSD License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.5",
+    "Programming Language :: Python :: 3.6",
+    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: Implementation :: CPython",
+    "Programming Language :: Python :: Implementation :: PyPy",
+    "Topic :: Database",
+    "Topic :: Software Development",
+]
+requires-python = ">=3.5"
+
+[project.urls]
+Home = "https://github.com/andialbrecht/sqlparse"
+Documentation = "https://sqlparse.readthedocs.io/"
+"Release Notes" = "https://sqlparse.readthedocs.io/en/latest/changes/"
+Source = "https://github.com/andialbrecht/sqlparse"
+Tracker = "https://github.com/andialbrecht/sqlparse/issues"
+
+[project.scripts]
+sqlformat = "sqlparse.__main__:main"
+
+[project.optional-dependencies]
+dev = [
+    "flake8",
+    "build",
+]
+test = [
+    "pytest",
+    "pytest-cov",
+]
+doc = [
+    "sphinx",
+]
+
+[tool.flit.sdist]
+include = [
+    "docs/source/",
+    "docs/sqlformat.1",
+    "docs/Makefile",
+    "tests/*.py", "tests/files/*.sql",
+    "LICENSE",
+    "TODO",
+    "AUTHORS",
+    "CHANGELOG",
+    "Makefile",
+    "tox.ini",
+]
+
+[tool.coverage.run]
+omit = ["sqlparse/__main__.py"]
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 0843b704..00000000
--- a/setup.cfg
+++ /dev/null
@@ -1,55 +0,0 @@
-[metadata]
-name = sqlparse
-version = attr: sqlparse.__version__
-url = https://github.com/andialbrecht/sqlparse
-author = Andi Albrecht
-author_email = albrecht.andi@gmail.com
-description = A non-validating SQL parser.
-long_description = file: README.rst
-license = BSD-3-Clause
-classifiers =
-    Development Status :: 5 - Production/Stable
-    Intended Audience :: Developers
-    License :: OSI Approved :: BSD License
-    Operating System :: OS Independent
-    Programming Language :: Python
-    Programming Language :: Python :: 3
-    Programming Language :: Python :: 3 :: Only
-    Programming Language :: Python :: 3.5
-    Programming Language :: Python :: 3.6
-    Programming Language :: Python :: 3.7
-    Programming Language :: Python :: 3.8
-    Programming Language :: Python :: 3.9
-    Programming Language :: Python :: 3.10
-    Programming Language :: Python :: Implementation :: CPython
-    Programming Language :: Python :: Implementation :: PyPy
-    Topic :: Database
-    Topic :: Software Development
-project_urls =
-    Documentation = https://sqlparse.readthedocs.io/
-    Release Notes = https://sqlparse.readthedocs.io/en/latest/changes/
-    Source = https://github.com/andialbrecht/sqlparse
-    Tracker = https://github.com/andialbrecht/sqlparse/issues
-
-[options]
-python_requires = >=3.5
-packages = find:
-
-[options.packages.find]
-exclude = tests
-
-[options.entry_points]
-console_scripts =
-    sqlformat = sqlparse.__main__:main
-
-[tool:pytest]
-xfail_strict = True
-
-[flake8]
-extend-ignore =
-    E731
-
-[coverage:run]
-branch = False
-omit =
-    sqlparse/__main__.py
diff --git a/setup.py b/setup.py
deleted file mode 100644
index ede0aff8..00000000
--- a/setup.py
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (C) 2009-2020 the sqlparse authors and contributors
-# <see AUTHORS file>
-#
-# This setup script is part of python-sqlparse and is released under
-# the BSD License: https://opensource.org/licenses/BSD-3-Clause
-
-from setuptools import setup
-
-
-setup()

From 9a1cb5dddd1545c30b1e3a2c6f5d3514d079d93e Mon Sep 17 00:00:00 2001
From: Simon Heisterkamp <simon@heisterkamp.dk>
Date: Wed, 30 Nov 2022 14:51:58 +0000
Subject: [PATCH 09/88] configurable syntax

---
 sqlparse/keywords.py   | 22 ++++-------
 sqlparse/lexer.py      | 87 ++++++++++++++++++++++++++++++++++--------
 tests/test_keywords.py |  3 +-
 3 files changed, 82 insertions(+), 30 deletions(-)

diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index dff5e1cb..ce537812 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -6,23 +6,17 @@
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
 import re
+from typing import Dict, List, Tuple, Callable, Union
 
 from sqlparse import tokens
 
+# object() only supports "is" and is useful as a marker
+PROCESS_AS_KEYWORD = object()
 
-def is_keyword(value):
-    """Checks for a keyword.
-
-    If the given value is in one of the KEYWORDS_* dictionary
-    it's considered a keyword. Otherwise tokens.Name is returned.
-    """
-    val = value.upper()
-    return (KEYWORDS_COMMON.get(val)
-            or KEYWORDS_ORACLE.get(val)
-            or KEYWORDS_PLPGSQL.get(val)
-            or KEYWORDS_HQL.get(val)
-            or KEYWORDS_MSACCESS.get(val)
-            or KEYWORDS.get(val, tokens.Name)), value
+SQL_REGEX_TYPE = List[
+    Tuple[Callable, Union[type(PROCESS_AS_KEYWORD), tokens._TokenType]]
+]
+KEYWORDS_TYPE = Dict[str, tokens._TokenType]
 
 
 SQL_REGEX = {
@@ -99,7 +93,7 @@ def is_keyword(value):
         (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison),
         # Check for keywords, also returns tokens.Name if regex matches
         # but the match isn't a keyword.
-        (r'[0-9_\w][_$#\w]*', is_keyword),
+        (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD),
         (r'[;:()\[\],\.]', tokens.Punctuation),
         (r'[<>=~!]+', tokens.Operator.Comparison),
         (r'[+/@#%^&|^-]+', tokens.Operator),
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 4397f185..61c52a97 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -13,19 +13,74 @@
 # and to allow some customizations.
 
 from io import TextIOBase
+from typing import List
 
-from sqlparse import tokens
-from sqlparse.keywords import SQL_REGEX
+from sqlparse import tokens, keywords
 from sqlparse.utils import consume
 
 
-class Lexer:
-    """Lexer
-    Empty class. Leaving for backwards-compatibility
-    """
+class _LexerSingletonMetaclass(type):
+    _lexer_instance = None
+
+    def __call__(cls, *args, **kwargs):
+        if _LexerSingletonMetaclass._lexer_instance is None:
+            _LexerSingletonMetaclass._lexer_instance = super(
+                _LexerSingletonMetaclass, cls
+            ).__call__(*args, **kwargs)
+        return _LexerSingletonMetaclass._lexer_instance
+
+
+class Lexer(metaclass=_LexerSingletonMetaclass):
+    """The Lexer supports configurable syntax.
+    To add support for additional keywords, use the `add_keywords` method."""
+
+    _SQL_REGEX: keywords.SQL_REGEX_TYPE
+    _keywords: List[keywords.KEYWORDS_TYPE]
+
+    def default_initialization(self):
+        """Initialize the lexer with default dictionaries.
+        Useful if you need to revert custom syntax settings."""
+        self.clear()
+        self.set_SQL_REGEX(keywords.SQL_REGEX)
+        self.add_keywords(keywords.KEYWORDS_COMMON)
+        self.add_keywords(keywords.KEYWORDS_ORACLE)
+        self.add_keywords(keywords.KEYWORDS_PLPGSQL)
+        self.add_keywords(keywords.KEYWORDS_HQL)
+        self.add_keywords(keywords.KEYWORDS_MSACCESS)
+        self.add_keywords(keywords.KEYWORDS)
+
+    def __init__(self):
+        self.default_initialization()
+
+    def clear(self):
+        """Clear all syntax configurations.
+        Useful if you want to load a reduced set of syntax configurations."""
+        self._SQL_REGEX = []
+        self._keywords = []
+
+    def set_SQL_REGEX(self, SQL_REGEX: keywords.SQL_REGEX_TYPE):
+        """Set the list of regex that will parse the SQL."""
+        self._SQL_REGEX = SQL_REGEX
+
+    def add_keywords(self, keywords: keywords.KEYWORDS_TYPE):
+        """Add keyword dictionaries. Keywords are looked up in the same order
+        that dictionaries were added."""
+        self._keywords.append(keywords)
+
+    def is_keyword(self, value):
+        """Checks for a keyword.
+
+        If the given value is in one of the KEYWORDS_* dictionary
+        it's considered a keyword. Otherwise tokens.Name is returned.
+        """
+        val = value.upper()
+        for kwdict in self._keywords:
+            if val in kwdict:
+                return kwdict[val], value
+        else:
+            return tokens.Name, value
 
-    @staticmethod
-    def get_tokens(text, encoding=None):
+    def get_tokens(self, text, encoding=None):
         """
         Return an iterable of (tokentype, value) pairs generated from
         `text`. If `unfiltered` is set to `True`, the filtering mechanism
@@ -48,24 +103,26 @@ def get_tokens(text, encoding=None):
                 text = text.decode(encoding)
             else:
                 try:
-                    text = text.decode('utf-8')
+                    text = text.decode("utf-8")
                 except UnicodeDecodeError:
-                    text = text.decode('unicode-escape')
+                    text = text.decode("unicode-escape")
         else:
-            raise TypeError("Expected text or file-like object, got {!r}".
-                            format(type(text)))
+            raise TypeError(
+                "Expected text or file-like object, got {!r}"
+                .format(type(text))
+            )
 
         iterable = enumerate(text)
         for pos, char in iterable:
-            for rexmatch, action in SQL_REGEX:
+            for rexmatch, action in self._SQL_REGEX:
                 m = rexmatch(text, pos)
 
                 if not m:
                     continue
                 elif isinstance(action, tokens._TokenType):
                     yield action, m.group()
-                elif callable(action):
-                    yield action(m.group())
+                elif action is keywords.PROCESS_AS_KEYWORD:
+                    yield self.is_keyword(m.group())
 
                 consume(iterable, m.end() - pos - 1)
                 break
diff --git a/tests/test_keywords.py b/tests/test_keywords.py
index d4ded4b6..a3b1b385 100644
--- a/tests/test_keywords.py
+++ b/tests/test_keywords.py
@@ -2,6 +2,7 @@
 
 from sqlparse import tokens
 from sqlparse.keywords import SQL_REGEX
+from sqlparse.lexer import Lexer
 
 
 class TestSQLREGEX:
@@ -9,5 +10,5 @@ class TestSQLREGEX:
                                         '1.', '-1.',
                                         '.1', '-.1'])
     def test_float_numbers(self, number):
-        ttype = next(tt for action, tt in SQL_REGEX if action(number))
+        ttype = next(tt for action, tt in Lexer()._SQL_REGEX if action(number))
         assert tokens.Number.Float == ttype

From e37eaea4a78cbb335070ffec018bfc28425aa1a4 Mon Sep 17 00:00:00 2001
From: Simon Heisterkamp <simon@heisterkamp.dk>
Date: Wed, 30 Nov 2022 14:52:13 +0000
Subject: [PATCH 10/88] test configurable syntax

---
 tests/test_parse.py | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/tests/test_parse.py b/tests/test_parse.py
index ec327ac8..c5dfd369 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -5,6 +5,7 @@
 
 import sqlparse
 from sqlparse import sql, tokens as T
+from sqlparse.lexer import Lexer
 
 
 def test_parse_tokenize():
@@ -489,3 +490,45 @@ def test_parenthesis():
                                                     T.Newline,
                                                     T.Newline,
                                                     T.Punctuation]
+
+
+def test_configurable_syntax():
+    sql = """select * from foo BACON SPAM EGGS;"""
+    # sql="""select * from mydb.mytable BACON SPAM EGGS;"""
+    tokens = sqlparse.parse(sql)[0]
+
+    assert list(
+        (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace
+    ) == [
+        (sqlparse.tokens.Keyword.DML, "select"),
+        (sqlparse.tokens.Wildcard, "*"),
+        (sqlparse.tokens.Keyword, "from"),
+        (None, "foo BACON"),
+        (None, "SPAM EGGS"),
+        (sqlparse.tokens.Punctuation, ";"),
+    ]
+
+    Lexer().add_keywords(
+        {
+            "BACON": sqlparse.tokens.Name.Builtin,
+            "SPAM": sqlparse.tokens.Keyword,
+            "EGGS": sqlparse.tokens.Keyword,
+        }
+    )
+
+    tokens = sqlparse.parse(sql)[0]
+
+    assert list(
+        (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace
+    ) == [
+        (sqlparse.tokens.Keyword.DML, "select"),
+        (sqlparse.tokens.Wildcard, "*"),
+        (sqlparse.tokens.Keyword, "from"),
+        (None, "foo"),
+        (sqlparse.tokens.Name.Builtin, "BACON"),
+        (sqlparse.tokens.Keyword, "SPAM"),
+        (sqlparse.tokens.Keyword, "EGGS"),
+        (sqlparse.tokens.Punctuation, ";"),
+    ]
+    # reset the syntax for later tests.
+    Lexer().default_initialization()

From 8515d2edd70fc16d69aa7b1094f9b3534dfa74d9 Mon Sep 17 00:00:00 2001
From: Simon Heisterkamp <simon@heisterkamp.dk>
Date: Wed, 30 Nov 2022 15:00:03 +0000
Subject: [PATCH 11/88] remove type annotations for python 3.5 compatibility

---
 sqlparse/keywords.py | 6 ------
 sqlparse/lexer.py    | 8 ++------
 tests/test_parse.py  | 1 -
 3 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index ce537812..6bc7937a 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -6,18 +6,12 @@
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
 import re
-from typing import Dict, List, Tuple, Callable, Union
 
 from sqlparse import tokens
 
 # object() only supports "is" and is useful as a marker
 PROCESS_AS_KEYWORD = object()
 
-SQL_REGEX_TYPE = List[
-    Tuple[Callable, Union[type(PROCESS_AS_KEYWORD), tokens._TokenType]]
-]
-KEYWORDS_TYPE = Dict[str, tokens._TokenType]
-
 
 SQL_REGEX = {
     'root': [
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 61c52a97..7408e01a 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -13,7 +13,6 @@
 # and to allow some customizations.
 
 from io import TextIOBase
-from typing import List
 
 from sqlparse import tokens, keywords
 from sqlparse.utils import consume
@@ -34,9 +33,6 @@ class Lexer(metaclass=_LexerSingletonMetaclass):
     """The Lexer supports configurable syntax.
     To add support for additional keywords, use the `add_keywords` method."""
 
-    _SQL_REGEX: keywords.SQL_REGEX_TYPE
-    _keywords: List[keywords.KEYWORDS_TYPE]
-
     def default_initialization(self):
         """Initialize the lexer with default dictionaries.
         Useful if you need to revert custom syntax settings."""
@@ -58,11 +54,11 @@ def clear(self):
         self._SQL_REGEX = []
         self._keywords = []
 
-    def set_SQL_REGEX(self, SQL_REGEX: keywords.SQL_REGEX_TYPE):
+    def set_SQL_REGEX(self, SQL_REGEX):
         """Set the list of regex that will parse the SQL."""
         self._SQL_REGEX = SQL_REGEX
 
-    def add_keywords(self, keywords: keywords.KEYWORDS_TYPE):
+    def add_keywords(self, keywords):
         """Add keyword dictionaries. Keywords are looked up in the same order
         that dictionaries were added."""
         self._keywords.append(keywords)
diff --git a/tests/test_parse.py b/tests/test_parse.py
index c5dfd369..3018d9ad 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -494,7 +494,6 @@ def test_parenthesis():
 
 def test_configurable_syntax():
     sql = """select * from foo BACON SPAM EGGS;"""
-    # sql="""select * from mydb.mytable BACON SPAM EGGS;"""
     tokens = sqlparse.parse(sql)[0]
 
     assert list(

From f9a73a62cfc23b10c38f22a10bd1d4c3edbb286f Mon Sep 17 00:00:00 2001
From: Simon Heisterkamp <simon@heisterkamp.dk>
Date: Wed, 30 Nov 2022 22:34:52 +0000
Subject: [PATCH 12/88] test for changing the regex

---
 sqlparse/lexer.py   | 10 ++++------
 tests/test_parse.py | 34 +++++++++++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 7408e01a..aafb55f2 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -99,14 +99,12 @@ def get_tokens(self, text, encoding=None):
                 text = text.decode(encoding)
             else:
                 try:
-                    text = text.decode("utf-8")
+                    text = text.decode('utf-8')
                 except UnicodeDecodeError:
-                    text = text.decode("unicode-escape")
+                    text = text.decode('unicode-escape')
         else:
-            raise TypeError(
-                "Expected text or file-like object, got {!r}"
-                .format(type(text))
-            )
+            raise TypeError("Expected text or file-like object, got {!r}".
+                            format(type(text)))
 
         iterable = enumerate(text)
         for pos, char in iterable:
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 3018d9ad..3ac65001 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -1,10 +1,11 @@
 """Tests sqlparse.parse()."""
+import re
 from io import StringIO
 
 import pytest
 
 import sqlparse
-from sqlparse import sql, tokens as T
+from sqlparse import sql, tokens as T, keywords
 from sqlparse.lexer import Lexer
 
 
@@ -491,8 +492,7 @@ def test_parenthesis():
                                                     T.Newline,
                                                     T.Punctuation]
 
-
-def test_configurable_syntax():
+def test_configurable_keywords():
     sql = """select * from foo BACON SPAM EGGS;"""
     tokens = sqlparse.parse(sql)[0]
 
@@ -517,6 +517,9 @@ def test_configurable_syntax():
 
     tokens = sqlparse.parse(sql)[0]
 
+    # reset the syntax for later tests.
+    Lexer().default_initialization()
+
     assert list(
         (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace
     ) == [
@@ -529,5 +532,30 @@ def test_configurable_syntax():
         (sqlparse.tokens.Keyword, "EGGS"),
         (sqlparse.tokens.Punctuation, ";"),
     ]
+
+
+def test_configurable_regex():
+    lex = Lexer()
+    lex.clear()
+
+    my_regex = (
+        re.compile(r"ZORDER\s+BY\b", keywords.FLAGS).match,
+        sqlparse.tokens.Keyword,
+    )
+
+    lex.set_SQL_REGEX(keywords.SQL_REGEX[:38] + [my_regex] + keywords.SQL_REGEX[38:])
+    lex.add_keywords(keywords.KEYWORDS_COMMON)
+    lex.add_keywords(keywords.KEYWORDS_ORACLE)
+    lex.add_keywords(keywords.KEYWORDS_PLPGSQL)
+    lex.add_keywords(keywords.KEYWORDS_HQL)
+    lex.add_keywords(keywords.KEYWORDS_MSACCESS)
+    lex.add_keywords(keywords.KEYWORDS)
+
+    tokens = sqlparse.parse("select * from foo zorder by bar;")[0]
+
     # reset the syntax for later tests.
     Lexer().default_initialization()
+
+    assert list(
+        (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace
+    )[4] == (sqlparse.tokens.Keyword, "zorder by")

From e0d3928ba69d73ba874ca03ec4395e94cf1ab293 Mon Sep 17 00:00:00 2001
From: Simon Heisterkamp <simon@heisterkamp.dk>
Date: Thu, 1 Dec 2022 10:35:42 +0000
Subject: [PATCH 13/88] lexer documentation

---
 docs/source/extending.rst |  66 +++++++++++++++
 docs/source/index.rst     |   1 +
 sqlparse/keywords.py      | 164 +++++++++++++++++++-------------------
 sqlparse/lexer.py         |   5 +-
 tests/test_parse.py       |   6 +-
 5 files changed, 151 insertions(+), 91 deletions(-)
 create mode 100644 docs/source/extending.rst

diff --git a/docs/source/extending.rst b/docs/source/extending.rst
new file mode 100644
index 00000000..f1bd5512
--- /dev/null
+++ b/docs/source/extending.rst
@@ -0,0 +1,66 @@
+Extending :mod:`sqlparse`
+=========================
+
+.. module:: sqlparse
+   :synopsis: Extending parsing capability of sqlparse.
+
+The :mod:`sqlparse` module uses a sql grammar that was tuned through usage and numerous
+PR to fit a broad range of SQL syntaxes, but it cannot cater to every given case since
+some SQL dialects have adopted conflicting meanings of certain keywords. Sqlparse
+therefore exposes a mechanism to configure the fundamental keywords and regular
+expressions that parse the language as described below.
+
+If you find an adaptation that works for your specific use-case. Please consider
+contributing it back to the community by opening a PR on
+`GitHub <https://github.com/andialbrecht/sqlparse>`_.
+
+Configuring the Lexer
+---------------------
+
+The lexer is a singleton class that breaks down the stream of characters into language
+tokens. It does this by using a sequence of regular expressions and keywords that are
+listed in the file ``sqlparse.keywords``. Instead of applying these fixed grammar
+definitions directly, the lexer is default initialized in its method called
+``default_initialization()``. As an api user, you can adapt the Lexer configuration by
+applying your own configuration logic. To do so, start out by clearing previous
+configurations with ``.clear()``, then apply the SQL list with
+``.set_SQL_REGEX(SQL_REGEX)``, and apply keyword lists with ``.add_keywords(KEYWORDS)``.
+
+You can do so by re-using the expressions in ``sqlparse.keywords`` (see example below),
+leaving parts out, or by making up your own master list.
+
+See the expected types of the arguments by inspecting their structure in
+``sqlparse.keywords``.
+(For compatibility with python 3.4, this library does not use type-hints.)
+
+The following example adds support for the expression ``ZORDER BY``, and adds ``BAR`` as
+a keyword to the lexer:
+
+..  code-block:: python
+
+    import re
+
+    import sqlparse
+    from sqlparse import keywords
+    from sqlparse.lexer import Lexer
+
+    lex = Lexer()
+    lex.clear()
+
+    my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)
+
+    # slice the default SQL_REGEX to inject the custom object
+    lex.set_SQL_REGEX(
+        keywords.SQL_REGEX[:38]
+        + [my_regex]
+        + keywords.SQL_REGEX[38:]
+    )
+    lex.add_keywords(keywords.KEYWORDS_COMMON)
+    lex.add_keywords(keywords.KEYWORDS_ORACLE)
+    lex.add_keywords(keywords.KEYWORDS_PLPGSQL)
+    lex.add_keywords(keywords.KEYWORDS_HQL)
+    lex.add_keywords(keywords.KEYWORDS_MSACCESS)
+    lex.add_keywords(keywords.KEYWORDS)
+    lex.add_keywords({'BAR', sqlparse.tokens.Keyword})
+
+    sqlparse.parse("select * from foo zorder by bar;")
diff --git a/docs/source/index.rst b/docs/source/index.rst
index cba33141..e18d2b3c 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -20,6 +20,7 @@ Contents
    api
    analyzing
    ui
+   extending
    changes
    license
    indices
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index 6bc7937a..f04f928e 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -5,96 +5,92 @@
 # This module is part of python-sqlparse and is released under
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
-import re
-
 from sqlparse import tokens
 
 # object() only supports "is" and is useful as a marker
+# use this marker to specify that the given regex in SQL_REGEX
+# shall be processed further through a lookup in the KEYWORDS dictionaries
 PROCESS_AS_KEYWORD = object()
 
 
-SQL_REGEX = {
-    'root': [
-        (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint),
-        (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint),
-
-        (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single),
-        (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline),
-
-        (r'(\r\n|\r|\n)', tokens.Newline),
-        (r'\s+?', tokens.Whitespace),
-
-        (r':=', tokens.Assignment),
-        (r'::', tokens.Punctuation),
-
-        (r'\*', tokens.Wildcard),
-
-        (r"`(``|[^`])*`", tokens.Name),
-        (r"´(´´|[^´])*´", tokens.Name),
-        (r'((?<!\S)\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal),
-
-        (r'\?', tokens.Name.Placeholder),
-        (r'%(\(\w+\))?s', tokens.Name.Placeholder),
-        (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder),
-
-        (r'\\\w+', tokens.Command),
-
-        # FIXME(andi): VALUES shouldn't be listed here
-        # see https://github.com/andialbrecht/sqlparse/pull/64
-        # AS and IN are special, it may be followed by a parenthesis, but
-        # are never functions, see issue183 and issue507
-        (r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword),
-
-        (r'(@|##|#)[A-ZÀ-Ü]\w+', tokens.Name),
-
-        # see issue #39
-        # Spaces around period `schema . name` are valid identifier
-        # TODO: Spaces before period not implemented
-        (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name),  # 'Name'.
-        # FIXME(atronah): never match,
-        # because `re.match` doesn't work with look-behind regexp feature
-        (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name),  # .'Name'
-        (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name),  # side effect: change kw to func
-        (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal),
-        (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float),
-        (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
-         tokens.Number.Float),
-        (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
-        (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
-        # not a real string literal in ANSI SQL:
-        (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol),
-        (r'(""|".*?[^\\]")', tokens.String.Symbol),
-        # sqlite names can be escaped with [square brackets]. left bracket
-        # cannot be preceded by word character or a right bracket --
-        # otherwise it's probably an array index
-        (r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name),
-        (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
-         r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
-        (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
-        (r'NOT\s+NULL\b', tokens.Keyword),
-        (r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword),
-        (r'UNION\s+ALL\b', tokens.Keyword),
-        (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
-        (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
-        (r'GROUP\s+BY\b', tokens.Keyword),
-        (r'ORDER\s+BY\b', tokens.Keyword),
-        (r'HANDLER\s+FOR\b', tokens.Keyword),
-        (r'(LATERAL\s+VIEW\s+)'
-         r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b',
-         tokens.Keyword),
-        (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast),
-        (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison),
-        (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison),
-        # Check for keywords, also returns tokens.Name if regex matches
-        # but the match isn't a keyword.
-        (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD),
-        (r'[;:()\[\],\.]', tokens.Punctuation),
-        (r'[<>=~!]+', tokens.Operator.Comparison),
-        (r'[+/@#%^&|^-]+', tokens.Operator),
-    ]}
-
-FLAGS = re.IGNORECASE | re.UNICODE
-SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX['root']]
+SQL_REGEX = [
+    (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint),
+    (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint),
+
+    (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single),
+    (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline),
+
+    (r'(\r\n|\r|\n)', tokens.Newline),
+    (r'\s+?', tokens.Whitespace),
+
+    (r':=', tokens.Assignment),
+    (r'::', tokens.Punctuation),
+
+    (r'\*', tokens.Wildcard),
+
+    (r"`(``|[^`])*`", tokens.Name),
+    (r"´(´´|[^´])*´", tokens.Name),
+    (r'((?<!\S)\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal),
+
+    (r'\?', tokens.Name.Placeholder),
+    (r'%(\(\w+\))?s', tokens.Name.Placeholder),
+    (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder),
+
+    (r'\\\w+', tokens.Command),
+
+    # FIXME(andi): VALUES shouldn't be listed here
+    # see https://github.com/andialbrecht/sqlparse/pull/64
+    # AS and IN are special, it may be followed by a parenthesis, but
+    # are never functions, see issue183 and issue507
+    (r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword),
+
+    (r'(@|##|#)[A-ZÀ-Ü]\w+', tokens.Name),
+
+    # see issue #39
+    # Spaces around period `schema . name` are valid identifier
+    # TODO: Spaces before period not implemented
+    (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name),  # 'Name'.
+    # FIXME(atronah): never match,
+    # because `re.match` doesn't work with look-behind regexp feature
+    (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name),  # .'Name'
+    (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name),  # side effect: change kw to func
+    (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal),
+    (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float),
+    (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
+     tokens.Number.Float),
+    (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
+    (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
+    # not a real string literal in ANSI SQL:
+    (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol),
+    (r'(""|".*?[^\\]")', tokens.String.Symbol),
+    # sqlite names can be escaped with [square brackets]. left bracket
+    # cannot be preceded by word character or a right bracket --
+    # otherwise it's probably an array index
+    (r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name),
+    (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
+     r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
+    (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
+    (r'NOT\s+NULL\b', tokens.Keyword),
+    (r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword),
+    (r'UNION\s+ALL\b', tokens.Keyword),
+    (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
+    (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
+    (r'GROUP\s+BY\b', tokens.Keyword),
+    (r'ORDER\s+BY\b', tokens.Keyword),
+    (r'HANDLER\s+FOR\b', tokens.Keyword),
+    (r'(LATERAL\s+VIEW\s+)'
+     r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b',
+     tokens.Keyword),
+    (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast),
+    (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison),
+    (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison),
+    # Check for keywords, also returns tokens.Name if regex matches
+    # but the match isn't a keyword.
+    (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD),
+    (r'[;:()\[\],\.]', tokens.Punctuation),
+    (r'[<>=~!]+', tokens.Operator.Comparison),
+    (r'[+/@#%^&|^-]+', tokens.Operator),
+]
 
 KEYWORDS = {
     'ABORT': tokens.Keyword,
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index aafb55f2..50799df6 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -6,7 +6,7 @@
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
 """SQL Lexer"""
-
+import re
 # This code is based on the SqlLexer in pygments.
 # http://pygments.org/
 # It's separated from the rest of pygments to increase performance
@@ -56,7 +56,8 @@ def clear(self):
 
     def set_SQL_REGEX(self, SQL_REGEX):
         """Set the list of regex that will parse the SQL."""
-        self._SQL_REGEX = SQL_REGEX
+        FLAGS = re.IGNORECASE | re.UNICODE
+        self._SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX]
 
     def add_keywords(self, keywords):
         """Add keyword dictionaries. Keywords are looked up in the same order
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 3ac65001..017f93ae 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -1,5 +1,4 @@
 """Tests sqlparse.parse()."""
-import re
 from io import StringIO
 
 import pytest
@@ -538,10 +537,7 @@ def test_configurable_regex():
     lex = Lexer()
     lex.clear()
 
-    my_regex = (
-        re.compile(r"ZORDER\s+BY\b", keywords.FLAGS).match,
-        sqlparse.tokens.Keyword,
-    )
+    my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)
 
     lex.set_SQL_REGEX(keywords.SQL_REGEX[:38] + [my_regex] + keywords.SQL_REGEX[38:])
     lex.add_keywords(keywords.KEYWORDS_COMMON)

From 4efdc036623e1586206d7132abf95696953deb9a Mon Sep 17 00:00:00 2001
From: Simon Heisterkamp <simon@heisterkamp.dk>
Date: Thu, 1 Dec 2022 10:42:44 +0000
Subject: [PATCH 14/88] flake8

---
 sqlparse/lexer.py      |  5 ++++-
 tests/test_keywords.py |  1 -
 tests/test_parse.py    | 19 +++++++++++++++----
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 50799df6..657177cb 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -57,7 +57,10 @@ def clear(self):
     def set_SQL_REGEX(self, SQL_REGEX):
         """Set the list of regex that will parse the SQL."""
         FLAGS = re.IGNORECASE | re.UNICODE
-        self._SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX]
+        self._SQL_REGEX = [
+            (re.compile(rx, FLAGS).match, tt)
+            for rx, tt in SQL_REGEX
+        ]
 
     def add_keywords(self, keywords):
         """Add keyword dictionaries. Keywords are looked up in the same order
diff --git a/tests/test_keywords.py b/tests/test_keywords.py
index a3b1b385..2eddccce 100644
--- a/tests/test_keywords.py
+++ b/tests/test_keywords.py
@@ -1,7 +1,6 @@
 import pytest
 
 from sqlparse import tokens
-from sqlparse.keywords import SQL_REGEX
 from sqlparse.lexer import Lexer
 
 
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 017f93ae..33e8541f 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -491,12 +491,15 @@ def test_parenthesis():
                                                     T.Newline,
                                                     T.Punctuation]
 
+
 def test_configurable_keywords():
     sql = """select * from foo BACON SPAM EGGS;"""
     tokens = sqlparse.parse(sql)[0]
 
     assert list(
-        (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace
+        (t.ttype, t.value)
+        for t in tokens
+        if t.ttype not in sqlparse.tokens.Whitespace
     ) == [
         (sqlparse.tokens.Keyword.DML, "select"),
         (sqlparse.tokens.Wildcard, "*"),
@@ -520,7 +523,9 @@ def test_configurable_keywords():
     Lexer().default_initialization()
 
     assert list(
-        (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace
+        (t.ttype, t.value)
+        for t in tokens
+        if t.ttype not in sqlparse.tokens.Whitespace
     ) == [
         (sqlparse.tokens.Keyword.DML, "select"),
         (sqlparse.tokens.Wildcard, "*"),
@@ -539,7 +544,11 @@ def test_configurable_regex():
 
     my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)
 
-    lex.set_SQL_REGEX(keywords.SQL_REGEX[:38] + [my_regex] + keywords.SQL_REGEX[38:])
+    lex.set_SQL_REGEX(
+        keywords.SQL_REGEX[:38]
+        + [my_regex]
+        + keywords.SQL_REGEX[38:]
+    )
     lex.add_keywords(keywords.KEYWORDS_COMMON)
     lex.add_keywords(keywords.KEYWORDS_ORACLE)
     lex.add_keywords(keywords.KEYWORDS_PLPGSQL)
@@ -553,5 +562,7 @@ def test_configurable_regex():
     Lexer().default_initialization()
 
     assert list(
-        (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace
+        (t.ttype, t.value)
+        for t in tokens
+        if t.ttype not in sqlparse.tokens.Whitespace
     )[4] == (sqlparse.tokens.Keyword, "zorder by")

From fbf9a576fe40ad8e4d51bb922bb454c317f73403 Mon Sep 17 00:00:00 2001
From: Simon Heisterkamp <simon@heisterkamp.dk>
Date: Sun, 1 Jan 2023 14:20:52 +0000
Subject: [PATCH 15/88] additional documentation

---
 docs/source/extending.rst | 10 ++++++++++
 sqlparse/lexer.py         |  4 +++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/source/extending.rst b/docs/source/extending.rst
index f1bd5512..97b7d389 100644
--- a/docs/source/extending.rst
+++ b/docs/source/extending.rst
@@ -44,7 +44,12 @@ a keyword to the lexer:
     from sqlparse import keywords
     from sqlparse.lexer import Lexer
 
+    # get the lexer singleton object to configure it
     lex = Lexer()
+
+    # Clear the default configurations.
+    # After this call, reg-exps and keyword dictionaries need to be loaded
+    # to make the lexer functional again.
     lex.clear()
 
     my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)
@@ -55,12 +60,17 @@ a keyword to the lexer:
         + [my_regex]
         + keywords.SQL_REGEX[38:]
     )
+
+    # add the default keyword dictionaries
     lex.add_keywords(keywords.KEYWORDS_COMMON)
     lex.add_keywords(keywords.KEYWORDS_ORACLE)
     lex.add_keywords(keywords.KEYWORDS_PLPGSQL)
     lex.add_keywords(keywords.KEYWORDS_HQL)
     lex.add_keywords(keywords.KEYWORDS_MSACCESS)
     lex.add_keywords(keywords.KEYWORDS)
+
+    # add a custom keyword dictionary
     lex.add_keywords({'BAR', sqlparse.tokens.Keyword})
 
+    # no configuration is passed here. The lexer is used as a singleton.
     sqlparse.parse("select * from foo zorder by bar;")
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 657177cb..6e17fca2 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -50,7 +50,9 @@ def __init__(self):
 
     def clear(self):
         """Clear all syntax configurations.
-        Useful if you want to load a reduced set of syntax configurations."""
+        Useful if you want to load a reduced set of syntax configurations.
+        After this call, reg-exps and keyword dictionaries need to be loaded
+        to make the lexer functional again."""
         self._SQL_REGEX = []
         self._keywords = []
 

From 907fb496f90f2719095a1f01fe24db1e5c0e15a8 Mon Sep 17 00:00:00 2001
From: Simon Heisterkamp <simon@heisterkamp.dk>
Date: Sun, 1 Jan 2023 20:59:40 +0000
Subject: [PATCH 16/88] change singleton behavior

---
 docs/source/extending.rst |  2 +-
 sqlparse/lexer.py         | 52 +++++++++++++++++++++++++--------------
 tests/test_keywords.py    |  2 +-
 tests/test_parse.py       |  8 +++---
 4 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/docs/source/extending.rst b/docs/source/extending.rst
index 97b7d389..0c10924b 100644
--- a/docs/source/extending.rst
+++ b/docs/source/extending.rst
@@ -45,7 +45,7 @@ a keyword to the lexer:
     from sqlparse.lexer import Lexer
 
     # get the lexer singleton object to configure it
-    lex = Lexer()
+    lex = Lexer.get_default_instance()
 
     # Clear the default configurations.
     # After this call, reg-exps and keyword dictionaries need to be loaded
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 6e17fca2..9d25c9e6 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -7,6 +7,7 @@
 
 """SQL Lexer"""
 import re
+
 # This code is based on the SqlLexer in pygments.
 # http://pygments.org/
 # It's separated from the rest of pygments to increase performance
@@ -18,21 +19,39 @@
 from sqlparse.utils import consume
 
 
-class _LexerSingletonMetaclass(type):
-    _lexer_instance = None
-
-    def __call__(cls, *args, **kwargs):
-        if _LexerSingletonMetaclass._lexer_instance is None:
-            _LexerSingletonMetaclass._lexer_instance = super(
-                _LexerSingletonMetaclass, cls
-            ).__call__(*args, **kwargs)
-        return _LexerSingletonMetaclass._lexer_instance
-
-
-class Lexer(metaclass=_LexerSingletonMetaclass):
+class Lexer:
     """The Lexer supports configurable syntax.
     To add support for additional keywords, use the `add_keywords` method."""
 
+    _default_intance = None
+
+    # Development notes:
+    # - This class is prepared to be able to support additional SQL dialects
+    #   in the future by adding additional functions that take the place of
+    #   the function default_initialization()
+    # - The lexer class uses an explicit singleton behavior with the
+    #   instance-getter method get_default_instance(). This mechanism has
+    #   the advantage that the call signature of the entry-points to the
+    #   sqlparse library are not affected. Also, usage of sqlparse in third
+    #   party code does not need to be adapted. On the other hand, singleton
+    #   behavior is not thread safe, and the current implementation does not
+    #   easily allow for multiple SQL dialects to be parsed in the same
+    #   process. Such behavior can be supported in the future by passing a
+    #   suitably initialized lexer object as an additional parameter to the
+    #   entry-point functions (such as `parse`). Code will need to be written
+    #   to pass down and utilize such an object. The current implementation
+    #   is prepared to support this thread safe approach without the
+    #   default_instance part needing to change interface.
+
+    @classmethod
+    def get_default_instance(cls):
+        """Returns the lexer instance used internally
+        by the sqlparse core functions."""
+        if cls._default_intance is None:
+            cls._default_intance = cls()
+            cls._default_intance.default_initialization()
+        return cls._default_intance
+
     def default_initialization(self):
         """Initialize the lexer with default dictionaries.
         Useful if you need to revert custom syntax settings."""
@@ -45,13 +64,10 @@ def default_initialization(self):
         self.add_keywords(keywords.KEYWORDS_MSACCESS)
         self.add_keywords(keywords.KEYWORDS)
 
-    def __init__(self):
-        self.default_initialization()
-
     def clear(self):
         """Clear all syntax configurations.
         Useful if you want to load a reduced set of syntax configurations.
-        After this call, reg-exps and keyword dictionaries need to be loaded
+        After this call, regexps and keyword dictionaries need to be loaded
         to make the lexer functional again."""
         self._SQL_REGEX = []
         self._keywords = []
@@ -73,7 +89,7 @@ def is_keyword(self, value):
         """Checks for a keyword.
 
         If the given value is in one of the KEYWORDS_* dictionary
-        it's considered a keyword. Otherwise tokens.Name is returned.
+        it's considered a keyword. Otherwise, tokens.Name is returned.
         """
         val = value.upper()
         for kwdict in self._keywords:
@@ -136,4 +152,4 @@ def tokenize(sql, encoding=None):
     Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
     of ``(token type, value)`` items.
     """
-    return Lexer().get_tokens(sql, encoding)
+    return Lexer.get_default_instance().get_tokens(sql, encoding)
diff --git a/tests/test_keywords.py b/tests/test_keywords.py
index 2eddccce..b26e9b45 100644
--- a/tests/test_keywords.py
+++ b/tests/test_keywords.py
@@ -9,5 +9,5 @@ class TestSQLREGEX:
                                         '1.', '-1.',
                                         '.1', '-.1'])
     def test_float_numbers(self, number):
-        ttype = next(tt for action, tt in Lexer()._SQL_REGEX if action(number))
+        ttype = next(tt for action, tt in Lexer.get_default_instance()._SQL_REGEX if action(number))
         assert tokens.Number.Float == ttype
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 33e8541f..5feef5a7 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -509,7 +509,7 @@ def test_configurable_keywords():
         (sqlparse.tokens.Punctuation, ";"),
     ]
 
-    Lexer().add_keywords(
+    Lexer.get_default_instance().add_keywords(
         {
             "BACON": sqlparse.tokens.Name.Builtin,
             "SPAM": sqlparse.tokens.Keyword,
@@ -520,7 +520,7 @@ def test_configurable_keywords():
     tokens = sqlparse.parse(sql)[0]
 
     # reset the syntax for later tests.
-    Lexer().default_initialization()
+    Lexer.get_default_instance().default_initialization()
 
     assert list(
         (t.ttype, t.value)
@@ -539,7 +539,7 @@ def test_configurable_keywords():
 
 
 def test_configurable_regex():
-    lex = Lexer()
+    lex = Lexer.get_default_instance()
     lex.clear()
 
     my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)
@@ -559,7 +559,7 @@ def test_configurable_regex():
     tokens = sqlparse.parse("select * from foo zorder by bar;")[0]
 
     # reset the syntax for later tests.
-    Lexer().default_initialization()
+    Lexer.get_default_instance().default_initialization()
 
     assert list(
         (t.ttype, t.value)

From dd9d5b91d7aa30e4a000d5370f09dc99378891dc Mon Sep 17 00:00:00 2001
From: Shikanime Deva <deva.shikanime@protonmail.com>
Date: Mon, 19 Jul 2021 13:56:30 +0200
Subject: [PATCH 17/88] Fix get_type with comments between WITH keyword

---
 sqlparse/sql.py           | 27 ++++++++++++++-------------
 tests/test_regressions.py |  9 +++++++++
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 586cd216..1ccfbdbe 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -413,27 +413,28 @@ def get_type(self):
         Whitespaces and comments at the beginning of the statement
         are ignored.
         """
-        first_token = self.token_first(skip_cm=True)
-        if first_token is None:
+        token = self.token_first(skip_cm=True)
+        if token is None:
             # An "empty" statement that either has not tokens at all
             # or only whitespace tokens.
             return 'UNKNOWN'
 
-        elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
-            return first_token.normalized
+        elif token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+            return token.normalized
 
-        elif first_token.ttype == T.Keyword.CTE:
+        elif token.ttype == T.Keyword.CTE:
             # The WITH keyword should be followed by either an Identifier or
             # an IdentifierList containing the CTE definitions;  the actual
             # DML keyword (e.g. SELECT, INSERT) will follow next.
-            fidx = self.token_index(first_token)
-            tidx, token = self.token_next(fidx, skip_ws=True)
-            if isinstance(token, (Identifier, IdentifierList)):
-                _, dml_keyword = self.token_next(tidx, skip_ws=True)
-
-                if dml_keyword is not None \
-                        and dml_keyword.ttype == T.Keyword.DML:
-                    return dml_keyword.normalized
+            tidx = self.token_index(token)
+            while tidx is not None:
+                tidx, token = self.token_next(tidx, skip_ws=True)
+                if isinstance(token, (Identifier, IdentifierList)):
+                    tidx, token = self.token_next(tidx, skip_ws=True)
+
+                    if token is not None \
+                            and token.ttype == T.Keyword.DML:
+                        return token.normalized
 
         # Hmm, probably invalid syntax, so return unknown.
         return 'UNKNOWN'
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
index 4ffc69f3..bc8b7dd3 100644
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -427,3 +427,12 @@ def test_splitting_at_and_backticks_issue588():
         'grant foo to user1@`myhost`; grant bar to user1@`myhost`;')
     assert len(splitted) == 2
     assert splitted[-1] == 'grant bar to user1@`myhost`;'
+
+
+def test_comment_between_cte_clauses_issue632():
+    p, = sqlparse.parse("""
+        WITH foo AS (),
+             -- A comment before baz subquery
+             baz AS ()
+        SELECT * FROM baz;""")
+    assert p.get_type() == "SELECT"

From fc76056fb8f0ec713a3f2a2b6206a3336932c382 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Mon, 20 Mar 2023 08:46:10 +0100
Subject: [PATCH 18/88] Cleanup regex for detecting keywords (fixes #709).

---
 sqlparse/keywords.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index f04f928e..f85d4688 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -86,7 +86,7 @@
     (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison),
     # Check for keywords, also returns tokens.Name if regex matches
     # but the match isn't a keyword.
-    (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD),
+    (r'\w[$#\w]*', PROCESS_AS_KEYWORD),
     (r'[;:()\[\],\.]', tokens.Punctuation),
     (r'[<>=~!]+', tokens.Operator.Comparison),
     (r'[+/@#%^&|^-]+', tokens.Operator),

From b949fdf9a1538f98b57612bef6306fc38f32aaf7 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 14 Apr 2023 14:51:58 +0200
Subject: [PATCH 19/88] CI: Use codecov action.

codecov module is deprecated and was removed from PyPI in
favor of the github action.
---
 .github/workflows/python-app.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 906ca7e8..3033af97 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -30,10 +30,9 @@ jobs:
       run: |
         python -m pip install --upgrade pip flit
         flit install --deps=develop
-        pip install codecov
     - name: Lint with flake8
       run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics
     - name: Test with pytest
       run: pytest --cov=sqlparse
     - name: Publish to codecov
-      run: codecov
+      uses: codecov/codecov-action@v3

From c457abd5f097dd13fb21543381e7cfafe7d31cfb Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Mon, 20 Mar 2023 08:33:46 +0100
Subject: [PATCH 20/88] Remove unnecessary parts in regex for bad escaping.

The regex tried to deal with situations where escaping in the
SQL to be parsed was suspicious.
---
 CHANGELOG            | 10 ++++++++++
 sqlparse/keywords.py |  4 ++--
 tests/test_split.py  |  4 ++--
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 94864138..880a9ca9 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,12 +1,22 @@
 Development Version
 -------------------
 
+Notable Changes
+
+* IMPORTANT: This release fixes a security vulnerability in the
+  parser where a regular expression vulnerable to ReDOS (Regular
+  Expression Denial of Service) was used. See the security advisory
+  for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-rrm6-wvj7-cwh2
+  The vulnerability was discovered by @erik-krogh from GitHub
+  Security Lab (GHSL). Thanks for reporting!
+
 Bug Fixes
 
 * Revert a change from 0.4.0 that changed IN to be a comparison (issue694).
   The primary expectation is that IN is treated as a keyword and not as a
   comparison operator. That also follows the definition of reserved keywords
   for the major SQL syntax definitions.
+* Fix regular expressions for string parsing.
 
 Other
 
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index f85d4688..b45f3e0f 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -59,9 +59,9 @@
     (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
      tokens.Number.Float),
     (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
-    (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
+    (r"'(''|\\'|[^'])*'", tokens.String.Single),
     # not a real string literal in ANSI SQL:
-    (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol),
+    (r'"(""|\\"|[^"])*"', tokens.String.Symbol),
     (r'(""|".*?[^\\]")', tokens.String.Symbol),
     # sqlite names can be escaped with [square brackets]. left bracket
     # cannot be preceded by word character or a right bracket --
diff --git a/tests/test_split.py b/tests/test_split.py
index a9d75765..e79750e8 100644
--- a/tests/test_split.py
+++ b/tests/test_split.py
@@ -18,8 +18,8 @@ def test_split_semicolon():
 
 
 def test_split_backslash():
-    stmts = sqlparse.parse(r"select '\\'; select '\''; select '\\\'';")
-    assert len(stmts) == 3
+    stmts = sqlparse.parse("select '\'; select '\'';")
+    assert len(stmts) == 2
 
 
 @pytest.mark.parametrize('fn', ['function.sql',

From 64bb91f4880b46f73b4cc9207ae9ccc180d56d1b Mon Sep 17 00:00:00 2001
From: Kevin Stubbings <kwstubbs@github.com>
Date: Wed, 22 Mar 2023 16:31:15 -0700
Subject: [PATCH 21/88] Testing branch

---
 test | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 test

diff --git a/test b/test
new file mode 100644
index 00000000..e69de29b

From d9d69f47ed13a583c81473211f44ae320470a58b Mon Sep 17 00:00:00 2001
From: Kevin Stubbings <kwstubbs@github.com>
Date: Wed, 22 Mar 2023 16:36:19 -0700
Subject: [PATCH 22/88] Removed test file

---
 test | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 test

diff --git a/test b/test
deleted file mode 100644
index e69de29b..00000000

From 58dae6fcd2a51209aeccd4fff3b923bf37714e19 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 18 Apr 2023 10:25:38 +0200
Subject: [PATCH 23/88] Bump version.

---
 sqlparse/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
index f901185a..122595b3 100644
--- a/sqlparse/__init__.py
+++ b/sqlparse/__init__.py
@@ -16,7 +16,7 @@
 from sqlparse import formatter
 
 
-__version__ = '0.4.4.dev0'
+__version__ = '0.4.4'
 __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']
 
 

From 647d1457acf7d88614215841eb15d423df2a1895 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 18 Apr 2023 10:29:29 +0200
Subject: [PATCH 24/88] Update Changelog.

---
 CHANGELOG | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 880a9ca9..a42577e1 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,5 @@
-Development Version
--------------------
+Release 0.4.4 (Apr 18, 2023)
+----------------------------
 
 Notable Changes
 

From bd417b8c7d6b79d0b6af1b42c78b17d13b724411 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 18 Apr 2023 10:31:23 +0200
Subject: [PATCH 25/88] Switch back to development mode.

---
 CHANGELOG            | 6 ++++++
 sqlparse/__init__.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index a42577e1..4f393b9c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,9 @@
+Development Version
+-------------------
+
+Nothing yet.
+
+
 Release 0.4.4 (Apr 18, 2023)
 ----------------------------
 
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
index 122595b3..db0d2fc9 100644
--- a/sqlparse/__init__.py
+++ b/sqlparse/__init__.py
@@ -16,7 +16,7 @@
 from sqlparse import formatter
 
 
-__version__ = '0.4.4'
+__version__ = '0.4.5.dev0'
 __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']
 
 

From 34c7c4d3b5aa953c14f88b5980d037c1682df5c6 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 3 Jun 2023 16:09:11 +0200
Subject: [PATCH 26/88] Update python-app.yml: Try with 3.12-beta1

---
 .github/workflows/python-app.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 3033af97..fdf04f85 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -18,7 +18,7 @@ jobs:
     runs-on: ubuntu-20.04  # keep it on 20.04 to have Python 3.5 and 3.6 available
     strategy:
       matrix:
-        python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"]
+        python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-beta.1"]
 
     steps:
     - uses: actions/checkout@v3

From 3c4b57c24dd3cc3e7937f24b14b8bd53a5499fcb Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 3 Jun 2023 16:13:56 +0200
Subject: [PATCH 27/88] Update python-app.yml: Revert to 3.12-dev and add
 check-latest

---
 .github/workflows/python-app.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index fdf04f85..43a918da 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -18,7 +18,7 @@ jobs:
     runs-on: ubuntu-20.04  # keep it on 20.04 to have Python 3.5 and 3.6 available
     strategy:
       matrix:
-        python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-beta.1"]
+        python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"]
 
     steps:
     - uses: actions/checkout@v3
@@ -26,6 +26,7 @@ jobs:
       uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
+        check-latest: true
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip flit

From 8157d16539b60ed625b004abeef9c2796eb09ba0 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Wed, 5 Jul 2023 21:44:26 +0200
Subject: [PATCH 28/88] Add classifier for Python 3.11 (fixes #726).

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 338a53ce..f6eadb89 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ classifiers = [
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
     "Topic :: Database",

From 9765fce1c0466d3bd90e3925ed47d4f47fa3a131 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 23 Jul 2023 22:44:26 +0200
Subject: [PATCH 29/88] Remove outdated and unused editorconfig.

---
 .editorconfig | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
deleted file mode 100644
index ca1e615a..00000000
--- a/.editorconfig
+++ /dev/null
@@ -1,20 +0,0 @@
-# http://editorconfig.org
-
-root = true
-
-[*]
-indent_style = space
-indent_size = 4
-end_of_line = lf
-charset = utf-8
-insert_final_newline = true
-trim_trailing_whitespace = true
-
-[*.{py,ini,yaml,yml,rst}]
-indent_style = space
-indent_size = 4
-continuation_indent_size = 4
-trim_trailing_whitespace = true
-
-[{Makefile,*.bat}]
-indent_style = tab

From 2bc8d9c2cdfcc4c857e62e682043f40dbb8c14f3 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Mon, 24 Jul 2023 09:01:12 +0200
Subject: [PATCH 30/88] Drop support for Python 3.5.

---
 .github/workflows/python-app.yml | 2 +-
 CHANGELOG                        | 4 +++-
 README.rst                       | 2 +-
 pyproject.toml                   | 3 +--
 sqlparse/__init__.py             | 2 +-
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 43a918da..91296f8c 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -18,7 +18,7 @@ jobs:
     runs-on: ubuntu-20.04  # keep it on 20.04 to have Python 3.5 and 3.6 available
     strategy:
       matrix:
-        python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"]
+        python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"]
 
     steps:
     - uses: actions/checkout@v3
diff --git a/CHANGELOG b/CHANGELOG
index 4f393b9c..0102cd0c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,7 +1,9 @@
 Development Version
 -------------------
 
-Nothing yet.
+Notable Changes
+
+* Drop support for Python 3.5.
 
 
 Release 0.4.4 (Apr 18, 2023)
diff --git a/README.rst b/README.rst
index df4e7e36..67ddaf96 100644
--- a/README.rst
+++ b/README.rst
@@ -11,7 +11,7 @@ python-sqlparse - Parse SQL statements
 sqlparse is a non-validating SQL parser for Python.
 It provides support for parsing, splitting and formatting SQL statements.
 
-The module is compatible with Python 3.5+ and released under the terms of the
+The module is compatible with Python 3.6+ and released under the terms of the
 `New BSD license <https://opensource.org/licenses/BSD-3-Clause>`_.
 
 Visit the project page at https://github.com/andialbrecht/sqlparse for
diff --git a/pyproject.toml b/pyproject.toml
index f6eadb89..4bdbe1b4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,6 @@ classifiers = [
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.5",
     "Programming Language :: Python :: 3.6",
     "Programming Language :: Python :: 3.7",
     "Programming Language :: Python :: 3.8",
@@ -28,7 +27,7 @@ classifiers = [
     "Topic :: Database",
     "Topic :: Software Development",
 ]
-requires-python = ">=3.5"
+requires-python = ">=3.6"
 
 [project.urls]
 Home = "https://github.com/andialbrecht/sqlparse"
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
index db0d2fc9..cfd4e2fd 100644
--- a/sqlparse/__init__.py
+++ b/sqlparse/__init__.py
@@ -16,7 +16,7 @@
 from sqlparse import formatter
 
 
-__version__ = '0.4.5.dev0'
+__version__ = '0.5.0.dev0'
 __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']
 
 

From be35807c83909b70be0e16fcd6408b7b32aef78a Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Mon, 24 Jul 2023 10:23:34 +0200
Subject: [PATCH 31/88] Get tox running again.

We have to pin versions for tox and virtualenv because newer versions
don't support Python 3.6 anymore.
---
 pyproject.toml | 2 ++
 tox.ini        | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4bdbe1b4..1b23a4bc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,6 +43,8 @@ sqlformat = "sqlparse.__main__:main"
 dev = [
     "flake8",
     "build",
+    "virtualenv<20.22.0",   # 20.22.0 dropped Python 3.6 support
+    "tox<4.5.0",  # >=4.5.0 requires virtualenv>=20.22
 ]
 test = [
     "pytest",
diff --git a/tox.ini b/tox.ini
index 0087d50e..19d17327 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,11 +1,12 @@
 [tox]
 skip_missing_interpreters = True
 envlist =
-    py35
     py36
     py37
     py38
-    pypy3
+    py39
+    py310
+    py311
     flake8
 
 [testenv]

From 3eec63dafd3e7ff99560f66c1f7964f558307b98 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Mon, 24 Jul 2023 11:06:57 +0200
Subject: [PATCH 32/88] Don't pin virtualenv in dev section.

The requirements in dev section are more general (and may require newer versions).
---
 pyproject.toml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1b23a4bc..10e96696 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,8 +43,6 @@ sqlformat = "sqlparse.__main__:main"
 dev = [
     "flake8",
     "build",
-    "virtualenv<20.22.0",   # 20.22.0 dropped Python 3.6 support
-    "tox<4.5.0",  # >=4.5.0 requires virtualenv>=20.22
 ]
 test = [
     "pytest",
@@ -53,6 +51,10 @@ test = [
 doc = [
     "sphinx",
 ]
+tox = [
+    "virtualenv<20.22.0",   # 20.22.0 dropped Python 3.6 support
+    "tox<4.5.0",  # >=4.5.0 requires virtualenv>=20.22
+]
 
 [tool.flit.sdist]
 include = [

From b90e422b06d268dfe588ad9d817009ec5fa01a72 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 11:07:44 +0200
Subject: [PATCH 33/88] Add reminder for github release (fixes #732).

---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 1657822e..b3db8e3d 100644
--- a/Makefile
+++ b/Makefile
@@ -24,3 +24,4 @@ release:
 	@rm -rf dist/
 	python -m build
 	twine upload --sign --identity E0B84F81 dist/*
+	@echo "Reminder: Add release on github https://github.com/andialbrecht/sqlparse/releases"

From d69fadac82301e87ed4a7a12b19359f13d105e9e Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 11:36:35 +0200
Subject: [PATCH 34/88] Update issue templates

---
 .github/ISSUE_TEMPLATE/bug_report.md      | 38 +++++++++++++++++++++++
 .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md
 create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..dd84ea78
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,38 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. iOS]
+ - Browser [e.g. chrome, safari]
+ - Version [e.g. 22]
+
+**Smartphone (please complete the following information):**
+ - Device: [e.g. iPhone6]
+ - OS: [e.g. iOS8.1]
+ - Browser [e.g. stock browser, safari]
+ - Version [e.g. 22]
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 00000000..bbcbbe7d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.

From efcdbf627a23f239be66a4e80824ef7d100ccb02 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 11:40:48 +0200
Subject: [PATCH 35/88] Update bug_report.md

---
 .github/ISSUE_TEMPLATE/bug_report.md | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index dd84ea78..22844bfc 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -2,7 +2,7 @@
 name: Bug report
 about: Create a report to help us improve
 title: ''
-labels: ''
+labels: 'bug,needs-triage'
 assignees: ''
 
 ---
@@ -11,28 +11,16 @@ assignees: ''
 A clear and concise description of what the bug is.
 
 **To Reproduce**
-Steps to reproduce the behavior:
-1. Go to '...'
-2. Click on '....'
-3. Scroll down to '....'
-4. See error
+Steps to reproduce the behavior. 
+Please give code examples or concete SQL statements. Take care of not posting any sensitive information when pasting SQL statements!
+What's the concrete error / traceback.
 
 **Expected behavior**
 A clear and concise description of what you expected to happen.
 
-**Screenshots**
-If applicable, add screenshots to help explain your problem.
-
-**Desktop (please complete the following information):**
- - OS: [e.g. iOS]
- - Browser [e.g. chrome, safari]
- - Version [e.g. 22]
-
-**Smartphone (please complete the following information):**
- - Device: [e.g. iPhone6]
- - OS: [e.g. iOS8.1]
- - Browser [e.g. stock browser, safari]
- - Version [e.g. 22]
+**Versions (please complete the following information):**
+ - Python: [e.g. 3.11.2]
+ - sqlparse: [e.g. 0.4.1]
 
 **Additional context**
 Add any other context about the problem here.

From bcfbe3749afdb64b7121ce7d1069fd9d62d40788 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 11:51:17 +0200
Subject: [PATCH 36/88] Add link to discussion when creating issues.

---
 .github/ISSUE_TEMPLATE/config.yml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/config.yml

diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..acccb059
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: true
+contact_links:
+  - name: Discussions
+    url: https://github.com/andialbrecht/sqlparse/discussions
+    about: Please ask questions and start more general discussions here
\ No newline at end of file

From 8aa4715afd4edb97787f0310d0ae26639076403a Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 11:52:03 +0200
Subject: [PATCH 37/88] Update config.yml.

---
 .github/ISSUE_TEMPLATE/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index acccb059..03f62715 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,5 +1,5 @@
 blank_issues_enabled: true
 contact_links:
-  - name: Discussions
+  - name: Discussions, Questions?
     url: https://github.com/andialbrecht/sqlparse/discussions
     about: Please ask questions and start more general discussions here
\ No newline at end of file

From 9a90474c6f346f0001739d32f5c2bd55a21bc247 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 12:12:32 +0200
Subject: [PATCH 38/88] Update test action.

---
 .github/workflows/python-app.yml | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 91296f8c..6fd253ab 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -13,19 +13,29 @@ on:
     - cron: '0 12 * * *'
 
 jobs:
-  build:
-
-    runs-on: ubuntu-20.04  # keep it on 20.04 to have Python 3.5 and 3.6 available
+  test:
+    name: Run tests on ${{ matrix.py }}
+    runs-on: ubuntu-20.04  # keep it on 20.04 to have Python 3.6 available
     strategy:
       matrix:
-        python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"]
+        py:
+          - "3.12.0-beta.4"
+          - "3.11"
+          - "3.10"
+          - "3.9"
+          - "3.8"
+          - "3.7"
+          - "3.6"
+          - "pypy-3.9"
+          - "pypy-3.8"
+          - "pypy-3.7"
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
+    - name: Set up Python ${{ matrix.py }}
       uses: actions/setup-python@v4
       with:
-        python-version: ${{ matrix.python-version }}
+        python-version: ${{ matrix.py }}
         check-latest: true
     - name: Install dependencies
       run: |

From 0623627674499302e7cf089a08903c40169a8ee3 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 12:33:02 +0200
Subject: [PATCH 39/88] Add Code of Conduct.

---
 .github/CODE_OF_CONDUCT.md | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 .github/CODE_OF_CONDUCT.md

diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000..b1ee021c
--- /dev/null
+++ b/.github/CODE_OF_CONDUCT.md
@@ -0,0 +1,7 @@
+# Be nice to each other
+
+Everyone participating in the _sqlparse_ project and especially in the
+issue tracker, discussion forums, pull requests, is expected to treat
+other people with respect and more generally to follow the guidelines 
+articulated in the 
+[Python Community Code of Conduct](https://www.python.org/psf/codeofconduct/).
\ No newline at end of file

From bb42969cca7c1e0bc49b970ea6512bf0184b97fe Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 12:45:45 +0200
Subject: [PATCH 40/88] Add contributing guide.

---
 CONTRIBUTING.md | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..3db0f110
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,11 @@
+# Contributing to `sqlparse`
+
+Thanks for your interest in contributing to the `sqlparse` project!
+
+All contributors are expected to follow the 
+[Python Community Code of Conduct](https://www.python.org/psf/codeofconduct/).
+
+Head over to the 
+[Discussions Page](https://github.com/andialbrecht/sqlparse/discussions) if
+you have any questions. We're still working on a more elaborate 
+developer guide.
\ No newline at end of file

From c0ffe867a48148f9c9a07a5edc716199ccceb114 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 12:57:30 +0200
Subject: [PATCH 41/88] Create SECURITY.md

---
 SECURITY.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 SECURITY.md

diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 00000000..81c01543
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,14 @@
+# Security Policy
+
+## Supported Versions
+
+For now `sqlparse` uses very defensive version numbers. There's no major version yet. 
+In turn there's only one supported version and this is the latest.
+
+## Reporting a Vulnerability
+
+To report a vulnerability head over to the [Security Advisories](https://github.com/andialbrecht/sqlparse/security/advisories) 
+page and click on "New draft security advisory".
+
+Feel free to contact me at albrecht.andi@gmail.com if you have any questions or want to discuss things
+beforehand.

From 881db0b29af79dd1c1898051e3a1f0fdd6e7e618 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 6 Aug 2023 13:31:51 +0200
Subject: [PATCH 42/88] Add Pull request template.

---
 .github/PULL_REQUEST_TEMPLATE.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .github/PULL_REQUEST_TEMPLATE.md

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 00000000..77b1fd6c
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,13 @@
+# Thanks for contributing!
+
+Before submitting your pull request please have a look at the
+following checklist:
+
+- [ ] ran the tests (`pytest`)
+- [ ] all style issues addressed (`flake8`)
+- [ ] your changes are covered by tests
+- [ ] your changes are documented, if needed
+
+In addition, please take care to provide a proper description
+on what your change does, fixes or achieves when submitting the 
+pull request.
\ No newline at end of file

From 715feacbdef1b488a562a3f37d3d4afbbcea8410 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Mon, 7 Aug 2023 15:05:59 +0200
Subject: [PATCH 43/88] Update Python version in test action.

---
 .github/workflows/python-app.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 6fd253ab..53f43c3e 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       matrix:
         py:
-          - "3.12.0-beta.4"
+          - "3.12.0-rc.1"
           - "3.11"
           - "3.10"
           - "3.9"

From baf3a0a5b9514540580152b0983a03e257b047ae Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Mon, 7 Aug 2023 15:20:17 +0200
Subject: [PATCH 44/88] Updated too early... switching back to 3.12.0-beta.4.

---
 .github/workflows/python-app.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 53f43c3e..6fd253ab 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       matrix:
         py:
-          - "3.12.0-rc.1"
+          - "3.12.0-beta.4"
           - "3.11"
           - "3.10"
           - "3.9"

From 21f9fd57005401888abf5cd1444923f483842203 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Mon, 4 Sep 2023 08:31:03 +0200
Subject: [PATCH 45/88] CI: Try to fix importlib issue with flake8.

See https://github.com/python/importlib_metadata/issues/406
---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 10e96696..67a9c7c5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,7 @@ sqlformat = "sqlparse.__main__:main"
 
 [project.optional-dependencies]
 dev = [
+    "importlib_metadata<5; python_version <= '3.7'",
     "flake8",
     "build",
 ]

From c0a8ee6cc4ae7e5c0b9ef9e6d8e80beb90b5b00b Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Sun, 10 Sep 2023 00:05:00 +0300
Subject: [PATCH 46/88] Bump GitHub Actions

---
 .github/workflows/codeql-analysis.yml | 2 +-
 .github/workflows/python-app.yml      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 1cde398b..5acaa67d 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -39,7 +39,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 6fd253ab..6accd816 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -31,7 +31,7 @@ jobs:
           - "pypy-3.7"
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.py }}
       uses: actions/setup-python@v4
       with:

From 9bf512cb7c7bc03b06cfa0056ec53076e9c22d7c Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Sun, 10 Sep 2023 00:05:54 +0300
Subject: [PATCH 47/88] Add support for Python 3.12

---
 .github/workflows/python-app.yml | 3 ++-
 pyproject.toml                   | 1 +
 tox.ini                          | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 6accd816..ff7269bc 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       matrix:
         py:
-          - "3.12.0-beta.4"
+          - "3.12"
           - "3.11"
           - "3.10"
           - "3.9"
@@ -36,6 +36,7 @@ jobs:
       uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.py }}
+        allow-prereleases: true
         check-latest: true
     - name: Install dependencies
       run: |
diff --git a/pyproject.toml b/pyproject.toml
index 67a9c7c5..d9a921f1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
     "Topic :: Database",
diff --git a/tox.ini b/tox.ini
index 19d17327..40d84ad8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,6 +7,7 @@ envlist =
     py39
     py310
     py311
+    py312
     flake8
 
 [testenv]

From 8ce446ed3f945f697a166551447f203510f25f2d Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 19 Sep 2023 06:52:23 +0200
Subject: [PATCH 48/88] Update changelog and authors.

---
 AUTHORS   | 1 +
 CHANGELOG | 1 +
 2 files changed, 2 insertions(+)

diff --git a/AUTHORS b/AUTHORS
index 1717adff..4617b7d7 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -31,6 +31,7 @@ Alphabetical list of contributors:
 * Florian Bauer <florian.bauer@zmdi.com>
 * Fredy Wijaya <fredy.wijaya@gmail.com>
 * Gavin Wahl <gwahl@fusionbox.com>
+* Hugo van Kemenade <hugovk@users.noreply.github.com>
 * hurcy <cinyoung.hur@gmail.com>
 * Ian Robertson <ian.robertson@capitalone.com>
 * JacekPliszka <Jacek.Pliszka@gmail.com>
diff --git a/CHANGELOG b/CHANGELOG
index 0102cd0c..eabd6019 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,7 @@ Development Version
 Notable Changes
 
 * Drop support for Python 3.5.
+* Python 3.12 is now supported (pr725, by hugovk).
 
 
 Release 0.4.4 (Apr 18, 2023)

From fac38cd03bea712e096222f16199a7482a4837da Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 19 Sep 2023 21:41:57 +0200
Subject: [PATCH 49/88] Ignore attributes starting with dunder in _TokenType
 (fixes #672).

This issue came up, when trying to deepcopy a parsed statement.
deepcopy uses getattr(obj, '__deepcopy__', None) to get a method
for copying an object. Before this change a new attribute
'__deepcopy__' was created as a new instance of _TokenType (a tuple).
---
 CHANGELOG                 | 4 ++++
 sqlparse/tokens.py        | 3 +++
 tests/test_regressions.py | 8 ++++++++
 3 files changed, 15 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index eabd6019..525918a2 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,6 +6,10 @@ Notable Changes
 * Drop support for Python 3.5.
 * Python 3.12 is now supported (pr725, by hugovk).
 
+Bug Fixes
+
+* Ignore dunder attributes when creating Tokens (issue672).
+
 
 Release 0.4.4 (Apr 18, 2023)
 ----------------------------
diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py
index d92bbdcf..143f66b4 100644
--- a/sqlparse/tokens.py
+++ b/sqlparse/tokens.py
@@ -19,6 +19,9 @@ def __contains__(self, item):
         return item is not None and (self is item or item[:len(self)] == self)
 
     def __getattr__(self, name):
+        # don't mess with dunder
+        if name.startswith('__'):
+            return super().__getattr__(self, name)
         new = _TokenType(self + (name,))
         setattr(self, name, new)
         new.parent = self
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
index bc8b7dd3..961adc17 100644
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -1,3 +1,5 @@
+import copy
+
 import pytest
 
 import sqlparse
@@ -436,3 +438,9 @@ def test_comment_between_cte_clauses_issue632():
              baz AS ()
         SELECT * FROM baz;""")
     assert p.get_type() == "SELECT"
+
+
+def test_copy_issue672():
+    p = sqlparse.parse('select * from foo')[0]
+    copied = copy.deepcopy(p)
+    assert str(p) == str(copied)

From 5c9435269bcb00c86164799a16621fcf5d41e917 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 19 Sep 2023 21:57:37 +0200
Subject: [PATCH 50/88] Simplify regex.

---
 sqlparse/filters/others.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py
index 6905f2d6..9e617c37 100644
--- a/sqlparse/filters/others.py
+++ b/sqlparse/filters/others.py
@@ -25,7 +25,7 @@ def _get_insert_token(token):
             # Note: The actual value for a line break is replaced by \n
             # in SerializerUnicode which will be executed in the
             # postprocessing state.
-            m = re.search(r'((\r|\n)+) *$', token.value)
+            m = re.search(r'([\r\n]+) *$', token.value)
             if m is not None:
                 return sql.Token(T.Whitespace.Newline, m.groups()[0])
             else:

From 3696d5388186a1fd51f657e0d6f4c6809b244143 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 19 Sep 2023 22:01:45 +0200
Subject: [PATCH 51/88] Code cleanup.

---
 sqlparse/engine/grouping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 86d8fc64..57d257e2 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -184,7 +184,7 @@ def match(token):
         return token.match(T.Assignment, ':=')
 
     def valid(token):
-        return token is not None and token.ttype not in (T.Keyword)
+        return token is not None and token.ttype not in (T.Keyword,)
 
     def post(tlist, pidx, tidx, nidx):
         m_semicolon = T.Punctuation, ';'

From 6eca7aeb407235d7053508a49e2262a395d56b67 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Mon, 25 Sep 2023 21:23:42 +0200
Subject: [PATCH 52/88] Cleanup .gitignore.

Removed any editor/IDE related and obsolete entries.
---
 .gitignore | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/.gitignore b/.gitignore
index e5953853..cc2ec16b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,3 @@
-# PyCharm
-.idea/
-
 *.py[co]
 docs/build
 dist/
@@ -11,12 +8,5 @@ MANIFEST
 .cache/
 *.egg-info/
 htmlcov/
-extras/appengine/sqlparse
-extras/appengine/lib/
-extras/py3k/sqlparse
-extras/py3k/tests
-extras/py3k/sqlparse.diff
-extras/py3k/tests.diff
 coverage.xml
-*.class
 .pytest_cache
\ No newline at end of file

From 115e208bd340f175b23964524670418fe6f72c31 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Thu, 12 Oct 2023 21:11:50 +0200
Subject: [PATCH 53/88] Add option to remove trailing semicolon when splitting
 (fixes #742).

---
 CHANGELOG                       |  5 +++++
 sqlparse/__init__.py            |  6 ++++--
 sqlparse/engine/filter_stack.py |  5 ++++-
 sqlparse/filters/__init__.py    |  2 ++
 sqlparse/filters/others.py      |  9 +++++++++
 tests/test_split.py             | 28 ++++++++++++++++++++++++++++
 6 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 525918a2..0ede2800 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,6 +6,11 @@ Notable Changes
 * Drop support for Python 3.5.
 * Python 3.12 is now supported (pr725, by hugovk).
 
+Enhancements:
+
+* Splitting statements now allows to remove the semicolon at the end.
+  Some database backends love statements without semicolon (issue742).
+
 Bug Fixes
 
 * Ignore dunder attributes when creating Tokens (issue672).
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
index cfd4e2fd..b80b2d60 100644
--- a/sqlparse/__init__.py
+++ b/sqlparse/__init__.py
@@ -59,12 +59,14 @@ def format(sql, encoding=None, **options):
     return ''.join(stack.run(sql, encoding))
 
 
-def split(sql, encoding=None):
+def split(sql, encoding=None, strip_semicolon=False):
     """Split *sql* into single statements.
 
     :param sql: A string containing one or more SQL statements.
     :param encoding: The encoding of the statement (optional).
+    :param strip_semicolon: If True, remove trainling semicolons
+        (default: False).
     :returns: A list of strings.
     """
-    stack = engine.FilterStack()
+    stack = engine.FilterStack(strip_semicolon=strip_semicolon)
     return [str(stmt).strip() for stmt in stack.run(sql, encoding)]
diff --git a/sqlparse/engine/filter_stack.py b/sqlparse/engine/filter_stack.py
index 9665a224..3feba377 100644
--- a/sqlparse/engine/filter_stack.py
+++ b/sqlparse/engine/filter_stack.py
@@ -10,14 +10,17 @@
 from sqlparse import lexer
 from sqlparse.engine import grouping
 from sqlparse.engine.statement_splitter import StatementSplitter
+from sqlparse.filters import StripTrailingSemicolonFilter
 
 
 class FilterStack:
-    def __init__(self):
+    def __init__(self, strip_semicolon=False):
         self.preprocess = []
         self.stmtprocess = []
         self.postprocess = []
         self._grouping = False
+        if strip_semicolon:
+            self.stmtprocess.append(StripTrailingSemicolonFilter())
 
     def enable_grouping(self):
         self._grouping = True
diff --git a/sqlparse/filters/__init__.py b/sqlparse/filters/__init__.py
index 5bd6b325..06169460 100644
--- a/sqlparse/filters/__init__.py
+++ b/sqlparse/filters/__init__.py
@@ -8,6 +8,7 @@
 from sqlparse.filters.others import SerializerUnicode
 from sqlparse.filters.others import StripCommentsFilter
 from sqlparse.filters.others import StripWhitespaceFilter
+from sqlparse.filters.others import StripTrailingSemicolonFilter
 from sqlparse.filters.others import SpacesAroundOperatorsFilter
 
 from sqlparse.filters.output import OutputPHPFilter
@@ -25,6 +26,7 @@
     'SerializerUnicode',
     'StripCommentsFilter',
     'StripWhitespaceFilter',
+    'StripTrailingSemicolonFilter',
     'SpacesAroundOperatorsFilter',
 
     'OutputPHPFilter',
diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py
index 9e617c37..da7c0e79 100644
--- a/sqlparse/filters/others.py
+++ b/sqlparse/filters/others.py
@@ -126,6 +126,15 @@ def process(self, stmt):
         return stmt
 
 
+class StripTrailingSemicolonFilter:
+
+    def process(self, stmt):
+        while stmt.tokens and (stmt.tokens[-1].is_whitespace
+                               or stmt.tokens[-1].value == ';'):
+            stmt.tokens.pop()
+        return stmt
+
+
 # ---------------------------
 # postprocess
 
diff --git a/tests/test_split.py b/tests/test_split.py
index e79750e8..30a50c59 100644
--- a/tests/test_split.py
+++ b/tests/test_split.py
@@ -166,3 +166,31 @@ def test_split_mysql_handler_for(load_file):
     # see issue581
     stmts = sqlparse.split(load_file('mysql_handler.sql'))
     assert len(stmts) == 2
+
+
+@pytest.mark.parametrize('sql, expected', [
+    ('select * from foo;', ['select * from foo']),
+    ('select * from foo', ['select * from foo']),
+    ('select * from foo; select * from bar;', [
+        'select * from foo',
+        'select * from bar',
+    ]),
+    ('  select * from foo;\n\nselect * from bar;\n\n\n\n', [
+        'select * from foo',
+        'select * from bar',
+    ]),
+    ('select * from foo\n\n;  bar', ['select * from foo', 'bar']),
+])
+def test_split_strip_semicolon(sql, expected):
+    stmts = sqlparse.split(sql, strip_semicolon=True)
+    assert len(stmts) == len(expected)
+    for idx, expectation in enumerate(expected):
+        assert stmts[idx] == expectation
+
+
+def test_split_strip_semicolon_procedure(load_file):
+    stmts = sqlparse.split(load_file('mysql_handler.sql'),
+                           strip_semicolon=True)
+    assert len(stmts) == 2
+    assert stmts[0].endswith('end')
+    assert stmts[1].endswith('end')

From f101546dafa921edfea5b3107731504665b758ea Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Thu, 12 Oct 2023 21:28:03 +0200
Subject: [PATCH 54/88] Add comment.

---
 sqlparse/engine/statement_splitter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py
index a991959a..9bde92c5 100644
--- a/sqlparse/engine/statement_splitter.py
+++ b/sqlparse/engine/statement_splitter.py
@@ -54,7 +54,7 @@ def _change_splitlevel(self, ttype, value):
         if unified == 'BEGIN':
             self._begin_depth += 1
             if self._is_create:
-                # FIXME(andi): This makes no sense.
+                # FIXME(andi): This makes no sense.  ## this comment neither
                 return 1
             return 0
 

From ab84201f0baf75fd20dd5458d65920e1a50a5be2 Mon Sep 17 00:00:00 2001
From: Georg Traar <georg@crate.io>
Date: Mon, 5 Feb 2024 07:37:17 +0100
Subject: [PATCH 55/88] allow operators to procede dollar quoted strings

---
 CHANGELOG            | 1 +
 sqlparse/keywords.py | 2 +-
 tests/test_parse.py  | 8 ++++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 0ede2800..0b48e9f3 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -14,6 +14,7 @@ Enhancements:
 Bug Fixes
 
 * Ignore dunder attributes when creating Tokens (issue672).
+* Allow operators to precede dollar-quoted strings (issue763).
 
 
 Release 0.4.4 (Apr 18, 2023)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index b45f3e0f..d3794fd3 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -30,7 +30,7 @@
 
     (r"`(``|[^`])*`", tokens.Name),
     (r"´(´´|[^´])*´", tokens.Name),
-    (r'((?<!\S)\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal),
+    (r'((?<![\w\"\$])\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal),
 
     (r'\?', tokens.Name.Placeholder),
     (r'%(\(\w+\))?s', tokens.Name.Placeholder),
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 5feef5a7..6e4df7c6 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -180,6 +180,14 @@ def test_psql_quotation_marks():
     $PROC_2$ LANGUAGE plpgsql;""")
     assert len(t) == 2
 
+    # operators are valid infront of dollar quoted strings
+    t = sqlparse.split("""UPDATE SET foo =$$bar;SELECT bar$$""")
+    assert len(t) == 1
+    
+    # identifiers must be separated by whitespace
+    t = sqlparse.split("""UPDATE SET foo TO$$bar;SELECT bar$$""")
+    assert len(t) == 2
+
 
 def test_double_precision_is_builtin():
     s = 'DOUBLE PRECISION'

From 1c6dff9b11b80eecf44e9e6dfb4487a2bb04409d Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 5 Mar 2024 07:23:48 +0100
Subject: [PATCH 56/88] Update authors.

---
 AUTHORS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/AUTHORS b/AUTHORS
index 4617b7d7..2427bfb3 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -31,6 +31,7 @@ Alphabetical list of contributors:
 * Florian Bauer <florian.bauer@zmdi.com>
 * Fredy Wijaya <fredy.wijaya@gmail.com>
 * Gavin Wahl <gwahl@fusionbox.com>
+* Georg Traar <georg@crate.io>
 * Hugo van Kemenade <hugovk@users.noreply.github.com>
 * hurcy <cinyoung.hur@gmail.com>
 * Ian Robertson <ian.robertson@capitalone.com>

From 8d34105d39521f980e8e591eadfc73025996dc82 Mon Sep 17 00:00:00 2001
From: Gregor Karetka <karetka.gregor@gmail.com>
Date: Wed, 6 Dec 2023 16:07:00 +0100
Subject: [PATCH 57/88] Update extending.rst

Fix broken example
---
 docs/source/extending.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/extending.rst b/docs/source/extending.rst
index 0c10924b..866303b7 100644
--- a/docs/source/extending.rst
+++ b/docs/source/extending.rst
@@ -70,7 +70,7 @@ a keyword to the lexer:
     lex.add_keywords(keywords.KEYWORDS)
 
     # add a custom keyword dictionary
-    lex.add_keywords({'BAR', sqlparse.tokens.Keyword})
+    lex.add_keywords({'BAR': sqlparse.tokens.Keyword})
 
     # no configuration is passed here. The lexer is used as a singleton.
     sqlparse.parse("select * from foo zorder by bar;")

From dc2329d07df3b475f2190d3711396691d705fb9a Mon Sep 17 00:00:00 2001
From: Igor Khrol <igor.khrol@automattic.com>
Date: Tue, 14 Nov 2023 17:59:16 +0200
Subject: [PATCH 58/88] Support TypedLiterals in get_parameters

---
 sqlparse/sql.py     | 7 ++++---
 tests/test_parse.py | 5 +++++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 1ccfbdbe..f93d7c36 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -619,12 +619,13 @@ class Function(NameAliasMixin, TokenList):
     def get_parameters(self):
         """Return a list of parameters."""
         parenthesis = self.tokens[-1]
+        result = []
         for token in parenthesis.tokens:
             if isinstance(token, IdentifierList):
                 return token.get_identifiers()
-            elif imt(token, i=(Function, Identifier), t=T.Literal):
-                return [token, ]
-        return []
+            elif imt(token, i=(Function, Identifier, TypedLiteral), t=T.Literal):
+                result.append(token)
+        return result
 
 
 class Begin(TokenList):
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 6e4df7c6..be416ef2 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -133,6 +133,11 @@ def test_parse_nested_function():
     assert type(t[0]) is sql.Function
 
 
+def test_parse_casted_params():
+    t = sqlparse.parse("foo(DATE '2023-11-14', TIMESTAMP '2023-11-15')")[0].tokens[0].get_parameters()
+    assert len(t) == 2
+
+
 def test_parse_div_operator():
     p = sqlparse.parse('col1 DIV 5 AS div_col1')[0].tokens
     assert p[0].tokens[0].tokens[2].ttype is T.Operator

From b97387ceab38ea724cb715f8a43050b1693d1d36 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 5 Mar 2024 07:29:30 +0100
Subject: [PATCH 59/88] Update changelog and code cleanup.

---
 AUTHORS         | 1 +
 CHANGELOG       | 1 +
 sqlparse/sql.py | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/AUTHORS b/AUTHORS
index 2427bfb3..934bbe33 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -35,6 +35,7 @@ Alphabetical list of contributors:
 * Hugo van Kemenade <hugovk@users.noreply.github.com>
 * hurcy <cinyoung.hur@gmail.com>
 * Ian Robertson <ian.robertson@capitalone.com>
+* Igor Khrol <igor.khrol@automattic.com>
 * JacekPliszka <Jacek.Pliszka@gmail.com>
 * JavierPan <PeterSandwich@users.noreply.github.com>
 * Jean-Martin Archer <jm@jmartin.ca>
diff --git a/CHANGELOG b/CHANGELOG
index 0b48e9f3..cbfbcf25 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,7 @@ Enhancements:
 
 * Splitting statements now allows to remove the semicolon at the end.
   Some database backends love statements without semicolon (issue742).
+* Support TypedLiterals in get_parameters (pr649, by Khrol).
 
 Bug Fixes
 
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index f93d7c36..41606dd8 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -623,7 +623,8 @@ def get_parameters(self):
         for token in parenthesis.tokens:
             if isinstance(token, IdentifierList):
                 return token.get_identifiers()
-            elif imt(token, i=(Function, Identifier, TypedLiteral), t=T.Literal):
+            elif imt(token, i=(Function, Identifier, TypedLiteral),
+                     t=T.Literal):
                 result.append(token)
         return result
 

From 39b5a02551de051b1e888135fe71759d5b49a134 Mon Sep 17 00:00:00 2001
From: John Bodley <john.bodley@gmail.com>
Date: Mon, 6 Nov 2023 20:59:16 -0800
Subject: [PATCH 60/88] Ensure nested ordered identifiers are grouped (fixes
 #745)

---
 sqlparse/engine/grouping.py | 1 +
 tests/test_grouping.py      | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 57d257e2..c486318a 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -360,6 +360,7 @@ def group_functions(tlist):
         tidx, token = tlist.token_next_by(t=T.Name, idx=tidx)
 
 
+@recurse(sql.Identifier)
 def group_order(tlist):
     """Group together Identifier and Asc/Desc token"""
     tidx, token = tlist.token_next_by(t=T.Keyword.Order)
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
index 03d16c5d..e90243b5 100644
--- a/tests/test_grouping.py
+++ b/tests/test_grouping.py
@@ -247,6 +247,14 @@ def test_grouping_identifier_list_with_order():
     assert str(p.tokens[0].tokens[3]) == '2 desc'
 
 
+def test_grouping_nested_identifier_with_order():
+    # issue745
+    p = sqlparse.parse('(a desc)')[0]
+    assert isinstance(p.tokens[0], sql.Parenthesis)
+    assert isinstance(p.tokens[0].tokens[1], sql.Identifier)
+    assert str(p.tokens[0].tokens[1]) == 'a desc'
+
+
 def test_grouping_where():
     s = 'select * from foo where bar = 1 order by id desc'
     p = sqlparse.parse(s)[0]

From 60486b91ca7b4183313b06f62f7b559f4920f099 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Tue, 5 Mar 2024 07:33:32 +0100
Subject: [PATCH 61/88] Update changelog.

---
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG b/CHANGELOG
index cbfbcf25..c76eab0f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -16,6 +16,7 @@ Bug Fixes
 
 * Ignore dunder attributes when creating Tokens (issue672).
 * Allow operators to precede dollar-quoted strings (issue763).
+* Fix parsing of nested order clauses (issue745, pr746 by john-bodley).
 
 
 Release 0.4.4 (Apr 18, 2023)

From 7334ac99152d02bb09ab0abe79377174c2867f7c Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 15 Mar 2024 08:04:02 +0100
Subject: [PATCH 62/88] Improve splitting of Transact SQL when using GO keyword
 (fixes #762).

---
 CHANGELOG                             | 1 +
 sqlparse/engine/statement_splitter.py | 7 ++++++-
 sqlparse/keywords.py                  | 1 +
 tests/test_split.py                   | 9 +++++++++
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index c76eab0f..efb3e95b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -11,6 +11,7 @@ Enhancements:
 * Splitting statements now allows to remove the semicolon at the end.
   Some database backends love statements without semicolon (issue742).
 * Support TypedLiterals in get_parameters (pr649, by Khrol).
+* Improve splitting of Transact SQL when using GO keyword (issue762).
 
 Bug Fixes
 
diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py
index 9bde92c5..5b3a0d9b 100644
--- a/sqlparse/engine/statement_splitter.py
+++ b/sqlparse/engine/statement_splitter.py
@@ -99,7 +99,12 @@ def process(self, stream):
             self.tokens.append(sql.Token(ttype, value))
 
             # Check if we get the end of a statement
-            if self.level <= 0 and ttype is T.Punctuation and value == ';':
+            # Issue762: Allow GO (or "GO 2") as statement splitter.
+            # When implementing a language toggle, it's not only to add
+            # keywords it's also to change some rules, like this splitting
+            # rule.
+            if (self.level <= 0 and ttype is T.Punctuation and value == ';') \
+                    or (ttype is T.Keyword and value.split()[0] == 'GO'):
                 self.consume_ws = True
 
         # Yield pending statement (if any)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index d3794fd3..8911c7a8 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -78,6 +78,7 @@
     (r'GROUP\s+BY\b', tokens.Keyword),
     (r'ORDER\s+BY\b', tokens.Keyword),
     (r'HANDLER\s+FOR\b', tokens.Keyword),
+    (r'GO(\s\d+)\b', tokens.Keyword),
     (r'(LATERAL\s+VIEW\s+)'
      r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b',
      tokens.Keyword),
diff --git a/tests/test_split.py b/tests/test_split.py
index 30a50c59..90d2eaff 100644
--- a/tests/test_split.py
+++ b/tests/test_split.py
@@ -194,3 +194,12 @@ def test_split_strip_semicolon_procedure(load_file):
     assert len(stmts) == 2
     assert stmts[0].endswith('end')
     assert stmts[1].endswith('end')
+
+@pytest.mark.parametrize('sql, num', [
+    ('USE foo;\nGO\nSELECT 1;\nGO', 4),
+    ('SELECT * FROM foo;\nGO', 2),
+    ('USE foo;\nGO 2\nSELECT 1;', 3)
+])
+def test_split_go(sql, num):  # issue762
+    stmts = sqlparse.split(sql)
+    assert len(stmts) == num

From 0cd062018fb1a1c296417435a10be1910a9ea657 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 15 Mar 2024 08:26:39 +0100
Subject: [PATCH 63/88] Drop support for Python < 3.8.

Also update tox.ini and Github actions:
- unpin some dependencies required for older Python versions
- update action versions to latest version
---
 .github/workflows/codeql-analysis.yml |  6 +++---
 .github/workflows/python-app.yml      | 11 +++++------
 CHANGELOG                             |  2 +-
 README.rst                            |  2 +-
 pyproject.toml                        |  9 +++------
 tox.ini                               |  4 +---
 6 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 5acaa67d..b560fd65 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -43,7 +43,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -54,7 +54,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v2
+      uses: github/codeql-action/autobuild@v3
 
     # ℹ️ Command-line programs to run using the OS shell.
     # 📚 https://git.io/JvXDl
@@ -68,4 +68,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
+      uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index ff7269bc..96c76bca 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -15,25 +15,24 @@ on:
 jobs:
   test:
     name: Run tests on ${{ matrix.py }}
-    runs-on: ubuntu-20.04  # keep it on 20.04 to have Python 3.6 available
+    runs-on: ubuntu-latest
     strategy:
       matrix:
         py:
+          - "3.13-dev"
           - "3.12"
           - "3.11"
           - "3.10"
           - "3.9"
           - "3.8"
-          - "3.7"
-          - "3.6"
+          - "pypy-3.10"
           - "pypy-3.9"
           - "pypy-3.8"
-          - "pypy-3.7"
 
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.py }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.py }}
         allow-prereleases: true
@@ -47,4 +46,4 @@ jobs:
     - name: Test with pytest
       run: pytest --cov=sqlparse
     - name: Publish to codecov
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
diff --git a/CHANGELOG b/CHANGELOG
index efb3e95b..5db0a595 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -3,7 +3,7 @@ Development Version
 
 Notable Changes
 
-* Drop support for Python 3.5.
+* Drop support for Python 3.5, 3.6, and 3.7.
 * Python 3.12 is now supported (pr725, by hugovk).
 
 Enhancements:
diff --git a/README.rst b/README.rst
index 67ddaf96..3eaf0efb 100644
--- a/README.rst
+++ b/README.rst
@@ -11,7 +11,7 @@ python-sqlparse - Parse SQL statements
 sqlparse is a non-validating SQL parser for Python.
 It provides support for parsing, splitting and formatting SQL statements.
 
-The module is compatible with Python 3.6+ and released under the terms of the
+The module is compatible with Python 3.8+ and released under the terms of the
 `New BSD license <https://opensource.org/licenses/BSD-3-Clause>`_.
 
 Visit the project page at https://github.com/andialbrecht/sqlparse for
diff --git a/pyproject.toml b/pyproject.toml
index d9a921f1..83cb93ed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,8 +16,6 @@ classifiers = [
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.6",
-    "Programming Language :: Python :: 3.7",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
@@ -28,7 +26,7 @@ classifiers = [
     "Topic :: Database",
     "Topic :: Software Development",
 ]
-requires-python = ">=3.6"
+requires-python = ">=3.8"
 
 [project.urls]
 Home = "https://github.com/andialbrecht/sqlparse"
@@ -42,7 +40,6 @@ sqlformat = "sqlparse.__main__:main"
 
 [project.optional-dependencies]
 dev = [
-    "importlib_metadata<5; python_version <= '3.7'",
     "flake8",
     "build",
 ]
@@ -54,8 +51,8 @@ doc = [
     "sphinx",
 ]
 tox = [
-    "virtualenv<20.22.0",   # 20.22.0 dropped Python 3.6 support
-    "tox<4.5.0",  # >=4.5.0 requires virtualenv>=20.22
+    "virtualenv",
+    "tox",
 ]
 
 [tool.flit.sdist]
diff --git a/tox.ini b/tox.ini
index 40d84ad8..71a98fa2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,8 +1,6 @@
 [tox]
 skip_missing_interpreters = True
 envlist =
-    py36
-    py37
     py38
     py39
     py310
@@ -22,4 +20,4 @@ commands =
 deps =
     flake8
 commands =
-    flake8 sqlparse tests setup.py
+    flake8 sqlparse tests

From c40f8000781633f2281c483c45be8d252bcba2e3 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Fri, 15 Mar 2024 08:32:35 +0100
Subject: [PATCH 64/88] Add .readthedocs.yaml.

---
 .readthedocs.yaml | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 .readthedocs.yaml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 00000000..6dffd85a
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,35 @@
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.12"
+    # You can also specify other tool versions:
+    # nodejs: "20"
+    # rust: "1.70"
+    # golang: "1.20"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+  # builder: "dirhtml"
+  # Fail on all warnings to avoid broken references
+  # fail_on_warning: true
+
+# Optionally build your docs in additional formats such as PDF and ePub
+# formats:
+#   - pdf
+#   - epub
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+# python:
+#   install:
+#     - requirements: docs/requirements.txt
\ No newline at end of file

From 02819f620e599343d55df53225b9ea6ca46d980c Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 07:39:12 +0100
Subject: [PATCH 65/88] Correct spelling error.

---
 sqlparse/lexer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 9d25c9e6..99f70f71 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -23,7 +23,7 @@ class Lexer:
     """The Lexer supports configurable syntax.
     To add support for additional keywords, use the `add_keywords` method."""
 
-    _default_intance = None
+    _default_instance = None
 
     # Development notes:
     # - This class is prepared to be able to support additional SQL dialects
@@ -47,10 +47,10 @@ class Lexer:
     def get_default_instance(cls):
         """Returns the lexer instance used internally
         by the sqlparse core functions."""
-        if cls._default_intance is None:
-            cls._default_intance = cls()
-            cls._default_intance.default_initialization()
-        return cls._default_intance
+        if cls._default_instance is None:
+            cls._default_instance = cls()
+            cls._default_instance.default_initialization()
+        return cls._default_instance
 
     def default_initialization(self):
         """Initialize the lexer with default dictionaries.

From 5bb129d3fc8a4d031bd37fab8e5ee24a199a9b8c Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 07:45:04 +0100
Subject: [PATCH 66/88] Thread-safe initialization of Lexer class (fixes #730).

---
 CHANGELOG         |  1 +
 sqlparse/lexer.py | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 5db0a595..38d53187 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -18,6 +18,7 @@ Bug Fixes
 * Ignore dunder attributes when creating Tokens (issue672).
 * Allow operators to precede dollar-quoted strings (issue763).
 * Fix parsing of nested order clauses (issue745, pr746 by john-bodley).
+* Thread-safe initialization of Lexer class (issue730).
 
 
 Release 0.4.4 (Apr 18, 2023)
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 99f70f71..f800a52b 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -7,6 +7,7 @@
 
 """SQL Lexer"""
 import re
+from threading import Lock
 
 # This code is based on the SqlLexer in pygments.
 # http://pygments.org/
@@ -24,19 +25,20 @@ class Lexer:
     To add support for additional keywords, use the `add_keywords` method."""
 
     _default_instance = None
+    _lock = Lock()
 
     # Development notes:
     # - This class is prepared to be able to support additional SQL dialects
     #   in the future by adding additional functions that take the place of
-    #   the function default_initialization()
+    #   the function default_initialization().
     # - The lexer class uses an explicit singleton behavior with the
     #   instance-getter method get_default_instance(). This mechanism has
     #   the advantage that the call signature of the entry-points to the
     #   sqlparse library are not affected. Also, usage of sqlparse in third
-    #   party code does not need to be adapted. On the other hand, singleton
-    #   behavior is not thread safe, and the current implementation does not
-    #   easily allow for multiple SQL dialects to be parsed in the same
-    #   process. Such behavior can be supported in the future by passing a
+    #   party code does not need to be adapted. On the other hand, the current
+    #   implementation does not easily allow for multiple SQL dialects to be
+    #   parsed in the same process.
+    #   Such behavior can be supported in the future by passing a
     #   suitably initialized lexer object as an additional parameter to the
     #   entry-point functions (such as `parse`). Code will need to be written
     #   to pass down and utilize such an object. The current implementation
@@ -47,9 +49,10 @@ class Lexer:
     def get_default_instance(cls):
         """Returns the lexer instance used internally
         by the sqlparse core functions."""
-        if cls._default_instance is None:
-            cls._default_instance = cls()
-            cls._default_instance.default_initialization()
+        with cls._lock:
+            if cls._default_instance is None:
+                cls._default_instance = cls()
+                cls._default_instance.default_initialization()
         return cls._default_instance
 
     def default_initialization(self):

From 6b05583f119224a43f8047159120edd0228ebd76 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 09:25:09 +0100
Subject: [PATCH 67/88] Add support for some of the JSON operators (fixes
 #682).

---
 CHANGELOG            |  1 +
 sqlparse/keywords.py |  2 ++
 tests/test_parse.py  | 14 ++++++++++++++
 3 files changed, 17 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index 38d53187..c2e3a9bc 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -12,6 +12,7 @@ Enhancements:
   Some database backends love statements without semicolon (issue742).
 * Support TypedLiterals in get_parameters (pr649, by Khrol).
 * Improve splitting of Transact SQL when using GO keyword (issue762).
+* Support for some JSON operators (issue682).
 
 Bug Fixes
 
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index 8911c7a8..9b7f8153 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -89,6 +89,8 @@
     # but the match isn't a keyword.
     (r'\w[$#\w]*', PROCESS_AS_KEYWORD),
     (r'[;:()\[\],\.]', tokens.Punctuation),
+    # JSON operators
+    (r'(\->>?|#>>?|@>|<@|\?\|?|\?&|\-|#\-)', tokens.Operator),
     (r'[<>=~!]+', tokens.Operator.Comparison),
     (r'[+/@#%^&|^-]+', tokens.Operator),
 ]
diff --git a/tests/test_parse.py b/tests/test_parse.py
index be416ef2..b49dcca3 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -579,3 +579,17 @@ def test_configurable_regex():
         for t in tokens
         if t.ttype not in sqlparse.tokens.Whitespace
     )[4] == (sqlparse.tokens.Keyword, "zorder by")
+
+
+@pytest.mark.parametrize('sql', [
+    '->', '->>', '#>', '#>>',
+    '@>', '<@',
+    # leaving ? out for now, they're somehow ambiguous as placeholders
+    # '?', '?|', '?&',
+    '||', '-', '#-'
+])
+def test_json_operators(sql):
+    p = sqlparse.parse(sql)
+    assert len(p) == 1
+    assert len(p[0].tokens) == 1
+    assert p[0].tokens[0].ttype == sqlparse.tokens.Operator

From 8c24779e027e92a1ed379fc271e20f540b0f3d20 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 10:10:19 +0100
Subject: [PATCH 68/88] Improve formatting of statements with JSON operators
 (fixes #542).

---
 CHANGELOG                   | 1 +
 sqlparse/engine/grouping.py | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index c2e3a9bc..c3387ee3 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -13,6 +13,7 @@ Enhancements:
 * Support TypedLiterals in get_parameters (pr649, by Khrol).
 * Improve splitting of Transact SQL when using GO keyword (issue762).
 * Support for some JSON operators (issue682).
+* Improve formatting of statements containing JSON operators (issue542).
 
 Bug Fixes
 
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index c486318a..9190797a 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -139,7 +139,12 @@ def post(tlist, pidx, tidx, nidx):
 
 def group_period(tlist):
     def match(token):
-        return token.match(T.Punctuation, '.')
+        for ttype, value in ((T.Punctuation, '.'),
+                             (T.Operator, '->'),
+                             (T.Operator, '->>')):
+            if token.match(ttype, value):
+                return True
+        return False
 
     def valid_prev(token):
         sqlcls = sql.SquareBrackets, sql.Identifier
@@ -153,7 +158,7 @@ def valid_next(token):
     def post(tlist, pidx, tidx, nidx):
         # next_ validation is being performed here. issue261
         sqlcls = sql.SquareBrackets, sql.Function
-        ttypes = T.Name, T.String.Symbol, T.Wildcard
+        ttypes = T.Name, T.String.Symbol, T.Wildcard, T.String.Single
         next_ = tlist[nidx] if nidx is not None else None
         valid_next = imt(next_, i=sqlcls, t=ttypes)
 

From 6b10952dcab573783e69638c75ca366b09cbaa4f Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 10:16:29 +0100
Subject: [PATCH 69/88] Add new group for MySQL specific keywords.

---
 sqlparse/keywords.py | 6 +++++-
 sqlparse/lexer.py    | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index 9b7f8153..82e39ad3 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -486,7 +486,6 @@
     'ROUTINE_CATALOG': tokens.Keyword,
     'ROUTINE_NAME': tokens.Keyword,
     'ROUTINE_SCHEMA': tokens.Keyword,
-    'ROW': tokens.Keyword,
     'ROWS': tokens.Keyword,
     'ROW_COUNT': tokens.Keyword,
     'RULE': tokens.Keyword,
@@ -829,6 +828,11 @@
     'UNLOCK': tokens.Keyword,
 }
 
+# MySQL
+KEYWORDS_MYSQL = {
+    'ROW': tokens.Keyword,
+}
+
 # PostgreSQL Syntax
 KEYWORDS_PLPGSQL = {
     'CONFLICT': tokens.Keyword,
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index f800a52b..9548bfe1 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -62,6 +62,7 @@ def default_initialization(self):
         self.set_SQL_REGEX(keywords.SQL_REGEX)
         self.add_keywords(keywords.KEYWORDS_COMMON)
         self.add_keywords(keywords.KEYWORDS_ORACLE)
+        self.add_keywords(keywords.KEYWORDS_MYSQL)
         self.add_keywords(keywords.KEYWORDS_PLPGSQL)
         self.add_keywords(keywords.KEYWORDS_HQL)
         self.add_keywords(keywords.KEYWORDS_MSACCESS)

From ee550f11b95b8d38a1be1b86fa674d37ffcb1609 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 10:19:01 +0100
Subject: [PATCH 70/88] Add test case for #542.

---
 tests/test_format.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/test_format.py b/tests/test_format.py
index 70bb8055..a616f360 100644
--- a/tests/test_format.py
+++ b/tests/test_format.py
@@ -722,3 +722,10 @@ def test_format_right_margin_invalid_option(right_margin):
 def test_format_right_margin():
     # TODO: Needs better test, only raises exception right now
     sqlparse.format('foo', right_margin="79")
+
+
+def test_format_json_ops():  # issue542
+    formatted = sqlparse.format(
+        "select foo->'bar', foo->'bar';", reindent=True)
+    expected = "select foo->'bar',\n       foo->'bar';"
+    assert formatted == expected

From 326a316446c3e091a93950251e3e376ebf0d4127 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 12:17:56 +0100
Subject: [PATCH 71/88] Switch to hatch and replace tox.

---
 .flake8                          |  8 ++++
 .github/workflows/python-app.yml |  8 ++--
 .gitignore                       |  2 -
 pyproject.toml                   | 75 ++++++++++++++++++++++----------
 tests/test_cli.py                |  4 +-
 5 files changed, 66 insertions(+), 31 deletions(-)
 create mode 100644 .flake8

diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..bd01afcf
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,8 @@
+[flake8]
+exclude =
+  tests,
+  docs,
+  dist
+max-complexity = 10
+statistics = True
+show-source = True
\ No newline at end of file
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 96c76bca..31b900bd 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -39,11 +39,11 @@ jobs:
         check-latest: true
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip flit
+        python -m pip install --upgrade pip hatch
         flit install --deps=develop
     - name: Lint with flake8
-      run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics
-    - name: Test with pytest
-      run: pytest --cov=sqlparse
+      run: hatch run flake8
+    - name: Test with pytest and coverage
+      run: hatch run cov
     - name: Publish to codecov
       uses: codecov/codecov-action@v4
diff --git a/.gitignore b/.gitignore
index cc2ec16b..77479f17 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,9 +4,7 @@ dist/
 build/
 MANIFEST
 .coverage
-.tox/
 .cache/
 *.egg-info/
 htmlcov/
-coverage.xml
 .pytest_cache
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 83cb93ed..c2d7fe4f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
-requires = ["flit_core >=3.2,<4"]
-build-backend = "flit_core.buildapi"
+requires = ["hatchling"]
+build-backend = "hatchling.build"
 
 [project]
 name = "sqlparse"
@@ -40,34 +40,63 @@ sqlformat = "sqlparse.__main__:main"
 
 [project.optional-dependencies]
 dev = [
-    "flake8",
+    "hatch",
     "build",
 ]
-test = [
-    "pytest",
-    "pytest-cov",
-]
 doc = [
     "sphinx",
 ]
-tox = [
-    "virtualenv",
-    "tox",
+
+[tool.hatch.version]
+path = "sqlparse/__init__.py"
+
+[tool.hatch.envs.default]
+dependencies = [
+    "coverage[toml]>=6.5",
+    "pytest",
+    # switch to ruff, but fix problems first
+    # but check defaults!
+    # https://hatch.pypa.io/1.9/config/static-analysis/#default-settings
+    "flake8",
+]
+[tool.hatch.envs.default.scripts]
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+cov-report = [
+    "- coverage combine",
+    "coverage report",
+]
+cov = [
+    "test-cov",
+    "cov-report",
 ]
+check = "flake8 sqlparse/"
 
-[tool.flit.sdist]
-include = [
-    "docs/source/",
-    "docs/sqlformat.1",
-    "docs/Makefile",
-    "tests/*.py", "tests/files/*.sql",
-    "LICENSE",
-    "TODO",
-    "AUTHORS",
-    "CHANGELOG",
-    "Makefile",
-    "tox.ini",
+[[tool.hatch.envs.all.matrix]]
+python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
+
+[tool.hatch.envs.types]
+dependencies = [
+    "mypy>=1.0.0",
 ]
+[tool.hatch.envs.types.scripts]
+check = "mypy --install-types --non-interactive {args:sqlparse tests}"
 
 [tool.coverage.run]
-omit = ["sqlparse/__main__.py"]
+source_pkgs = ["sqlparse", "tests"]
+branch = true
+parallel = true
+omit = [
+    "sqlparse/__main__.py",
+]
+
+[tool.coverage.paths]
+sqlparse = ["sqlparse"]
+tests = ["tests"]
+
+[tool.coverage.report]
+exclude_lines = [
+    "no cov",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+]
diff --git a/tests/test_cli.py b/tests/test_cli.py
index b681a60b..a0c1f2b0 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -69,8 +69,8 @@ def test_stdout(filepath, load_file, capsys):
 
 def test_script():
     # Call with the --help option as a basic sanity check.
-    cmd = "{:s} -m sqlparse.cli --help".format(sys.executable)
-    assert subprocess.call(cmd.split()) == 0
+    cmd = [sys.executable, '-m', 'sqlparse.cli', '--help']
+    assert subprocess.call(cmd) == 0
 
 
 @pytest.mark.parametrize('fpath, encoding', (

From be9dc7a31f2c2068ea069648029363735a751bfc Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 12:19:16 +0100
Subject: [PATCH 72/88] CI: Remove obsolte flit command.

---
 .github/workflows/python-app.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 31b900bd..4b1853e9 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -40,7 +40,6 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip hatch
-        flit install --deps=develop
     - name: Lint with flake8
       run: hatch run flake8
     - name: Test with pytest and coverage

From 135bfadf9662031de9b27b13555a8e05ec0f4806 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 12:20:25 +0100
Subject: [PATCH 73/88] CI: Reset max-complexity to current default.

---
 .flake8 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.flake8 b/.flake8
index bd01afcf..b508dca8 100644
--- a/.flake8
+++ b/.flake8
@@ -3,6 +3,6 @@ exclude =
   tests,
   docs,
   dist
-max-complexity = 10
+max-complexity = 31
 statistics = True
 show-source = True
\ No newline at end of file

From 8871dd016c1eb332a751ea8b3dbb2e902a5b8ba6 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 12:25:43 +0100
Subject: [PATCH 74/88] CI: Disable 3.13-dev for now.

---
 .github/workflows/python-app.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 4b1853e9..555e5dc2 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       matrix:
         py:
-          - "3.13-dev"
+          #- "3.13-dev"
           - "3.12"
           - "3.11"
           - "3.10"

From f55b4e1b69ac2e4fc36151c46d5405ec80b89f58 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 13:27:42 +0100
Subject: [PATCH 75/88] Remove tox.ini.

---
 tox.ini | 23 -----------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 tox.ini

diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index 71a98fa2..00000000
--- a/tox.ini
+++ /dev/null
@@ -1,23 +0,0 @@
-[tox]
-skip_missing_interpreters = True
-envlist =
-    py38
-    py39
-    py310
-    py311
-    py312
-    flake8
-
-[testenv]
-deps =
-    pytest
-    pytest-cov
-commands =
-    sqlformat --version
-    pytest --cov=sqlparse {posargs}
-
-[testenv:flake8]
-deps =
-    flake8
-commands =
-    flake8 sqlparse tests

From d76e8a4425d82a6cd704b5e549a8cabefa931341 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 16:34:23 +0100
Subject: [PATCH 76/88] Identify TRUNCATE as DDL, REVOKE/GRANT as DCL keywords.

See #719 as well.
---
 CHANGELOG            | 2 ++
 sqlparse/keywords.py | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index c3387ee3..ca4d23aa 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -21,6 +21,8 @@ Bug Fixes
 * Allow operators to precede dollar-quoted strings (issue763).
 * Fix parsing of nested order clauses (issue745, pr746 by john-bodley).
 * Thread-safe initialization of Lexer class (issue730).
+* Classify TRUNCATE as DDL and GRANT/REVOKE as DCL keywords (based on pr719
+  by josuc1, thanks for bringing this up!)
 
 
 Release 0.4.4 (Apr 18, 2023)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index 82e39ad3..d734bdff 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -288,7 +288,6 @@
     'GLOBAL': tokens.Keyword,
     'GO': tokens.Keyword,
     'GOTO': tokens.Keyword,
-    'GRANT': tokens.Keyword,
     'GRANTED': tokens.Keyword,
     'GROUPING': tokens.Keyword,
 
@@ -477,7 +476,6 @@
     'RETURNED_SQLSTATE': tokens.Keyword,
     'RETURNING': tokens.Keyword,
     'RETURNS': tokens.Keyword,
-    'REVOKE': tokens.Keyword,
     'RIGHT': tokens.Keyword,
     'ROLE': tokens.Keyword,
     'ROLLBACK': tokens.Keyword.DML,
@@ -577,7 +575,6 @@
     'TRIGGER_SCHEMA': tokens.Keyword,
     'TRIM': tokens.Keyword,
     'TRUE': tokens.Keyword,
-    'TRUNCATE': tokens.Keyword,
     'TRUSTED': tokens.Keyword,
     'TYPE': tokens.Keyword,
 
@@ -684,6 +681,9 @@
     'DROP': tokens.Keyword.DDL,
     'CREATE': tokens.Keyword.DDL,
     'ALTER': tokens.Keyword.DDL,
+    'TRUNCATE': tokens.Keyword.DDL,
+    'GRANT': tokens.Keyword.DCL,
+    'REVOKE': tokens.Keyword.DCL,
 
     'WHERE': tokens.Keyword,
     'FROM': tokens.Keyword,

From db1ebe21a1a1c34b510b79fd52bf5130a99606bc Mon Sep 17 00:00:00 2001
From: griff <70294474+griffatrasgo@users.noreply.github.com>
Date: Wed, 18 Jan 2023 10:12:59 -0500
Subject: [PATCH 77/88] add snowflake and bq keywords

---
 sqlparse/keywords.py | 32 ++++++++++++++++++++++++++++++++
 sqlparse/lexer.py    |  2 ++
 2 files changed, 34 insertions(+)

diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index d734bdff..130485d9 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -966,3 +966,35 @@
 KEYWORDS_MSACCESS = {
     'DISTINCTROW': tokens.Keyword,
 }
+
+
+KEYWORDS_SNOWFLAKE = {    
+    'ACCOUNT': tokens.Keyword,
+    'GSCLUSTER': tokens.Keyword,
+    'ISSUE': tokens.Keyword,
+    'ORGANIZATION': tokens.Keyword,
+    'PIVOT': tokens.Keyword,
+    'QUALIFY': tokens.Keyword,
+    'REGEXP': tokens.Keyword,
+    'RLIKE': tokens.Keyword,
+    'SAMPLE': tokens.Keyword,
+    'TRY_CAST': tokens.Keyword,
+    'UNPIVOT': tokens.Keyword,
+
+    'VARIANT': tokens.Name.Builtin,
+}
+
+
+KEYWORDS_BIGQUERY = {
+    'ASSERT_ROWS_MODIFIED': tokens.Keyword,
+    'DEFINE': tokens.Keyword,
+    'ENUM': tokens.Keyword,
+    'HASH': tokens.Keyword,
+    'LOOKUP': tokens.Keyword,
+    'PRECEDING': tokens.Keyword,
+    'PROTO': tokens.Keyword,
+    'RESPECT': tokens.Keyword,
+    'TABLESAMPLE': tokens.Keyword,
+
+    'BIGNUMERIC': tokens.Name.Builtin,
+}
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 9548bfe1..0ff99bf3 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -67,6 +67,8 @@ def default_initialization(self):
         self.add_keywords(keywords.KEYWORDS_HQL)
         self.add_keywords(keywords.KEYWORDS_MSACCESS)
         self.add_keywords(keywords.KEYWORDS)
+        self.add_keywords(keywords.KEYWORDS_SNOWFLAKE)
+        self.add_keywords(keywords.KEYWORDS_BIGQUERY)
 
     def clear(self):
         """Clear all syntax configurations.

From 4ad66a7d750edf635c053d52ce183df7fa0afc4b Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 16:48:19 +0100
Subject: [PATCH 78/88] Update Changelog and authors.

---
 AUTHORS           | 1 +
 CHANGELOG         | 1 +
 sqlparse/lexer.py | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/AUTHORS b/AUTHORS
index 934bbe33..90def42c 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -32,6 +32,7 @@ Alphabetical list of contributors:
 * Fredy Wijaya <fredy.wijaya@gmail.com>
 * Gavin Wahl <gwahl@fusionbox.com>
 * Georg Traar <georg@crate.io>
+* griff <70294474+griffatrasgo@users.noreply.github.com>
 * Hugo van Kemenade <hugovk@users.noreply.github.com>
 * hurcy <cinyoung.hur@gmail.com>
 * Ian Robertson <ian.robertson@capitalone.com>
diff --git a/CHANGELOG b/CHANGELOG
index ca4d23aa..6aa1e278 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -14,6 +14,7 @@ Enhancements:
 * Improve splitting of Transact SQL when using GO keyword (issue762).
 * Support for some JSON operators (issue682).
 * Improve formatting of statements containing JSON operators (issue542).
+* Support for BigQuery and Snowflake keywords (pr699, by griffatrasgo).
 
 Bug Fixes
 
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 0ff99bf3..8f88d171 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -66,9 +66,9 @@ def default_initialization(self):
         self.add_keywords(keywords.KEYWORDS_PLPGSQL)
         self.add_keywords(keywords.KEYWORDS_HQL)
         self.add_keywords(keywords.KEYWORDS_MSACCESS)
-        self.add_keywords(keywords.KEYWORDS)
         self.add_keywords(keywords.KEYWORDS_SNOWFLAKE)
         self.add_keywords(keywords.KEYWORDS_BIGQUERY)
+        self.add_keywords(keywords.KEYWORDS)
 
     def clear(self):
         """Clear all syntax configurations.

From fc4b0beab89c5598d556572cb6db0165affb017b Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 16:52:04 +0100
Subject: [PATCH 79/88] Code cleanup.

---
 sqlparse/keywords.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index 130485d9..029d8bae 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -968,7 +968,7 @@
 }
 
 
-KEYWORDS_SNOWFLAKE = {    
+KEYWORDS_SNOWFLAKE = {
     'ACCOUNT': tokens.Keyword,
     'GSCLUSTER': tokens.Keyword,
     'ISSUE': tokens.Keyword,

From 46971e5a804b29e7dbd437155a8ceffab8ef1cd5 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 16 Mar 2024 17:03:23 +0100
Subject: [PATCH 80/88] Fix parsing of PRIMARY KEY (fixes #740).

---
 CHANGELOG                 | 3 ++-
 sqlparse/keywords.py      | 1 +
 tests/test_regressions.py | 6 ++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 6aa1e278..745328e6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -23,7 +23,8 @@ Bug Fixes
 * Fix parsing of nested order clauses (issue745, pr746 by john-bodley).
 * Thread-safe initialization of Lexer class (issue730).
 * Classify TRUNCATE as DDL and GRANT/REVOKE as DCL keywords (based on pr719
-  by josuc1, thanks for bringing this up!)
+  by josuc1, thanks for bringing this up!).
+* Fix parsing of PRIMARY KEY (issue740).
 
 
 Release 0.4.4 (Apr 18, 2023)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index 029d8bae..3b963557 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -77,6 +77,7 @@
     (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
     (r'GROUP\s+BY\b', tokens.Keyword),
     (r'ORDER\s+BY\b', tokens.Keyword),
+    (r'PRIMARY\s+KEY\b', tokens.Keyword),
     (r'HANDLER\s+FOR\b', tokens.Keyword),
     (r'GO(\s\d+)\b', tokens.Keyword),
     (r'(LATERAL\s+VIEW\s+)'
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
index 961adc17..29cb502c 100644
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -444,3 +444,9 @@ def test_copy_issue672():
     p = sqlparse.parse('select * from foo')[0]
     copied = copy.deepcopy(p)
     assert str(p) == str(copied)
+
+
+def test_primary_key_issue740():
+    p = sqlparse.parse('PRIMARY KEY')[0]
+    assert len(p.tokens) == 1
+    assert p.tokens[0].ttype == T.Keyword
\ No newline at end of file

From 012c9f10c8ddfa47ccf17ead28122492155cf6fc Mon Sep 17 00:00:00 2001
From: Adam Johnson <me@adamj.eu>
Date: Sat, 9 Mar 2024 20:22:21 +0000
Subject: [PATCH 81/88] Optimize sqlparse.utils.imt().

---
 sqlparse/utils.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index 512f0385..58c0245a 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -86,20 +86,23 @@ def imt(token, i=None, m=None, t=None):
     :param t: TokenType or Tuple/List of TokenTypes
     :return:  bool
     """
-    clss = i
-    types = [t, ] if t and not isinstance(t, list) else t
-    mpatterns = [m, ] if m and not isinstance(m, list) else m
-
     if token is None:
         return False
-    elif clss and isinstance(token, clss):
-        return True
-    elif mpatterns and any(token.match(*pattern) for pattern in mpatterns):
+    if i and isinstance(token, i):
         return True
-    elif types and any(token.ttype in ttype for ttype in types):
-        return True
-    else:
-        return False
+    if m:
+        if isinstance(m, list):
+            if any(token.match(*pattern) for pattern in m):
+                return True
+        elif token.match(*m):
+            return True
+    if t:
+        if isinstance(t, list):
+            if any(token.ttype in ttype for ttype in t):
+                return True
+        elif token.ttype in t:
+            return True
+    return False
 
 
 def consume(iterator, n):

From d8f81471cfc2c39ac43128e2a0c8cc67c313cc40 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sun, 17 Mar 2024 19:19:16 +0100
Subject: [PATCH 82/88] Update AUHTORS and Changelog.

---
 AUTHORS   | 1 +
 CHANGELOG | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/AUTHORS b/AUTHORS
index 90def42c..476e9c1e 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -8,6 +8,7 @@ project: https://bitbucket.org/gutworth/six.
 
 Alphabetical list of contributors:
 * Adam Greenhall <agreenhall@lyft.com>
+* Adam Johnson <me@adamj.eu>
 * Aki Ariga <chezou+github@gmail.com>
 * Alexander Beedie <ayembee@gmail.com>
 * Alexey Malyshev <nostrict@gmail.com>
diff --git a/CHANGELOG b/CHANGELOG
index 745328e6..aeae1c6a 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -26,6 +26,10 @@ Bug Fixes
   by josuc1, thanks for bringing this up!).
 * Fix parsing of PRIMARY KEY (issue740).
 
+Other
+
+* Optimize performance of matching function (pr799, by admachainz).
+
 
 Release 0.4.4 (Apr 18, 2023)
 ----------------------------

From 617b8f6cd3c55bacf2c80130901508518753f7e1 Mon Sep 17 00:00:00 2001
From: Zi-Xuan Fu <r33s3n6@gmail.com>
Date: Tue, 26 Mar 2024 21:31:51 +0800
Subject: [PATCH 83/88] Add OVER clause, and group it into Function (fixes
 #701)

---
 sqlparse/engine/grouping.py | 18 +++++++++++++++++-
 sqlparse/sql.py             |  5 +++++
 tests/test_grouping.py      | 14 ++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 9190797a..926a3c1b 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -235,6 +235,16 @@ def group_identifier(tlist):
         tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
 
 
+@recurse(sql.Over)
+def group_over(tlist):
+    tidx, token = tlist.token_next_by(m=sql.Over.M_OPEN)
+    while token:
+        nidx, next_ = tlist.token_next(tidx)
+        if imt(next_, i=sql.Parenthesis, t=T.Name):
+            tlist.group_tokens(sql.Over, tidx, nidx)
+        tidx, token = tlist.token_next_by(m=sql.Over.M_OPEN, idx=tidx)
+
+
 def group_arrays(tlist):
     sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function
     ttypes = T.Name, T.String.Symbol
@@ -361,7 +371,12 @@ def group_functions(tlist):
     while token:
         nidx, next_ = tlist.token_next(tidx)
         if isinstance(next_, sql.Parenthesis):
-            tlist.group_tokens(sql.Function, tidx, nidx)
+            over_idx, over = tlist.token_next(nidx)
+            if over and isinstance(over, sql.Over):
+                eidx = over_idx
+            else:
+                eidx = nidx
+            tlist.group_tokens(sql.Function, tidx, eidx)
         tidx, token = tlist.token_next_by(t=T.Name, idx=tidx)
 
 
@@ -412,6 +427,7 @@ def group(stmt):
         group_for,
         group_begin,
 
+        group_over,
         group_functions,
         group_where,
         group_period,
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 41606dd8..def06797 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -554,6 +554,11 @@ class Where(TokenList):
         'HAVING', 'RETURNING', 'INTO')
 
 
+class Over(TokenList):
+    """An OVER clause."""
+    M_OPEN = T.Keyword, 'OVER'
+
+
 class Having(TokenList):
     """A HAVING clause."""
     M_OPEN = T.Keyword, 'HAVING'
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
index e90243b5..0bf10c38 100644
--- a/tests/test_grouping.py
+++ b/tests/test_grouping.py
@@ -185,6 +185,20 @@ def test_grouping_identifier_function():
     assert isinstance(p.tokens[0], sql.Identifier)
     assert isinstance(p.tokens[0].tokens[0], sql.Operation)
     assert isinstance(p.tokens[0].tokens[0].tokens[0], sql.Function)
+    p = sqlparse.parse('foo(c1) over win1 as bar')[0]
+    assert isinstance(p.tokens[0], sql.Identifier)
+    assert isinstance(p.tokens[0].tokens[0], sql.Function)
+    assert len(p.tokens[0].tokens[0].tokens) == 4
+    assert isinstance(p.tokens[0].tokens[0].tokens[3], sql.Over)
+    assert isinstance(p.tokens[0].tokens[0].tokens[3].tokens[2],
+                      sql.Identifier)
+    p = sqlparse.parse('foo(c1) over (partition by c2 order by c3) as bar')[0]
+    assert isinstance(p.tokens[0], sql.Identifier)
+    assert isinstance(p.tokens[0].tokens[0], sql.Function)
+    assert len(p.tokens[0].tokens[0].tokens) == 4
+    assert isinstance(p.tokens[0].tokens[0].tokens[3], sql.Over)
+    assert isinstance(p.tokens[0].tokens[0].tokens[3].tokens[2],
+                      sql.Parenthesis)
 
 
 @pytest.mark.parametrize('s', ['foo+100', 'foo + 100', 'foo*100'])

From e03b74e608b71dd06824c2cb42421c0d790248e3 Mon Sep 17 00:00:00 2001
From: Zi-Xuan Fu <r33s3n6@gmail.com>
Date: Wed, 27 Mar 2024 11:00:32 +0800
Subject: [PATCH 84/88] Fix Function.get_parameters(), add Funtion.get_window()

---
 sqlparse/sql.py        | 9 ++++++++-
 tests/test_grouping.py | 8 ++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index def06797..05e17748 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -623,7 +623,7 @@ class Function(NameAliasMixin, TokenList):
 
     def get_parameters(self):
         """Return a list of parameters."""
-        parenthesis = self.tokens[-1]
+        parenthesis = self.token_next_by(i=Parenthesis)[1]
         result = []
         for token in parenthesis.tokens:
             if isinstance(token, IdentifierList):
@@ -633,6 +633,13 @@ def get_parameters(self):
                 result.append(token)
         return result
 
+    def get_window(self):
+        """Return the window if it exists."""
+        over_clause = self.token_next_by(i=Over)
+        if not over_clause:
+            return None
+        return over_clause[1].tokens[-1]
+
 
 class Begin(TokenList):
     """A BEGIN/END block."""
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
index 0bf10c38..b39ff270 100644
--- a/tests/test_grouping.py
+++ b/tests/test_grouping.py
@@ -392,6 +392,14 @@ def test_grouping_function():
     p = sqlparse.parse('foo(null, bar)')[0]
     assert isinstance(p.tokens[0], sql.Function)
     assert len(list(p.tokens[0].get_parameters())) == 2
+    p = sqlparse.parse('foo(5) over win1')[0]
+    assert isinstance(p.tokens[0], sql.Function)
+    assert len(list(p.tokens[0].get_parameters())) == 1
+    assert isinstance(p.tokens[0].get_window(), sql.Identifier)
+    p = sqlparse.parse('foo(5) over (PARTITION BY c1)')[0]
+    assert isinstance(p.tokens[0], sql.Function)
+    assert len(list(p.tokens[0].get_parameters())) == 1
+    assert isinstance(p.tokens[0].get_window(), sql.Parenthesis)
 
 
 def test_grouping_function_not_in():

From f1bcf2f8a7ddf6854c99990c56ff5394f4981d58 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 13 Apr 2024 13:42:51 +0200
Subject: [PATCH 85/88] Update AUHTORS and Changelog.

---
 AUTHORS   | 1 +
 CHANGELOG | 1 +
 2 files changed, 2 insertions(+)

diff --git a/AUTHORS b/AUTHORS
index 476e9c1e..261b04df 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -82,3 +82,4 @@ Alphabetical list of contributors:
 * Will Jones <willjones127@gmail.com>
 * William Ivanski <william.ivanski@gmail.com>
 * Yago Riveiro <yago.riveiro@gmail.com>
+* Zi-Xuan Fu <r33s3n6@gmail.com>
diff --git a/CHANGELOG b/CHANGELOG
index aeae1c6a..4e98e7f6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -15,6 +15,7 @@ Enhancements:
 * Support for some JSON operators (issue682).
 * Improve formatting of statements containing JSON operators (issue542).
 * Support for BigQuery and Snowflake keywords (pr699, by griffatrasgo).
+* Support parsing of OVER clause (issue701, pr768 by r33s3n6).
 
 Bug Fixes
 

From b4a39d9850969b4e1d6940d32094ee0b42a2cf03 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 13 Apr 2024 13:59:00 +0200
Subject: [PATCH 86/88] Raise SQLParseError instead of RecursionError.

---
 CHANGELOG                 |  5 +++++
 sqlparse/sql.py           | 14 +++++++++-----
 tests/test_regressions.py | 17 ++++++++++++++++-
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 4e98e7f6..6c442c05 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,11 @@ Notable Changes
 
 * Drop support for Python 3.5, 3.6, and 3.7.
 * Python 3.12 is now supported (pr725, by hugovk).
+* IMPORTANT: Fixes a potential denial of service attack (DOS) due to recursion
+  error for deeply nested statements. Instead of recursion error a generic
+  SQLParseError is raised. See the security advisory for details:
+  https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-2m57-hf25-phgg
+  The vulnerability was discovered by @uriyay-jfrog. Thanks for reporting!
 
 Enhancements:
 
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 05e17748..bd5f35b1 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -10,6 +10,7 @@
 import re
 
 from sqlparse import tokens as T
+from sqlparse.exceptions import SQLParseError
 from sqlparse.utils import imt, remove_quotes
 
 
@@ -209,11 +210,14 @@ def flatten(self):
 
         This method is recursively called for all child tokens.
         """
-        for token in self.tokens:
-            if token.is_group:
-                yield from token.flatten()
-            else:
-                yield token
+        try:
+            for token in self.tokens:
+                if token.is_group:
+                    yield from token.flatten()
+                else:
+                    yield token
+        except RecursionError as err:
+            raise SQLParseError('Maximum recursion depth exceeded') from err
 
     def get_sublists(self):
         for token in self.tokens:
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
index 29cb502c..1edd3da6 100644
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -1,9 +1,11 @@
 import copy
+import sys
 
 import pytest
 
 import sqlparse
 from sqlparse import sql, tokens as T
+from sqlparse.exceptions import SQLParseError
 
 
 def test_issue9():
@@ -449,4 +451,17 @@ def test_copy_issue672():
 def test_primary_key_issue740():
     p = sqlparse.parse('PRIMARY KEY')[0]
     assert len(p.tokens) == 1
-    assert p.tokens[0].ttype == T.Keyword
\ No newline at end of file
+    assert p.tokens[0].ttype == T.Keyword
+
+
+@pytest.fixture
+def limit_recursion():
+    curr_limit = sys.getrecursionlimit()
+    sys.setrecursionlimit(70)
+    yield
+    sys.setrecursionlimit(curr_limit)
+
+
+def test_max_recursion(limit_recursion):
+    with pytest.raises(SQLParseError):
+        sqlparse.parse('[' * 100 + ']' * 100)

From 29f2e0a6609ddc1fa248faef1bc41616043c544e Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 13 Apr 2024 14:05:19 +0200
Subject: [PATCH 87/88] Raise recursion limit for tests.

---
 tests/test_regressions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_regressions.py b/tests/test_regressions.py
index 1edd3da6..b3288901 100644
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -457,11 +457,11 @@ def test_primary_key_issue740():
 @pytest.fixture
 def limit_recursion():
     curr_limit = sys.getrecursionlimit()
-    sys.setrecursionlimit(70)
+    sys.setrecursionlimit(100)
     yield
     sys.setrecursionlimit(curr_limit)
 
 
 def test_max_recursion(limit_recursion):
     with pytest.raises(SQLParseError):
-        sqlparse.parse('[' * 100 + ']' * 100)
+        sqlparse.parse('[' * 1000 + ']' * 1000)

From ddbd0ec3592545c914fe71e47118c04582d8bfb0 Mon Sep 17 00:00:00 2001
From: Andi Albrecht <albrecht.andi@gmail.com>
Date: Sat, 13 Apr 2024 14:33:09 +0200
Subject: [PATCH 88/88] Bump version.

---
 CHANGELOG            | 4 ++--
 sqlparse/__init__.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 6c442c05..da7b6178 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,5 @@
-Development Version
--------------------
+Release 0.5.0 (Apr 13, 2024)
+----------------------------
 
 Notable Changes
 
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
index b80b2d60..17b4b525 100644
--- a/sqlparse/__init__.py
+++ b/sqlparse/__init__.py
@@ -16,7 +16,7 @@
 from sqlparse import formatter
 
 
-__version__ = '0.5.0.dev0'
+__version__ = '0.5.0'
 __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']