From 9f44d54c07180b826a6276d3acf5e1458b507c3f Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 23 Sep 2022 20:42:55 +0200 Subject: [PATCH 01/88] Switch back to development mode. --- CHANGELOG | 6 ++++++ sqlparse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 229d9a4d..2b00a890 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +Development Version +------------------- + +Nothing yet. + + Release 0.4.3 (Sep 23, 2022) ---------------------------- diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index 0dd3475e..f901185a 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.4.3' +__version__ = '0.4.4.dev0' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] From e9241945801808d1db7f76bdccbbe9a200042c37 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 10:57:31 +0100 Subject: [PATCH 02/88] Revert "add regex pattern to identify IN as a Compasion token" This reverts commit 28c4d4026e1d9389a99d8cd627c96fa360c17fc4. See #694. The expectation is that IN is primarily recognized as a keyword, although it acts as a comparison operator. This also matches the definition of IN in most SQL syntax references where it is listed as a reserved keyword (PostgreSQL: https://www.postgresql.org/docs/current/sql-keywords-appendix.html, MySQL: https://dev.mysql.com/doc/refman/8.0/en/keywords.html, for example). --- sqlparse/keywords.py | 2 +- tests/test_grouping.py | 12 +----------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index d73e1143..dff5e1cb 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -50,7 +50,7 @@ def is_keyword(value): (r'(? Date: Fri, 30 Dec 2022 11:08:08 +0100 Subject: [PATCH 03/88] Update changelog. --- CHANGELOG | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 2b00a890..123ed173 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,12 @@ Development Version ------------------- -Nothing yet. +Bug Fixes + +* Revert a change from 0.4.0 that changed IN to be a comparison (issue694). + The primary expectation is that IN is treated as a keyword and not as a + comparison operator. That also follows the definition of reserved keywords + for the major SQL syntax definitions. Release 0.4.3 (Sep 23, 2022) From bacbeff74bc3d1866246bb1f397f18e64a62c27a Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 11:23:29 +0100 Subject: [PATCH 04/88] Update workflow runner. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 4f165859..e87b5e42 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -12,7 +12,7 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.5 and 3.6 available strategy: matrix: python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11-dev"] From bf5aff484146ffda3944088c48323ad9272b91fb Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 11:26:49 +0100 Subject: [PATCH 05/88] Update tested Python versions in workflow. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index e87b5e42..2e07ee10 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.5 and 3.6 available strategy: matrix: - python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11-dev"] + python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] steps: - uses: actions/checkout@v3 From 243da5137c6d21b7b246f884fb07e1f0625f2673 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 11:32:09 +0100 Subject: [PATCH 06/88] Setup a nightly build, even without changes in the module itself. --- .github/workflows/python-app.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 2e07ee10..1a173f81 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -8,6 +8,8 @@ on: branches: [ master ] pull_request: branches: [ master ] + schedule: + cron: '0 12 * * *' jobs: build: From cda0e499a1c762662d2b06b18e7b4aed2da75bc7 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 11:32:54 +0100 Subject: [PATCH 07/88] Fix schedule trigger syntax. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 1a173f81..fef18a5d 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -9,7 +9,7 @@ on: pull_request: branches: [ master ] schedule: - cron: '0 12 * * *' + - cron: '0 12 * * *' jobs: build: From 8b789f286e1b6cbf05c15020ea7544cb7f02f8f7 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 15:44:37 +0100 Subject: [PATCH 08/88] Switch to pyproject.toml (fixes #685). --- .github/workflows/python-app.yml | 9 ++-- CHANGELOG | 4 ++ MANIFEST.in | 11 ----- Makefile | 2 +- pyproject.toml | 70 ++++++++++++++++++++++++++++++++ setup.cfg | 55 ------------------------- setup.py | 12 ------ 7 files changed, 80 insertions(+), 83 deletions(-) delete mode 100644 MANIFEST.in create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index fef18a5d..906ca7e8 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -5,7 +5,8 @@ name: Python application on: push: - branches: [ master ] + branches: + - master pull_request: branches: [ master ] schedule: @@ -27,9 +28,9 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install codecov flake8 pytest pytest-cov - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + python -m pip install --upgrade pip flit + flit install --deps=develop + pip install codecov - name: Lint with flake8 run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics - name: Test with pytest diff --git a/CHANGELOG b/CHANGELOG index 123ed173..94864138 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,6 +8,10 @@ Bug Fixes comparison operator. That also follows the definition of reserved keywords for the major SQL syntax definitions. +Other + +* sqlparse now uses pyproject.toml instead of setup.cfg (issue685). + Release 0.4.3 (Sep 23, 2022) ---------------------------- diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 8043b359..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,11 +0,0 @@ -recursive-include docs source/* -include docs/sqlformat.1 -include docs/Makefile -recursive-include tests *.py *.sql -include LICENSE -include TODO -include AUTHORS -include CHANGELOG -include Makefile -include setup.cfg -include tox.ini diff --git a/Makefile b/Makefile index ee35e546..1657822e 100644 --- a/Makefile +++ b/Makefile @@ -22,5 +22,5 @@ clean: release: @rm -rf dist/ - python setup.py sdist bdist_wheel + python -m build twine upload --sign --identity E0B84F81 dist/* diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..338a53ce --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,70 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "sqlparse" +description = "A non-validating SQL parser." +authors = [{name = "Andi Albrecht", email = "albrecht.andi@gmail.com"}] +readme = "README.rst" +dynamic = ["version"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Topic :: Database", + "Topic :: Software Development", +] +requires-python = ">=3.5" + +[project.urls] +Home = "https://github.com/andialbrecht/sqlparse" +Documentation = "https://sqlparse.readthedocs.io/" +"Release Notes" = "https://sqlparse.readthedocs.io/en/latest/changes/" +Source = "https://github.com/andialbrecht/sqlparse" +Tracker = "https://github.com/andialbrecht/sqlparse/issues" + +[project.scripts] +sqlformat = "sqlparse.__main__:main" + +[project.optional-dependencies] +dev = [ + "flake8", + "build", +] +test = [ + "pytest", + "pytest-cov", +] +doc = [ + "sphinx", +] + +[tool.flit.sdist] +include = [ + "docs/source/", + "docs/sqlformat.1", + "docs/Makefile", + "tests/*.py", "tests/files/*.sql", + "LICENSE", + "TODO", + "AUTHORS", + "CHANGELOG", + "Makefile", + "tox.ini", +] + +[tool.coverage.run] +omit = ["sqlparse/__main__.py"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 0843b704..00000000 --- a/setup.cfg +++ /dev/null @@ -1,55 +0,0 @@ -[metadata] -name = sqlparse -version = attr: sqlparse.__version__ -url = https://github.com/andialbrecht/sqlparse -author = Andi Albrecht -author_email = albrecht.andi@gmail.com -description = A non-validating SQL parser. -long_description = file: README.rst -license = BSD-3-Clause -classifiers = - Development Status :: 5 - Production/Stable - Intended Audience :: Developers - License :: OSI Approved :: BSD License - Operating System :: OS Independent - Programming Language :: Python - Programming Language :: Python :: 3 - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.5 - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - Topic :: Database - Topic :: Software Development -project_urls = - Documentation = https://sqlparse.readthedocs.io/ - Release Notes = https://sqlparse.readthedocs.io/en/latest/changes/ - Source = https://github.com/andialbrecht/sqlparse - Tracker = https://github.com/andialbrecht/sqlparse/issues - -[options] -python_requires = >=3.5 -packages = find: - -[options.packages.find] -exclude = tests - -[options.entry_points] -console_scripts = - sqlformat = sqlparse.__main__:main - -[tool:pytest] -xfail_strict = True - -[flake8] -extend-ignore = - E731 - -[coverage:run] -branch = False -omit = - sqlparse/__main__.py diff --git a/setup.py b/setup.py deleted file mode 100644 index ede0aff8..00000000 --- a/setup.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (C) 2009-2020 the sqlparse authors and contributors -# -# -# This setup script is part of python-sqlparse and is released under -# the BSD License: https://opensource.org/licenses/BSD-3-Clause - -from setuptools import setup - - -setup() From 9a1cb5dddd1545c30b1e3a2c6f5d3514d079d93e Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Wed, 30 Nov 2022 14:51:58 +0000 Subject: [PATCH 09/88] configurable syntax --- sqlparse/keywords.py | 22 ++++------- sqlparse/lexer.py | 87 ++++++++++++++++++++++++++++++++++-------- tests/test_keywords.py | 3 +- 3 files changed, 82 insertions(+), 30 deletions(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index dff5e1cb..ce537812 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -6,23 +6,17 @@ # the BSD License: https://opensource.org/licenses/BSD-3-Clause import re +from typing import Dict, List, Tuple, Callable, Union from sqlparse import tokens +# object() only supports "is" and is useful as a marker +PROCESS_AS_KEYWORD = object() -def is_keyword(value): - """Checks for a keyword. - - If the given value is in one of the KEYWORDS_* dictionary - it's considered a keyword. Otherwise tokens.Name is returned. - """ - val = value.upper() - return (KEYWORDS_COMMON.get(val) - or KEYWORDS_ORACLE.get(val) - or KEYWORDS_PLPGSQL.get(val) - or KEYWORDS_HQL.get(val) - or KEYWORDS_MSACCESS.get(val) - or KEYWORDS.get(val, tokens.Name)), value +SQL_REGEX_TYPE = List[ + Tuple[Callable, Union[type(PROCESS_AS_KEYWORD), tokens._TokenType]] +] +KEYWORDS_TYPE = Dict[str, tokens._TokenType] SQL_REGEX = { @@ -99,7 +93,7 @@ def is_keyword(value): (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), # Check for keywords, also returns tokens.Name if regex matches # but the match isn't a keyword. - (r'[0-9_\w][_$#\w]*', is_keyword), + (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD), (r'[;:()\[\],\.]', tokens.Punctuation), (r'[<>=~!]+', tokens.Operator.Comparison), (r'[+/@#%^&|^-]+', tokens.Operator), diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 4397f185..61c52a97 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -13,19 +13,74 @@ # and to allow some customizations. from io import TextIOBase +from typing import List -from sqlparse import tokens -from sqlparse.keywords import SQL_REGEX +from sqlparse import tokens, keywords from sqlparse.utils import consume -class Lexer: - """Lexer - Empty class. Leaving for backwards-compatibility - """ +class _LexerSingletonMetaclass(type): + _lexer_instance = None + + def __call__(cls, *args, **kwargs): + if _LexerSingletonMetaclass._lexer_instance is None: + _LexerSingletonMetaclass._lexer_instance = super( + _LexerSingletonMetaclass, cls + ).__call__(*args, **kwargs) + return _LexerSingletonMetaclass._lexer_instance + + +class Lexer(metaclass=_LexerSingletonMetaclass): + """The Lexer supports configurable syntax. + To add support for additional keywords, use the `add_keywords` method.""" + + _SQL_REGEX: keywords.SQL_REGEX_TYPE + _keywords: List[keywords.KEYWORDS_TYPE] + + def default_initialization(self): + """Initialize the lexer with default dictionaries. + Useful if you need to revert custom syntax settings.""" + self.clear() + self.set_SQL_REGEX(keywords.SQL_REGEX) + self.add_keywords(keywords.KEYWORDS_COMMON) + self.add_keywords(keywords.KEYWORDS_ORACLE) + self.add_keywords(keywords.KEYWORDS_PLPGSQL) + self.add_keywords(keywords.KEYWORDS_HQL) + self.add_keywords(keywords.KEYWORDS_MSACCESS) + self.add_keywords(keywords.KEYWORDS) + + def __init__(self): + self.default_initialization() + + def clear(self): + """Clear all syntax configurations. + Useful if you want to load a reduced set of syntax configurations.""" + self._SQL_REGEX = [] + self._keywords = [] + + def set_SQL_REGEX(self, SQL_REGEX: keywords.SQL_REGEX_TYPE): + """Set the list of regex that will parse the SQL.""" + self._SQL_REGEX = SQL_REGEX + + def add_keywords(self, keywords: keywords.KEYWORDS_TYPE): + """Add keyword dictionaries. Keywords are looked up in the same order + that dictionaries were added.""" + self._keywords.append(keywords) + + def is_keyword(self, value): + """Checks for a keyword. + + If the given value is in one of the KEYWORDS_* dictionary + it's considered a keyword. Otherwise tokens.Name is returned. + """ + val = value.upper() + for kwdict in self._keywords: + if val in kwdict: + return kwdict[val], value + else: + return tokens.Name, value - @staticmethod - def get_tokens(text, encoding=None): + def get_tokens(self, text, encoding=None): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism @@ -48,24 +103,26 @@ def get_tokens(text, encoding=None): text = text.decode(encoding) else: try: - text = text.decode('utf-8') + text = text.decode("utf-8") except UnicodeDecodeError: - text = text.decode('unicode-escape') + text = text.decode("unicode-escape") else: - raise TypeError("Expected text or file-like object, got {!r}". - format(type(text))) + raise TypeError( + "Expected text or file-like object, got {!r}" + .format(type(text)) + ) iterable = enumerate(text) for pos, char in iterable: - for rexmatch, action in SQL_REGEX: + for rexmatch, action in self._SQL_REGEX: m = rexmatch(text, pos) if not m: continue elif isinstance(action, tokens._TokenType): yield action, m.group() - elif callable(action): - yield action(m.group()) + elif action is keywords.PROCESS_AS_KEYWORD: + yield self.is_keyword(m.group()) consume(iterable, m.end() - pos - 1) break diff --git a/tests/test_keywords.py b/tests/test_keywords.py index d4ded4b6..a3b1b385 100644 --- a/tests/test_keywords.py +++ b/tests/test_keywords.py @@ -2,6 +2,7 @@ from sqlparse import tokens from sqlparse.keywords import SQL_REGEX +from sqlparse.lexer import Lexer class TestSQLREGEX: @@ -9,5 +10,5 @@ class TestSQLREGEX: '1.', '-1.', '.1', '-.1']) def test_float_numbers(self, number): - ttype = next(tt for action, tt in SQL_REGEX if action(number)) + ttype = next(tt for action, tt in Lexer()._SQL_REGEX if action(number)) assert tokens.Number.Float == ttype From e37eaea4a78cbb335070ffec018bfc28425aa1a4 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Wed, 30 Nov 2022 14:52:13 +0000 Subject: [PATCH 10/88] test configurable syntax --- tests/test_parse.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/test_parse.py b/tests/test_parse.py index ec327ac8..c5dfd369 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -5,6 +5,7 @@ import sqlparse from sqlparse import sql, tokens as T +from sqlparse.lexer import Lexer def test_parse_tokenize(): @@ -489,3 +490,45 @@ def test_parenthesis(): T.Newline, T.Newline, T.Punctuation] + + +def test_configurable_syntax(): + sql = """select * from foo BACON SPAM EGGS;""" + # sql="""select * from mydb.mytable BACON SPAM EGGS;""" + tokens = sqlparse.parse(sql)[0] + + assert list( + (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + ) == [ + (sqlparse.tokens.Keyword.DML, "select"), + (sqlparse.tokens.Wildcard, "*"), + (sqlparse.tokens.Keyword, "from"), + (None, "foo BACON"), + (None, "SPAM EGGS"), + (sqlparse.tokens.Punctuation, ";"), + ] + + Lexer().add_keywords( + { + "BACON": sqlparse.tokens.Name.Builtin, + "SPAM": sqlparse.tokens.Keyword, + "EGGS": sqlparse.tokens.Keyword, + } + ) + + tokens = sqlparse.parse(sql)[0] + + assert list( + (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + ) == [ + (sqlparse.tokens.Keyword.DML, "select"), + (sqlparse.tokens.Wildcard, "*"), + (sqlparse.tokens.Keyword, "from"), + (None, "foo"), + (sqlparse.tokens.Name.Builtin, "BACON"), + (sqlparse.tokens.Keyword, "SPAM"), + (sqlparse.tokens.Keyword, "EGGS"), + (sqlparse.tokens.Punctuation, ";"), + ] + # reset the syntax for later tests. + Lexer().default_initialization() From 8515d2edd70fc16d69aa7b1094f9b3534dfa74d9 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Wed, 30 Nov 2022 15:00:03 +0000 Subject: [PATCH 11/88] remove type annotations for python 3.5 compatibility --- sqlparse/keywords.py | 6 ------ sqlparse/lexer.py | 8 ++------ tests/test_parse.py | 1 - 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index ce537812..6bc7937a 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -6,18 +6,12 @@ # the BSD License: https://opensource.org/licenses/BSD-3-Clause import re -from typing import Dict, List, Tuple, Callable, Union from sqlparse import tokens # object() only supports "is" and is useful as a marker PROCESS_AS_KEYWORD = object() -SQL_REGEX_TYPE = List[ - Tuple[Callable, Union[type(PROCESS_AS_KEYWORD), tokens._TokenType]] -] -KEYWORDS_TYPE = Dict[str, tokens._TokenType] - SQL_REGEX = { 'root': [ diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 61c52a97..7408e01a 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -13,7 +13,6 @@ # and to allow some customizations. from io import TextIOBase -from typing import List from sqlparse import tokens, keywords from sqlparse.utils import consume @@ -34,9 +33,6 @@ class Lexer(metaclass=_LexerSingletonMetaclass): """The Lexer supports configurable syntax. To add support for additional keywords, use the `add_keywords` method.""" - _SQL_REGEX: keywords.SQL_REGEX_TYPE - _keywords: List[keywords.KEYWORDS_TYPE] - def default_initialization(self): """Initialize the lexer with default dictionaries. Useful if you need to revert custom syntax settings.""" @@ -58,11 +54,11 @@ def clear(self): self._SQL_REGEX = [] self._keywords = [] - def set_SQL_REGEX(self, SQL_REGEX: keywords.SQL_REGEX_TYPE): + def set_SQL_REGEX(self, SQL_REGEX): """Set the list of regex that will parse the SQL.""" self._SQL_REGEX = SQL_REGEX - def add_keywords(self, keywords: keywords.KEYWORDS_TYPE): + def add_keywords(self, keywords): """Add keyword dictionaries. Keywords are looked up in the same order that dictionaries were added.""" self._keywords.append(keywords) diff --git a/tests/test_parse.py b/tests/test_parse.py index c5dfd369..3018d9ad 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -494,7 +494,6 @@ def test_parenthesis(): def test_configurable_syntax(): sql = """select * from foo BACON SPAM EGGS;""" - # sql="""select * from mydb.mytable BACON SPAM EGGS;""" tokens = sqlparse.parse(sql)[0] assert list( From f9a73a62cfc23b10c38f22a10bd1d4c3edbb286f Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Wed, 30 Nov 2022 22:34:52 +0000 Subject: [PATCH 12/88] test for changing the regex --- sqlparse/lexer.py | 10 ++++------ tests/test_parse.py | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 7408e01a..aafb55f2 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -99,14 +99,12 @@ def get_tokens(self, text, encoding=None): text = text.decode(encoding) else: try: - text = text.decode("utf-8") + text = text.decode('utf-8') except UnicodeDecodeError: - text = text.decode("unicode-escape") + text = text.decode('unicode-escape') else: - raise TypeError( - "Expected text or file-like object, got {!r}" - .format(type(text)) - ) + raise TypeError("Expected text or file-like object, got {!r}". + format(type(text))) iterable = enumerate(text) for pos, char in iterable: diff --git a/tests/test_parse.py b/tests/test_parse.py index 3018d9ad..3ac65001 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,10 +1,11 @@ """Tests sqlparse.parse().""" +import re from io import StringIO import pytest import sqlparse -from sqlparse import sql, tokens as T +from sqlparse import sql, tokens as T, keywords from sqlparse.lexer import Lexer @@ -491,8 +492,7 @@ def test_parenthesis(): T.Newline, T.Punctuation] - -def test_configurable_syntax(): +def test_configurable_keywords(): sql = """select * from foo BACON SPAM EGGS;""" tokens = sqlparse.parse(sql)[0] @@ -517,6 +517,9 @@ def test_configurable_syntax(): tokens = sqlparse.parse(sql)[0] + # reset the syntax for later tests. + Lexer().default_initialization() + assert list( (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace ) == [ @@ -529,5 +532,30 @@ def test_configurable_syntax(): (sqlparse.tokens.Keyword, "EGGS"), (sqlparse.tokens.Punctuation, ";"), ] + + +def test_configurable_regex(): + lex = Lexer() + lex.clear() + + my_regex = ( + re.compile(r"ZORDER\s+BY\b", keywords.FLAGS).match, + sqlparse.tokens.Keyword, + ) + + lex.set_SQL_REGEX(keywords.SQL_REGEX[:38] + [my_regex] + keywords.SQL_REGEX[38:]) + lex.add_keywords(keywords.KEYWORDS_COMMON) + lex.add_keywords(keywords.KEYWORDS_ORACLE) + lex.add_keywords(keywords.KEYWORDS_PLPGSQL) + lex.add_keywords(keywords.KEYWORDS_HQL) + lex.add_keywords(keywords.KEYWORDS_MSACCESS) + lex.add_keywords(keywords.KEYWORDS) + + tokens = sqlparse.parse("select * from foo zorder by bar;")[0] + # reset the syntax for later tests. Lexer().default_initialization() + + assert list( + (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + )[4] == (sqlparse.tokens.Keyword, "zorder by") From e0d3928ba69d73ba874ca03ec4395e94cf1ab293 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Thu, 1 Dec 2022 10:35:42 +0000 Subject: [PATCH 13/88] lexer documentation --- docs/source/extending.rst | 66 +++++++++++++++ docs/source/index.rst | 1 + sqlparse/keywords.py | 164 +++++++++++++++++++------------------- sqlparse/lexer.py | 5 +- tests/test_parse.py | 6 +- 5 files changed, 151 insertions(+), 91 deletions(-) create mode 100644 docs/source/extending.rst diff --git a/docs/source/extending.rst b/docs/source/extending.rst new file mode 100644 index 00000000..f1bd5512 --- /dev/null +++ b/docs/source/extending.rst @@ -0,0 +1,66 @@ +Extending :mod:`sqlparse` +========================= + +.. module:: sqlparse + :synopsis: Extending parsing capability of sqlparse. + +The :mod:`sqlparse` module uses a sql grammar that was tuned through usage and numerous +PR to fit a broad range of SQL syntaxes, but it cannot cater to every given case since +some SQL dialects have adopted conflicting meanings of certain keywords. Sqlparse +therefore exposes a mechanism to configure the fundamental keywords and regular +expressions that parse the language as described below. + +If you find an adaptation that works for your specific use-case. Please consider +contributing it back to the community by opening a PR on +`GitHub `_. + +Configuring the Lexer +--------------------- + +The lexer is a singleton class that breaks down the stream of characters into language +tokens. It does this by using a sequence of regular expressions and keywords that are +listed in the file ``sqlparse.keywords``. Instead of applying these fixed grammar +definitions directly, the lexer is default initialized in its method called +``default_initialization()``. As an api user, you can adapt the Lexer configuration by +applying your own configuration logic. To do so, start out by clearing previous +configurations with ``.clear()``, then apply the SQL list with +``.set_SQL_REGEX(SQL_REGEX)``, and apply keyword lists with ``.add_keywords(KEYWORDS)``. + +You can do so by re-using the expressions in ``sqlparse.keywords`` (see example below), +leaving parts out, or by making up your own master list. + +See the expected types of the arguments by inspecting their structure in +``sqlparse.keywords``. +(For compatibility with python 3.4, this library does not use type-hints.) + +The following example adds support for the expression ``ZORDER BY``, and adds ``BAR`` as +a keyword to the lexer: + +.. code-block:: python + + import re + + import sqlparse + from sqlparse import keywords + from sqlparse.lexer import Lexer + + lex = Lexer() + lex.clear() + + my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) + + # slice the default SQL_REGEX to inject the custom object + lex.set_SQL_REGEX( + keywords.SQL_REGEX[:38] + + [my_regex] + + keywords.SQL_REGEX[38:] + ) + lex.add_keywords(keywords.KEYWORDS_COMMON) + lex.add_keywords(keywords.KEYWORDS_ORACLE) + lex.add_keywords(keywords.KEYWORDS_PLPGSQL) + lex.add_keywords(keywords.KEYWORDS_HQL) + lex.add_keywords(keywords.KEYWORDS_MSACCESS) + lex.add_keywords(keywords.KEYWORDS) + lex.add_keywords({'BAR', sqlparse.tokens.Keyword}) + + sqlparse.parse("select * from foo zorder by bar;") diff --git a/docs/source/index.rst b/docs/source/index.rst index cba33141..e18d2b3c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -20,6 +20,7 @@ Contents api analyzing ui + extending changes license indices diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 6bc7937a..f04f928e 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -5,96 +5,92 @@ # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause -import re - from sqlparse import tokens # object() only supports "is" and is useful as a marker +# use this marker to specify that the given regex in SQL_REGEX +# shall be processed further through a lookup in the KEYWORDS dictionaries PROCESS_AS_KEYWORD = object() -SQL_REGEX = { - 'root': [ - (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), - (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), - - (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), - (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), - - (r'(\r\n|\r|\n)', tokens.Newline), - (r'\s+?', tokens.Whitespace), - - (r':=', tokens.Assignment), - (r'::', tokens.Punctuation), - - (r'\*', tokens.Wildcard), - - (r"`(``|[^`])*`", tokens.Name), - (r"´(´´|[^´])*´", tokens.Name), - (r'((?=~!]+', tokens.Operator.Comparison), - (r'[+/@#%^&|^-]+', tokens.Operator), - ]} - -FLAGS = re.IGNORECASE | re.UNICODE -SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX['root']] +SQL_REGEX = [ + (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), + (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), + + (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), + (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), + + (r'(\r\n|\r|\n)', tokens.Newline), + (r'\s+?', tokens.Whitespace), + + (r':=', tokens.Assignment), + (r'::', tokens.Punctuation), + + (r'\*', tokens.Wildcard), + + (r"`(``|[^`])*`", tokens.Name), + (r"´(´´|[^´])*´", tokens.Name), + (r'((?=~!]+', tokens.Operator.Comparison), + (r'[+/@#%^&|^-]+', tokens.Operator), +] KEYWORDS = { 'ABORT': tokens.Keyword, diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index aafb55f2..50799df6 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -6,7 +6,7 @@ # the BSD License: https://opensource.org/licenses/BSD-3-Clause """SQL Lexer""" - +import re # This code is based on the SqlLexer in pygments. # http://pygments.org/ # It's separated from the rest of pygments to increase performance @@ -56,7 +56,8 @@ def clear(self): def set_SQL_REGEX(self, SQL_REGEX): """Set the list of regex that will parse the SQL.""" - self._SQL_REGEX = SQL_REGEX + FLAGS = re.IGNORECASE | re.UNICODE + self._SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX] def add_keywords(self, keywords): """Add keyword dictionaries. Keywords are looked up in the same order diff --git a/tests/test_parse.py b/tests/test_parse.py index 3ac65001..017f93ae 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,5 +1,4 @@ """Tests sqlparse.parse().""" -import re from io import StringIO import pytest @@ -538,10 +537,7 @@ def test_configurable_regex(): lex = Lexer() lex.clear() - my_regex = ( - re.compile(r"ZORDER\s+BY\b", keywords.FLAGS).match, - sqlparse.tokens.Keyword, - ) + my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) lex.set_SQL_REGEX(keywords.SQL_REGEX[:38] + [my_regex] + keywords.SQL_REGEX[38:]) lex.add_keywords(keywords.KEYWORDS_COMMON) From 4efdc036623e1586206d7132abf95696953deb9a Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Thu, 1 Dec 2022 10:42:44 +0000 Subject: [PATCH 14/88] flake8 --- sqlparse/lexer.py | 5 ++++- tests/test_keywords.py | 1 - tests/test_parse.py | 19 +++++++++++++++---- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 50799df6..657177cb 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -57,7 +57,10 @@ def clear(self): def set_SQL_REGEX(self, SQL_REGEX): """Set the list of regex that will parse the SQL.""" FLAGS = re.IGNORECASE | re.UNICODE - self._SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX] + self._SQL_REGEX = [ + (re.compile(rx, FLAGS).match, tt) + for rx, tt in SQL_REGEX + ] def add_keywords(self, keywords): """Add keyword dictionaries. Keywords are looked up in the same order diff --git a/tests/test_keywords.py b/tests/test_keywords.py index a3b1b385..2eddccce 100644 --- a/tests/test_keywords.py +++ b/tests/test_keywords.py @@ -1,7 +1,6 @@ import pytest from sqlparse import tokens -from sqlparse.keywords import SQL_REGEX from sqlparse.lexer import Lexer diff --git a/tests/test_parse.py b/tests/test_parse.py index 017f93ae..33e8541f 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -491,12 +491,15 @@ def test_parenthesis(): T.Newline, T.Punctuation] + def test_configurable_keywords(): sql = """select * from foo BACON SPAM EGGS;""" tokens = sqlparse.parse(sql)[0] assert list( - (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace ) == [ (sqlparse.tokens.Keyword.DML, "select"), (sqlparse.tokens.Wildcard, "*"), @@ -520,7 +523,9 @@ def test_configurable_keywords(): Lexer().default_initialization() assert list( - (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace ) == [ (sqlparse.tokens.Keyword.DML, "select"), (sqlparse.tokens.Wildcard, "*"), @@ -539,7 +544,11 @@ def test_configurable_regex(): my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) - lex.set_SQL_REGEX(keywords.SQL_REGEX[:38] + [my_regex] + keywords.SQL_REGEX[38:]) + lex.set_SQL_REGEX( + keywords.SQL_REGEX[:38] + + [my_regex] + + keywords.SQL_REGEX[38:] + ) lex.add_keywords(keywords.KEYWORDS_COMMON) lex.add_keywords(keywords.KEYWORDS_ORACLE) lex.add_keywords(keywords.KEYWORDS_PLPGSQL) @@ -553,5 +562,7 @@ def test_configurable_regex(): Lexer().default_initialization() assert list( - (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace )[4] == (sqlparse.tokens.Keyword, "zorder by") From fbf9a576fe40ad8e4d51bb922bb454c317f73403 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Sun, 1 Jan 2023 14:20:52 +0000 Subject: [PATCH 15/88] additional documentation --- docs/source/extending.rst | 10 ++++++++++ sqlparse/lexer.py | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/source/extending.rst b/docs/source/extending.rst index f1bd5512..97b7d389 100644 --- a/docs/source/extending.rst +++ b/docs/source/extending.rst @@ -44,7 +44,12 @@ a keyword to the lexer: from sqlparse import keywords from sqlparse.lexer import Lexer + # get the lexer singleton object to configure it lex = Lexer() + + # Clear the default configurations. + # After this call, reg-exps and keyword dictionaries need to be loaded + # to make the lexer functional again. lex.clear() my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) @@ -55,12 +60,17 @@ a keyword to the lexer: + [my_regex] + keywords.SQL_REGEX[38:] ) + + # add the default keyword dictionaries lex.add_keywords(keywords.KEYWORDS_COMMON) lex.add_keywords(keywords.KEYWORDS_ORACLE) lex.add_keywords(keywords.KEYWORDS_PLPGSQL) lex.add_keywords(keywords.KEYWORDS_HQL) lex.add_keywords(keywords.KEYWORDS_MSACCESS) lex.add_keywords(keywords.KEYWORDS) + + # add a custom keyword dictionary lex.add_keywords({'BAR', sqlparse.tokens.Keyword}) + # no configuration is passed here. The lexer is used as a singleton. sqlparse.parse("select * from foo zorder by bar;") diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 657177cb..6e17fca2 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -50,7 +50,9 @@ def __init__(self): def clear(self): """Clear all syntax configurations. - Useful if you want to load a reduced set of syntax configurations.""" + Useful if you want to load a reduced set of syntax configurations. + After this call, reg-exps and keyword dictionaries need to be loaded + to make the lexer functional again.""" self._SQL_REGEX = [] self._keywords = [] From 907fb496f90f2719095a1f01fe24db1e5c0e15a8 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Sun, 1 Jan 2023 20:59:40 +0000 Subject: [PATCH 16/88] change singleton behavior --- docs/source/extending.rst | 2 +- sqlparse/lexer.py | 52 +++++++++++++++++++++++++-------------- tests/test_keywords.py | 2 +- tests/test_parse.py | 8 +++--- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/docs/source/extending.rst b/docs/source/extending.rst index 97b7d389..0c10924b 100644 --- a/docs/source/extending.rst +++ b/docs/source/extending.rst @@ -45,7 +45,7 @@ a keyword to the lexer: from sqlparse.lexer import Lexer # get the lexer singleton object to configure it - lex = Lexer() + lex = Lexer.get_default_instance() # Clear the default configurations. # After this call, reg-exps and keyword dictionaries need to be loaded diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 6e17fca2..9d25c9e6 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -7,6 +7,7 @@ """SQL Lexer""" import re + # This code is based on the SqlLexer in pygments. # http://pygments.org/ # It's separated from the rest of pygments to increase performance @@ -18,21 +19,39 @@ from sqlparse.utils import consume -class _LexerSingletonMetaclass(type): - _lexer_instance = None - - def __call__(cls, *args, **kwargs): - if _LexerSingletonMetaclass._lexer_instance is None: - _LexerSingletonMetaclass._lexer_instance = super( - _LexerSingletonMetaclass, cls - ).__call__(*args, **kwargs) - return _LexerSingletonMetaclass._lexer_instance - - -class Lexer(metaclass=_LexerSingletonMetaclass): +class Lexer: """The Lexer supports configurable syntax. To add support for additional keywords, use the `add_keywords` method.""" + _default_intance = None + + # Development notes: + # - This class is prepared to be able to support additional SQL dialects + # in the future by adding additional functions that take the place of + # the function default_initialization() + # - The lexer class uses an explicit singleton behavior with the + # instance-getter method get_default_instance(). This mechanism has + # the advantage that the call signature of the entry-points to the + # sqlparse library are not affected. Also, usage of sqlparse in third + # party code does not need to be adapted. On the other hand, singleton + # behavior is not thread safe, and the current implementation does not + # easily allow for multiple SQL dialects to be parsed in the same + # process. Such behavior can be supported in the future by passing a + # suitably initialized lexer object as an additional parameter to the + # entry-point functions (such as `parse`). Code will need to be written + # to pass down and utilize such an object. The current implementation + # is prepared to support this thread safe approach without the + # default_instance part needing to change interface. + + @classmethod + def get_default_instance(cls): + """Returns the lexer instance used internally + by the sqlparse core functions.""" + if cls._default_intance is None: + cls._default_intance = cls() + cls._default_intance.default_initialization() + return cls._default_intance + def default_initialization(self): """Initialize the lexer with default dictionaries. Useful if you need to revert custom syntax settings.""" @@ -45,13 +64,10 @@ def default_initialization(self): self.add_keywords(keywords.KEYWORDS_MSACCESS) self.add_keywords(keywords.KEYWORDS) - def __init__(self): - self.default_initialization() - def clear(self): """Clear all syntax configurations. Useful if you want to load a reduced set of syntax configurations. - After this call, reg-exps and keyword dictionaries need to be loaded + After this call, regexps and keyword dictionaries need to be loaded to make the lexer functional again.""" self._SQL_REGEX = [] self._keywords = [] @@ -73,7 +89,7 @@ def is_keyword(self, value): """Checks for a keyword. If the given value is in one of the KEYWORDS_* dictionary - it's considered a keyword. Otherwise tokens.Name is returned. + it's considered a keyword. Otherwise, tokens.Name is returned. """ val = value.upper() for kwdict in self._keywords: @@ -136,4 +152,4 @@ def tokenize(sql, encoding=None): Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream of ``(token type, value)`` items. """ - return Lexer().get_tokens(sql, encoding) + return Lexer.get_default_instance().get_tokens(sql, encoding) diff --git a/tests/test_keywords.py b/tests/test_keywords.py index 2eddccce..b26e9b45 100644 --- a/tests/test_keywords.py +++ b/tests/test_keywords.py @@ -9,5 +9,5 @@ class TestSQLREGEX: '1.', '-1.', '.1', '-.1']) def test_float_numbers(self, number): - ttype = next(tt for action, tt in Lexer()._SQL_REGEX if action(number)) + ttype = next(tt for action, tt in Lexer.get_default_instance()._SQL_REGEX if action(number)) assert tokens.Number.Float == ttype diff --git a/tests/test_parse.py b/tests/test_parse.py index 33e8541f..5feef5a7 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -509,7 +509,7 @@ def test_configurable_keywords(): (sqlparse.tokens.Punctuation, ";"), ] - Lexer().add_keywords( + Lexer.get_default_instance().add_keywords( { "BACON": sqlparse.tokens.Name.Builtin, "SPAM": sqlparse.tokens.Keyword, @@ -520,7 +520,7 @@ def test_configurable_keywords(): tokens = sqlparse.parse(sql)[0] # reset the syntax for later tests. - Lexer().default_initialization() + Lexer.get_default_instance().default_initialization() assert list( (t.ttype, t.value) @@ -539,7 +539,7 @@ def test_configurable_keywords(): def test_configurable_regex(): - lex = Lexer() + lex = Lexer.get_default_instance() lex.clear() my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) @@ -559,7 +559,7 @@ def test_configurable_regex(): tokens = sqlparse.parse("select * from foo zorder by bar;")[0] # reset the syntax for later tests. - Lexer().default_initialization() + Lexer.get_default_instance().default_initialization() assert list( (t.ttype, t.value) From dd9d5b91d7aa30e4a000d5370f09dc99378891dc Mon Sep 17 00:00:00 2001 From: Shikanime Deva Date: Mon, 19 Jul 2021 13:56:30 +0200 Subject: [PATCH 17/88] Fix get_type with comments between WITH keyword --- sqlparse/sql.py | 27 ++++++++++++++------------- tests/test_regressions.py | 9 +++++++++ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 586cd216..1ccfbdbe 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -413,27 +413,28 @@ def get_type(self): Whitespaces and comments at the beginning of the statement are ignored. """ - first_token = self.token_first(skip_cm=True) - if first_token is None: + token = self.token_first(skip_cm=True) + if token is None: # An "empty" statement that either has not tokens at all # or only whitespace tokens. return 'UNKNOWN' - elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): - return first_token.normalized + elif token.ttype in (T.Keyword.DML, T.Keyword.DDL): + return token.normalized - elif first_token.ttype == T.Keyword.CTE: + elif token.ttype == T.Keyword.CTE: # The WITH keyword should be followed by either an Identifier or # an IdentifierList containing the CTE definitions; the actual # DML keyword (e.g. SELECT, INSERT) will follow next. - fidx = self.token_index(first_token) - tidx, token = self.token_next(fidx, skip_ws=True) - if isinstance(token, (Identifier, IdentifierList)): - _, dml_keyword = self.token_next(tidx, skip_ws=True) - - if dml_keyword is not None \ - and dml_keyword.ttype == T.Keyword.DML: - return dml_keyword.normalized + tidx = self.token_index(token) + while tidx is not None: + tidx, token = self.token_next(tidx, skip_ws=True) + if isinstance(token, (Identifier, IdentifierList)): + tidx, token = self.token_next(tidx, skip_ws=True) + + if token is not None \ + and token.ttype == T.Keyword.DML: + return token.normalized # Hmm, probably invalid syntax, so return unknown. return 'UNKNOWN' diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 4ffc69f3..bc8b7dd3 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -427,3 +427,12 @@ def test_splitting_at_and_backticks_issue588(): 'grant foo to user1@`myhost`; grant bar to user1@`myhost`;') assert len(splitted) == 2 assert splitted[-1] == 'grant bar to user1@`myhost`;' + + +def test_comment_between_cte_clauses_issue632(): + p, = sqlparse.parse(""" + WITH foo AS (), + -- A comment before baz subquery + baz AS () + SELECT * FROM baz;""") + assert p.get_type() == "SELECT" From fc76056fb8f0ec713a3f2a2b6206a3336932c382 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 20 Mar 2023 08:46:10 +0100 Subject: [PATCH 18/88] Cleanup regex for detecting keywords (fixes #709). --- sqlparse/keywords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index f04f928e..f85d4688 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -86,7 +86,7 @@ (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), # Check for keywords, also returns tokens.Name if regex matches # but the match isn't a keyword. - (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD), + (r'\w[$#\w]*', PROCESS_AS_KEYWORD), (r'[;:()\[\],\.]', tokens.Punctuation), (r'[<>=~!]+', tokens.Operator.Comparison), (r'[+/@#%^&|^-]+', tokens.Operator), From b949fdf9a1538f98b57612bef6306fc38f32aaf7 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 14 Apr 2023 14:51:58 +0200 Subject: [PATCH 19/88] CI: Use codecov action. codecov module is deprecated and was removed from PyPI in favor of the github action. --- .github/workflows/python-app.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 906ca7e8..3033af97 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -30,10 +30,9 @@ jobs: run: | python -m pip install --upgrade pip flit flit install --deps=develop - pip install codecov - name: Lint with flake8 run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics - name: Test with pytest run: pytest --cov=sqlparse - name: Publish to codecov - run: codecov + uses: codecov/codecov-action@v3 From c457abd5f097dd13fb21543381e7cfafe7d31cfb Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 20 Mar 2023 08:33:46 +0100 Subject: [PATCH 20/88] Remove unnecessary parts in regex for bad escaping. The regex tried to deal with situations where escaping in the SQL to be parsed was suspicious. --- CHANGELOG | 10 ++++++++++ sqlparse/keywords.py | 4 ++-- tests/test_split.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 94864138..880a9ca9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,12 +1,22 @@ Development Version ------------------- +Notable Changes + +* IMPORTANT: This release fixes a security vulnerability in the + parser where a regular expression vulnerable to ReDOS (Regular + Expression Denial of Service) was used. See the security advisory + for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-rrm6-wvj7-cwh2 + The vulnerability was discovered by @erik-krogh from GitHub + Security Lab (GHSL). Thanks for reporting! + Bug Fixes * Revert a change from 0.4.0 that changed IN to be a comparison (issue694). The primary expectation is that IN is treated as a keyword and not as a comparison operator. That also follows the definition of reserved keywords for the major SQL syntax definitions. +* Fix regular expressions for string parsing. Other diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index f85d4688..b45f3e0f 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -59,9 +59,9 @@ (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])', tokens.Number.Float), (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer), - (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), + (r"'(''|\\'|[^'])*'", tokens.String.Single), # not a real string literal in ANSI SQL: - (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol), + (r'"(""|\\"|[^"])*"', tokens.String.Symbol), (r'(""|".*?[^\\]")', tokens.String.Symbol), # sqlite names can be escaped with [square brackets]. left bracket # cannot be preceded by word character or a right bracket -- diff --git a/tests/test_split.py b/tests/test_split.py index a9d75765..e79750e8 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -18,8 +18,8 @@ def test_split_semicolon(): def test_split_backslash(): - stmts = sqlparse.parse(r"select '\\'; select '\''; select '\\\'';") - assert len(stmts) == 3 + stmts = sqlparse.parse("select '\'; select '\'';") + assert len(stmts) == 2 @pytest.mark.parametrize('fn', ['function.sql', From 64bb91f4880b46f73b4cc9207ae9ccc180d56d1b Mon Sep 17 00:00:00 2001 From: Kevin Stubbings Date: Wed, 22 Mar 2023 16:31:15 -0700 Subject: [PATCH 21/88] Testing branch --- test | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test diff --git a/test b/test new file mode 100644 index 00000000..e69de29b From d9d69f47ed13a583c81473211f44ae320470a58b Mon Sep 17 00:00:00 2001 From: Kevin Stubbings Date: Wed, 22 Mar 2023 16:36:19 -0700 Subject: [PATCH 22/88] Removed test file --- test | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test diff --git a/test b/test deleted file mode 100644 index e69de29b..00000000 From 58dae6fcd2a51209aeccd4fff3b923bf37714e19 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 18 Apr 2023 10:25:38 +0200 Subject: [PATCH 23/88] Bump version. --- sqlparse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index f901185a..122595b3 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.4.4.dev0' +__version__ = '0.4.4' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] From 647d1457acf7d88614215841eb15d423df2a1895 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 18 Apr 2023 10:29:29 +0200 Subject: [PATCH 24/88] Update Changelog. --- CHANGELOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 880a9ca9..a42577e1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,5 @@ -Development Version -------------------- +Release 0.4.4 (Apr 18, 2023) +---------------------------- Notable Changes From bd417b8c7d6b79d0b6af1b42c78b17d13b724411 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 18 Apr 2023 10:31:23 +0200 Subject: [PATCH 25/88] Switch back to development mode. --- CHANGELOG | 6 ++++++ sqlparse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index a42577e1..4f393b9c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +Development Version +------------------- + +Nothing yet. + + Release 0.4.4 (Apr 18, 2023) ---------------------------- diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index 122595b3..db0d2fc9 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.4.4' +__version__ = '0.4.5.dev0' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] From 34c7c4d3b5aa953c14f88b5980d037c1682df5c6 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 3 Jun 2023 16:09:11 +0200 Subject: [PATCH 26/88] Update python-app.yml: Try with 3.12-beta1 --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 3033af97..fdf04f85 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.5 and 3.6 available strategy: matrix: - python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] + python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-beta.1"] steps: - uses: actions/checkout@v3 From 3c4b57c24dd3cc3e7937f24b14b8bd53a5499fcb Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 3 Jun 2023 16:13:56 +0200 Subject: [PATCH 27/88] Update python-app.yml: Revert to 3.12-dev and add check-latest --- .github/workflows/python-app.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index fdf04f85..43a918da 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.5 and 3.6 available strategy: matrix: - python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-beta.1"] + python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] steps: - uses: actions/checkout@v3 @@ -26,6 +26,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + check-latest: true - name: Install dependencies run: | python -m pip install --upgrade pip flit From 8157d16539b60ed625b004abeef9c2796eb09ba0 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Wed, 5 Jul 2023 21:44:26 +0200 Subject: [PATCH 28/88] Add classifier for Python 3.11 (fixes #726). --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 338a53ce..f6eadb89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Database", From 9765fce1c0466d3bd90e3925ed47d4f47fa3a131 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 23 Jul 2023 22:44:26 +0200 Subject: [PATCH 29/88] Remove outdated and unused editorconfig. --- .editorconfig | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index ca1e615a..00000000 --- a/.editorconfig +++ /dev/null @@ -1,20 +0,0 @@ -# http://editorconfig.org - -root = true - -[*] -indent_style = space -indent_size = 4 -end_of_line = lf -charset = utf-8 -insert_final_newline = true -trim_trailing_whitespace = true - -[*.{py,ini,yaml,yml,rst}] -indent_style = space -indent_size = 4 -continuation_indent_size = 4 -trim_trailing_whitespace = true - -[{Makefile,*.bat}] -indent_style = tab From 2bc8d9c2cdfcc4c857e62e682043f40dbb8c14f3 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 24 Jul 2023 09:01:12 +0200 Subject: [PATCH 30/88] Drop support for Python 3.5. --- .github/workflows/python-app.yml | 2 +- CHANGELOG | 4 +++- README.rst | 2 +- pyproject.toml | 3 +-- sqlparse/__init__.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 43a918da..91296f8c 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.5 and 3.6 available strategy: matrix: - python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] steps: - uses: actions/checkout@v3 diff --git a/CHANGELOG b/CHANGELOG index 4f393b9c..0102cd0c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,9 @@ Development Version ------------------- -Nothing yet. +Notable Changes + +* Drop support for Python 3.5. Release 0.4.4 (Apr 18, 2023) diff --git a/README.rst b/README.rst index df4e7e36..67ddaf96 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ python-sqlparse - Parse SQL statements sqlparse is a non-validating SQL parser for Python. It provides support for parsing, splitting and formatting SQL statements. -The module is compatible with Python 3.5+ and released under the terms of the +The module is compatible with Python 3.6+ and released under the terms of the `New BSD license `_. Visit the project page at https://github.com/andialbrecht/sqlparse for diff --git a/pyproject.toml b/pyproject.toml index f6eadb89..4bdbe1b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", @@ -28,7 +27,7 @@ classifiers = [ "Topic :: Database", "Topic :: Software Development", ] -requires-python = ">=3.5" +requires-python = ">=3.6" [project.urls] Home = "https://github.com/andialbrecht/sqlparse" diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index db0d2fc9..cfd4e2fd 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.4.5.dev0' +__version__ = '0.5.0.dev0' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] From be35807c83909b70be0e16fcd6408b7b32aef78a Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 24 Jul 2023 10:23:34 +0200 Subject: [PATCH 31/88] Get tox running again. We have to pin versions for tox and virtualenv because newer versions don't support Python 3.6 anymore. --- pyproject.toml | 2 ++ tox.ini | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4bdbe1b4..1b23a4bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,8 @@ sqlformat = "sqlparse.__main__:main" dev = [ "flake8", "build", + "virtualenv<20.22.0", # 20.22.0 dropped Python 3.6 support + "tox<4.5.0", # >=4.5.0 requires virtualenv>=20.22 ] test = [ "pytest", diff --git a/tox.ini b/tox.ini index 0087d50e..19d17327 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,12 @@ [tox] skip_missing_interpreters = True envlist = - py35 py36 py37 py38 - pypy3 + py39 + py310 + py311 flake8 [testenv] From 3eec63dafd3e7ff99560f66c1f7964f558307b98 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 24 Jul 2023 11:06:57 +0200 Subject: [PATCH 32/88] Don't pin virtualenv in dev section. The requirements in dev section are more general (and may require newer versions). --- pyproject.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1b23a4bc..10e96696 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,8 +43,6 @@ sqlformat = "sqlparse.__main__:main" dev = [ "flake8", "build", - "virtualenv<20.22.0", # 20.22.0 dropped Python 3.6 support - "tox<4.5.0", # >=4.5.0 requires virtualenv>=20.22 ] test = [ "pytest", @@ -53,6 +51,10 @@ test = [ doc = [ "sphinx", ] +tox = [ + "virtualenv<20.22.0", # 20.22.0 dropped Python 3.6 support + "tox<4.5.0", # >=4.5.0 requires virtualenv>=20.22 +] [tool.flit.sdist] include = [ From b90e422b06d268dfe588ad9d817009ec5fa01a72 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 11:07:44 +0200 Subject: [PATCH 33/88] Add reminder for github release (fixes #732). --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 1657822e..b3db8e3d 100644 --- a/Makefile +++ b/Makefile @@ -24,3 +24,4 @@ release: @rm -rf dist/ python -m build twine upload --sign --identity E0B84F81 dist/* + @echo "Reminder: Add release on github https://github.com/andialbrecht/sqlparse/releases" From d69fadac82301e87ed4a7a12b19359f13d105e9e Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 11:36:35 +0200 Subject: [PATCH 34/88] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 38 +++++++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..dd84ea78 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..bbcbbe7d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. From efcdbf627a23f239be66a4e80824ef7d100ccb02 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 11:40:48 +0200 Subject: [PATCH 35/88] Update bug_report.md --- .github/ISSUE_TEMPLATE/bug_report.md | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index dd84ea78..22844bfc 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -2,7 +2,7 @@ name: Bug report about: Create a report to help us improve title: '' -labels: '' +labels: 'bug,needs-triage' assignees: '' --- @@ -11,28 +11,16 @@ assignees: '' A clear and concise description of what the bug is. **To Reproduce** -Steps to reproduce the behavior: -1. Go to '...' -2. Click on '....' -3. Scroll down to '....' -4. See error +Steps to reproduce the behavior. +Please give code examples or concete SQL statements. Take care of not posting any sensitive information when pasting SQL statements! +What's the concrete error / traceback. **Expected behavior** A clear and concise description of what you expected to happen. -**Screenshots** -If applicable, add screenshots to help explain your problem. - -**Desktop (please complete the following information):** - - OS: [e.g. iOS] - - Browser [e.g. chrome, safari] - - Version [e.g. 22] - -**Smartphone (please complete the following information):** - - Device: [e.g. iPhone6] - - OS: [e.g. iOS8.1] - - Browser [e.g. stock browser, safari] - - Version [e.g. 22] +**Versions (please complete the following information):** + - Python: [e.g. 3.11.2] + - sqlparse: [e.g. 0.4.1] **Additional context** Add any other context about the problem here. From bcfbe3749afdb64b7121ce7d1069fd9d62d40788 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 11:51:17 +0200 Subject: [PATCH 36/88] Add link to discussion when creating issues. --- .github/ISSUE_TEMPLATE/config.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/config.yml diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..acccb059 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: Discussions + url: https://github.com/andialbrecht/sqlparse/discussions + about: Please ask questions and start more general discussions here \ No newline at end of file From 8aa4715afd4edb97787f0310d0ae26639076403a Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 11:52:03 +0200 Subject: [PATCH 37/88] Update config.yml. --- .github/ISSUE_TEMPLATE/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index acccb059..03f62715 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,5 +1,5 @@ blank_issues_enabled: true contact_links: - - name: Discussions + - name: Discussions, Questions? url: https://github.com/andialbrecht/sqlparse/discussions about: Please ask questions and start more general discussions here \ No newline at end of file From 9a90474c6f346f0001739d32f5c2bd55a21bc247 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 12:12:32 +0200 Subject: [PATCH 38/88] Update test action. --- .github/workflows/python-app.yml | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 91296f8c..6fd253ab 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -13,19 +13,29 @@ on: - cron: '0 12 * * *' jobs: - build: - - runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.5 and 3.6 available + test: + name: Run tests on ${{ matrix.py }} + runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.6 available strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] + py: + - "3.12.0-beta.4" + - "3.11" + - "3.10" + - "3.9" + - "3.8" + - "3.7" + - "3.6" + - "pypy-3.9" + - "pypy-3.8" + - "pypy-3.7" steps: - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} + - name: Set up Python ${{ matrix.py }} uses: actions/setup-python@v4 with: - python-version: ${{ matrix.python-version }} + python-version: ${{ matrix.py }} check-latest: true - name: Install dependencies run: | From 0623627674499302e7cf089a08903c40169a8ee3 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 12:33:02 +0200 Subject: [PATCH 39/88] Add Code of Conduct. --- .github/CODE_OF_CONDUCT.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .github/CODE_OF_CONDUCT.md diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..b1ee021c --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,7 @@ +# Be nice to each other + +Everyone participating in the _sqlparse_ project and especially in the +issue tracker, discussion forums, pull requests, is expected to treat +other people with respect and more generally to follow the guidelines +articulated in the +[Python Community Code of Conduct](https://www.python.org/psf/codeofconduct/). \ No newline at end of file From bb42969cca7c1e0bc49b970ea6512bf0184b97fe Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 12:45:45 +0200 Subject: [PATCH 40/88] Add contributing guide. --- CONTRIBUTING.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..3db0f110 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,11 @@ +# Contributing to `sqlparse` + +Thanks for your interest in contributing to the `sqlparse` project! + +All contributors are expected to follow the +[Python Community Code of Conduct](https://www.python.org/psf/codeofconduct/). + +Head over to the +[Discussions Page](https://github.com/andialbrecht/sqlparse/discussions) if +you have any questions. We're still working on a more elaborate +developer guide. \ No newline at end of file From c0ffe867a48148f9c9a07a5edc716199ccceb114 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 12:57:30 +0200 Subject: [PATCH 41/88] Create SECURITY.md --- SECURITY.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..81c01543 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,14 @@ +# Security Policy + +## Supported Versions + +For now `sqlparse` uses very defensive version numbers. There's no major version yet. +In turn there's only one supported version and this is the latest. + +## Reporting a Vulnerability + +To report a vulnerability head over to the [Security Advisories](https://github.com/andialbrecht/sqlparse/security/advisories) +page and click on "New draft security advisory". + +Feel free to contact me at albrecht.andi@gmail.com if you have any questions or want to discuss things +beforehand. From 881db0b29af79dd1c1898051e3a1f0fdd6e7e618 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 6 Aug 2023 13:31:51 +0200 Subject: [PATCH 42/88] Add Pull request template. --- .github/PULL_REQUEST_TEMPLATE.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..77b1fd6c --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,13 @@ +# Thanks for contributing! + +Before submitting your pull request please have a look at the +following checklist: + +- [ ] ran the tests (`pytest`) +- [ ] all style issues addressed (`flake8`) +- [ ] your changes are covered by tests +- [ ] your changes are documented, if needed + +In addition, please take care to provide a proper description +on what your change does, fixes or achieves when submitting the +pull request. \ No newline at end of file From 715feacbdef1b488a562a3f37d3d4afbbcea8410 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 7 Aug 2023 15:05:59 +0200 Subject: [PATCH 43/88] Update Python version in test action. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 6fd253ab..53f43c3e 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: py: - - "3.12.0-beta.4" + - "3.12.0-rc.1" - "3.11" - "3.10" - "3.9" From baf3a0a5b9514540580152b0983a03e257b047ae Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 7 Aug 2023 15:20:17 +0200 Subject: [PATCH 44/88] Updated too early... switching back to 3.12.0-beta.4. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 53f43c3e..6fd253ab 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: py: - - "3.12.0-rc.1" + - "3.12.0-beta.4" - "3.11" - "3.10" - "3.9" From 21f9fd57005401888abf5cd1444923f483842203 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 4 Sep 2023 08:31:03 +0200 Subject: [PATCH 45/88] CI: Try to fix importlib issue with flake8. See https://github.com/python/importlib_metadata/issues/406 --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 10e96696..67a9c7c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ sqlformat = "sqlparse.__main__:main" [project.optional-dependencies] dev = [ + "importlib_metadata<5; python_version <= '3.7'", "flake8", "build", ] From c0a8ee6cc4ae7e5c0b9ef9e6d8e80beb90b5b00b Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 10 Sep 2023 00:05:00 +0300 Subject: [PATCH 46/88] Bump GitHub Actions --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/python-app.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 1cde398b..5acaa67d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -39,7 +39,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 6fd253ab..6accd816 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -31,7 +31,7 @@ jobs: - "pypy-3.7" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.py }} uses: actions/setup-python@v4 with: From 9bf512cb7c7bc03b06cfa0056ec53076e9c22d7c Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 10 Sep 2023 00:05:54 +0300 Subject: [PATCH 47/88] Add support for Python 3.12 --- .github/workflows/python-app.yml | 3 ++- pyproject.toml | 1 + tox.ini | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 6accd816..ff7269bc 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: py: - - "3.12.0-beta.4" + - "3.12" - "3.11" - "3.10" - "3.9" @@ -36,6 +36,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.py }} + allow-prereleases: true check-latest: true - name: Install dependencies run: | diff --git a/pyproject.toml b/pyproject.toml index 67a9c7c5..d9a921f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Database", diff --git a/tox.ini b/tox.ini index 19d17327..40d84ad8 100644 --- a/tox.ini +++ b/tox.ini @@ -7,6 +7,7 @@ envlist = py39 py310 py311 + py312 flake8 [testenv] From 8ce446ed3f945f697a166551447f203510f25f2d Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 19 Sep 2023 06:52:23 +0200 Subject: [PATCH 48/88] Update changelog and authors. --- AUTHORS | 1 + CHANGELOG | 1 + 2 files changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index 1717adff..4617b7d7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -31,6 +31,7 @@ Alphabetical list of contributors: * Florian Bauer * Fredy Wijaya * Gavin Wahl +* Hugo van Kemenade * hurcy * Ian Robertson * JacekPliszka diff --git a/CHANGELOG b/CHANGELOG index 0102cd0c..eabd6019 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,7 @@ Development Version Notable Changes * Drop support for Python 3.5. +* Python 3.12 is now supported (pr725, by hugovk). Release 0.4.4 (Apr 18, 2023) From fac38cd03bea712e096222f16199a7482a4837da Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 19 Sep 2023 21:41:57 +0200 Subject: [PATCH 49/88] Ignore attributes starting with dunder in _TokenType (fixes #672). This issue came up, when trying to deepcopy a parsed statement. deepcopy uses getattr(obj, '__deepcopy__', None) to get a method for copying an object. Before this change a new attribute '__deepcopy__' was created as a new instance of _TokenType (a tuple). --- CHANGELOG | 4 ++++ sqlparse/tokens.py | 3 +++ tests/test_regressions.py | 8 ++++++++ 3 files changed, 15 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index eabd6019..525918a2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,10 @@ Notable Changes * Drop support for Python 3.5. * Python 3.12 is now supported (pr725, by hugovk). +Bug Fixes + +* Ignore dunder attributes when creating Tokens (issue672). + Release 0.4.4 (Apr 18, 2023) ---------------------------- diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py index d92bbdcf..143f66b4 100644 --- a/sqlparse/tokens.py +++ b/sqlparse/tokens.py @@ -19,6 +19,9 @@ def __contains__(self, item): return item is not None and (self is item or item[:len(self)] == self) def __getattr__(self, name): + # don't mess with dunder + if name.startswith('__'): + return super().__getattr__(self, name) new = _TokenType(self + (name,)) setattr(self, name, new) new.parent = self diff --git a/tests/test_regressions.py b/tests/test_regressions.py index bc8b7dd3..961adc17 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -1,3 +1,5 @@ +import copy + import pytest import sqlparse @@ -436,3 +438,9 @@ def test_comment_between_cte_clauses_issue632(): baz AS () SELECT * FROM baz;""") assert p.get_type() == "SELECT" + + +def test_copy_issue672(): + p = sqlparse.parse('select * from foo')[0] + copied = copy.deepcopy(p) + assert str(p) == str(copied) From 5c9435269bcb00c86164799a16621fcf5d41e917 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 19 Sep 2023 21:57:37 +0200 Subject: [PATCH 50/88] Simplify regex. --- sqlparse/filters/others.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index 6905f2d6..9e617c37 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -25,7 +25,7 @@ def _get_insert_token(token): # Note: The actual value for a line break is replaced by \n # in SerializerUnicode which will be executed in the # postprocessing state. - m = re.search(r'((\r|\n)+) *$', token.value) + m = re.search(r'([\r\n]+) *$', token.value) if m is not None: return sql.Token(T.Whitespace.Newline, m.groups()[0]) else: From 3696d5388186a1fd51f657e0d6f4c6809b244143 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 19 Sep 2023 22:01:45 +0200 Subject: [PATCH 51/88] Code cleanup. --- sqlparse/engine/grouping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 86d8fc64..57d257e2 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -184,7 +184,7 @@ def match(token): return token.match(T.Assignment, ':=') def valid(token): - return token is not None and token.ttype not in (T.Keyword) + return token is not None and token.ttype not in (T.Keyword,) def post(tlist, pidx, tidx, nidx): m_semicolon = T.Punctuation, ';' From 6eca7aeb407235d7053508a49e2262a395d56b67 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 25 Sep 2023 21:23:42 +0200 Subject: [PATCH 52/88] Cleanup .gitignore. Removed any editor/IDE related and obsolete entries. --- .gitignore | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.gitignore b/.gitignore index e5953853..cc2ec16b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,3 @@ -# PyCharm -.idea/ - *.py[co] docs/build dist/ @@ -11,12 +8,5 @@ MANIFEST .cache/ *.egg-info/ htmlcov/ -extras/appengine/sqlparse -extras/appengine/lib/ -extras/py3k/sqlparse -extras/py3k/tests -extras/py3k/sqlparse.diff -extras/py3k/tests.diff coverage.xml -*.class .pytest_cache \ No newline at end of file From 115e208bd340f175b23964524670418fe6f72c31 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Thu, 12 Oct 2023 21:11:50 +0200 Subject: [PATCH 53/88] Add option to remove trailing semicolon when splitting (fixes #742). --- CHANGELOG | 5 +++++ sqlparse/__init__.py | 6 ++++-- sqlparse/engine/filter_stack.py | 5 ++++- sqlparse/filters/__init__.py | 2 ++ sqlparse/filters/others.py | 9 +++++++++ tests/test_split.py | 28 ++++++++++++++++++++++++++++ 6 files changed, 52 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 525918a2..0ede2800 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,11 @@ Notable Changes * Drop support for Python 3.5. * Python 3.12 is now supported (pr725, by hugovk). +Enhancements: + +* Splitting statements now allows to remove the semicolon at the end. + Some database backends love statements without semicolon (issue742). + Bug Fixes * Ignore dunder attributes when creating Tokens (issue672). diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index cfd4e2fd..b80b2d60 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -59,12 +59,14 @@ def format(sql, encoding=None, **options): return ''.join(stack.run(sql, encoding)) -def split(sql, encoding=None): +def split(sql, encoding=None, strip_semicolon=False): """Split *sql* into single statements. :param sql: A string containing one or more SQL statements. :param encoding: The encoding of the statement (optional). + :param strip_semicolon: If True, remove trainling semicolons + (default: False). :returns: A list of strings. """ - stack = engine.FilterStack() + stack = engine.FilterStack(strip_semicolon=strip_semicolon) return [str(stmt).strip() for stmt in stack.run(sql, encoding)] diff --git a/sqlparse/engine/filter_stack.py b/sqlparse/engine/filter_stack.py index 9665a224..3feba377 100644 --- a/sqlparse/engine/filter_stack.py +++ b/sqlparse/engine/filter_stack.py @@ -10,14 +10,17 @@ from sqlparse import lexer from sqlparse.engine import grouping from sqlparse.engine.statement_splitter import StatementSplitter +from sqlparse.filters import StripTrailingSemicolonFilter class FilterStack: - def __init__(self): + def __init__(self, strip_semicolon=False): self.preprocess = [] self.stmtprocess = [] self.postprocess = [] self._grouping = False + if strip_semicolon: + self.stmtprocess.append(StripTrailingSemicolonFilter()) def enable_grouping(self): self._grouping = True diff --git a/sqlparse/filters/__init__.py b/sqlparse/filters/__init__.py index 5bd6b325..06169460 100644 --- a/sqlparse/filters/__init__.py +++ b/sqlparse/filters/__init__.py @@ -8,6 +8,7 @@ from sqlparse.filters.others import SerializerUnicode from sqlparse.filters.others import StripCommentsFilter from sqlparse.filters.others import StripWhitespaceFilter +from sqlparse.filters.others import StripTrailingSemicolonFilter from sqlparse.filters.others import SpacesAroundOperatorsFilter from sqlparse.filters.output import OutputPHPFilter @@ -25,6 +26,7 @@ 'SerializerUnicode', 'StripCommentsFilter', 'StripWhitespaceFilter', + 'StripTrailingSemicolonFilter', 'SpacesAroundOperatorsFilter', 'OutputPHPFilter', diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index 9e617c37..da7c0e79 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -126,6 +126,15 @@ def process(self, stmt): return stmt +class StripTrailingSemicolonFilter: + + def process(self, stmt): + while stmt.tokens and (stmt.tokens[-1].is_whitespace + or stmt.tokens[-1].value == ';'): + stmt.tokens.pop() + return stmt + + # --------------------------- # postprocess diff --git a/tests/test_split.py b/tests/test_split.py index e79750e8..30a50c59 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -166,3 +166,31 @@ def test_split_mysql_handler_for(load_file): # see issue581 stmts = sqlparse.split(load_file('mysql_handler.sql')) assert len(stmts) == 2 + + +@pytest.mark.parametrize('sql, expected', [ + ('select * from foo;', ['select * from foo']), + ('select * from foo', ['select * from foo']), + ('select * from foo; select * from bar;', [ + 'select * from foo', + 'select * from bar', + ]), + (' select * from foo;\n\nselect * from bar;\n\n\n\n', [ + 'select * from foo', + 'select * from bar', + ]), + ('select * from foo\n\n; bar', ['select * from foo', 'bar']), +]) +def test_split_strip_semicolon(sql, expected): + stmts = sqlparse.split(sql, strip_semicolon=True) + assert len(stmts) == len(expected) + for idx, expectation in enumerate(expected): + assert stmts[idx] == expectation + + +def test_split_strip_semicolon_procedure(load_file): + stmts = sqlparse.split(load_file('mysql_handler.sql'), + strip_semicolon=True) + assert len(stmts) == 2 + assert stmts[0].endswith('end') + assert stmts[1].endswith('end') From f101546dafa921edfea5b3107731504665b758ea Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Thu, 12 Oct 2023 21:28:03 +0200 Subject: [PATCH 54/88] Add comment. --- sqlparse/engine/statement_splitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py index a991959a..9bde92c5 100644 --- a/sqlparse/engine/statement_splitter.py +++ b/sqlparse/engine/statement_splitter.py @@ -54,7 +54,7 @@ def _change_splitlevel(self, ttype, value): if unified == 'BEGIN': self._begin_depth += 1 if self._is_create: - # FIXME(andi): This makes no sense. + # FIXME(andi): This makes no sense. ## this comment neither return 1 return 0 From ab84201f0baf75fd20dd5458d65920e1a50a5be2 Mon Sep 17 00:00:00 2001 From: Georg Traar Date: Mon, 5 Feb 2024 07:37:17 +0100 Subject: [PATCH 55/88] allow operators to procede dollar quoted strings --- CHANGELOG | 1 + sqlparse/keywords.py | 2 +- tests/test_parse.py | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 0ede2800..0b48e9f3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -14,6 +14,7 @@ Enhancements: Bug Fixes * Ignore dunder attributes when creating Tokens (issue672). +* Allow operators to precede dollar-quoted strings (issue763). Release 0.4.4 (Apr 18, 2023) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index b45f3e0f..d3794fd3 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -30,7 +30,7 @@ (r"`(``|[^`])*`", tokens.Name), (r"´(´´|[^´])*´", tokens.Name), - (r'((? Date: Tue, 5 Mar 2024 07:23:48 +0100 Subject: [PATCH 56/88] Update authors. --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4617b7d7..2427bfb3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -31,6 +31,7 @@ Alphabetical list of contributors: * Florian Bauer * Fredy Wijaya * Gavin Wahl +* Georg Traar * Hugo van Kemenade * hurcy * Ian Robertson From 8d34105d39521f980e8e591eadfc73025996dc82 Mon Sep 17 00:00:00 2001 From: Gregor Karetka Date: Wed, 6 Dec 2023 16:07:00 +0100 Subject: [PATCH 57/88] Update extending.rst Fix broken example --- docs/source/extending.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/extending.rst b/docs/source/extending.rst index 0c10924b..866303b7 100644 --- a/docs/source/extending.rst +++ b/docs/source/extending.rst @@ -70,7 +70,7 @@ a keyword to the lexer: lex.add_keywords(keywords.KEYWORDS) # add a custom keyword dictionary - lex.add_keywords({'BAR', sqlparse.tokens.Keyword}) + lex.add_keywords({'BAR': sqlparse.tokens.Keyword}) # no configuration is passed here. The lexer is used as a singleton. sqlparse.parse("select * from foo zorder by bar;") From dc2329d07df3b475f2190d3711396691d705fb9a Mon Sep 17 00:00:00 2001 From: Igor Khrol Date: Tue, 14 Nov 2023 17:59:16 +0200 Subject: [PATCH 58/88] Support TypedLiterals in get_parameters --- sqlparse/sql.py | 7 ++++--- tests/test_parse.py | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 1ccfbdbe..f93d7c36 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -619,12 +619,13 @@ class Function(NameAliasMixin, TokenList): def get_parameters(self): """Return a list of parameters.""" parenthesis = self.tokens[-1] + result = [] for token in parenthesis.tokens: if isinstance(token, IdentifierList): return token.get_identifiers() - elif imt(token, i=(Function, Identifier), t=T.Literal): - return [token, ] - return [] + elif imt(token, i=(Function, Identifier, TypedLiteral), t=T.Literal): + result.append(token) + return result class Begin(TokenList): diff --git a/tests/test_parse.py b/tests/test_parse.py index 6e4df7c6..be416ef2 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -133,6 +133,11 @@ def test_parse_nested_function(): assert type(t[0]) is sql.Function +def test_parse_casted_params(): + t = sqlparse.parse("foo(DATE '2023-11-14', TIMESTAMP '2023-11-15')")[0].tokens[0].get_parameters() + assert len(t) == 2 + + def test_parse_div_operator(): p = sqlparse.parse('col1 DIV 5 AS div_col1')[0].tokens assert p[0].tokens[0].tokens[2].ttype is T.Operator From b97387ceab38ea724cb715f8a43050b1693d1d36 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 5 Mar 2024 07:29:30 +0100 Subject: [PATCH 59/88] Update changelog and code cleanup. --- AUTHORS | 1 + CHANGELOG | 1 + sqlparse/sql.py | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 2427bfb3..934bbe33 100644 --- a/AUTHORS +++ b/AUTHORS @@ -35,6 +35,7 @@ Alphabetical list of contributors: * Hugo van Kemenade * hurcy * Ian Robertson +* Igor Khrol * JacekPliszka * JavierPan * Jean-Martin Archer diff --git a/CHANGELOG b/CHANGELOG index 0b48e9f3..cbfbcf25 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,6 +10,7 @@ Enhancements: * Splitting statements now allows to remove the semicolon at the end. Some database backends love statements without semicolon (issue742). +* Support TypedLiterals in get_parameters (pr649, by Khrol). Bug Fixes diff --git a/sqlparse/sql.py b/sqlparse/sql.py index f93d7c36..41606dd8 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -623,7 +623,8 @@ def get_parameters(self): for token in parenthesis.tokens: if isinstance(token, IdentifierList): return token.get_identifiers() - elif imt(token, i=(Function, Identifier, TypedLiteral), t=T.Literal): + elif imt(token, i=(Function, Identifier, TypedLiteral), + t=T.Literal): result.append(token) return result From 39b5a02551de051b1e888135fe71759d5b49a134 Mon Sep 17 00:00:00 2001 From: John Bodley Date: Mon, 6 Nov 2023 20:59:16 -0800 Subject: [PATCH 60/88] Ensure nested ordered identifiers are grouped (fixes #745) --- sqlparse/engine/grouping.py | 1 + tests/test_grouping.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 57d257e2..c486318a 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -360,6 +360,7 @@ def group_functions(tlist): tidx, token = tlist.token_next_by(t=T.Name, idx=tidx) +@recurse(sql.Identifier) def group_order(tlist): """Group together Identifier and Asc/Desc token""" tidx, token = tlist.token_next_by(t=T.Keyword.Order) diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 03d16c5d..e90243b5 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -247,6 +247,14 @@ def test_grouping_identifier_list_with_order(): assert str(p.tokens[0].tokens[3]) == '2 desc' +def test_grouping_nested_identifier_with_order(): + # issue745 + p = sqlparse.parse('(a desc)')[0] + assert isinstance(p.tokens[0], sql.Parenthesis) + assert isinstance(p.tokens[0].tokens[1], sql.Identifier) + assert str(p.tokens[0].tokens[1]) == 'a desc' + + def test_grouping_where(): s = 'select * from foo where bar = 1 order by id desc' p = sqlparse.parse(s)[0] From 60486b91ca7b4183313b06f62f7b559f4920f099 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 5 Mar 2024 07:33:32 +0100 Subject: [PATCH 61/88] Update changelog. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index cbfbcf25..c76eab0f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -16,6 +16,7 @@ Bug Fixes * Ignore dunder attributes when creating Tokens (issue672). * Allow operators to precede dollar-quoted strings (issue763). +* Fix parsing of nested order clauses (issue745, pr746 by john-bodley). Release 0.4.4 (Apr 18, 2023) From 7334ac99152d02bb09ab0abe79377174c2867f7c Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 15 Mar 2024 08:04:02 +0100 Subject: [PATCH 62/88] Improve splitting of Transact SQL when using GO keyword (fixes #762). --- CHANGELOG | 1 + sqlparse/engine/statement_splitter.py | 7 ++++++- sqlparse/keywords.py | 1 + tests/test_split.py | 9 +++++++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index c76eab0f..efb3e95b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -11,6 +11,7 @@ Enhancements: * Splitting statements now allows to remove the semicolon at the end. Some database backends love statements without semicolon (issue742). * Support TypedLiterals in get_parameters (pr649, by Khrol). +* Improve splitting of Transact SQL when using GO keyword (issue762). Bug Fixes diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py index 9bde92c5..5b3a0d9b 100644 --- a/sqlparse/engine/statement_splitter.py +++ b/sqlparse/engine/statement_splitter.py @@ -99,7 +99,12 @@ def process(self, stream): self.tokens.append(sql.Token(ttype, value)) # Check if we get the end of a statement - if self.level <= 0 and ttype is T.Punctuation and value == ';': + # Issue762: Allow GO (or "GO 2") as statement splitter. + # When implementing a language toggle, it's not only to add + # keywords it's also to change some rules, like this splitting + # rule. + if (self.level <= 0 and ttype is T.Punctuation and value == ';') \ + or (ttype is T.Keyword and value.split()[0] == 'GO'): self.consume_ws = True # Yield pending statement (if any) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index d3794fd3..8911c7a8 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -78,6 +78,7 @@ (r'GROUP\s+BY\b', tokens.Keyword), (r'ORDER\s+BY\b', tokens.Keyword), (r'HANDLER\s+FOR\b', tokens.Keyword), + (r'GO(\s\d+)\b', tokens.Keyword), (r'(LATERAL\s+VIEW\s+)' r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b', tokens.Keyword), diff --git a/tests/test_split.py b/tests/test_split.py index 30a50c59..90d2eaff 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -194,3 +194,12 @@ def test_split_strip_semicolon_procedure(load_file): assert len(stmts) == 2 assert stmts[0].endswith('end') assert stmts[1].endswith('end') + +@pytest.mark.parametrize('sql, num', [ + ('USE foo;\nGO\nSELECT 1;\nGO', 4), + ('SELECT * FROM foo;\nGO', 2), + ('USE foo;\nGO 2\nSELECT 1;', 3) +]) +def test_split_go(sql, num): # issue762 + stmts = sqlparse.split(sql) + assert len(stmts) == num From 0cd062018fb1a1c296417435a10be1910a9ea657 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 15 Mar 2024 08:26:39 +0100 Subject: [PATCH 63/88] Drop support for Python < 3.8. Also update tox.ini and Github actions: - unpin some dependencies required for older Python versions - update action versions to latest version --- .github/workflows/codeql-analysis.yml | 6 +++--- .github/workflows/python-app.yml | 11 +++++------ CHANGELOG | 2 +- README.rst | 2 +- pyproject.toml | 9 +++------ tox.ini | 4 +--- 6 files changed, 14 insertions(+), 20 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 5acaa67d..b560fd65 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -43,7 +43,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -54,7 +54,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -68,4 +68,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index ff7269bc..96c76bca 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -15,25 +15,24 @@ on: jobs: test: name: Run tests on ${{ matrix.py }} - runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.6 available + runs-on: ubuntu-latest strategy: matrix: py: + - "3.13-dev" - "3.12" - "3.11" - "3.10" - "3.9" - "3.8" - - "3.7" - - "3.6" + - "pypy-3.10" - "pypy-3.9" - "pypy-3.8" - - "pypy-3.7" steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.py }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.py }} allow-prereleases: true @@ -47,4 +46,4 @@ jobs: - name: Test with pytest run: pytest --cov=sqlparse - name: Publish to codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 diff --git a/CHANGELOG b/CHANGELOG index efb3e95b..5db0a595 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,7 +3,7 @@ Development Version Notable Changes -* Drop support for Python 3.5. +* Drop support for Python 3.5, 3.6, and 3.7. * Python 3.12 is now supported (pr725, by hugovk). Enhancements: diff --git a/README.rst b/README.rst index 67ddaf96..3eaf0efb 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ python-sqlparse - Parse SQL statements sqlparse is a non-validating SQL parser for Python. It provides support for parsing, splitting and formatting SQL statements. -The module is compatible with Python 3.6+ and released under the terms of the +The module is compatible with Python 3.8+ and released under the terms of the `New BSD license `_. Visit the project page at https://github.com/andialbrecht/sqlparse for diff --git a/pyproject.toml b/pyproject.toml index d9a921f1..83cb93ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,6 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -28,7 +26,7 @@ classifiers = [ "Topic :: Database", "Topic :: Software Development", ] -requires-python = ">=3.6" +requires-python = ">=3.8" [project.urls] Home = "https://github.com/andialbrecht/sqlparse" @@ -42,7 +40,6 @@ sqlformat = "sqlparse.__main__:main" [project.optional-dependencies] dev = [ - "importlib_metadata<5; python_version <= '3.7'", "flake8", "build", ] @@ -54,8 +51,8 @@ doc = [ "sphinx", ] tox = [ - "virtualenv<20.22.0", # 20.22.0 dropped Python 3.6 support - "tox<4.5.0", # >=4.5.0 requires virtualenv>=20.22 + "virtualenv", + "tox", ] [tool.flit.sdist] diff --git a/tox.ini b/tox.ini index 40d84ad8..71a98fa2 100644 --- a/tox.ini +++ b/tox.ini @@ -1,8 +1,6 @@ [tox] skip_missing_interpreters = True envlist = - py36 - py37 py38 py39 py310 @@ -22,4 +20,4 @@ commands = deps = flake8 commands = - flake8 sqlparse tests setup.py + flake8 sqlparse tests From c40f8000781633f2281c483c45be8d252bcba2e3 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 15 Mar 2024 08:32:35 +0100 Subject: [PATCH 64/88] Add .readthedocs.yaml. --- .readthedocs.yaml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..6dffd85a --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,35 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "20" + # rust: "1.70" + # golang: "1.20" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/source/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + # Fail on all warnings to avoid broken references + # fail_on_warning: true + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +# python: +# install: +# - requirements: docs/requirements.txt \ No newline at end of file From 02819f620e599343d55df53225b9ea6ca46d980c Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 07:39:12 +0100 Subject: [PATCH 65/88] Correct spelling error. --- sqlparse/lexer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 9d25c9e6..99f70f71 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -23,7 +23,7 @@ class Lexer: """The Lexer supports configurable syntax. To add support for additional keywords, use the `add_keywords` method.""" - _default_intance = None + _default_instance = None # Development notes: # - This class is prepared to be able to support additional SQL dialects @@ -47,10 +47,10 @@ class Lexer: def get_default_instance(cls): """Returns the lexer instance used internally by the sqlparse core functions.""" - if cls._default_intance is None: - cls._default_intance = cls() - cls._default_intance.default_initialization() - return cls._default_intance + if cls._default_instance is None: + cls._default_instance = cls() + cls._default_instance.default_initialization() + return cls._default_instance def default_initialization(self): """Initialize the lexer with default dictionaries. From 5bb129d3fc8a4d031bd37fab8e5ee24a199a9b8c Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 07:45:04 +0100 Subject: [PATCH 66/88] Thread-safe initialization of Lexer class (fixes #730). --- CHANGELOG | 1 + sqlparse/lexer.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5db0a595..38d53187 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -18,6 +18,7 @@ Bug Fixes * Ignore dunder attributes when creating Tokens (issue672). * Allow operators to precede dollar-quoted strings (issue763). * Fix parsing of nested order clauses (issue745, pr746 by john-bodley). +* Thread-safe initialization of Lexer class (issue730). Release 0.4.4 (Apr 18, 2023) diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 99f70f71..f800a52b 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -7,6 +7,7 @@ """SQL Lexer""" import re +from threading import Lock # This code is based on the SqlLexer in pygments. # http://pygments.org/ @@ -24,19 +25,20 @@ class Lexer: To add support for additional keywords, use the `add_keywords` method.""" _default_instance = None + _lock = Lock() # Development notes: # - This class is prepared to be able to support additional SQL dialects # in the future by adding additional functions that take the place of - # the function default_initialization() + # the function default_initialization(). # - The lexer class uses an explicit singleton behavior with the # instance-getter method get_default_instance(). This mechanism has # the advantage that the call signature of the entry-points to the # sqlparse library are not affected. Also, usage of sqlparse in third - # party code does not need to be adapted. On the other hand, singleton - # behavior is not thread safe, and the current implementation does not - # easily allow for multiple SQL dialects to be parsed in the same - # process. Such behavior can be supported in the future by passing a + # party code does not need to be adapted. On the other hand, the current + # implementation does not easily allow for multiple SQL dialects to be + # parsed in the same process. + # Such behavior can be supported in the future by passing a # suitably initialized lexer object as an additional parameter to the # entry-point functions (such as `parse`). Code will need to be written # to pass down and utilize such an object. The current implementation @@ -47,9 +49,10 @@ class Lexer: def get_default_instance(cls): """Returns the lexer instance used internally by the sqlparse core functions.""" - if cls._default_instance is None: - cls._default_instance = cls() - cls._default_instance.default_initialization() + with cls._lock: + if cls._default_instance is None: + cls._default_instance = cls() + cls._default_instance.default_initialization() return cls._default_instance def default_initialization(self): From 6b05583f119224a43f8047159120edd0228ebd76 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 09:25:09 +0100 Subject: [PATCH 67/88] Add support for some of the JSON operators (fixes #682). --- CHANGELOG | 1 + sqlparse/keywords.py | 2 ++ tests/test_parse.py | 14 ++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 38d53187..c2e3a9bc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,6 +12,7 @@ Enhancements: Some database backends love statements without semicolon (issue742). * Support TypedLiterals in get_parameters (pr649, by Khrol). * Improve splitting of Transact SQL when using GO keyword (issue762). +* Support for some JSON operators (issue682). Bug Fixes diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 8911c7a8..9b7f8153 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -89,6 +89,8 @@ # but the match isn't a keyword. (r'\w[$#\w]*', PROCESS_AS_KEYWORD), (r'[;:()\[\],\.]', tokens.Punctuation), + # JSON operators + (r'(\->>?|#>>?|@>|<@|\?\|?|\?&|\-|#\-)', tokens.Operator), (r'[<>=~!]+', tokens.Operator.Comparison), (r'[+/@#%^&|^-]+', tokens.Operator), ] diff --git a/tests/test_parse.py b/tests/test_parse.py index be416ef2..b49dcca3 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -579,3 +579,17 @@ def test_configurable_regex(): for t in tokens if t.ttype not in sqlparse.tokens.Whitespace )[4] == (sqlparse.tokens.Keyword, "zorder by") + + +@pytest.mark.parametrize('sql', [ + '->', '->>', '#>', '#>>', + '@>', '<@', + # leaving ? out for now, they're somehow ambiguous as placeholders + # '?', '?|', '?&', + '||', '-', '#-' +]) +def test_json_operators(sql): + p = sqlparse.parse(sql) + assert len(p) == 1 + assert len(p[0].tokens) == 1 + assert p[0].tokens[0].ttype == sqlparse.tokens.Operator From 8c24779e027e92a1ed379fc271e20f540b0f3d20 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 10:10:19 +0100 Subject: [PATCH 68/88] Improve formatting of statements with JSON operators (fixes #542). --- CHANGELOG | 1 + sqlparse/engine/grouping.py | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c2e3a9bc..c3387ee3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,7 @@ Enhancements: * Support TypedLiterals in get_parameters (pr649, by Khrol). * Improve splitting of Transact SQL when using GO keyword (issue762). * Support for some JSON operators (issue682). +* Improve formatting of statements containing JSON operators (issue542). Bug Fixes diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index c486318a..9190797a 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -139,7 +139,12 @@ def post(tlist, pidx, tidx, nidx): def group_period(tlist): def match(token): - return token.match(T.Punctuation, '.') + for ttype, value in ((T.Punctuation, '.'), + (T.Operator, '->'), + (T.Operator, '->>')): + if token.match(ttype, value): + return True + return False def valid_prev(token): sqlcls = sql.SquareBrackets, sql.Identifier @@ -153,7 +158,7 @@ def valid_next(token): def post(tlist, pidx, tidx, nidx): # next_ validation is being performed here. issue261 sqlcls = sql.SquareBrackets, sql.Function - ttypes = T.Name, T.String.Symbol, T.Wildcard + ttypes = T.Name, T.String.Symbol, T.Wildcard, T.String.Single next_ = tlist[nidx] if nidx is not None else None valid_next = imt(next_, i=sqlcls, t=ttypes) From 6b10952dcab573783e69638c75ca366b09cbaa4f Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 10:16:29 +0100 Subject: [PATCH 69/88] Add new group for MySQL specific keywords. --- sqlparse/keywords.py | 6 +++++- sqlparse/lexer.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 9b7f8153..82e39ad3 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -486,7 +486,6 @@ 'ROUTINE_CATALOG': tokens.Keyword, 'ROUTINE_NAME': tokens.Keyword, 'ROUTINE_SCHEMA': tokens.Keyword, - 'ROW': tokens.Keyword, 'ROWS': tokens.Keyword, 'ROW_COUNT': tokens.Keyword, 'RULE': tokens.Keyword, @@ -829,6 +828,11 @@ 'UNLOCK': tokens.Keyword, } +# MySQL +KEYWORDS_MYSQL = { + 'ROW': tokens.Keyword, +} + # PostgreSQL Syntax KEYWORDS_PLPGSQL = { 'CONFLICT': tokens.Keyword, diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index f800a52b..9548bfe1 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -62,6 +62,7 @@ def default_initialization(self): self.set_SQL_REGEX(keywords.SQL_REGEX) self.add_keywords(keywords.KEYWORDS_COMMON) self.add_keywords(keywords.KEYWORDS_ORACLE) + self.add_keywords(keywords.KEYWORDS_MYSQL) self.add_keywords(keywords.KEYWORDS_PLPGSQL) self.add_keywords(keywords.KEYWORDS_HQL) self.add_keywords(keywords.KEYWORDS_MSACCESS) From ee550f11b95b8d38a1be1b86fa674d37ffcb1609 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 10:19:01 +0100 Subject: [PATCH 70/88] Add test case for #542. --- tests/test_format.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_format.py b/tests/test_format.py index 70bb8055..a616f360 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -722,3 +722,10 @@ def test_format_right_margin_invalid_option(right_margin): def test_format_right_margin(): # TODO: Needs better test, only raises exception right now sqlparse.format('foo', right_margin="79") + + +def test_format_json_ops(): # issue542 + formatted = sqlparse.format( + "select foo->'bar', foo->'bar';", reindent=True) + expected = "select foo->'bar',\n foo->'bar';" + assert formatted == expected From 326a316446c3e091a93950251e3e376ebf0d4127 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 12:17:56 +0100 Subject: [PATCH 71/88] Switch to hatch and replace tox. --- .flake8 | 8 ++++ .github/workflows/python-app.yml | 8 ++-- .gitignore | 2 - pyproject.toml | 75 ++++++++++++++++++++++---------- tests/test_cli.py | 4 +- 5 files changed, 66 insertions(+), 31 deletions(-) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..bd01afcf --- /dev/null +++ b/.flake8 @@ -0,0 +1,8 @@ +[flake8] +exclude = + tests, + docs, + dist +max-complexity = 10 +statistics = True +show-source = True \ No newline at end of file diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 96c76bca..31b900bd 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -39,11 +39,11 @@ jobs: check-latest: true - name: Install dependencies run: | - python -m pip install --upgrade pip flit + python -m pip install --upgrade pip hatch flit install --deps=develop - name: Lint with flake8 - run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics - - name: Test with pytest - run: pytest --cov=sqlparse + run: hatch run flake8 + - name: Test with pytest and coverage + run: hatch run cov - name: Publish to codecov uses: codecov/codecov-action@v4 diff --git a/.gitignore b/.gitignore index cc2ec16b..77479f17 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,7 @@ dist/ build/ MANIFEST .coverage -.tox/ .cache/ *.egg-info/ htmlcov/ -coverage.xml .pytest_cache \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 83cb93ed..c2d7fe4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["flit_core >=3.2,<4"] -build-backend = "flit_core.buildapi" +requires = ["hatchling"] +build-backend = "hatchling.build" [project] name = "sqlparse" @@ -40,34 +40,63 @@ sqlformat = "sqlparse.__main__:main" [project.optional-dependencies] dev = [ - "flake8", + "hatch", "build", ] -test = [ - "pytest", - "pytest-cov", -] doc = [ "sphinx", ] -tox = [ - "virtualenv", - "tox", + +[tool.hatch.version] +path = "sqlparse/__init__.py" + +[tool.hatch.envs.default] +dependencies = [ + "coverage[toml]>=6.5", + "pytest", + # switch to ruff, but fix problems first + # but check defaults! + # https://hatch.pypa.io/1.9/config/static-analysis/#default-settings + "flake8", +] +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +cov-report = [ + "- coverage combine", + "coverage report", +] +cov = [ + "test-cov", + "cov-report", ] +check = "flake8 sqlparse/" -[tool.flit.sdist] -include = [ - "docs/source/", - "docs/sqlformat.1", - "docs/Makefile", - "tests/*.py", "tests/files/*.sql", - "LICENSE", - "TODO", - "AUTHORS", - "CHANGELOG", - "Makefile", - "tox.ini", +[[tool.hatch.envs.all.matrix]] +python = ["3.8", "3.9", "3.10", "3.11", "3.12"] + +[tool.hatch.envs.types] +dependencies = [ + "mypy>=1.0.0", ] +[tool.hatch.envs.types.scripts] +check = "mypy --install-types --non-interactive {args:sqlparse tests}" [tool.coverage.run] -omit = ["sqlparse/__main__.py"] +source_pkgs = ["sqlparse", "tests"] +branch = true +parallel = true +omit = [ + "sqlparse/__main__.py", +] + +[tool.coverage.paths] +sqlparse = ["sqlparse"] +tests = ["tests"] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] diff --git a/tests/test_cli.py b/tests/test_cli.py index b681a60b..a0c1f2b0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -69,8 +69,8 @@ def test_stdout(filepath, load_file, capsys): def test_script(): # Call with the --help option as a basic sanity check. - cmd = "{:s} -m sqlparse.cli --help".format(sys.executable) - assert subprocess.call(cmd.split()) == 0 + cmd = [sys.executable, '-m', 'sqlparse.cli', '--help'] + assert subprocess.call(cmd) == 0 @pytest.mark.parametrize('fpath, encoding', ( From be9dc7a31f2c2068ea069648029363735a751bfc Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 12:19:16 +0100 Subject: [PATCH 72/88] CI: Remove obsolte flit command. --- .github/workflows/python-app.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 31b900bd..4b1853e9 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -40,7 +40,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip hatch - flit install --deps=develop - name: Lint with flake8 run: hatch run flake8 - name: Test with pytest and coverage From 135bfadf9662031de9b27b13555a8e05ec0f4806 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 12:20:25 +0100 Subject: [PATCH 73/88] CI: Reset max-complexity to current default. --- .flake8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.flake8 b/.flake8 index bd01afcf..b508dca8 100644 --- a/.flake8 +++ b/.flake8 @@ -3,6 +3,6 @@ exclude = tests, docs, dist -max-complexity = 10 +max-complexity = 31 statistics = True show-source = True \ No newline at end of file From 8871dd016c1eb332a751ea8b3dbb2e902a5b8ba6 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 12:25:43 +0100 Subject: [PATCH 74/88] CI: Disable 3.13-dev for now. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 4b1853e9..555e5dc2 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: py: - - "3.13-dev" + #- "3.13-dev" - "3.12" - "3.11" - "3.10" From f55b4e1b69ac2e4fc36151c46d5405ec80b89f58 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 13:27:42 +0100 Subject: [PATCH 75/88] Remove tox.ini. --- tox.ini | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 tox.ini diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 71a98fa2..00000000 --- a/tox.ini +++ /dev/null @@ -1,23 +0,0 @@ -[tox] -skip_missing_interpreters = True -envlist = - py38 - py39 - py310 - py311 - py312 - flake8 - -[testenv] -deps = - pytest - pytest-cov -commands = - sqlformat --version - pytest --cov=sqlparse {posargs} - -[testenv:flake8] -deps = - flake8 -commands = - flake8 sqlparse tests From d76e8a4425d82a6cd704b5e549a8cabefa931341 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 16:34:23 +0100 Subject: [PATCH 76/88] Identify TRUNCATE as DDL, REVOKE/GRANT as DCL keywords. See #719 as well. --- CHANGELOG | 2 ++ sqlparse/keywords.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c3387ee3..ca4d23aa 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -21,6 +21,8 @@ Bug Fixes * Allow operators to precede dollar-quoted strings (issue763). * Fix parsing of nested order clauses (issue745, pr746 by john-bodley). * Thread-safe initialization of Lexer class (issue730). +* Classify TRUNCATE as DDL and GRANT/REVOKE as DCL keywords (based on pr719 + by josuc1, thanks for bringing this up!) Release 0.4.4 (Apr 18, 2023) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 82e39ad3..d734bdff 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -288,7 +288,6 @@ 'GLOBAL': tokens.Keyword, 'GO': tokens.Keyword, 'GOTO': tokens.Keyword, - 'GRANT': tokens.Keyword, 'GRANTED': tokens.Keyword, 'GROUPING': tokens.Keyword, @@ -477,7 +476,6 @@ 'RETURNED_SQLSTATE': tokens.Keyword, 'RETURNING': tokens.Keyword, 'RETURNS': tokens.Keyword, - 'REVOKE': tokens.Keyword, 'RIGHT': tokens.Keyword, 'ROLE': tokens.Keyword, 'ROLLBACK': tokens.Keyword.DML, @@ -577,7 +575,6 @@ 'TRIGGER_SCHEMA': tokens.Keyword, 'TRIM': tokens.Keyword, 'TRUE': tokens.Keyword, - 'TRUNCATE': tokens.Keyword, 'TRUSTED': tokens.Keyword, 'TYPE': tokens.Keyword, @@ -684,6 +681,9 @@ 'DROP': tokens.Keyword.DDL, 'CREATE': tokens.Keyword.DDL, 'ALTER': tokens.Keyword.DDL, + 'TRUNCATE': tokens.Keyword.DDL, + 'GRANT': tokens.Keyword.DCL, + 'REVOKE': tokens.Keyword.DCL, 'WHERE': tokens.Keyword, 'FROM': tokens.Keyword, From db1ebe21a1a1c34b510b79fd52bf5130a99606bc Mon Sep 17 00:00:00 2001 From: griff <70294474+griffatrasgo@users.noreply.github.com> Date: Wed, 18 Jan 2023 10:12:59 -0500 Subject: [PATCH 77/88] add snowflake and bq keywords --- sqlparse/keywords.py | 32 ++++++++++++++++++++++++++++++++ sqlparse/lexer.py | 2 ++ 2 files changed, 34 insertions(+) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index d734bdff..130485d9 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -966,3 +966,35 @@ KEYWORDS_MSACCESS = { 'DISTINCTROW': tokens.Keyword, } + + +KEYWORDS_SNOWFLAKE = { + 'ACCOUNT': tokens.Keyword, + 'GSCLUSTER': tokens.Keyword, + 'ISSUE': tokens.Keyword, + 'ORGANIZATION': tokens.Keyword, + 'PIVOT': tokens.Keyword, + 'QUALIFY': tokens.Keyword, + 'REGEXP': tokens.Keyword, + 'RLIKE': tokens.Keyword, + 'SAMPLE': tokens.Keyword, + 'TRY_CAST': tokens.Keyword, + 'UNPIVOT': tokens.Keyword, + + 'VARIANT': tokens.Name.Builtin, +} + + +KEYWORDS_BIGQUERY = { + 'ASSERT_ROWS_MODIFIED': tokens.Keyword, + 'DEFINE': tokens.Keyword, + 'ENUM': tokens.Keyword, + 'HASH': tokens.Keyword, + 'LOOKUP': tokens.Keyword, + 'PRECEDING': tokens.Keyword, + 'PROTO': tokens.Keyword, + 'RESPECT': tokens.Keyword, + 'TABLESAMPLE': tokens.Keyword, + + 'BIGNUMERIC': tokens.Name.Builtin, +} diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 9548bfe1..0ff99bf3 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -67,6 +67,8 @@ def default_initialization(self): self.add_keywords(keywords.KEYWORDS_HQL) self.add_keywords(keywords.KEYWORDS_MSACCESS) self.add_keywords(keywords.KEYWORDS) + self.add_keywords(keywords.KEYWORDS_SNOWFLAKE) + self.add_keywords(keywords.KEYWORDS_BIGQUERY) def clear(self): """Clear all syntax configurations. From 4ad66a7d750edf635c053d52ce183df7fa0afc4b Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 16:48:19 +0100 Subject: [PATCH 78/88] Update Changelog and authors. --- AUTHORS | 1 + CHANGELOG | 1 + sqlparse/lexer.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 934bbe33..90def42c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -32,6 +32,7 @@ Alphabetical list of contributors: * Fredy Wijaya * Gavin Wahl * Georg Traar +* griff <70294474+griffatrasgo@users.noreply.github.com> * Hugo van Kemenade * hurcy * Ian Robertson diff --git a/CHANGELOG b/CHANGELOG index ca4d23aa..6aa1e278 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -14,6 +14,7 @@ Enhancements: * Improve splitting of Transact SQL when using GO keyword (issue762). * Support for some JSON operators (issue682). * Improve formatting of statements containing JSON operators (issue542). +* Support for BigQuery and Snowflake keywords (pr699, by griffatrasgo). Bug Fixes diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 0ff99bf3..8f88d171 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -66,9 +66,9 @@ def default_initialization(self): self.add_keywords(keywords.KEYWORDS_PLPGSQL) self.add_keywords(keywords.KEYWORDS_HQL) self.add_keywords(keywords.KEYWORDS_MSACCESS) - self.add_keywords(keywords.KEYWORDS) self.add_keywords(keywords.KEYWORDS_SNOWFLAKE) self.add_keywords(keywords.KEYWORDS_BIGQUERY) + self.add_keywords(keywords.KEYWORDS) def clear(self): """Clear all syntax configurations. From fc4b0beab89c5598d556572cb6db0165affb017b Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 16:52:04 +0100 Subject: [PATCH 79/88] Code cleanup. --- sqlparse/keywords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 130485d9..029d8bae 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -968,7 +968,7 @@ } -KEYWORDS_SNOWFLAKE = { +KEYWORDS_SNOWFLAKE = { 'ACCOUNT': tokens.Keyword, 'GSCLUSTER': tokens.Keyword, 'ISSUE': tokens.Keyword, From 46971e5a804b29e7dbd437155a8ceffab8ef1cd5 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 16 Mar 2024 17:03:23 +0100 Subject: [PATCH 80/88] Fix parsing of PRIMARY KEY (fixes #740). --- CHANGELOG | 3 ++- sqlparse/keywords.py | 1 + tests/test_regressions.py | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 6aa1e278..745328e6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -23,7 +23,8 @@ Bug Fixes * Fix parsing of nested order clauses (issue745, pr746 by john-bodley). * Thread-safe initialization of Lexer class (issue730). * Classify TRUNCATE as DDL and GRANT/REVOKE as DCL keywords (based on pr719 - by josuc1, thanks for bringing this up!) + by josuc1, thanks for bringing this up!). +* Fix parsing of PRIMARY KEY (issue740). Release 0.4.4 (Apr 18, 2023) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 029d8bae..3b963557 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -77,6 +77,7 @@ (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin), (r'GROUP\s+BY\b', tokens.Keyword), (r'ORDER\s+BY\b', tokens.Keyword), + (r'PRIMARY\s+KEY\b', tokens.Keyword), (r'HANDLER\s+FOR\b', tokens.Keyword), (r'GO(\s\d+)\b', tokens.Keyword), (r'(LATERAL\s+VIEW\s+)' diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 961adc17..29cb502c 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -444,3 +444,9 @@ def test_copy_issue672(): p = sqlparse.parse('select * from foo')[0] copied = copy.deepcopy(p) assert str(p) == str(copied) + + +def test_primary_key_issue740(): + p = sqlparse.parse('PRIMARY KEY')[0] + assert len(p.tokens) == 1 + assert p.tokens[0].ttype == T.Keyword \ No newline at end of file From 012c9f10c8ddfa47ccf17ead28122492155cf6fc Mon Sep 17 00:00:00 2001 From: Adam Johnson Date: Sat, 9 Mar 2024 20:22:21 +0000 Subject: [PATCH 81/88] Optimize sqlparse.utils.imt(). --- sqlparse/utils.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 512f0385..58c0245a 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -86,20 +86,23 @@ def imt(token, i=None, m=None, t=None): :param t: TokenType or Tuple/List of TokenTypes :return: bool """ - clss = i - types = [t, ] if t and not isinstance(t, list) else t - mpatterns = [m, ] if m and not isinstance(m, list) else m - if token is None: return False - elif clss and isinstance(token, clss): - return True - elif mpatterns and any(token.match(*pattern) for pattern in mpatterns): + if i and isinstance(token, i): return True - elif types and any(token.ttype in ttype for ttype in types): - return True - else: - return False + if m: + if isinstance(m, list): + if any(token.match(*pattern) for pattern in m): + return True + elif token.match(*m): + return True + if t: + if isinstance(t, list): + if any(token.ttype in ttype for ttype in t): + return True + elif token.ttype in t: + return True + return False def consume(iterator, n): From d8f81471cfc2c39ac43128e2a0c8cc67c313cc40 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 17 Mar 2024 19:19:16 +0100 Subject: [PATCH 82/88] Update AUHTORS and Changelog. --- AUTHORS | 1 + CHANGELOG | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/AUTHORS b/AUTHORS index 90def42c..476e9c1e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -8,6 +8,7 @@ project: https://bitbucket.org/gutworth/six. Alphabetical list of contributors: * Adam Greenhall +* Adam Johnson * Aki Ariga * Alexander Beedie * Alexey Malyshev diff --git a/CHANGELOG b/CHANGELOG index 745328e6..aeae1c6a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -26,6 +26,10 @@ Bug Fixes by josuc1, thanks for bringing this up!). * Fix parsing of PRIMARY KEY (issue740). +Other + +* Optimize performance of matching function (pr799, by admachainz). + Release 0.4.4 (Apr 18, 2023) ---------------------------- From 617b8f6cd3c55bacf2c80130901508518753f7e1 Mon Sep 17 00:00:00 2001 From: Zi-Xuan Fu Date: Tue, 26 Mar 2024 21:31:51 +0800 Subject: [PATCH 83/88] Add OVER clause, and group it into Function (fixes #701) --- sqlparse/engine/grouping.py | 18 +++++++++++++++++- sqlparse/sql.py | 5 +++++ tests/test_grouping.py | 14 ++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 9190797a..926a3c1b 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -235,6 +235,16 @@ def group_identifier(tlist): tidx, token = tlist.token_next_by(t=ttypes, idx=tidx) +@recurse(sql.Over) +def group_over(tlist): + tidx, token = tlist.token_next_by(m=sql.Over.M_OPEN) + while token: + nidx, next_ = tlist.token_next(tidx) + if imt(next_, i=sql.Parenthesis, t=T.Name): + tlist.group_tokens(sql.Over, tidx, nidx) + tidx, token = tlist.token_next_by(m=sql.Over.M_OPEN, idx=tidx) + + def group_arrays(tlist): sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function ttypes = T.Name, T.String.Symbol @@ -361,7 +371,12 @@ def group_functions(tlist): while token: nidx, next_ = tlist.token_next(tidx) if isinstance(next_, sql.Parenthesis): - tlist.group_tokens(sql.Function, tidx, nidx) + over_idx, over = tlist.token_next(nidx) + if over and isinstance(over, sql.Over): + eidx = over_idx + else: + eidx = nidx + tlist.group_tokens(sql.Function, tidx, eidx) tidx, token = tlist.token_next_by(t=T.Name, idx=tidx) @@ -412,6 +427,7 @@ def group(stmt): group_for, group_begin, + group_over, group_functions, group_where, group_period, diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 41606dd8..def06797 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -554,6 +554,11 @@ class Where(TokenList): 'HAVING', 'RETURNING', 'INTO') +class Over(TokenList): + """An OVER clause.""" + M_OPEN = T.Keyword, 'OVER' + + class Having(TokenList): """A HAVING clause.""" M_OPEN = T.Keyword, 'HAVING' diff --git a/tests/test_grouping.py b/tests/test_grouping.py index e90243b5..0bf10c38 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -185,6 +185,20 @@ def test_grouping_identifier_function(): assert isinstance(p.tokens[0], sql.Identifier) assert isinstance(p.tokens[0].tokens[0], sql.Operation) assert isinstance(p.tokens[0].tokens[0].tokens[0], sql.Function) + p = sqlparse.parse('foo(c1) over win1 as bar')[0] + assert isinstance(p.tokens[0], sql.Identifier) + assert isinstance(p.tokens[0].tokens[0], sql.Function) + assert len(p.tokens[0].tokens[0].tokens) == 4 + assert isinstance(p.tokens[0].tokens[0].tokens[3], sql.Over) + assert isinstance(p.tokens[0].tokens[0].tokens[3].tokens[2], + sql.Identifier) + p = sqlparse.parse('foo(c1) over (partition by c2 order by c3) as bar')[0] + assert isinstance(p.tokens[0], sql.Identifier) + assert isinstance(p.tokens[0].tokens[0], sql.Function) + assert len(p.tokens[0].tokens[0].tokens) == 4 + assert isinstance(p.tokens[0].tokens[0].tokens[3], sql.Over) + assert isinstance(p.tokens[0].tokens[0].tokens[3].tokens[2], + sql.Parenthesis) @pytest.mark.parametrize('s', ['foo+100', 'foo + 100', 'foo*100']) From e03b74e608b71dd06824c2cb42421c0d790248e3 Mon Sep 17 00:00:00 2001 From: Zi-Xuan Fu Date: Wed, 27 Mar 2024 11:00:32 +0800 Subject: [PATCH 84/88] Fix Function.get_parameters(), add Funtion.get_window() --- sqlparse/sql.py | 9 ++++++++- tests/test_grouping.py | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index def06797..05e17748 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -623,7 +623,7 @@ class Function(NameAliasMixin, TokenList): def get_parameters(self): """Return a list of parameters.""" - parenthesis = self.tokens[-1] + parenthesis = self.token_next_by(i=Parenthesis)[1] result = [] for token in parenthesis.tokens: if isinstance(token, IdentifierList): @@ -633,6 +633,13 @@ def get_parameters(self): result.append(token) return result + def get_window(self): + """Return the window if it exists.""" + over_clause = self.token_next_by(i=Over) + if not over_clause: + return None + return over_clause[1].tokens[-1] + class Begin(TokenList): """A BEGIN/END block.""" diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 0bf10c38..b39ff270 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -392,6 +392,14 @@ def test_grouping_function(): p = sqlparse.parse('foo(null, bar)')[0] assert isinstance(p.tokens[0], sql.Function) assert len(list(p.tokens[0].get_parameters())) == 2 + p = sqlparse.parse('foo(5) over win1')[0] + assert isinstance(p.tokens[0], sql.Function) + assert len(list(p.tokens[0].get_parameters())) == 1 + assert isinstance(p.tokens[0].get_window(), sql.Identifier) + p = sqlparse.parse('foo(5) over (PARTITION BY c1)')[0] + assert isinstance(p.tokens[0], sql.Function) + assert len(list(p.tokens[0].get_parameters())) == 1 + assert isinstance(p.tokens[0].get_window(), sql.Parenthesis) def test_grouping_function_not_in(): From f1bcf2f8a7ddf6854c99990c56ff5394f4981d58 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 13 Apr 2024 13:42:51 +0200 Subject: [PATCH 85/88] Update AUHTORS and Changelog. --- AUTHORS | 1 + CHANGELOG | 1 + 2 files changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index 476e9c1e..261b04df 100644 --- a/AUTHORS +++ b/AUTHORS @@ -82,3 +82,4 @@ Alphabetical list of contributors: * Will Jones * William Ivanski * Yago Riveiro +* Zi-Xuan Fu diff --git a/CHANGELOG b/CHANGELOG index aeae1c6a..4e98e7f6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,7 @@ Enhancements: * Support for some JSON operators (issue682). * Improve formatting of statements containing JSON operators (issue542). * Support for BigQuery and Snowflake keywords (pr699, by griffatrasgo). +* Support parsing of OVER clause (issue701, pr768 by r33s3n6). Bug Fixes From b4a39d9850969b4e1d6940d32094ee0b42a2cf03 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 13 Apr 2024 13:59:00 +0200 Subject: [PATCH 86/88] Raise SQLParseError instead of RecursionError. --- CHANGELOG | 5 +++++ sqlparse/sql.py | 14 +++++++++----- tests/test_regressions.py | 17 ++++++++++++++++- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 4e98e7f6..6c442c05 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,11 @@ Notable Changes * Drop support for Python 3.5, 3.6, and 3.7. * Python 3.12 is now supported (pr725, by hugovk). +* IMPORTANT: Fixes a potential denial of service attack (DOS) due to recursion + error for deeply nested statements. Instead of recursion error a generic + SQLParseError is raised. See the security advisory for details: + https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-2m57-hf25-phgg + The vulnerability was discovered by @uriyay-jfrog. Thanks for reporting! Enhancements: diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 05e17748..bd5f35b1 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -10,6 +10,7 @@ import re from sqlparse import tokens as T +from sqlparse.exceptions import SQLParseError from sqlparse.utils import imt, remove_quotes @@ -209,11 +210,14 @@ def flatten(self): This method is recursively called for all child tokens. """ - for token in self.tokens: - if token.is_group: - yield from token.flatten() - else: - yield token + try: + for token in self.tokens: + if token.is_group: + yield from token.flatten() + else: + yield token + except RecursionError as err: + raise SQLParseError('Maximum recursion depth exceeded') from err def get_sublists(self): for token in self.tokens: diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 29cb502c..1edd3da6 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -1,9 +1,11 @@ import copy +import sys import pytest import sqlparse from sqlparse import sql, tokens as T +from sqlparse.exceptions import SQLParseError def test_issue9(): @@ -449,4 +451,17 @@ def test_copy_issue672(): def test_primary_key_issue740(): p = sqlparse.parse('PRIMARY KEY')[0] assert len(p.tokens) == 1 - assert p.tokens[0].ttype == T.Keyword \ No newline at end of file + assert p.tokens[0].ttype == T.Keyword + + +@pytest.fixture +def limit_recursion(): + curr_limit = sys.getrecursionlimit() + sys.setrecursionlimit(70) + yield + sys.setrecursionlimit(curr_limit) + + +def test_max_recursion(limit_recursion): + with pytest.raises(SQLParseError): + sqlparse.parse('[' * 100 + ']' * 100) From 29f2e0a6609ddc1fa248faef1bc41616043c544e Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 13 Apr 2024 14:05:19 +0200 Subject: [PATCH 87/88] Raise recursion limit for tests. --- tests/test_regressions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 1edd3da6..b3288901 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -457,11 +457,11 @@ def test_primary_key_issue740(): @pytest.fixture def limit_recursion(): curr_limit = sys.getrecursionlimit() - sys.setrecursionlimit(70) + sys.setrecursionlimit(100) yield sys.setrecursionlimit(curr_limit) def test_max_recursion(limit_recursion): with pytest.raises(SQLParseError): - sqlparse.parse('[' * 100 + ']' * 100) + sqlparse.parse('[' * 1000 + ']' * 1000) From ddbd0ec3592545c914fe71e47118c04582d8bfb0 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 13 Apr 2024 14:33:09 +0200 Subject: [PATCH 88/88] Bump version. --- CHANGELOG | 4 ++-- sqlparse/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 6c442c05..da7b6178 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,5 @@ -Development Version -------------------- +Release 0.5.0 (Apr 13, 2024) +---------------------------- Notable Changes diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index b80b2d60..17b4b525 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.5.0.dev0' +__version__ = '0.5.0' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']