From 0905e7f30d68ca35d81d45ce46b22f99eb241e41 Mon Sep 17 00:00:00 2001 From: codejedi365 Date: Thu, 9 Jan 2025 14:13:39 -0700 Subject: [PATCH 1/4] feat(parser-conventional-monorepo): add new conventional-commits standard parser for monorepos Resolves: #614 --- .../commit_parser/conventional_monorepo.py | 468 ++++++++++++++++++ 1 file changed, 468 insertions(+) create mode 100644 src/semantic_release/commit_parser/conventional_monorepo.py diff --git a/src/semantic_release/commit_parser/conventional_monorepo.py b/src/semantic_release/commit_parser/conventional_monorepo.py new file mode 100644 index 000000000..c5c440e16 --- /dev/null +++ b/src/semantic_release/commit_parser/conventional_monorepo.py @@ -0,0 +1,468 @@ +from __future__ import annotations + +import logging +import os +import re +from fnmatch import fnmatch +from functools import reduce +from itertools import zip_longest +from pathlib import Path +from re import compile as regexp +from typing import TYPE_CHECKING, Any, Iterable + +from pydantic import Field, field_validator +from pydantic.dataclasses import dataclass + +# typing_extensions is for Python 3.8, 3.9, 3.10 compatibility +from typing_extensions import Annotated + +from semantic_release.commit_parser._base import CommitParser, ParserOptions +from semantic_release.commit_parser.angular import LONG_TYPE_NAMES +from semantic_release.commit_parser.token import ( + ParsedCommit, + ParsedMessageResult, + ParseError, + ParseResult, +) +from semantic_release.commit_parser.util import ( + breaking_re, + parse_paragraphs, + sort_numerically, +) +from semantic_release.enums import LevelBump +from semantic_release.errors import InvalidParserOptions + +if TYPE_CHECKING: # pragma: no cover + from git.objects.commit import Commit + + +logger = logging.getLogger(__name__) + + +def _logged_parse_error(commit: Commit, error: str) -> ParseError: + logger.debug(error) + return ParseError(commit, error=error) + + +@dataclass +class ConventionalMonorepoParserOptions(ParserOptions): + """Options dataclass for ConventionalCommitMonorepoParser.""" + + minor_tags: tuple[str, ...] = ("feat",) + """Commit-type prefixes that should result in a minor release bump.""" + + patch_tags: tuple[str, ...] = ("fix", "perf") + """Commit-type prefixes that should result in a patch release bump.""" + + other_allowed_tags: tuple[str, ...] = ( + "build", + "chore", + "ci", + "docs", + "style", + "refactor", + "test", + ) + """Commit-type prefixes that are allowed but do not result in a version bump.""" + + default_bump_level: LevelBump = LevelBump.NO_RELEASE + """The minimum bump level to apply to valid commit message.""" + + path_filters: Annotated[tuple[Path, ...], Field(validate_default=True)] = ( + Path("."), + ) + """ + A set of relative paths to filter commits by. Only commits with file changes that + match these file paths or its subdirectories will be considered valid commits. + + Syntax is similar to .gitignore with file path globs and inverse file match globs + via `!` prefix. Paths should be relative to the current working directory. + """ + + scope_prefix: str = "" + """ + A prefix that will be striped from the scope when parsing commit messages. + + If set, it will cause unscoped commits to be ignored. Use this in tandem with + the `path_filters` option to filter commits by directory and scope. + """ + + @field_validator("path_filters", mode="before") + @classmethod + def convert_strs_to_paths(cls, value: Any) -> tuple[Path]: + values = value if isinstance(value, Iterable) else [value] + results = [] + + for val in values: + if isinstance(val, (str, Path)): + results.append(Path(val)) + continue + + raise TypeError(f"Invalid type: {type(val)}, expected str or Path.") + + return tuple(results) + + @field_validator("path_filters", mode="after") + @classmethod + def resolve_path(cls, dir_paths: tuple[Path, ...]) -> tuple[Path, ...]: + return tuple( + ( + Path(f"!{Path(str_path[1:]).expanduser().absolute().resolve()}") + # maintains the negation prefix if it exists + if (str_path := str(path)).startswith("!") + # otherwise, resolve the path normally + else path.expanduser().absolute().resolve() + ) + for path in dir_paths + ) + + @property + def tag_to_level(self) -> dict[str, LevelBump]: + """A mapping of commit tags to the level bump they should result in.""" + return self._tag_to_level + + @property + def allowed_tags(self) -> tuple[str, ...]: + """ + All commit-type prefixes that are allowed. + + These are used to identify a valid commit message. If a commit message does not start with + one of these prefixes, it will not be considered a valid commit message. + + :return: A tuple of all allowed commit-type prefixes (ordered from most to least significant) + """ + return tuple(list(self.tag_to_level.keys())[::-1]) + + def __post_init__(self) -> None: + self._tag_to_level: dict[str, LevelBump] = { + str(tag): level + for tag, level in [ + # we have to do a type ignore as zip_longest provides a type that is not specific enough + # for our expected output. Due to the empty second array, we know the first is always longest + # and that means no values in the first entry of the tuples will ever be a LevelBump. We + # apply a str() to make mypy happy although it will never happen. + *zip_longest( + self.other_allowed_tags, (), fillvalue=self.default_bump_level + ), + *zip_longest(self.patch_tags, (), fillvalue=LevelBump.PATCH), + *zip_longest(self.minor_tags, (), fillvalue=LevelBump.MINOR), + ] + if "|" not in str(tag) + } + + +class ConventionalCommitMonorepoParser( + CommitParser[ParseResult, ConventionalMonorepoParserOptions] +): + # TODO: Remove for v10 compatibility, get_default_options() will be called instead + parser_options = ConventionalMonorepoParserOptions + + def __init__( + self, options: ConventionalMonorepoParserOptions | None = None + ) -> None: + super().__init__(options) + self.file_selection_filters = [] + self.file_ignore_filters = [] + + for str_path in map(str, self.options.path_filters): + str_filter = str_path[1:] if str_path.startswith("!") else str_path + filter_list = ( + self.file_ignore_filters + if str_path.startswith("!") + else self.file_selection_filters + ) + + # Since fnmatch is not too flexible, we will expand the path filters to include the name and any subdirectories + # as this is how gitignore is interpreted. Possible scenarios: + # filter: "src" -> [ "src", "src/**"] + # filter: "src/" -> ["src/**"] + # filter: "src/*" -> "src/*" + # filter: "src/**" -> "src/**" + # This expansion will occur regardless of the negation prefix + filter_list.extend( + filter( + None, + [ + # Its more likely to be a file within a directory than a specific file, for speed do the directory first + ( + # Set the filter to the directory and all subdirectories if it is not already globbing + None + if str_path.endswith("*") + else f"{str_filter.rstrip(os.sep)}{os.sep}**" + ), + # Set the filter to the exact file unless its a directory/ + None if str_path.endswith(os.sep) else str_filter, + ], + ) + ) + + try: + commit_type_pattern = regexp( + r"(?P%s)" % str.join("|", self.options.allowed_tags) + ) + except re.error as err: + raise InvalidParserOptions( + str.join( + "\n", + [ + f"Invalid options for {self.__class__.__name__}", + "Unable to create regular expression from configured commit-types.", + "Please check the configured commit-types and remove or escape any regular expression characters.", + ], + ) + ) from err + + try: + commit_scope_pattern = regexp( + r"\(" + self.options.scope_prefix + r"(?P[^\n]+)\)", + ) + except re.error as err: + raise InvalidParserOptions( + str.join( + "\n", + [ + f"Invalid options for {self.__class__.__name__}", + "Unable to create regular expression from configured scope_prefix.", + "Please check the configured scope_prefix and remove or escape any regular expression characters.", + ], + ) + ) from err + + # This regular expression includes scope prefix into the pattern and forces a scope to be present + # PSR will match the full scope but we don't include it in the scope match, + # which implicitly strips it from being included in the returned scope. + self.strict_scope_pattern = regexp( + str.join( + "", + [ + r"^" + commit_type_pattern.pattern, + commit_scope_pattern.pattern, + r"(?P!)?:\s+", + r"(?P[^\n]+)", + r"(?:\n\n(?P.+))?", # commit body + ], + ), + flags=re.DOTALL, + ) + + self.optional_scope_pattern = regexp( + str.join( + "", + [ + r"^" + commit_type_pattern.pattern, + r"(?:\((?P[^\n]+)\))?", + r"(?P!)?:\s+", + r"(?P[^\n]+)", + r"(?:\n\n(?P.+))?", # commit body + ], + ), + flags=re.DOTALL, + ) + + # GitHub & Gitea use (#123), GitLab uses (!123), and BitBucket uses (pull request #123) + self.mr_selector = regexp( + r"[\t ]+\((?:pull request )?(?P[#!]\d+)\)[\t ]*$" + ) + self.issue_selector = regexp( + str.join( + "", + [ + r"^(?:clos(?:e|es|ed|ing)|fix(?:es|ed|ing)?|resolv(?:e|es|ed|ing)|implement(?:s|ed|ing)?):", + r"[\t ]+(?P.+)[\t ]*$", + ], + ), + flags=re.MULTILINE | re.IGNORECASE, + ) + + @staticmethod + def get_default_options() -> ConventionalMonorepoParserOptions: + return ConventionalMonorepoParserOptions() + + def parse_message( + self, message: str, strict_scope: bool = False + ) -> ParsedMessageResult | None: + if not (parsed := self.strict_scope_pattern.match(message)) and strict_scope: + return None + + if not parsed and not (parsed := self.optional_scope_pattern.match(message)): + return None + + parsed_break = parsed.group("break") + parsed_scope = parsed.group("scope") + parsed_subject = parsed.group("subject") + parsed_text = parsed.group("text") + parsed_type = parsed.group("type") + + linked_merge_request = "" + if mr_match := self.mr_selector.search(parsed_subject): + linked_merge_request = mr_match.group("mr_number") + parsed_subject = self.mr_selector.sub("", parsed_subject).strip() + + body_components: dict[str, list[str]] = reduce( + self._commit_body_components_separator, + [ + # Insert the subject before the other paragraphs + parsed_subject, + *parse_paragraphs(parsed_text or ""), + ], + { + "breaking_descriptions": [], + "descriptions": [], + "linked_issues": [], + }, + ) + + level_bump = ( + LevelBump.MAJOR + if body_components["breaking_descriptions"] or parsed_break + else self.options.tag_to_level.get( + parsed_type, self.options.default_bump_level + ) + ) + + return ParsedMessageResult( + bump=level_bump, + type=parsed_type, + category=LONG_TYPE_NAMES.get(parsed_type, parsed_type), + scope=parsed_scope, + descriptions=tuple(body_components["descriptions"]), + breaking_descriptions=tuple(body_components["breaking_descriptions"]), + linked_issues=tuple(body_components["linked_issues"]), + linked_merge_request=linked_merge_request, + ) + + def parse(self, commit: Commit) -> ParseResult: + """Attempt to parse the commit message with a regular expression into a ParseResult.""" + # Multiple scenarios to consider when parsing a commit message [Truth table]: + # ======================================================================================================= + # | || INPUTS || | + # | # ||------------------------+----------------+--------------|| Result | + # | || Example Commit Message | Relevant Files | Scope Prefix || | + # |----||------------------------+----------------+--------------||-------------------------------------| + # | 1 || type(prefix-cli): msg | yes | "prefix-" || ParsedCommit | + # | 2 || type(prefix-cli): msg | yes | "" || ParsedCommit | + # | 3 || type(prefix-cli): msg | no | "prefix-" || ParsedCommit | + # | 4 || type(prefix-cli): msg | no | "" || ParseError[No files] | + # | 5 || type(scope-cli): msg | yes | "prefix-" || ParsedCommit | + # | 6 || type(scope-cli): msg | yes | "" || ParsedCommit | + # | 7 || type(scope-cli): msg | no | "prefix-" || ParseError[No files & wrong scope] | + # | 8 || type(scope-cli): msg | no | "" || ParseError[No files] | + # | 9 || type(cli): msg | yes | "prefix-" || ParsedCommit | + # | 10 || type(cli): msg | yes | "" || ParsedCommit | + # | 11 || type(cli): msg | no | "prefix-" || ParseError[No files & wrong scope] | + # | 12 || type(cli): msg | no | "" || ParseError[No files] | + # | 13 || type: msg | yes | "prefix-" || ParsedCommit | + # | 14 || type: msg | yes | "" || ParsedCommit | + # | 15 || type: msg | no | "prefix-" || ParseError[No files & wrong scope] | + # | 16 || type: msg | no | "" || ParseError[No files] | + # | 17 || non-conventional msg | yes | "prefix-" || ParseError[Invalid Syntax] | + # | 18 || non-conventional msg | yes | "" || ParseError[Invalid Syntax] | + # | 19 || non-conventional msg | no | "prefix-" || ParseError[Invalid Syntax] | + # | 20 || non-conventional msg | no | "" || ParseError[Invalid Syntax] | + # ======================================================================================================= + + # Initial Logic Flow: + # [1] When there are no relevant files and a scope prefix is defined, we enforce a strict scope + # [2] When there are no relevant files and no scope prefix is defined, we parse scoped or unscoped commits + # [3] When there are relevant files, we parse scoped or unscoped commits regardless of any defined prefix + has_relevant_changed_files = self._has_relevant_changed_files(commit) + strict_scope = bool( + not has_relevant_changed_files and self.options.scope_prefix + ) + pmsg_result = self.parse_message( + message=str(commit.message), + strict_scope=strict_scope, + ) + + if pmsg_result and (has_relevant_changed_files or strict_scope): + logger.debug( + "commit %s introduces a %s level_bump", + commit.hexsha[:8], + pmsg_result.bump, + ) + + return ParsedCommit.from_parsed_message_result(commit, pmsg_result) + + if pmsg_result and not has_relevant_changed_files: + return _logged_parse_error( + commit, + f"Commit {commit.hexsha[:7]} has no changed files matching the path filter(s)", + ) + + if strict_scope and self.parse_message(str(commit.message), strict_scope=False): + return _logged_parse_error( + commit, + str.join( + " and ", + [ + f"Commit {commit.hexsha[:7]} has no changed files matching the path filter(s)", + f"the scope does not match scope prefix '{self.options.scope_prefix}'", + ], + ), + ) + + return _logged_parse_error( + commit, + f"Format Mismatch! Unable to parse commit message: {commit.message!r}", + ) + + def _has_relevant_changed_files(self, commit: Commit) -> bool: + # Extract git root from commit + git_root = ( + Path(commit.repo.working_tree_dir or commit.repo.working_dir) + .absolute() + .resolve() + ) + + # Check if the changed files of the commit that match the path filters + for full_path in iter( + str(git_root / rel_git_path) for rel_git_path in commit.stats.files + ): + # Check if the filepath matches any of the file selection filters + if not any( + fnmatch(full_path, select_filter) + for select_filter in self.file_selection_filters + ): + continue + + # Pass filter matches, so now evaluate if it is supposed to be ignored + if not any( + fnmatch(full_path, ignore_filter) + for ignore_filter in self.file_ignore_filters + ): + # No ignore filter matched, so it must be a relevant file + return True + + return False + + def _commit_body_components_separator( + self, accumulator: dict[str, list[str]], text: str + ) -> dict[str, list[str]]: + if match := breaking_re.match(text): + accumulator["breaking_descriptions"].append(match.group(1) or "") + return accumulator + + if match := self.issue_selector.search(text): + predicate = regexp(r",? and | *[,;/& ] *").sub( + ",", match.group("issue_predicate") or "" + ) + # Almost all issue trackers use a number to reference an issue so + # we use a simple regexp to validate the existence of a number which helps filter out + # any non-issue references that don't fit our expected format + has_number = regexp(r"\d+") + new_issue_refs: set[str] = set( + filter( + lambda issue_str, validator=has_number: validator.search(issue_str), # type: ignore[arg-type] + predicate.split(","), + ) + ) + accumulator["linked_issues"] = sort_numerically( + set(accumulator["linked_issues"]).union(new_issue_refs) + ) + return accumulator + + # Prevent appending duplicate descriptions + if text not in accumulator["descriptions"]: + accumulator["descriptions"].append(text) + + return accumulator From d37270a06fffd3fbd4243ea8b8b6fe2eaebae21f Mon Sep 17 00:00:00 2001 From: codejedi365 Date: Sat, 18 Jan 2025 16:27:55 -0700 Subject: [PATCH 2/4] feat(config): add `conventional-monorepo` as valid `commit_parser` type NOTICE: This release introduces a new built-in parser type that can be utilized for monorepo projects. The type value is `conventional-monorepo` and when specified it will apply the conventional commit parser to a monorepo environment. This parser has specialized options to help handle monorepo projects as well. For more information, please refer to the [Monorepo Docs](https://python-semantic-release.readthedocs.io/en/stable/monorepos.html). --- src/semantic_release/cli/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/semantic_release/cli/config.py b/src/semantic_release/cli/config.py index 59df69834..2623692d9 100644 --- a/src/semantic_release/cli/config.py +++ b/src/semantic_release/cli/config.py @@ -31,6 +31,7 @@ from typing_extensions import Annotated, Self from urllib3.util.url import parse_url +from semantic_release.commit_parser.conventional_monorepo import ConventionalCommitMonorepoParser import semantic_release.hvcs as hvcs from semantic_release.changelog.context import ChangelogMode from semantic_release.changelog.template import environment @@ -74,6 +75,7 @@ class HvcsClient(str, Enum): _known_commit_parsers: Dict[str, type[CommitParser]] = { "conventional": ConventionalCommitParser, "angular": AngularCommitParser, + "conventional-monorepo": ConventionalCommitMonorepoParser, "emoji": EmojiCommitParser, "scipy": ScipyCommitParser, "tag": TagCommitParser, From 6f73c08fcb4d6dcf9bb32b968996167bb00a8b85 Mon Sep 17 00:00:00 2001 From: Marc Hermann <76454594+marc-at-brightnight@users.noreply.github.com> Date: Fri, 17 Jan 2025 18:36:45 -0600 Subject: [PATCH 3/4] docs(configuration) add descriptions for new monorepo commit parser options Co-authored-by: marc --- docs/configuration/configuration.rst | 50 ++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/docs/configuration/configuration.rst b/docs/configuration/configuration.rst index 08368b337..2c26c8acf 100644 --- a/docs/configuration/configuration.rst +++ b/docs/configuration/configuration.rst @@ -813,6 +813,22 @@ For more information see :ref:`commit_parsing`. ``commit_parser_options`` """"""""""""""""""""""""" +This section defines configuration options that modify commit parser options. + +.. note:: + **pyproject.toml:** ``[tool.semantic_release.commit_parser_options]`` + + **releaserc.toml:** ``[semantic_release.commit_parser_options]`` + + **releaserc.json:** ``{ "semantic_release": { "commit_parser_options": {} } }`` + +---- + +.. _config-commit_parser_options-types: + +``allowed_types``/``minor_types``/``patch_types`` +************************************************* + **Type:** ``dict[str, Any]`` This set of options are passed directly to the commit parser class specified in @@ -826,6 +842,40 @@ For more information (to include defaults), see ---- +.. _config-commit_parser_options-path_filters: + +``path_filters`` +*************************** + +**Type:** ``list[str]`` + +A set of relative paths to filter commits by. Only commits with file changes that +match these file paths or its subdirectories will be considered valid commits. + +Syntax is similar to .gitignore with file path globs and inverse file match globs +via ``!`` prefix. Paths should be relative to the current working directory. + +**Default:** ``["."]`` + +---- + +.. _config-commit_parser_options-scope_prefix: + +``scope_prefix`` +*************************** + +**Type:** ``str`` + +A prefix that will be striped from the scope when parsing commit messages. + +If set, it will cause unscoped commits to be ignored. Use this in tandem with +the :ref:`config-commit_parser_options-path_filters` option to filter commits by +directory and scope. + +**Default:** ``""`` + +---- + .. _config-logging_use_named_masks: ``logging_use_named_masks`` From d3856b567f8f85536ac839e60926689d41ed9303 Mon Sep 17 00:00:00 2001 From: codejedi365 Date: Tue, 18 Mar 2025 22:02:48 -0600 Subject: [PATCH 4/4] chore: working directory --- src/semantic_release/cli/config.py | 2 +- .../commit_parser/conventional/__init__.py | 11 + .../commit_parser/conventional/options.py | 71 ++++ .../conventional/options_monorepo.py | 68 ++++ .../parser.py} | 107 ++---- .../conventional/parser_monorepo.py | 315 ++++++++++++++++++ 6 files changed, 491 insertions(+), 83 deletions(-) create mode 100644 src/semantic_release/commit_parser/conventional/__init__.py create mode 100644 src/semantic_release/commit_parser/conventional/options.py create mode 100644 src/semantic_release/commit_parser/conventional/options_monorepo.py rename src/semantic_release/commit_parser/{conventional.py => conventional/parser.py} (83%) create mode 100644 src/semantic_release/commit_parser/conventional/parser_monorepo.py diff --git a/src/semantic_release/cli/config.py b/src/semantic_release/cli/config.py index 2623692d9..72535c4c9 100644 --- a/src/semantic_release/cli/config.py +++ b/src/semantic_release/cli/config.py @@ -73,8 +73,8 @@ class HvcsClient(str, Enum): _known_commit_parsers: Dict[str, type[CommitParser]] = { - "conventional": ConventionalCommitParser, "angular": AngularCommitParser, + "conventional": ConventionalCommitParser, "conventional-monorepo": ConventionalCommitMonorepoParser, "emoji": EmojiCommitParser, "scipy": ScipyCommitParser, diff --git a/src/semantic_release/commit_parser/conventional/__init__.py b/src/semantic_release/commit_parser/conventional/__init__.py new file mode 100644 index 000000000..69943a7c1 --- /dev/null +++ b/src/semantic_release/commit_parser/conventional/__init__.py @@ -0,0 +1,11 @@ +from .options import ConventionalCommitParserOptions +from .options_monorepo import ConventionalMonorepoParserOptions +from .parser import ConventionalCommitParser +from .parser_monorepo import ConventionalCommitMonorepoParser + +__all__ = [ + "ConventionalCommitParser", + "ConventionalCommitParserOptions", + "ConventionalCommitMonorepoParser", + "ConventionalMonorepoParserOptions", +] diff --git a/src/semantic_release/commit_parser/conventional/options.py b/src/semantic_release/commit_parser/conventional/options.py new file mode 100644 index 000000000..beda9ed48 --- /dev/null +++ b/src/semantic_release/commit_parser/conventional/options.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from itertools import zip_longest + +from pydantic.dataclasses import dataclass + +from semantic_release.commit_parser._base import ParserOptions +from semantic_release.enums import LevelBump + + +@dataclass +class ConventionalCommitParserOptions(ParserOptions): + """Options dataclass for the ConventionalCommitParser.""" + + minor_tags: tuple[str, ...] = ("feat",) + """Commit-type prefixes that should result in a minor release bump.""" + + patch_tags: tuple[str, ...] = ("fix", "perf") + """Commit-type prefixes that should result in a patch release bump.""" + + other_allowed_tags: tuple[str, ...] = ( + "build", + "chore", + "ci", + "docs", + "style", + "refactor", + "test", + ) + """Commit-type prefixes that are allowed but do not result in a version bump.""" + + allowed_tags: tuple[str, ...] = ( + *minor_tags, + *patch_tags, + *other_allowed_tags, + ) + """ + All commit-type prefixes that are allowed. + + These are used to identify a valid commit message. If a commit message does not start with + one of these prefixes, it will not be considered a valid commit message. + """ + + default_bump_level: LevelBump = LevelBump.NO_RELEASE + """The minimum bump level to apply to valid commit message.""" + + parse_squash_commits: bool = True + """Toggle flag for whether or not to parse squash commits""" + + ignore_merge_commits: bool = True + """Toggle flag for whether or not to ignore merge commits""" + + @property + def tag_to_level(self) -> dict[str, LevelBump]: + """A mapping of commit tags to the level bump they should result in.""" + return self._tag_to_level + + def __post_init__(self) -> None: + self._tag_to_level: dict[str, LevelBump] = { + str(tag): level + for tag, level in [ + # we have to do a type ignore as zip_longest provides a type that is not specific enough + # for our expected output. Due to the empty second array, we know the first is always longest + # and that means no values in the first entry of the tuples will ever be a LevelBump. We + # apply a str() to make mypy happy although it will never happen. + *zip_longest(self.allowed_tags, (), fillvalue=self.default_bump_level), + *zip_longest(self.patch_tags, (), fillvalue=LevelBump.PATCH), + *zip_longest(self.minor_tags, (), fillvalue=LevelBump.MINOR), + ] + if "|" not in str(tag) + } diff --git a/src/semantic_release/commit_parser/conventional/options_monorepo.py b/src/semantic_release/commit_parser/conventional/options_monorepo.py new file mode 100644 index 000000000..5959c7958 --- /dev/null +++ b/src/semantic_release/commit_parser/conventional/options_monorepo.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, Any, Iterable + +from pydantic import Field, field_validator +from pydantic.dataclasses import dataclass + +# typing_extensions is for Python 3.8, 3.9, 3.10 compatibility +from typing_extensions import Annotated + +from .options import ConventionalCommitParserOptions + +if TYPE_CHECKING: # pragma: no cover + pass + + +@dataclass +class ConventionalMonorepoParserOptions(ConventionalCommitParserOptions): + """Options dataclass for ConventionalCommitMonorepoParser.""" + + path_filters: Annotated[tuple[Path, ...], Field(validate_default=True)] = ( + Path("."), + ) + """ + A set of relative paths to filter commits by. Only commits with file changes that + match these file paths or its subdirectories will be considered valid commits. + + Syntax is similar to .gitignore with file path globs and inverse file match globs + via `!` prefix. Paths should be relative to the current working directory. + """ + + scope_prefix: str = "" + """ + A prefix that will be striped from the scope when parsing commit messages. + + If set, it will cause unscoped commits to be ignored. Use this in tandem with + the `path_filters` option to filter commits by directory and scope. + """ + + @field_validator("path_filters", mode="before") + @classmethod + def convert_strs_to_paths(cls, value: Any) -> tuple[Path]: + values = value if isinstance(value, Iterable) else [value] + results = [] + + for val in values: + if isinstance(val, (str, Path)): + results.append(Path(val)) + continue + + raise TypeError(f"Invalid type: {type(val)}, expected str or Path.") + + return tuple(results) + + @field_validator("path_filters", mode="after") + @classmethod + def resolve_path(cls, dir_paths: tuple[Path, ...]) -> tuple[Path, ...]: + return tuple( + ( + Path(f"!{Path(str_path[1:]).expanduser().absolute().resolve()}") + # maintains the negation prefix if it exists + if (str_path := str(path)).startswith("!") + # otherwise, resolve the path normally + else path.expanduser().absolute().resolve() + ) + for path in dir_paths + ) diff --git a/src/semantic_release/commit_parser/conventional.py b/src/semantic_release/commit_parser/conventional/parser.py similarity index 83% rename from src/semantic_release/commit_parser/conventional.py rename to src/semantic_release/commit_parser/conventional/parser.py index 3cd50d9c7..54625a85e 100644 --- a/src/semantic_release/commit_parser/conventional.py +++ b/src/semantic_release/commit_parser/conventional/parser.py @@ -1,16 +1,22 @@ from __future__ import annotations -import re from functools import reduce -from itertools import zip_longest -from re import compile as regexp +from re import ( + DOTALL, + IGNORECASE, + MULTILINE, + compile as regexp, + error as RegexError, # noqa: N812 +) from textwrap import dedent -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING from git.objects.commit import Commit -from pydantic.dataclasses import dataclass -from semantic_release.commit_parser._base import CommitParser, ParserOptions +from semantic_release.commit_parser._base import CommitParser +from semantic_release.commit_parser.conventional.options import ( + ConventionalCommitParserOptions, +) from semantic_release.commit_parser.token import ( ParsedCommit, ParsedMessageResult, @@ -28,13 +34,8 @@ from semantic_release.globals import logger from semantic_release.helpers import sort_numerically, text_reducer -if TYPE_CHECKING: # pragma: no cover - from git.objects.commit import Commit - - -def _logged_parse_error(commit: Commit, error: str) -> ParseError: - logger.debug(error) - return ParseError(commit, error=error) +if TYPE_CHECKING: + pass # TODO: Remove from here, allow for user customization instead via options @@ -53,67 +54,9 @@ def _logged_parse_error(commit: Commit, error: str) -> ParseError: } -@dataclass -class ConventionalCommitParserOptions(ParserOptions): - """Options dataclass for the ConventionalCommitParser.""" - - minor_tags: Tuple[str, ...] = ("feat",) - """Commit-type prefixes that should result in a minor release bump.""" - - patch_tags: Tuple[str, ...] = ("fix", "perf") - """Commit-type prefixes that should result in a patch release bump.""" - - other_allowed_tags: Tuple[str, ...] = ( - "build", - "chore", - "ci", - "docs", - "style", - "refactor", - "test", - ) - """Commit-type prefixes that are allowed but do not result in a version bump.""" - - allowed_tags: Tuple[str, ...] = ( - *minor_tags, - *patch_tags, - *other_allowed_tags, - ) - """ - All commit-type prefixes that are allowed. - - These are used to identify a valid commit message. If a commit message does not start with - one of these prefixes, it will not be considered a valid commit message. - """ - - default_bump_level: LevelBump = LevelBump.NO_RELEASE - """The minimum bump level to apply to valid commit message.""" - - parse_squash_commits: bool = True - """Toggle flag for whether or not to parse squash commits""" - - ignore_merge_commits: bool = True - """Toggle flag for whether or not to ignore merge commits""" - - @property - def tag_to_level(self) -> dict[str, LevelBump]: - """A mapping of commit tags to the level bump they should result in.""" - return self._tag_to_level - - def __post_init__(self) -> None: - self._tag_to_level: dict[str, LevelBump] = { - str(tag): level - for tag, level in [ - # we have to do a type ignore as zip_longest provides a type that is not specific enough - # for our expected output. Due to the empty second array, we know the first is always longest - # and that means no values in the first entry of the tuples will ever be a LevelBump. We - # apply a str() to make mypy happy although it will never happen. - *zip_longest(self.allowed_tags, (), fillvalue=self.default_bump_level), - *zip_longest(self.patch_tags, (), fillvalue=LevelBump.PATCH), - *zip_longest(self.minor_tags, (), fillvalue=LevelBump.MINOR), - ] - if "|" not in str(tag) - } +def _logged_parse_error(commit: Commit, error: str) -> ParseError: + logger.debug(error) + return ParseError(commit, error=error) class ConventionalCommitParser( @@ -135,7 +78,7 @@ def __init__(self, options: ConventionalCommitParserOptions | None = None) -> No commit_type_pattern = regexp( r"(?P%s)" % str.join("|", self.options.allowed_tags) ) - except re.error as err: + except RegexError as err: raise InvalidParserOptions( str.join( "\n", @@ -167,7 +110,7 @@ def __init__(self, options: ConventionalCommitParserOptions | None = None) -> No r"(?:\n\n(?P.+))?", # commit body ], ), - flags=re.DOTALL, + flags=DOTALL, ) # GitHub & Gitea use (#123), GitLab uses (!123), and BitBucket uses (pull request #123) @@ -182,27 +125,27 @@ def __init__(self, options: ConventionalCommitParserOptions | None = None) -> No r"[\t ]+(?P.+)[\t ]*$", ], ), - flags=re.MULTILINE | re.IGNORECASE, + flags=MULTILINE | IGNORECASE, ) self.notice_selector = regexp(r"^NOTICE: (?P.+)$") self.filters = { "typo-extra-spaces": (regexp(r"(\S) +(\S)"), r"\1 \2"), "git-header-commit": ( - regexp(r"^[\t ]*commit [0-9a-f]+$\n?", flags=re.MULTILINE), + regexp(r"^[\t ]*commit [0-9a-f]+$\n?", flags=MULTILINE), "", ), "git-header-author": ( - regexp(r"^[\t ]*Author: .+$\n?", flags=re.MULTILINE), + regexp(r"^[\t ]*Author: .+$\n?", flags=MULTILINE), "", ), "git-header-date": ( - regexp(r"^[\t ]*Date: .+$\n?", flags=re.MULTILINE), + regexp(r"^[\t ]*Date: .+$\n?", flags=MULTILINE), "", ), "git-squash-heading": ( regexp( r"^[\t ]*Squashed commit of the following:.*$\n?", - flags=re.MULTILINE, + flags=MULTILINE, ), "", ), @@ -215,7 +158,7 @@ def __init__(self, options: ConventionalCommitParserOptions | None = None) -> No commit_type_pattern.pattern + r"\b", # prior to commit type ], ), - flags=re.MULTILINE, + flags=MULTILINE, ), # move commit type to the start of the line r"\1", diff --git a/src/semantic_release/commit_parser/conventional/parser_monorepo.py b/src/semantic_release/commit_parser/conventional/parser_monorepo.py new file mode 100644 index 000000000..100a3c05e --- /dev/null +++ b/src/semantic_release/commit_parser/conventional/parser_monorepo.py @@ -0,0 +1,315 @@ +from __future__ import annotations + +import logging +import os +import re +from fnmatch import fnmatch +from functools import reduce +from itertools import zip_longest +from pathlib import Path +from re import compile as regexp +from typing import TYPE_CHECKING, Any, Iterable + +# typing_extensions is for Python 3.8, 3.9, 3.10 compatibility +from typing_extensions import Annotated + +from semantic_release.commit_parser._base import CommitParser, ParserOptions +from semantic_release.commit_parser.angular import LONG_TYPE_NAMES +from semantic_release.commit_parser.conventional.options_monorepo import ConventionalMonorepoParserOptions +from semantic_release.commit_parser.conventional.parser import ConventionalCommitParser +from semantic_release.commit_parser.token import ( + ParsedCommit, + ParsedMessageResult, + ParseError, + ParseResult, +) +from semantic_release.commit_parser.util import ( + breaking_re, + force_str, + parse_paragraphs, + sort_numerically, +) +from semantic_release.enums import LevelBump +from semantic_release.errors import InvalidParserOptions + +if TYPE_CHECKING: # pragma: no cover + from git.objects.commit import Commit + + +logger = logging.getLogger(__name__) + + +def _logged_parse_error(commit: Commit, error: str) -> ParseError: + logger.debug(error) + return ParseError(commit, error=error) + + +class ConventionalCommitMonorepoParser(ConventionalCommitParser): + + # TODO: Remove for v10 compatibility, get_default_options() will be called instead + parser_options = ConventionalMonorepoParserOptions + + def __init__( + self, options: ConventionalMonorepoParserOptions | None = None + ) -> None: + super().__init__(options) + self.file_selection_filters = [] + self.file_ignore_filters = [] + + for str_path in map(str, self.options.path_filters): + str_filter = str_path[1:] if str_path.startswith("!") else str_path + filter_list = ( + self.file_ignore_filters + if str_path.startswith("!") + else self.file_selection_filters + ) + + # Since fnmatch is not too flexible, we will expand the path filters to include the name and any subdirectories + # as this is how gitignore is interpreted. Possible scenarios: + # filter: "src" -> [ "src", "src/**"] + # filter: "src/" -> ["src/**"] + # filter: "src/*" -> "src/*" + # filter: "src/**" -> "src/**" + # This expansion will occur regardless of the negation prefix + filter_list.extend( + filter( + None, + [ + # Its more likely to be a file within a directory than a specific file, for speed do the directory first + ( + # Set the filter to the directory and all subdirectories if it is not already globbing + None + if str_path.endswith("*") + else f"{str_filter.rstrip(os.sep)}{os.sep}**" + ), + # Set the filter to the exact file unless its a directory/ + None if str_path.endswith(os.sep) else str_filter, + ], + ) + ) + + try: + commit_scope_pattern = regexp( + r"\(" + self.options.scope_prefix + r"(?P[^\n]+)\)", + ) + except re.error as err: + raise InvalidParserOptions( + str.join( + "\n", + [ + f"Invalid options for {self.__class__.__name__}", + "Unable to create regular expression from configured scope_prefix.", + "Please check the configured scope_prefix and remove or escape any regular expression characters.", + ], + ) + ) from err + + try: + commit_type_pattern = regexp( + r"(?P%s)" % str.join("|", self.options.allowed_tags) + ) + except re.error as err: + raise InvalidParserOptions( + str.join( + "\n", + [ + f"Invalid options for {self.__class__.__name__}", + "Unable to create regular expression from configured commit-types.", + "Please check the configured commit-types and remove or escape any regular expression characters.", + ], + ) + ) from err + + # This regular expression includes scope prefix into the pattern and forces a scope to be present + # PSR will match the full scope but we don't include it in the scope match, + # which implicitly strips it from being included in the returned scope. + self.strict_scope_pattern = regexp( + str.join( + "", + [ + r"^" + commit_type_pattern.pattern, + commit_scope_pattern.pattern, + r"(?P!)?:\s+", + r"(?P[^\n]+)", + r"(?:\n\n(?P.+))?", # commit body + ], + ), + flags=re.DOTALL, + ) + + self.optional_scope_pattern = regexp( + str.join( + "", + [ + r"^" + commit_type_pattern.pattern, + r"(?:\((?P[^\n]+)\))?", + r"(?P!)?:\s+", + r"(?P[^\n]+)", + r"(?:\n\n(?P.+))?", # commit body + ], + ), + flags=re.DOTALL, + ) + + @staticmethod + def get_default_options() -> ConventionalMonorepoParserOptions: + return ConventionalMonorepoParserOptions() + + def parse_message( + self, message: str, strict_scope: bool = False + ) -> ParsedMessageResult | None: + if not (parsed := self.strict_scope_pattern.match(message)) and strict_scope: + return None + + if not parsed and not (parsed := self.optional_scope_pattern.match(message)): + return None + + parsed_break = parsed.group("break") + parsed_scope = parsed.group("scope") or "" + parsed_subject = parsed.group("subject") + parsed_text = parsed.group("text") + parsed_type = parsed.group("type") + + linked_merge_request = "" + if mr_match := self.mr_selector.search(parsed_subject): + linked_merge_request = mr_match.group("mr_number") + parsed_subject = self.mr_selector.sub("", parsed_subject).strip() + + body_components: dict[str, list[str]] = reduce( + self.commit_body_components_separator, + [ + # Insert the subject before the other paragraphs + parsed_subject, + *parse_paragraphs(parsed_text or ""), + ], + { + "breaking_descriptions": [], + "descriptions": [], + "notices": [], + "linked_issues": [], + }, + ) + + level_bump = ( + LevelBump.MAJOR + if body_components["breaking_descriptions"] or parsed_break + else self.options.tag_to_level.get( + parsed_type, self.options.default_bump_level + ) + ) + + return ParsedMessageResult( + bump=level_bump, + type=parsed_type, + category=LONG_TYPE_NAMES.get(parsed_type, parsed_type), + scope=parsed_scope, + descriptions=tuple(body_components["descriptions"]), + breaking_descriptions=tuple(body_components["breaking_descriptions"]), + release_notices=tuple(body_components["notices"]), + linked_issues=tuple(body_components["linked_issues"]), + linked_merge_request=linked_merge_request, + ) + + def parse_commit(self, commit: Commit) -> ParseResult: + """Attempt to parse the commit message with a regular expression into a ParseResult.""" + # Multiple scenarios to consider when parsing a commit message [Truth table]: + # ======================================================================================================= + # | || INPUTS || | + # | # ||------------------------+----------------+--------------|| Result | + # | || Example Commit Message | Relevant Files | Scope Prefix || | + # |----||------------------------+----------------+--------------||-------------------------------------| + # | 1 || type(prefix-cli): msg | yes | "prefix-" || ParsedCommit | + # | 2 || type(prefix-cli): msg | yes | "" || ParsedCommit | + # | 3 || type(prefix-cli): msg | no | "prefix-" || ParsedCommit | + # | 4 || type(prefix-cli): msg | no | "" || ParseError[No files] | + # | 5 || type(scope-cli): msg | yes | "prefix-" || ParsedCommit | + # | 6 || type(scope-cli): msg | yes | "" || ParsedCommit | + # | 7 || type(scope-cli): msg | no | "prefix-" || ParseError[No files & wrong scope] | + # | 8 || type(scope-cli): msg | no | "" || ParseError[No files] | + # | 9 || type(cli): msg | yes | "prefix-" || ParsedCommit | + # | 10 || type(cli): msg | yes | "" || ParsedCommit | + # | 11 || type(cli): msg | no | "prefix-" || ParseError[No files & wrong scope] | + # | 12 || type(cli): msg | no | "" || ParseError[No files] | + # | 13 || type: msg | yes | "prefix-" || ParsedCommit | + # | 14 || type: msg | yes | "" || ParsedCommit | + # | 15 || type: msg | no | "prefix-" || ParseError[No files & wrong scope] | + # | 16 || type: msg | no | "" || ParseError[No files] | + # | 17 || non-conventional msg | yes | "prefix-" || ParseError[Invalid Syntax] | + # | 18 || non-conventional msg | yes | "" || ParseError[Invalid Syntax] | + # | 19 || non-conventional msg | no | "prefix-" || ParseError[Invalid Syntax] | + # | 20 || non-conventional msg | no | "" || ParseError[Invalid Syntax] | + # ======================================================================================================= + + # Initial Logic Flow: + # [1] When there are no relevant files and a scope prefix is defined, we enforce a strict scope + # [2] When there are no relevant files and no scope prefix is defined, we parse scoped or unscoped commits + # [3] When there are relevant files, we parse scoped or unscoped commits regardless of any defined prefix + has_relevant_changed_files = self._has_relevant_changed_files(commit) + strict_scope = bool( + not has_relevant_changed_files and self.options.scope_prefix + ) + pmsg_result = self.parse_message( + message=force_str(commit.message), + strict_scope=strict_scope, + ) + + if pmsg_result and (has_relevant_changed_files or strict_scope): + logger.debug( + "commit %s introduces a %s level_bump", + commit.hexsha[:8], + pmsg_result.bump, + ) + + return ParsedCommit.from_parsed_message_result(commit, pmsg_result) + + if pmsg_result and not has_relevant_changed_files: + return _logged_parse_error( + commit, + f"Commit {commit.hexsha[:7]} has no changed files matching the path filter(s)", + ) + + if strict_scope and self.parse_message(str(commit.message), strict_scope=False): + return _logged_parse_error( + commit, + str.join( + " and ", + [ + f"Commit {commit.hexsha[:7]} has no changed files matching the path filter(s)", + f"the scope does not match scope prefix '{self.options.scope_prefix}'", + ], + ), + ) + + return _logged_parse_error( + commit, + f"Format Mismatch! Unable to parse commit message: {commit.message!r}", + ) + + def _has_relevant_changed_files(self, commit: Commit) -> bool: + # Extract git root from commit + git_root = ( + Path(commit.repo.working_tree_dir or commit.repo.working_dir) + .absolute() + .resolve() + ) + + # Check if the changed files of the commit that match the path filters + for full_path in iter( + str(git_root / rel_git_path) for rel_git_path in commit.stats.files + ): + # Check if the filepath matches any of the file selection filters + if not any( + fnmatch(full_path, select_filter) + for select_filter in self.file_selection_filters + ): + continue + + # Pass filter matches, so now evaluate if it is supposed to be ignored + if not any( + fnmatch(full_path, ignore_filter) + for ignore_filter in self.file_ignore_filters + ): + # No ignore filter matched, so it must be a relevant file + return True + + return False