-
-
Notifications
You must be signed in to change notification settings - Fork 32.4k
Convert change detection to a Python script #129627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
819da3b
4c0a7a4
4169b46
49f02be
d65e5be
b40113e
1ee4e0e
e183b15
e03d295
05253e3
f45c009
e265fa5
06ef117
f52c88f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,122 +52,59 @@ jobs: | |
timeout-minutes: 10 | ||
outputs: | ||
config-hash: ${{ steps.config-hash.outputs.hash }} | ||
run-cifuzz: ${{ steps.check.outputs.run-cifuzz }} | ||
run-docs: ${{ steps.docs-changes.outputs.run-docs }} | ||
run-hypothesis: ${{ steps.check.outputs.run-hypothesis }} | ||
run-tests: ${{ steps.check.outputs.run-tests }} | ||
run-win-msi: ${{ steps.win-msi-changes.outputs.run-win-msi }} | ||
run-cifuzz: ${{ steps.changes.outputs.run-cifuzz }} | ||
run-docs: ${{ steps.changes.outputs.run-docs }} | ||
run-hypothesis: ${{ steps.changes.outputs.run-hypothesis }} | ||
run-tests: ${{ steps.changes.outputs.run-tests }} | ||
run-win-msi: ${{ steps.changes.outputs.run-win-msi }} | ||
steps: | ||
- uses: actions/setup-python@v5 | ||
with: | ||
python-version: "3" | ||
|
||
- run: >- | ||
echo '${{ github.event_name }}' | ||
|
||
- uses: actions/checkout@v4 | ||
with: | ||
persist-credentials: false | ||
- name: Check for source changes | ||
id: check | ||
ref: >- | ||
${{ | ||
github.event_name == 'pull_request' | ||
&& github.event.pull_request.head.sha | ||
|| '' | ||
}} | ||
|
||
# Adapted from https://github.com/actions/checkout/issues/520#issuecomment-1167205721 | ||
- name: Fetch commits to get branch diff | ||
if: github.event_name == 'pull_request' | ||
run: | | ||
if [ -z "$GITHUB_BASE_REF" ]; then | ||
echo "run-tests=true" >> "$GITHUB_OUTPUT" | ||
else | ||
git fetch origin "$GITHUB_BASE_REF" --depth=1 | ||
# git diff "origin/$GITHUB_BASE_REF..." (3 dots) may be more | ||
# reliable than git diff "origin/$GITHUB_BASE_REF.." (2 dots), | ||
# but it requires to download more commits (this job uses | ||
# "git fetch --depth=1"). | ||
# | ||
# git diff "origin/$GITHUB_BASE_REF..." (3 dots) works with Git | ||
# 2.26, but Git 2.28 is stricter and fails with "no merge base". | ||
# | ||
# git diff "origin/$GITHUB_BASE_REF.." (2 dots) should be enough on | ||
# GitHub, since GitHub starts by merging origin/$GITHUB_BASE_REF | ||
# into the PR branch anyway. | ||
# | ||
# https://github.com/python/core-workflow/issues/373 | ||
grep_ignore_args=( | ||
# file extensions | ||
-e '\.md$' | ||
-e '\.rst$' | ||
# top-level folders | ||
-e '^Doc/' | ||
-e '^Misc/' | ||
# configuration files | ||
-e '^\.github/CODEOWNERS$' | ||
-e '^\.pre-commit-config\.yaml$' | ||
-e '\.ruff\.toml$' | ||
-e 'mypy\.ini$' | ||
) | ||
git diff --name-only "origin/$GITHUB_BASE_REF.." \ | ||
| grep -qvE "${grep_ignore_args[@]}" \ | ||
&& echo "run-tests=true" >> "$GITHUB_OUTPUT" || true | ||
fi | ||
set -eux | ||
|
||
# Fetch enough history to find a common ancestor commit (aka merge-base): | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
git fetch origin "${refspec_pr}" --depth=$(( commits + 1 )) \ | ||
--no-tags --prune --no-recurse-submodules | ||
|
||
# This should get the oldest commit in the local fetched history (which may not be the commit the PR branched from): | ||
COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 "${branch_pr}" ) | ||
DATE=$( git log --date=iso8601 --format=%cd "${COMMON_ANCESTOR}" ) | ||
|
||
# Check if we should run hypothesis tests | ||
GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}} | ||
echo "$GIT_BRANCH" | ||
if $(echo "$GIT_BRANCH" | grep -q -w '3\.\(8\|9\|10\|11\)'); then | ||
echo "Branch too old for hypothesis tests" | ||
echo "run-hypothesis=false" >> "$GITHUB_OUTPUT" | ||
else | ||
echo "Run hypothesis tests" | ||
echo "run-hypothesis=true" >> "$GITHUB_OUTPUT" | ||
fi | ||
# Get all commits since that commit date from the base branch (eg: master or main): | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
git fetch origin "${refspec_base}" --shallow-since="${DATE}" \ | ||
--no-tags --prune --no-recurse-submodules | ||
env: | ||
branch_pr: 'origin/${{ github.event.pull_request.head.ref }}' | ||
commits: ${{ github.event.pull_request.commits }} | ||
refspec_base: '+${{ github.event.pull_request.base.sha }}:remotes/origin/${{ github.event.pull_request.base.ref }}' | ||
refspec_pr: '+${{ github.event.pull_request.head.sha }}:remotes/origin/${{ github.event.pull_request.head.ref }}' | ||
|
||
# We only want to run tests on PRs when related files are changed, | ||
# or when someone triggers a manual workflow run. | ||
- name: Compute changed files | ||
id: changes | ||
run: python Tools/build/compute-changes.py | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# oss-fuzz maintains a configuration for fuzzing the main branch of | ||
# CPython, so CIFuzz should be run only for code that is likely to be | ||
# merged into the main branch; compatibility with older branches may | ||
# be broken. | ||
FUZZ_RELEVANT_FILES='(\.c$|\.h$|\.cpp$|^configure$|^\.github/workflows/build\.yml$|^Modules/_xxtestfuzz)' | ||
if [ "$GITHUB_BASE_REF" = "main" ] && [ "$(git diff --name-only "origin/$GITHUB_BASE_REF.." | grep -qE $FUZZ_RELEVANT_FILES; echo $?)" -eq 0 ]; then | ||
# The tests are pretty slow so they are executed only for PRs | ||
# changing relevant files. | ||
echo "Run CIFuzz tests" | ||
echo "run-cifuzz=true" >> "$GITHUB_OUTPUT" | ||
else | ||
echo "Branch too old for CIFuzz tests; or no C files were changed" | ||
echo "run-cifuzz=false" >> "$GITHUB_OUTPUT" | ||
fi | ||
- name: Compute hash for config cache key | ||
id: config-hash | ||
run: | | ||
echo "hash=${{ hashFiles('configure', 'configure.ac', '.github/workflows/build.yml') }}" >> "$GITHUB_OUTPUT" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if this would belong in the same script… In the past I'd just use sha512 to compute hashes in python: https://github.com/ansible/awx-plugins/blob/0d569b5/.github/workflows/ci-cd.yml#L222C16-L222C52. If not, I'd question if this reusable workflow should even be called “change detection”. I think I tend to call the computation job “pre-setup” or something, since the changes isn't the only thing being detected… There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
"reusable-choose-workflows"?
We could move it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
"reusable-build-settings"? "reusable-settings"? "reusable-workflow-run-context"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Totally, although, it doesn't really matter if it's the same given the context where it's being used. It just has to be unique and predictable. Hashing is only used because you can't put entire file contents into cache keys 🤷♂️ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we change the key, all current caches will be invalidated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but they won't be picked up after this PR anyway because they depend on the contents of |
||
- name: Get a list of the changed documentation-related files | ||
if: github.event_name == 'pull_request' | ||
id: changed-docs-files | ||
uses: Ana06/get-changed-files@v2.3.0 | ||
with: | ||
filter: | | ||
Doc/** | ||
Misc/** | ||
.github/workflows/reusable-docs.yml | ||
format: csv # works for paths with spaces | ||
- name: Check for docs changes | ||
# We only want to run this on PRs when related files are changed, | ||
# or when user triggers manual workflow run. | ||
if: >- | ||
( | ||
github.event_name == 'pull_request' | ||
&& steps.changed-docs-files.outputs.added_modified_renamed != '' | ||
) || github.event_name == 'workflow_dispatch' | ||
id: docs-changes | ||
run: | | ||
echo "run-docs=true" >> "${GITHUB_OUTPUT}" | ||
- name: Get a list of the MSI installer-related files | ||
if: github.event_name == 'pull_request' | ||
id: changed-win-msi-files | ||
uses: Ana06/get-changed-files@v2.3.0 | ||
with: | ||
filter: | | ||
Tools/msi/** | ||
.github/workflows/reusable-windows-msi.yml | ||
format: csv # works for paths with spaces | ||
- name: Check for changes in MSI installer-related files | ||
# We only want to run this on PRs when related files are changed, | ||
# or when user triggers manual workflow run. | ||
if: >- | ||
( | ||
github.event_name == 'pull_request' | ||
&& steps.changed-win-msi-files.outputs.added_modified_renamed != '' | ||
) || github.event_name == 'workflow_dispatch' | ||
id: win-msi-changes | ||
run: | | ||
echo "run-win-msi=true" >> "${GITHUB_OUTPUT}" |
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
"""Determine which GitHub Actions workflows to run. | ||
|
||
Called by ``.github/workflows/reusable-change-detection.yml``. | ||
We only want to run tests on PRs when related files are changed, | ||
or when someone triggers a manual workflow run. | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
This improves developer experience by not doing (slow) | ||
unnecessary work in GHA, and saves CI resources. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
import os | ||
import subprocess | ||
from dataclasses import dataclass | ||
from pathlib import Path | ||
|
||
TYPE_CHECKING = False | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if TYPE_CHECKING: | ||
from collections.abc import Set | ||
|
||
GITHUB_CODEOWNERS_PATH = Path(".github/CODEOWNERS") | ||
GITHUB_WORKFLOWS_PATH = Path(".github/workflows") | ||
CONFIGURATION_FILE_NAMES = frozenset({ | ||
".pre-commit-config.yaml", | ||
".ruff.toml", | ||
"mypy.ini", | ||
}) | ||
SUFFIXES_DOCUMENTATION = frozenset({".rst", ".md"}) | ||
SUFFIXES_C_OR_CPP = frozenset({".c", ".h", ".cpp"}) | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
@dataclass(kw_only=True, slots=True) | ||
class Outputs: | ||
run_ci_fuzz: bool = False | ||
run_docs: bool = False | ||
run_hypothesis: bool = False | ||
run_tests: bool = False | ||
run_windows_msi: bool = False | ||
< F438 /td> | ||
|
||
def compute_changes(): | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
target_branch, head_branch = git_branches() | ||
if target_branch and head_branch: | ||
# Getting changed files only makes sense on a pull request | ||
files = get_changed_files( | ||
f"origin/{target_branch}", f"origin/{head_branch}" | ||
) | ||
outputs = process_changed_files(files) | ||
else: | ||
# Otherwise, just run the tests | ||
outputs = Outputs(run_tests=True) | ||
outputs = process_target_branch(outputs, target_branch) | ||
|
||
if outputs.run_tests: | ||
print("Run tests") | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if outputs.run_hypothesis: | ||
print("Run hypothesis tests") | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if outputs.run_ci_fuzz: | ||
print("Run CIFuzz tests") | ||
else: | ||
print("Branch too old for CIFuzz tests; or no C files were changed") | ||
|
||
if outputs.run_docs: | ||
print("Build documentation") | ||
|
||
if outputs.run_windows_msi: | ||
print("Build Windows MSI") | ||
|
||
print(outputs) | ||
|
||
write_github_output(outputs) | ||
|
||
|
||
def git_branches() -> tuple[str, str]: | ||
target_branch = os.environ.get("GITHUB_BASE_REF", "") | ||
target_branch = target_branch.removeprefix("refs/heads/") | ||
print(f"target branch: {target_branch!r}") | ||
|
||
head_branch = os.environ.get("GITHUB_HEAD_REF", "") | ||
head_branch = head_branch.removeprefix("refs/heads/") | ||
print(f"head branch: {head_branch!r}") | ||
return target_branch, head_branch | ||
|
||
|
||
def get_changed_files(ref_a: str = "main", ref_b: str = "HEAD") -> Set[Path]: | ||
"""List the files changed between two Git refs, filtered by change type.""" | ||
args = ("git", "diff", "--name-only", f"{ref_a}...{ref_b}", "--") | ||
print(*args) | ||
changed_files_result = subprocess.run( | ||
args, stdout=subprocess.PIPE, check=True, encoding="utf-8" | ||
) | ||
changed_files = changed_files_result.stdout.strip().splitlines() | ||
return frozenset(map(Path, filter(None, map(str.strip, changed_files)))) | ||
|
||
|
||
def process_changed_files(changed_files: Set[Path]) -> Outputs: | ||
run_tests = False | ||
run_ci_fuzz = False | ||
run_docs = False | ||
run_windows_msi = False | ||
|
||
for file in changed_files: | ||
file_name = file.name | ||
file_suffix = file.suffix | ||
file_parts = file.parts | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Documentation files | ||
doc_or_misc = file_parts[0] in {"Doc", "Misc"} | ||
doc_file = file_suffix in SUFFIXES_DOCUMENTATION or doc_or_misc | ||
|
||
if file.parent == GITHUB_WORKFLOWS_PATH: | ||
if file_name == "build.yml": | ||
run_tests = run_ci_fuzz = True | ||
if file_name == "reusable-docs.yml": | ||
run_docs = True | ||
if file_name == "reusable-windows-msi.yml": | ||
run_windows_msi = True | ||
|
||
if not ( | ||
doc_file | ||
or file == GITHUB_CODEOWNERS_PATH | ||
or file_name in CONFIGURATION_FILE_NAMES | ||
): | ||
run_tests = True | ||
|
||
# The fuzz tests are pretty slow so they are executed only for PRs | ||
# changing relevant files. | ||
if file_suffix in SUFFIXES_C_OR_CPP: | ||
run_ci_fuzz = True | ||
if file_parts[:2] in { | ||
("configure",), | ||
("Modules", "_xxtestfuzz"), | ||
}: | ||
run_ci_fuzz = True | ||
|
||
# Check for changed documentation-related files | ||
if doc_file: | ||
run_docs = True | ||
|
||
# Check for changed MSI installer-related files | ||
if file_parts[:2] == ("Tools", "msi"): | ||
run_windows_msi = True | ||
|
||
return Outputs( | ||
run_ci_fuzz=run_ci_fuzz, | ||
run_docs=run_docs, | ||
run_tests=run_tests, | ||
run_windows_msi=run_windows_msi, | ||
) | ||
|
||
|
||
def process_target_branch(outputs: Outputs, git_branch: str) -> Outputs: | ||
if not git_branch: | ||
outputs.run_tests = True | ||
|
||
# Check if we should run the hypothesis tests | ||
if git_branch in {"3.8", "3.9", "3.10", "3.11"}: | ||
print("Branch too old for hypothesis tests") | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
outputs.run_hypothesis = False | ||
else: | ||
outputs.run_hypothesis = outputs.run_tests | ||
|
||
# oss-fuzz maintains a configuration for fuzzing the main branch of | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# CPython, so CIFuzz should be run only for code that is likely to be | ||
# merged into the main branch; compatibility with older branches may | ||
# be broken. | ||
if git_branch != "main": | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
outputs.run_ci_fuzz = False | ||
|
||
if os.environ.get("GITHUB_EVENT_NAME", "").lower() == "workflow_dispatch": | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
outputs.run_docs = True | ||
outputs.run_windows_msi = True | ||
|
||
return outputs | ||
|
||
|
||
def write_github_output(outputs: Outputs) -> None: | ||
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables | ||
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-output-parameter | ||
if "GITHUB_OUTPUT" not in os.environ: | ||
print("GITHUB_OUTPUT not defined!") | ||
return | ||
|
||
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as f: | ||
f.write(f"run-cifuzz={bool_lower(outputs.run_ci_fuzz)}\n") | ||
f.write(f"run-docs={bool_lower(outputs.run_docs)}\n") | ||
f.write(f"run-hypothesis={bool_lower(outputs.run_hypothesis)}\n") | ||
f.write(f"run-tests={bool_lower(outputs.run_tests)}\n") | ||
f.write(f"run-win-msi={bool_lower(outputs.run_windows_msi)}\n") | ||
|
||
|
||
def bool_lower(value: bool, /) -> str: | ||
return "true" if value else "false" | ||
|
||
|
||
if __name__ == "__main__": | ||
compute_changes() |
Uh oh!
There was an error while loading. Please reload this page.