-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathcheck_md_code_blocks.py
More file actions
157 lines (131 loc) · 7.09 KB
/
check_md_code_blocks.py
File metadata and controls
157 lines (131 loc) · 7.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# Copyright (c) Microsoft. All rights reserved.
"""Check code blocks in Markdown files for syntax errors."""
import argparse
from enum import Enum
import glob
import logging
import os
import tempfile
import subprocess # nosec
from pygments import highlight # type: ignore
from pygments.formatters import TerminalFormatter
from pygments.lexers import PythonLexer
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)
class Colors(str, Enum):
CEND = "\33[0m"
CRED = "\33[31m"
CREDBG = "\33[41m"
CGREEN = "\33[32m"
CGREENBG = "\33[42m"
CVIOLET = "\33[35m"
CGREY = "\33[90m"
def with_color(text: str, color: Colors) -> str:
"""Prints a string with the specified color."""
return f"{color.value}{text}{Colors.CEND.value}"
def expand_file_patterns(patterns: list[str], skip_glob: bool = False) -> list[str]:
"""Expand glob patterns to actual file paths."""
all_files: list[str] = []
for pattern in patterns:
if skip_glob:
# When skip_glob is True, treat patterns as literal file paths
# Only include if it's a markdown file
if pattern.endswith('.md'):
matches = glob.glob(pattern, recursive=False)
all_files.extend(matches)
else:
# Handle both relative and absolute paths with glob expansion
matches = glob.glob(pattern, recursive=True)
all_files.extend(matches)
return sorted(set(all_files)) # Remove duplicates and sort
def extract_python_code_blocks(markdown_file_path: str) -> list[tuple[str, int]]:
"""Extract Python code blocks from a Markdown file."""
with open(markdown_file_path, encoding="utf-8") as file:
lines = file.readlines()
code_blocks: list[tuple[str, int]] = []
in_code_block = False
current_block: list[str] = []
for i, line in enumerate(lines):
if line.strip().startswith("```python"):
in_code_block = True
current_block = []
elif line.strip().startswith("```"):
in_code_block = False
code_blocks.append(("\n".join(current_block), i - len(current_block) + 1))
elif in_code_block:
current_block.append(line)
return code_blocks
def check_code_blocks(markdown_file_paths: list[str], exclude_patterns: list[str] | None = None) -> None:
"""Check Python code blocks in a Markdown file for syntax errors."""
files_with_errors: list[str] = []
exclude_patterns = exclude_patterns or []
for markdown_file_path in markdown_file_paths:
# Skip files that match any exclude pattern
if any(pattern in markdown_file_path for pattern in exclude_patterns):
logger.info(f"Skipping {markdown_file_path} (matches exclude pattern)")
continue
code_blocks = extract_python_code_blocks(markdown_file_path)
had_errors = False
for code_block, line_no in code_blocks:
markdown_file_path_with_line_no = f"{markdown_file_path}:{line_no}"
logger.info("Checking a code block in %s...", markdown_file_path_with_line_no)
# Skip blocks that don't import agent_framework modules or import lab modules
if (all(
all(import_code not in code_block for import_code in [f"import {module}", f"from {module}"])
for module in ["agent_framework"]
) or "agent_framework.lab" in code_block):
logger.info(f' {with_color("OK[ignored]", Colors.CGREENBG)}')
continue
with tempfile.TemporaryDirectory() as tmp_dir:
# Use the same rules as pyrightconfig.samples.json:
# typeCheckingMode=off, only reportMissingImports and reportAttributeAccessIssue enabled.
pyright_cfg = os.path.join(tmp_dir, "pyrightconfig.json")
with open(pyright_cfg, "w") as cfg:
cfg.write(
'{"include":["."],"typeCheckingMode":"off",'
'"reportMissingImports":"error","reportAttributeAccessIssue":"error"}'
)
tmp_file = os.path.join(tmp_dir, "snippet.py")
with open(tmp_file, "w", encoding="utf-8") as f:
f.write(code_block)
result = subprocess.run(["uv", "run", "pyright", "-p", tmp_dir], capture_output=True, text=True, cwd=".") # nosec
# Filter to only errors from our config rules; syntax-level errors
# (top-level await, etc.) are expected in README documentation snippets.
# Only flag reportMissingImports for agent_framework modules, not third-party packages.
relevant_errors = [
line for line in result.stdout.splitlines()
if ("reportMissingImports" in line and "agent_framework" in line)
or "reportAttributeAccessIssue" in line
]
if relevant_errors:
highlighted_code = highlight(code_block, PythonLexer(), TerminalFormatter()) # type: ignore
logger.info(
f" {with_color('FAIL', Colors.CREDBG)}\n"
f"{with_color('========================================================', Colors.CGREY)}\n"
f"{with_color('Error', Colors.CRED)}: Pyright found issues in {with_color(markdown_file_path_with_line_no, Colors.CVIOLET)}:\n"
f"{with_color('--------------------------------------------------------', Colors.CGREY)}\n"
f"{highlighted_code}\n"
f"{with_color('--------------------------------------------------------', Colors.CGREY)}\n"
"\n"
f"{with_color('pyright output:', Colors.CVIOLET)}\n"
f"{with_color(result.stdout, Colors.CRED)}"
f"{with_color('========================================================', Colors.CGREY)}\n"
)
had_errors = True
else:
logger.info(f" {with_color('OK', Colors.CGREENBG)}")
if had_errors:
files_with_errors.append(markdown_file_path)
if files_with_errors:
raise RuntimeError("Syntax errors found in the following files:\n" + "\n".join(files_with_errors))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Check code blocks in Markdown files for syntax errors.")
# Argument is a list of markdown files containing glob patterns
parser.add_argument("markdown_files", nargs="+", help="Markdown files to check (supports glob patterns).")
parser.add_argument("--exclude", action="append", help="Exclude files containing this pattern.")
parser.add_argument("--no-glob", action="store_true", help="Treat file arguments as literal paths (no glob expansion).")
args = parser.parse_args()
# Expand glob patterns to actual file paths (or skip if --no-glob)
expanded_files = expand_file_patterns(args.markdown_files, skip_glob=args.no_glob)
check_code_blocks(expanded_files, args.exclude)