agent-framework/python/scripts/check_md_code_blocks.py at main · microsoft/agent-framework

History

157 lines (131 loc) · 7.09 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

"""Check code blocks in Markdown files for syntax errors."""

import argparse

from enum import Enum

import glob

import logging

import os

import tempfile

import subprocess # nosec

from pygments import highlight # type: ignore

from pygments.formatters import TerminalFormatter

from pygments.lexers import PythonLexer

logger = logging.getLogger(__name__)

logger.addHandler(logging.StreamHandler())

logger.setLevel(logging.INFO)

class Colors(str, Enum):

CEND = "\33[0m"

CRED = "\33[31m"

CREDBG = "\33[41m"

CGREEN = "\33[32m"

CGREENBG = "\33[42m"

CVIOLET = "\33[35m"

CGREY = "\33[90m"

def with_color(text: str, color: Colors) -> str:

"""Prints a string with the specified color."""

return f"{color.value}{text}{Colors.CEND.value}"

def expand_file_patterns(patterns: list[str], skip_glob: bool = False) -> list[str]:

"""Expand glob patterns to actual file paths."""

all_files: list[str] = []

for pattern in patterns:

if skip_glob:

# When skip_glob is True, treat patterns as literal file paths

# Only include if it's a markdown file

if pattern.endswith('.md'):

matches = glob.glob(pattern, recursive=False)

all_files.extend(matches)

else:

# Handle both relative and absolute paths with glob expansion

matches = glob.glob(pattern, recursive=True)

all_files.extend(matches)

return sorted(set(all_files)) # Remove duplicates and sort

def extract_python_code_blocks(markdown_file_path: str) -> list[tuple[str, int]]:

"""Extract Python code blocks from a Markdown file."""

with open(markdown_file_path, encoding="utf-8") as file:

lines = file.readlines()

code_blocks: list[tuple[str, int]] = []

in_code_block = False

current_block: list[str] = []

for i, line in enumerate(lines):

if line.strip().startswith("```python"):

in_code_block = True

current_block = []

elif line.strip().startswith("```"):

in_code_block = False

code_blocks.append(("\n".join(current_block), i - len(current_block) + 1))

elif in_code_block:

current_block.append(line)

return code_blocks

def check_code_blocks(markdown_file_paths: list[str], exclude_patterns: list[str] | None = None) -> None:

"""Check Python code blocks in a Markdown file for syntax errors."""

files_with_errors: list[str] = []

exclude_patterns = exclude_patterns or []

for markdown_file_path in markdown_file_paths:

# Skip files that match any exclude pattern

if any(pattern in markdown_file_path for pattern in exclude_patterns):

logger.info(f"Skipping {markdown_file_path} (matches exclude pattern)")

continue

code_blocks = extract_python_code_blocks(markdown_file_path)

had_errors = False

for code_block, line_no in code_blocks:

markdown_file_path_with_line_no = f"{markdown_file_path}:{line_no}"

logger.info("Checking a code block in %s...", markdown_file_path_with_line_no)

# Skip blocks that don't import agent_framework modules or import lab modules

if (all(

all(import_code not in code_block for import_code in [f"import {module}", f"from {module}"])

for module in ["agent_framework"]

) or "agent_framework.lab" in code_block):

logger.info(f' {with_color("OK[ignored]", Colors.CGREENBG)}')

continue

with tempfile.TemporaryDirectory() as tmp_dir:

# Use the same rules as pyrightconfig.samples.json:

# typeCheckingMode=off, only reportMissingImports and reportAttributeAccessIssue enabled.

pyright_cfg = os.path.join(tmp_dir, "pyrightconfig.json")

with open(pyright_cfg, "w") as cfg:

cfg.write(

'{"include":["."],"typeCheckingMode":"off",'

'"reportMissingImports":"error","reportAttributeAccessIssue":"error"}'

)

tmp_file = os.path.join(tmp_dir, "snippet.py")

with open(tmp_file, "w", encoding="utf-8") as f:

f.write(code_block)

result = subprocess.run(["uv", "run", "pyright", "-p", tmp_dir], capture_output=True, text=True, cwd=".") # nosec

# Filter to only errors from our config rules; syntax-level errors

# (top-level await, etc.) are expected in README documentation snippets.

# Only flag reportMissingImports for agent_framework modules, not third-party packages.

relevant_errors = [

line for line in result.stdout.splitlines()

if ("reportMissingImports" in line and "agent_framework" in line)

or "reportAttributeAccessIssue" in line

]

if relevant_errors:

highlighted_code = highlight(code_block, PythonLexer(), TerminalFormatter()) # type: ignore

logger.info(

f" {with_color('FAIL', Colors.CREDBG)}\n"

f"{with_color('========================================================', Colors.CGREY)}\n"

f"{with_color('Error', Colors.CRED)}: Pyright found issues in {with_color(markdown_file_path_with_line_no, Colors.CVIOLET)}:\n"

f"{with_color('--------------------------------------------------------', Colors.CGREY)}\n"

f"{highlighted_code}\n"

f"{with_color('--------------------------------------------------------', Colors.CGREY)}\n"

"\n"

f"{with_color('pyright output:', Colors.CVIOLET)}\n"

f"{with_color(result.stdout, Colors.CRED)}"

f"{with_color('========================================================', Colors.CGREY)}\n"

)

had_errors = True

else:

logger.info(f" {with_color('OK', Colors.CGREENBG)}")

if had_errors:

files_with_errors.append(markdown_file_path)

if files_with_errors:

raise RuntimeError("Syntax errors found in the following files:\n" + "\n".join(files_with_errors))

if __name__ == "__main__":

parser = argparse.ArgumentParser(description="Check code blocks in Markdown files for syntax errors.")

# Argument is a list of markdown files containing glob patterns

parser.add_argument("markdown_files", nargs="+", help="Markdown files to check (supports glob patterns).")

parser.add_argument("--exclude", action="append", help="Exclude files containing this pattern.")

parser.add_argument("--no-glob", action="store_true", help="Treat file arguments as literal paths (no glob expansion).")

args = parser.parse_args()

# Expand glob patterns to actual file paths (or skip if --no-glob)

expanded_files = expand_file_patterns(args.markdown_files, skip_glob=args.no_glob)

check_code_blocks(expanded_files, args.exclude)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

check_md_code_blocks.py

Latest commit

History

check_md_code_blocks.py

File metadata and controls