8000 GH-135904: Optimize the JIT's assembly control flow by brandtbucher · Pull Request #135905 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-135904: Optimize the JIT's assembly control flow #135905

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jun 27, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add comments, noise -> noninstructions, jrxz -> jrcxz, predecessor ->…
… pre
  • Loading branch information
brandtbucher committed Jun 25, 2025
commit 6ddeaaf55b94bf4706be2ad5d4328b855f7fedfa
44 changes: 21 additions & 23 deletions Tools/jit/_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# Dictionary mapping branch instructions to their inverted branch instructions.
# If a branch cannot be inverted, the value is None:
_X86_BRANCHES = {
# https://www.felixcloutier.com/x86/jcc
"ja": "jna",
"jae": "jnae",
"jb": "jnb",
Expand All @@ -25,9 +26,10 @@
"jo": "jno",
"jp": "jnp",
"jpe": "jpo",
"jrxz": None,
"jrcxz": None,
"js": "jns",
"jz": "jnz",
# https://www.felixcloutier.com/x86/loop:loopcc
"loop": None,
"loope": None,
"loopne": None,
Expand All @@ -42,7 +44,7 @@
class _Block:
label: str | None = None
# Non-instruction lines like labels, directives, and comments:
noise: list[str] = dataclasses.field(default_factory=list)
noninstructions: list[str] = dataclasses.field(default_factory=list)
# Instruction lines:
instructions: list[str] = dataclasses.field(default_factory=list)
# If this block ends in a jump, where to?
Expand Down Expand Up @@ -74,7 +76,9 @@ class Optimizer:
_root: _Block = dataclasses.field(init=False, default_factory=_Block)
_labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict)
# No groups:
_re_noise: typing.ClassVar[re.Pattern[str]] = re.compile(r"\s*(?:\.|#|//|$)")
_re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile(
r"\s*(?:\.|#|//|$)"
)
# One group (label):
_re_label: typing.ClassVar[re.Pattern[str]] = re.compile(
r'\s*(?P<label>[\w."$?@]+):'
Expand All @@ -91,23 +95,23 @@ class Optimizer:

def __post_init__(self) -> None:
# Split the code into a linked list of basic blocks. A basic block is an
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I'm trying to reason about what happens if we miss something, say a branch instruction that we did not include in our branch table?

Or is everything in the x64 spec already included in the table above?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we miss it, it won't be optimised and I don't think it will break the logic anyway.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything is already included (but one was misspelled, thanks for making me double-check).

If we miss one, we will accidentally create a superblock instead of a basic block, and will miss one outgoing edge. This could cause _invert_hot_branches to miscount the number of predecessors for the jump after the branch, and perform an invalid optimization. It could also make our hot-cold splitting incorrect.

So bad things could happen, but those would just be bugs that need fixing.

(Not detecting any branches is a special case, since _invert_hot_branches will never run, so everything is fine. That's why AArch64 works fine now, even though we haven't taught its optimizer about branches yet.)

# optional label, followed by zero or more non-instruction ("noise")
# lines, followed by zero or more instruction lines (only the last of
# which may be a branch, jump, or return):
# optional label, followed by zero or more non-instruction lines,
# followed by zero or more instruction lines (only the last of which may
# be a branch, jump, or return):
text = self._preprocess(self.path.read_text())
block = self._root
for line in text.splitlines():
# See if we need to start a new block:
if match := self._re_label.match(line):
# Label. New block:
block.link = block = self._lookup_label(match["label"])
block.noise.append(line)
block.noninstructions.append(line)
continue
if self._re_noise.match(line):
if self._re_noninstructions.match(line):
if block.instructions:
# Noise lines. New block:
# Non-instruction lines. New block:
block.link = block = _Block()
block.noise.append(line)
block.noninstructions.append(line)
continue
if block.target or not block.fallthrough:
# Current block ends with a branch, jump, or return. New block:
Expand Down Expand Up @@ -174,17 +178,15 @@ def _body(self) -> str:
hot = block.hot
# Make it easy to tell at a glance where cold code is:
lines.append(f"# JIT: {'HOT' if hot else 'COLD'} ".ljust(80, "#"))
lines.extend(block.noise)
lines.extend(block.noninstructions)
lines.extend(block.instructions)
return "\n".join(lines)

def _predecessors(self, block: _Block) -> typing.Generator[_Block, None, None]:
# This is inefficient, but it's never wrong:
for predecessor in self._blocks():
if predecessor.target is block or (
predecessor.fallthrough and predecessor.link is block
):
yield predecessor
for pre in self._blocks():
if pre.target is block or pre.fallthrough and pre.link is block:
yield pre

def _insert_continue_label(self) -> None:
# Find the block with the last instruction:
Expand All @@ -199,10 +201,10 @@ def _insert_continue_label(self) -> None:
# _JIT_CONTINUE:
# This lets the assembler encode _JIT_CONTINUE jumps at build time!
align = _Block()
align.noise.append(f"\t.balign\t{self._alignment}")
align.noninstructions.append(f"\t.balign\t{self._alignment}")
continuation = self._lookup_label(f"{self.prefix}_JIT_CONTINUE")
assert continuation.label
continuation.noise.append(f"{continuation.label}:")
continuation.noninstructions.append(f"{continuation.label}:")
end.link, align.link, continuation.link = align, continuation, end.link

def _mark_hot_blocks(self) -> None:
Expand All @@ -212,11 +214,7 @@ def _mark_hot_blocks(self) -> None:
while todo:
block = todo.pop()
block.hot = True
todo.extend(
predecessor
for predecessor in self._predecessors(block)
if not predecessor.hot
)
todo.extend(pre for pre in self._predecessors(block) if not pre.hot)

def _invert_hot_branches(self) -> None:
for branch in self._blocks():
Expand Down
0