8000 [mypyc] Inline increfs and decrefs in commonly executed blocks by JukkaL · Pull Request #11540 · python/mypy · GitHub
[go: up one dir, main page]

Skip to content

[mypyc] Inline increfs and decrefs in commonly executed blocks #11540

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 15, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[mypyc] Inline increfs and decrefs in commonly executed blocks
These operations are performance-critical, but inlining everywhere
can slow down compilation a lot, so we only inline them outside error
handlers (and other rarely executed code paths).

This still can slow compilation by 10-15%, but I think that we just need
to live with it, since the performance gains are impressive. We can perhaps
claw back some of the loss by optimizing away redundant increfs/decrefs.
Also parallel compilation would make this much less significant.

This can speed up the richards benchmark by 65% (!).

With this change:

```
running richards
...........
interpreted: 0.181880s (avg of 6 iterations; stdev 0.91%)
compiled:    0.005314s (avg of 6 iterations; stdev 1.2%)

compiled is 34.229x faster
```

Using master:

```
running richards
...........
interpreted: 0.182124s (avg of 6 iterations; stdev 2.1%)
compiled:    0.008794s (avg of 6 iterations; stdev 1.9%)

compiled is 20.710x faster
```

Also, this makes the int_list microbenchmark up to 80% faster.
Compiled mypy was also around 3% faster.
  • Loading branch information
JukkaL committed Nov 13, 2021
commit 6f166e07d1b46f8bf0bfe0a1dfa0db27757c9cd3
32 changes: 32 additions & 0 deletions mypyc/analysis/blockfreq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Find basic blocks that are likely to be executed commonly.

For example, this would not include blocks that have exception handlers.

We can use different optimization heuristics for common and rare code. For
example, we can make IR fast to compile instead of fast to execute for rare
code.
"""

from typing import Set

from mypyc.ir.ops import BasicBlock, Goto, Branch


def commonly_executed_blocks(entry_point: BasicBlock) -> Set[BasicBlock]:
result: Set[BasicBlock] = set()
worklist = [entry_point]
while worklist:
block = worklist.pop()
if block in result:
continue
result.add(block)
t = block.terminator
if isinstance(t, Goto):
worklist.append(t.label)
elif isinstance(t, Branch):
if t.rare or t.traceback_entry is not None:
worklist.append(t.false)
else:
worklist.append(t.true)
worklist.append(t.false)
return result
31 changes: 25 additions & 6 deletions mypyc/codegen/emit.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,35 +348,54 @@ def declare_tuple_struct(self, tuple_type: RTuple) -> None:
is_type=True,
)

def emit_inc_ref(self, dest: str, rtype: RType) -> None:
def emit_inc_ref(self, dest: str, rtype: RType, *, rare: bool = False) -> None:
"""Increment reference count of C expression `dest`.

For composite unboxed structures (e.g. tuples) recursively
increment reference counts for each component.

If rare is True, optimize for code size and compilation speed.
"""
if is_int_rprimitive(rtype):
self.emit_line('CPyTagged_IncRef(%s);' % dest)
if rare:
self.emit_line('CPyTagged_IncRef(%s);' % dest)
else:
self.emit_line('CPyTagged_INCREF(%s);' % dest)
elif isinstance(rtype, RTuple):
for i, item_type in enumerate(rtype.types):
self.emit_inc_ref('{}.f{}'.format(dest, i), item_type)
elif not rtype.is_unboxed:
self.emit_line('CPy_INCREF(%s);' % dest)
# Otherwise assume it's an unboxed, pointerless value and do nothing.

def emit_dec_ref(self, dest: str, rtype: RType, is_xdec: bool = False) -> None:
def emit_dec_ref(self,
dest: str,
rtype: RType,
*,
is_xdec: bool = False,
rare: bool = False) -> None:
"""Decrement reference count of C expression `dest`.

For composite unboxed structures (e.g. tuples) recursively
decrement reference counts for each component.

If rare is True, optimize for code size and compilation speed.
"""
x = 'X' if is_xdec else ''
if is_int_rprimitive(rtype):
self.emit_line('CPyTagged_%sDecRef(%s);' % (x, dest))
if rare:
self.emit_line('CPyTagged_%sDecRef(%s);' % (x, dest))
else:
self.emit_line('CPyTagged_%sDECREF(%s);' % (x, dest))
elif isinstance(rtype, RTuple):
for i, item_type in enumerate(rtype.types):
self.emit_dec_ref('{}.f{}'.format(dest, i), item_type, is_xdec)
10000 self.emit_dec_ref('{}.f{}'.format(dest, i), item_type, is_xdec=is_xdec, rare=rare)
elif not rtype.is_unboxed:
self.emit_line('CPy_%sDecRef(%s);' % (x, dest))
if rare:
self.emit_line('CPy_%sDecRef(%s);' % (x, dest))
else:
# Inlined
self.emit_line('CPy_%sDECREF(%s);' % (x, dest))
# Otherwise assume it's an unboxed, pointerless value and do nothing.

def pretty_name(self, typ: RType) -> str:
Expand Down
9 changes: 7 additions & 2 deletions mypyc/codegen/emitfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from mypyc.ir.func_ir import FuncIR, FuncDecl, FUNC_STATICMETHOD, FUNC_CLASSMETHOD, all_values
from mypyc.ir.class_ir import ClassIR
from mypyc.ir.pprint import generate_names_for_ir
from mypyc.analysis.blockfreq import commonly_executed_blocks

# Whether to insert debug asserts for all error handling, to quickly
# catch errors propagating without exceptions set.
Expand Down Expand Up @@ -77,8 +78,11 @@ def generate_native_function(fn: FuncIR,
for i, block in enumerate(blocks):
block.label = i

common = commonly_executed_blocks(fn.blocks[0])

for i in range(len(blocks)):
block = blocks[i]
visitor.rare = block not in common
next_block = None
if i + 1 < len(blocks):
next_block = blocks[i + 1]
Expand All @@ -105,6 +109,7 @@ def __init__(self,
self.source_path = source_path
self.module_name = module_name
self.literals = emitter.context.literals
self.rare = False
self.next_block: Optional[BasicBlock] = None

def temp_name(self) -> str:
Expand Down Expand Up @@ -416,7 +421,7 @@ def visit_inc_ref(self, op: IncRef) -> None:

def visit_dec_ref(self, op: DecRef) -> None:
src = self.reg(op.src)
self.emit_dec_ref(src, op.src.type, op.is_xdec)
self.emit_dec_ref(src, op.src.type, is_xdec=op.is_xdec)

def visit_box(self, op: Box) -> None:
self.emitter.emit_box(self.reg(op.src), self.reg(op), op.src.type, can_borrow=True)
Expand Down Expand Up @@ -577,7 +582,7 @@ def emit_inc_ref(self, dest: str, rtype: RType) -> None:
self.emitter.emit_inc_ref(dest, rtype)

def emit_dec_ref(self, dest: str, rtype: RType, is_xdec: bool) -> None:
self.emitter.emit_dec_ref(dest, rtype, is_xdec)
self.emitter.emit_dec_ref(dest, rtype, is_xdec=is_xdec, rare=self.rare)

def emit_declaration(self, line: str) -> None:
self.declarations.emit_line(line)
Expand Down
3 changes: 2 additions & 1 deletion mypyc/ir/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ def __init__(self,
self.negated = False
# If not None, the true label should generate a traceback entry (func name, line number)
self.traceback_entry: Optional[Tuple[str, int]] = None
# If True, the condition is expected to be usually False (for optimization purposes)
# If True, we expect to usually take the false branch (for optimization purposes);
# this is implicitly treated as true if there is a traceback entry
self.rare = rare

def targets(self) -> Sequence[BasicBlock]:
Expand Down
18 changes: 18 additions & 0 deletions mypyc/lib-rt/CPy.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,24 @@ static inline int CPyTagged_CheckShort(CPyTagged x) {
return !CPyTagged_CheckLong(x);
}

static inline void CPyTagged_INCREF(CPyTagged x) {
if (unlikely(CPyTagged_CheckLong(x))) {
CPyTagged_IncRef(x);
}
}

static inline void CPyTagged_DECREF(CPyTagged x) {
if (unlikely(CPyTagged_CheckLong(x))) {
CPyTagged_DecRef(x);
}
}

static inline void CPyTagged_XDECREF(CPyTagged x) {
if (unlikely(CPyTagged_CheckLong(x))) {
CPyTagged_XDecRef(x);
}
}

static inline Py_ssize_t CPyTagged_ShortAsSsize_t(CPyTagged x) {
// NOTE: Assume that we sign extend.
return (Py_ssize_t)x >> 1;
Expand Down
0