8000 Inline Class#new. by tenderlove · Pull Request #13080 · ruby/ruby · GitHub
[go: up one dir, main page]

Skip to content

Inline Class#new. #13080

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 25, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Inline Class#new.
This commit inlines instructions for Class#new.  To make this work, we
added a new YARV instructions, `opt_new`.  `opt_new` checks whether or
not the `new` method is the default allocator method.  If it is, it
allocates the object, and pushes the instance on the stack.  If not, the
instruction jumps to the "slow path" method call instructions.

Old instructions:

```
> ruby --dump=insns -e'Object.new'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,10)>
0000 opt_getconstant_path                   <ic:0 Object>             (   1)[Li]
0002 opt_send_without_block                 <calldata!mid:new, argc:0, ARGS_SIMPLE>
0004 leave
```

New instructions:

```
> ./miniruby --dump=insns -e'Object.new'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,10)>
0000 opt_getconstant_path                   <ic:0 Object>             (   1)[Li]
0002 putnil
0003 swap
0004 opt_new                                <calldata!mid:new, argc:0, ARGS_SIMPLE>, 11
0007 opt_send_without_block                 <calldata!mid:initialize, argc:0, FCALL|ARGS_SIMPLE>
0009 jump                                   14
0011 opt_send_without_block                 <calldata!mid:new, argc:0, ARGS_SIMPLE>
0013 swap
0014 pop
0015 leave
```

This commit speeds up basic object allocation (`Foo.new`) by 60%, but
classes that take keyword parameters see an even bigger benefit because
no hash is allocated when instantiating the object (3x to 6x faster).

Here is an example that uses `Hash.new(capacity: 0)`:

```
> hyperfine "ruby --disable-gems -e'i = 0; while i < 10_000_000; Hash.new(capacity: 0); i += 1; end'" "./ruby --disable-gems -e'i = 0; while i < 10_000_000; Hash.new(capacity: 0); i += 1; end'"
Benchmark 1: ruby --disable-gems -e'i = 0; while i < 10_000_000; Hash.new(capacity: 0); i += 1; end'
  Time (mean ± σ):      1.082 s ±  0.004 s    [User: 1.074 s, System: 0.008 s]
  Range (min … max):    1.076 s …  1.088 s    10 runs

Benchmark 2: ./ruby --disable-gems -e'i = 0; while i < 10_000_000; Hash.new(capacity: 0); i += 1; end'
  Time (mean ± σ):     627.9 ms ±   3.5 ms    [User: 622.7 ms, System: 4.8 ms]
  Range (min … max):   622.7 ms … 633.2 ms    10 runs

Summary
  ./ruby --disable-gems -e'i = 0; while i < 10_000_000; Hash.new(capacity: 0); i += 1; end' ran
    1.72 ± 0.01 times faster than ruby --disable-gems -e'i = 0; while i < 10_000_000; Hash.new(capacity: 0); i += 1; end'
```

This commit changes the backtrace for `initialize`:

```
aaron@tc ~/g/ruby (inline-new)> cat test.rb
class Foo
  def initialize
    puts caller
  end
end

def hello
  Foo.new
end

hello
aaron@tc ~/g/ruby (inline-new)> ruby -v test.rb
ruby 3.4.2 (2025-02-15 revision d2930f8) +PRISM [arm64-darwin24]
test.rb:8:in 'Class#new'
test.rb:8:in 'Object#hello'
test.rb:11:in '<main>'
aaron@tc ~/g/ruby (inline-new)> ./miniruby -v test.rb
ruby 3.5.0dev (2025-03-28T23:59:40Z inline-new c4157884e4) +PRISM [arm64-darwin24]
test.rb:8:in 'Object#hello'
test.rb:11:in '<main>'
```

It also increases memory usage for calls to `new` by 122 bytes:

```
aaron@tc ~/g/ruby (inline-new)> cat test.rb
require "objspace"

class Foo
  def initialize
    puts caller
  end
end

def hello
  Foo.new
end

puts ObjectSpace.memsize_of(RubyVM::InstructionSequence.of(method(:hello)))
aaron@tc ~/g/ruby (inline-new)> make runruby
RUBY_ON_BUG='gdb -x ./.gdbinit -p' ./miniruby -I./lib -I. -I.ext/common  ./tool/runruby.rb --extout=.ext  -- --disable-gems  ./test.rb
656
aaron@tc ~/g/ruby (inline-new)> ruby -v test.rb
ruby 3.4.2 (2025-02-15 revision d2930f8) +PRISM [arm64-darwin24]
544
```

Thanks to @ko1 for coming up with this idea!

Co-Authored-By: John Hawthorn <john@hawthorn.email>
  • Loading branch information
tenderlove and jhawthorn committed Apr 25, 2025
commit e97034c38d60fa34a99ab48dbb8f935d37e8160b
3 changes: 3 additions & 0 deletions debug_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,9 @@ RB_DEBUG_COUNTER(obj_imemo_callinfo)
RB_DEBUG_COUNTER(obj_imemo_callcache)
RB_DEBUG_COUNTER(obj_imemo_constcache)

RB_DEBUG_COUNTER(opt_new_hit)
RB_DEBUG_COUNTER(opt_new_miss)

/* ar_table */
RB_DEBUG_COUNTER(artable_hint_hit)
RB_DEBUG_COUNTER(artable_hint_miss)
Expand Down
24 changes: 24 additions & 0 deletions insns.def
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,30 @@ opt_send_without_block
}
}

/* Jump if "new" method has been defined by user */
DEFINE_INSN
opt_new
(CALL_DATA cd, OFFSET dst)
()
()
// attr bool leaf = false;
{
VALUE argc = vm_ci_argc(cd->ci);
VALUE val = TOPN(argc);

if (vm_method_cfunc_is(GET_ISEQ(), cd, val, rb_class_new_instance_pass_kw)) {
RB_DEBUG_COUNTER_INC(opt_new_hit);
val = rb_obj_alloc(val);
TOPN(argc) = val;
RUBY_ASSERT(TOPN(argc + 1) == Qnil);
TOPN(argc + 1) = val;
}
else {
RB_DEBUG_COUNTER_INC(opt_new_miss);
JUMP(dst);
}
}

/* Convert object to string using to_s or equivalent. */
DEFINE_INSN
objtostring
Expand Down
12 changes: 11 additions & 1 deletion lib/erb/compiler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,17 @@ def detect_magic_comment(s, enc = nil)
return enc, frozen
end

# :stopdoc:
WARNING_UPLEVEL = Class.new {
attr_reader :c
def initialize from
@c = caller.length - from.length
end
}.new(caller(0)).c
private_constant :WARNING_UPLEVEL
# :startdoc:

def warn_invalid_trim_mode(mode, uplevel:)
warn "Invalid ERB trim mode: #{mode.inspect} (trim_mode: nil, 0, 1, 2, or String composed of '%' and/or '-', '>', '<>')", uplevel: uplevel + 1
warn "Invalid ERB trim mode: #{mode.inspect} (trim_mode: nil, 0, 1, 2, or String composed of '%' and/or '-', '>', '<>')", uplevel: uplevel + WARNING_UPLEVEL
end
end
47 changes: 46 additions & 1 deletion prism_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -3620,6 +3620,9 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c
if (message_loc->start == NULL) message_loc = &call_node->base.location;

const pm_node_location_t location = PM_LOCATION_START_LOCATION(scope_node->parser, message_loc, call_node->base.node_id);

LINK_ELEMENT *opt_new_prelude = LAST_ELEMENT(ret);

LABEL *else_label = NEW_LABEL(location.line);
LABEL *end_label = NEW_LABEL(location.line);
LABEL *retry_end_l = NEW_LABEL(location.line);
Expand Down Expand Up @@ -3714,7 +3717,49 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c
PUSH_INSN(ret, location, splatkw);
}

PUSH_SEND_R(ret, location, method_id, INT2FIX(orig_argc), block_iseq, INT2FIX(flags), kw_arg);
LABEL *not_basic_new = NEW_LABEL(location.line);
LABEL *not_basic_new_finish = NEW_LABEL(location.line);

bool inline_new = ISEQ_COMPILE_DATA(iseq)->option->specialized_instruction &&
method_id == rb_intern("new") &&
call_node->block == NULL;

if (inline_new) {
if (LAST_ELEMENT(ret) == opt_new_prelude) {
PUSH_INSN(ret, location, putnil);
PUSH_INSN(ret, location, swap);
}
else {
ELEM_INSERT_NEXT(opt_new_prelude, &new_insn_body(iseq, location.line, location.node_id, BIN(swap), 0)->link);
ELEM_INSERT_NEXT(opt_new_prelude, &new_insn_body(iseq, location.line, location.node_id, BIN(putnil), 0)->link);
}

// Jump unless the receiver uses the "basic" implementation of "new"
VALUE ci;
if (flags & VM_CALL_FORWARDING) {
ci = (VALUE)new_callinfo(iseq, method_id, orig_argc + 1, flags, kw_arg, 0);
}
else {
ci = (VALUE)new_callinfo(iseq, method_id, orig_argc, flags, kw_arg, 0);
}

PUSH_INSN2(ret, location, opt_new, ci, not_basic_new);
LABEL_REF(not_basic_new);
// optimized path
PUSH_SEND_R(ret, location, rb_intern("initialize"), INT2FIX(orig_argc), block_iseq, INT2FIX(flags | VM_CALL_FCALL), kw_arg);
PUSH_INSNL(ret, location, jump, not_basic_new_finish);

PUSH_LABEL(ret, not_basic_new);
// Fall back to normal send
PUSH_SEND_R(ret, location, method_id, INT2FIX(orig_argc), block_iseq, INT2FIX(flags), kw_arg);
PUSH_INSN(ret, location, swap);

PUSH_LABEL(ret, not_basic_new_finish);
PUSH_INSN(ret, location, pop);
}
else {
PUSH_SEND_R(ret, location, method_id, INT2FIX(orig_argc), block_iseq, INT2FIX(flags), kw_arg);
}

if (block_iseq && ISEQ_BODY(block_iseq)->catch_table) {
pm_compile_retry_end_label(iseq, ret, retry_end_l);
Expand Down
6 changes: 6 additions & 0 deletions test/erb/test_erb.rb
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,12 @@ def test_explicit_trim_line_with_carriage_return
assert_equal("line\r\n" * 3, erb.result)
end

def test_safe_level_warning
assert_warning(/#{__FILE__}:#{__LINE__ + 1}/) do
@erb.new("", 1)
end
end

def test_invalid_trim_mode
pend if RUBY_ENGINE == 'truffleruby'

Expand Down
4 changes: 2 additions & 2 deletions test/objspace/test_objspace.rb
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ def test_trace_object_allocations
assert_equal(line1, ObjectSpace.allocation_sourceline(o1))
assert_equal(__FILE__, ObjectSpace.allocation_sourcefile(o1))
assert_equal(c1, ObjectSpace.allocation_generation(o1))
assert_equal(Class.name, ObjectSpace.allocation_class_path(o1))
assert_equal(:new, ObjectSpace.allocation_method_id(o1))
assert_equal(self.class.name, ObjectSpace.allocation_class_path(o1))
assert_equal(__method__, ObjectSpace.allocation_method_id(o1))

assert_equal(__FILE__, ObjectSpace.allocation_sourcefile(o2))
assert_equal(line2, ObjectSpace.allocation_sourceline(o2))
Expand Down
2 changes: 1 addition & 1 deletion test/ruby/test_settracefunc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1999,7 +1999,7 @@ def m
TracePoint.new(:c_call, &capture_events).enable{
c.new
}
assert_equal [:c_call, :itself, :initialize], events[1]
assert_equal [:c_call, :itself, :initialize], events[0]
events.clear

o = Class.new{
Expand Down
2 changes: 2 additions & 0 deletions yjit/bindgen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,8 @@ fn main() {
// From internal/object.h
.allowlist_function("rb_class_allocate_instance")
.allowlist_function("rb_obj_equal")
.allowlist_function("rb_class_new_instance_pass_kw")
.allowlist_function("rb_obj_alloc")

// From gc.h and internal/gc.h
.allowlist_function("rb_obj_info")
Expand Down
65 changes: 65 additions & 0 deletions yjit/src/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4873,6 +4873,70 @@ fn gen_throw(
Some(EndBlock)
}

fn gen_opt_new(
jit: &mut JITState,
asm: &mut Assembler,
) -> Option<CodegenStatus> {
let cd = jit.get_arg(0).as_ptr();
let jump_offset = jit.get_arg(1).as_i32();

if !jit.at_compile_target() {
return jit.defer_compilation(asm);
}

let ci = unsafe { get_call_data_ci(cd) }; // info about the call site
let mid = unsafe { vm_ci_mid(ci) };
let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();

let recv_idx = argc;
let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize);

// This is a singleton class
let comptime_recv_klass = comptime_recv.class_of();

let recv = asm.stack_opnd(recv_idx);

perf_call!("opt_new: ", jit_guard_known_klass(
jit,
asm,
comptime_recv_klass,
recv,
recv.into(),
comptime_recv,
SEND_MAX_DEPTH,
Counter::guard_send_klass_megamorphic,
));

// We now know that it's always comptime_recv_klass
if jit.assume_expected_cfunc(asm, comptime_recv_klass, mid, rb_class_new_instance_pass_kw as _) {
// Fast path
// call rb_class_alloc to actually allocate
jit_prepare_non_leaf_call(jit, asm);
let obj = asm.ccall(rb_obj_alloc as _, vec![comptime_recv.into()]);

// Get a reference to the stack location where we need to save the
// return instance.
let result = asm.stack_opnd(recv_idx + 1);
let recv = asm.stack_opnd(recv_idx);

// Replace the receiver for the upcoming initialize call
asm.ctx.set_opnd_mapping(recv.into(), TempMapping::MapToStack(Type::UnknownHeap));
asm.mov(recv, obj);

// Save the allocated object for return
asm.ctx.set_opnd_mapping(result.into(), TempMapping::MapToStack(Type::UnknownHeap));
asm.mov(result, obj);

jump_to_next_insn(jit, asm)
} else {
// general case

// Get the branch target instruction offsets
let jump_idx = jit.next_insn_idx() as i32 + jump_offset;
return end_block_with_jump(jit, asm, jump_idx as u16);
}
}

fn gen_jump(
jit: &mut JITState,
asm: &mut Assembler,
Expand Down Expand Up @@ -10699,6 +10763,7 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_branchnil => Some(gen_branchnil),
YARVINSN_throw => Some(gen_throw),
YARVINSN_jump => Some(gen_jump),
YARVINSN_opt_new => Some(gen_opt_new),

YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy),
YARVINSN_getblockparam => Some(gen_getblockparam),
Expand Down
Loading
0