8000 Add ability to trace exit locations in yjit · ruby/ruby@82be00d · GitHub
[go: up one dir, main page]

Skip to content

Commit 82be00d

Browse files
Add ability to trace exit locations in yjit
When running with `--yjit-stats` turned on, yjit can inform the user what the most common exits are. While this is useful information it doesn't tell you the source location of the code that exited or what the code that exited looks like. This change intends to fix that. To use the feature, run yjit with the `--yjit-trace-exits` option, which will record the backtrace for every exit that occurs. This functionality requires the stats feature to be turned on. Calling `--yjit-trace-exits` will automatically set the `--yjit-stats` option. Users must call `RubyVM::YJIT.dump_exit_locations(filename)` which will Marshal dump the contents of `RubyVM::YJIT.exit_locations` into a file based on the passed filename. *Example usage:* Given the following script, we write to a file called `concat_array.dump` the results of `RubyVM::YJIT.exit_locations`. ```ruby def concat_array ["t", "r", *x = "u", "e"].join end 1000.times do concat_array end RubyVM::YJIT.dump_exit_locations("concat_array.dump") ``` When we run the file with this branch and the appropriate flags the stacktrace will be recorded. Note Stackprof needs to be installed or you need to point to the library directly. ``` ./ruby --yjit --yjit-call-threshold=1 --yjit-trace-exits -I/Users/eileencodes/open_source/stackprof/lib test.rb ``` We can then read the dump file with Stackprof: ``` ./ruby -I/Users/eileencodes/open_source/stackprof/lib/ /Users/eileencodes/open_source/stackprof/bin/stackprof --text concat_array.dump ``` Results will look similar to the following: ``` ================================== Mode: () Samples: 1817 (0.00% miss rate) GC: 0 (0.00%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 1001 (55.1%) 1001 (55.1%) concatarray 335 (18.4%) 335 (18.4%) invokeblock 178 (9.8%) 178 (9.8%) send 140 (7.7%) 140 (7.7%) opt_getinlinecache ...etc... ``` Simply inspecting the `concatarray` method will give `SOURCE UNAVAILABLE` because the source is insns.def. ``` ./ruby -I/Users/eileencodes/open_source/stackprof/lib/ /Users/eileencodes/open_source/stackprof/bin/stackprof --text concat_array.dump --method concatarray ``` Result: ``` concatarray (nonexistent.def:1) samples: 1001 self (55.1%) / 1001 total (55.1%) callers: 1000 ( 99.9%) Object#concat_array 1 ( 0.1%) Gem.suffixes callees (0 total): code: SOURCE UNAVAILABLE ``` However if we go deeper to the callee we can see the exact source of the `concatarray` exit. ``` ./ruby -I/Users/eileencodes/open_source/stackprof/lib/ /Users/eileencodes/open_source/stackprof/bin/stackprof --text concat_array.dump --method Object#concat_array ``` ``` Object#concat_array (/Users/eileencodes/open_source/rust_ruby/test.rb:1) samples: 0 self (0.0%) / 1000 total (55.0%) callers: 1000 ( 100.0%) block in <main> callees (1000 total): 1000 ( 100.0%) concatarray code: | 1 | def concat_array 1000 (55.0%) | 2 | ["t", "r", *x = "u", "e"].join | 3 | end ``` The `--walk` option is recommended for this feature as it make it easier to traverse the tree of exits. *Goals of this feature:* This feature is meant to give more information when working on YJIT. The idea is that if we know what code is exiting we can decide what areas to prioritize when fixing exits. In some cases this means adding prioritizing avoiding certain exits in yjit. In more complex cases it might mean changing the Ruby code to be more performant when run with yjit. Ultimately the more information we have about what code is exiting AND why, the better we can make yjit. *Known limitations:* * Due to tracing exits, running this on large codebases like Rails can be quite slow. * On complex methods it can still be difficult to pinpoint the exact cause of an exit. * Stackprof is a requirement to to view the backtrace information from the dump file Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
1 parent ec35422 commit 82be00d

File tree

10 files changed

+554
-0
lines changed

10 files changed

+554
-0
lines changed

test/ruby/test_yjit_exit_locations.rb

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# frozen_string_literal: true
2+
#
3+
# This set of tests can be run with:
4+
# make test-all TESTS='test/ruby/test_yjit.rb' RUN_OPTS="--yjit-call-threshold=1"
5+
6+
require 'test/unit'
7+
require 'envutil'
8+
require 'tmpdir'
9+
require_relative '../lib/jit_support'
10+
11+
return unless defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled? && RubyVM::YJIT.trace_exit_locations_enabled?
12+
13+
# Tests for YJIT with assertions on tracing exits
14+
# insipired by the MJIT tests in test/ruby/test_yjit.rb
15+
class TestYJITExitLocations < Test::Unit::TestCase
16+
def test_yjit_trace_exits_and_v_no_error
17+
_stdout, stderr, _status = EnvUtil.invoke_ruby(%w(-v --yjit-trace-exits), '', true, true)
18+
refute_includes(stderr, "NoMethodError")
19+
end
20+
21+
def test_trace_exits_setclassvariable
22+
script = 'class Foo; def self.foo; @@foo = 1; end; end; Foo.foo'
23+
assert_exit_locations(script)
24+
end
25+
26+
def test_trace_exits_putobject
27+
assert_exit_locations('true')
28+
assert_exit_locations('123')
29+
assert_exit_locations(':foo')
30+
end
31+
32+
def test_trace_exits_opt_not
33+
assert_exit_locations('!false')
34+
assert_exit_locations('!nil')
35+
assert_exit_locations('!true')
36+
assert_exit_locations('![]')
37+
end
38+
39+
private
40+
41+
def assert_exit_locations(test_script)
42+
write_results = <<~RUBY
43+
IO.open(3).write Marshal.dump(RubyVM::YJIT.exit_locations)
44+
RUBY
45+
46+
script = <<~RUBY
47+
_test_proc = -> {
48+
#{test_script}
49+
}
50+
result = _test_proc.call
51+
#{write_results}
52+
RUBY
53+
54+
exit_locations = eval_with_jit(script)
55+
56+
assert exit_locations.key?(:raw)
57+
assert exit_locations.key?(:frames)
58+
assert exit_locations.key?(:lines)
59+
assert exit_locations.key?(:samples)
60+
assert exit_locations.key?(:missed_samples)
61+
assert exit_locations.key?(:gc_samples)
62+
63+
assert_equal 0, exit_locations[:missed_samples]
64+
assert_equal 0, exit_locations[:gc_samples]
65+
66+
assert_not_empty exit_locations[:raw]
67+
assert_not_empty exit_locations[:frames]
68+
assert_not_empty exit_locations[:lines]
69+
70+
exit_locations[:frames].each do |frame_id, frame|
71+
assert frame.key?(:name)
72+
assert frame.key?(:file)
73+
assert frame.key?(:samples)
74+
assert frame.key?(:total_samples)
75+
assert frame.key?(:edges)
76+
end
77+
end
78+
79+
def eval_with_jit(script)
80+
args = [
81+
"--disable-gems",
82+
"--yjit-call-threshold=1",
83+
"--yjit-trace-exits"
84+
]
85+
args << "-e" << script_shell_encode(script)
86+
stats_r, stats_w = IO.pipe
87+
out, err, status = EnvUtil.invoke_ruby(args,
88+
'', true, true, timeout: 1000, ios: { 3 => stats_w }
89+
)
90+
stats_w.close
91+
stats = stats_r.read
92+
stats = Marshal.load(stats) if !stats.empty?
93+
stats_r.close
94+
stats
95+
end
96+
97+
def script_shell_encode(s)
98+
# We can't pass utf-8-encoded characters directly in a shell arg. But we can use Ruby \u constants.
99+
s.chars.map { |c| c.ascii_only? ? c : "\\u%x" % c.codepoints[0] }.join
100+
end
101+
end

yjit.c

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "probes.h"
2626
#include "probes_helper.h"
2727
#include "iseq.h"
28+
#include "ruby/debug.h"
2829

2930
// For mmapp(), sysconf()
3031
#ifndef _WIN32
@@ -83,6 +84,94 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size)
8384
}
8485
}
8586

87+
# define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
88+
89+
// For a given raw_sample index, set a frame_info hash with the caller's
90+
// name, file, and line number. Return the frames hash with collected frame_info.
91+
static void
92+
rb_yjit_add_frame(VALUE hash, VALUE frame)
93+
{
94+
VALUE frame_id = PTR2NUM(frame);
95+
96+
if (RTEST(rb_hash_aref(hash, frame_id))) {
97+
return;
98+
} else {
99+
VALUE frame_info = rb_hash_new();
100+
// Full label for the frame
101+
VALUE name = rb_profile_frame_full_label(frame);
102+
// Absolute path of the frame from rb_iseq_realpath
103+
VALUE file = rb_profile_frame_absolute_path(frame);
104+
// Line number of the frame
105+
VALUE line = rb_profile_frame_first_lineno(frame);
106+
107+
// If absolute path isn't available use the rb_iseq_path
108+
if (NIL_P(file)) {
109+
file = rb_profile_frame_path(frame);
110+
}
111+
112+
rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name);
113+
rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file);
114+
115+
if (line != INT2FIX(0)) {
116+
rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line);
117+
}
118+
119+
rb_hash_aset(hash, frame_id, frame_info);
120+
}
121+
}
122+
123+
// Parses the YjitExitLocations.raw_samples and YjitExitLocations.line_samples collected
124+
// by rb_yjit_record_exit_stack and turns them into 3 hashes (raw, lines, and frames) to
125+
// be used by RubyVM::YJIT.exit_locations. yjit_raw_samples represents the raw frames information
126+
// (without name, file, and line), and yjit_line_samples represents the line information
127+
// of the iseq caller.
128+
VALUE
129+
rb_yjit_exit_locations_dict(VALUE *yjit_raw_samples, int *yjit_line_samples, int samples_len)
130+
{
131+
VALUE result = rb_hash_new();
132+
VALUE raw_samples = rb_ary_new_capa(samples_len);
133+
VALUE line_samples = rb_ary_new_capa(samples_len);
134+
VALUE frames = rb_hash_new();
135+
int idx = 0;
136+
137+
// While the index is less than samples_len, parse yjit_raw_samples and
138+
// yjit_line_samples, then add casted values to raw_samples and line_samples array.
139+
while (idx < samples_len) {
140+
int num = (int)yjit_raw_samples[idx];
141+
int line_num = (int)yjit_line_samples[idx];
142+
idx++;
143+
144+
rb_ary_push(raw_samples, SIZET2NUM(num));
145+
rb_ary_push(line_samples, INT2NUM(line_num));
146+
147+
// Loop through the length of samples_len and add data to the
148+
// frames hash. Also push the current value onto the raw_samples
149+
// and line_samples arrary respectively.
150+
for (int o = 0; o < num; o++) {
151+
rb_yjit_add_frame(frames, yjit_raw_samples[idx]);
152+
rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx]));
153+
rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx]));
154+
idx++;
155+
}
156+
157+
rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx]));
158+
rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx]));
159+
idx++;
160+
161+
rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx]));
162+
rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx]));
163+
idx++;
164+
}
165+
166+
// Set add the raw_samples, line_samples, and frames to the results
167+
// hash.
168+
rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples);
169+
rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples);
170+
rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames);
171+
172+
return result;
173+
}
174+
86175
uint32_t
87176
rb_yjit_get_page_size(void)
88177
{
@@ -860,12 +949,14 @@ rb_yjit_invalidate_all_method_lookup_assumptions(void)
860949

861950
// Primitives used by yjit.rb
862951
VALUE rb_yjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self);
952+
VALUE rb_yjit_trace_exit_locations_enabled_p(rb_execution_context_t *ec, VALUE self);
863953
VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self);
864954
VALUE rb_yjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self);
865955
VALUE rb_yjit_disasm_iseq(rb_execution_context_t *ec, VALUE self, VALUE iseq);
866956
VALUE rb_yjit_insns_compiled(rb_execution_context_t *ec, VALUE self, VALUE iseq);
867957
VALUE rb_yjit_simulate_oom_bang(rb_execution_context_t *ec, VALUE self);
868958
VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self);
959+
VALUE rb_yjit_get_exit_locations(rb_execution_context_t *ec, VALUE self);
869960

870961
// Preprocessed yjit.rb generated during build
871962
#include "yjit.rbinc"

yjit.rb

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,107 @@ def self.stats_enabled?
1818
Primitive.rb_yjit_stats_enabled_p
1919
end
2020

21+
# Check if rb_yjit_trace_exit_locations_enabled_p is enabled.
22+
def self.trace_exit_locations_enabled?
23+
Primitive.rb_yjit_trace_exit_locations_enabled_p
24+
end
25+
2126
# Discard statistics collected for --yjit-stats.
2227
def self.reset_stats!
2328
Primitive.rb_yjit_reset_stats_bang
2429
end
2530

31+
# If --yjit-trace-exits is enabled parse the hashes from
32+
# Primitive.rb_yjit_get_exit_locations into a format readable
33+
# by StackProf. This will allow us to find the exact location of a
34+
# side exit in YJIT based on the instruction that is exiting.
35+
#
36+
# Usage:
37+
#
38+
# In a script call:
39+
#
40+
# File.write("my_file.dump", Marshal.dump(RubyVM::YJIT.exit_locations))
41+
#
42+
# Then run the file with the following options:
43+
#
44+
# ruby --yjit --yjit-stats --yjit-trace-exits test.rb
45+
#
46+
# Once the code is done running, use StackProf to read the dump file.
47+
# See StackProf documentation for options.
48+
def self.exit_locations
49+
return unless trace_exit_locations_enabled?
50+
51+
results = Primitive.rb_yjit_get_exit_locations
52+
raw_samples = results[:raw].dup
53+
line_samples = results[:lines].dup
54+
frames = results[:frames].dup
55+
samples_count = 0
56+
57+
frames.each do |frame_id, frame|
58+
frame[:samples] = 0
59+
frame[:edges] = {}
60+
end
61+
62+
# Loop through the instructions and set the frame hash with the data.
63+
# We use nonexistent.def for the file name, otherwise insns.def will be displayed
64+
# and that information isn't useful in this context.
65+
RubyVM::INSTRUCTION_NAMES.each_with_index do |name, frame_id|
66+
frame_hash = { samples: 0, total_samples: 0, edges: {}, name: name, file: "nonexistent.def", line: nil }
67+
results[:frames][frame_id] = frame_hash
68+
frames[frame_id] = frame_hash
69+
end
70+
71+
# Loop through the raw_samples and build the hashes for StackProf.
72+
# The loop is based off an example in the StackProf documentation and therefore
73+
# this functionality can only work with that library.
74+
while raw_samples.length > 0
75+
stack_trace = raw_samples.shift(raw_samples.shift + 1)
76+
lines = line_samples.shift(line_samples.shift + 1)
77+
prev_frame_id = nil
78+
79+
stack_trace.each_with_index do |frame_id, idx|
80+
if prev_frame_id
81+
prev_frame = frames[prev_frame_id]
82+
prev_frame[:edges][frame_id] ||= 0
83+
prev_frame[:edges][frame_id] += 1
84+
end
85+
86+
frame_info = frames[frame_id]
87+
frame_info[:total_samples] ||= 0
88+
frame_info[:total_samples] += 1
89+
90+
frame_info[:lines] ||= {}
91+
frame_info[:lines][lines[idx]] ||= [0, 0]
92+
frame_info[:lines][lines[idx]][0] += 1
93+
94+
prev_frame_id = frame_id
95+
end
96+
97+
top_frame_id = stack_trace.last
98+
top_frame_line = 1
99+
100+
frames[top_frame_id][:samples] += 1
101+
frames[top_frame_id][:lines] ||= {}
102+
frames[top_frame_id][:lines][top_frame_line] ||= [0, 0]
103+
frames[top_frame_id][:lines][top_frame_line][1] += 1
104+
105+
samples_count += raw_samples.shift
106+
line_samples.shift
107+
end
108+
109+
results[:samples] = samples_count
110+
# Set missed_samples and gc_samples to 0 as their values
111+
# don't matter to us in this context.
112+
results[:missed_samples] = 0
113+
results[:gc_samples] = 0 F438
114+
results
115+
end
116+
117+
# Marshal dumps exit locations to the given filename.
118+
def self.dump_exit_locations(filename)
119+
File.write(filename, Marshal.dump(RubyVM::YJIT.exit_locations))
120+
end
121+
26122
# Return a hash for statistics generated for the --yjit-stats command line option.
27123
# Return nil when option is not passed or unavailable.
28124
def self.runtime_stats

yjit/bindgen/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ fn main() {
258258
.allowlist_function("rb_yjit_obj_written")
259259
.allowlist_function("rb_yjit_str_simple_append")
260260
.allowlist_function("rb_ENCODING_GET")
261+
.allowlist_function("rb_yjit_exit_locations_dict")
261262

262263
// from vm_sync.h
263264
.allowlist_function("rb_vm_barrier")
@@ -292,6 +293,9 @@ fn main() {
292293
.allowlist_function("rb_class_allocate_instance")
293294
.allowlist_function("rb_obj_info")
294295

296+
// From include/ruby/debug.h
297+
.allowlist_function("rb_profile_frames")
298+
295299
// We define VALUE manually, don't import it
296300
.blocklist_type("VALUE")
297301

yjit/src/codegen.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,13 @@ fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr {
424424
if get_option!(gen_stats) {
425425
mov(cb, RDI, const_ptr_opnd(exit_pc as *const u8));
426426
call_ptr(cb, RSI, rb_yjit_count_side_exit_op as *const u8);
427+
428+
// If --yjit-trace-exits option is enabled, record the exit stack
429+
// while recording the side exits.
430+
if get_option!(gen_trace_exits) {
431+
mov(cb, C_ARG_REGS[0], const_ptr_opnd(exit_pc as *const u8));
432+
call_ptr(cb, REG0, rb_yjit_record_exit_stack as *const u8);
433+
}
427434
}
428435

429436
pop(cb, REG_SP);

yjit/src/cruby_bindings.inc.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,12 +964,27 @@ extern "C" {
964964
extern "C" {
965965
pub fn rb_vm_barrier();
966966
}
967+
extern "C" {
968+
pub fn rb_profile_frames(
969+
start: ::std::os::raw::c_int,
970+
limit: ::std::os::raw::c_int,
971+
buff: *mut VALUE,
972+
lines: *mut ::std::os::raw::c_int,
973+
) -> ::std::os::raw::c_int;
974+
}
967975
extern "C" {
968976
pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
969977
}
970978
extern "C" {
971979
pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
972980
}
981+
extern "C" {
982+
pub fn rb_yjit_exit_locations_dict(
983+
yjit_raw_samples: *mut VALUE,
984+
yjit_line_samples: *mut ::std::os::raw::c_int,
985+
samples_len: ::std::os::raw::c_int,
986+
) -> VALUE;
987+
}
973988
extern "C" {
974989
pub fn rb_yjit_get_page_size() -> u32;
975990
}

yjit/src/invariants.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,11 @@ pub extern "C" fn rb_yjit_constant_state_changed(id: ID) {
360360
/// See `struct yjijt_root_struct` in C.
361361
#[no_mangle]
362362
pub extern "C" fn rb_yjit_root_mark() {
363+
// Call rb_gc_mark on exit location's raw_samples to
364+
// wrap frames in a GC allocated object. This needs to be called
365+
// at the same time as root mark.
366+
YjitExitLocations::gc_mark_raw_samples();
367+
363368
// Comment from C YJIT:
364369
//
365370
// Why not let the GC move the cme keys in this table?

0 commit comments

Comments
 (0)
0