8000 YJIT: Interleave inline and outlined code blocks (#6460) · Shopify/ruby@c3ec150 · GitHub
[go: up one dir, main page]

Skip to content

Commit c3ec150

Browse files
k0kubunXrXrmaximecb
authored andcommitted
YJIT: Interleave inline and outlined code blocks (ruby#6460)
Co-authored-by: Alan Wu <alansi.xingwu@shopify.com> Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
1 parent d2cb48e commit c3ec150

File tree

9 files changed

+378
-161
lines changed

9 files changed

+378
-161
lines changed

yjit/src/asm/mod.rs

Lines changed: 201 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
1+
use std::cell::RefCell;
2+
use std::cmp;
13
use std::fmt;
24
use std::mem;
5+
use std::rc::Rc;
6+
#[cfg(target_arch = "x86_64")]
7+
use crate::backend::x86_64::JMP_PTR_BYTES;
8+
#[cfg(target_arch = "aarch64")]
9+
use crate::backend::arm64::JMP_PTR_BYTES;
10+
use crate::backend::ir::Assembler;
11+
use crate::backend::ir::Target;
12+
use crate::virtualmem::WriteError;
313

414
#[cfg(feature = "asm_comments")]
515
use std::collections::BTreeMap;
616

17+
use crate::codegen::CodegenGlobals;
718
use crate::virtualmem::{VirtualMem, CodePtr};
819

920
// Lots of manual vertical alignment in there that rustfmt doesn't handle well.
@@ -17,7 +28,8 @@ pub mod arm64;
1728
//
1829

1930
/// Reference to an ASM label
20-
struct LabelRef {
31+
#[derive(Clone)]
32+
pub struct LabelRef {
2133
// Position in the code block where the label reference exists
2234
pos: usize,
2335

@@ -36,14 +48,20 @@ struct LabelRef {
3648
/// Block of memory into which instructions can be assembled
3749
pub struct CodeBlock {
3850
// Memory for storing the encoded instructions
39-
mem_block: VirtualMem,
51+
mem_block: Rc<RefCell<VirtualMem>>,
4052

4153
// Memory block size
4254
mem_size: usize,
4355

4456
// Current writing position
4557
write_pos: usize,
4658

59+
// Size of a code page (inlined + outlined)
60+
page_size: usize,
61+
62+
// Size reserved for writing a jump to the next page
63+
page_end_reserve: usize,
64+
4765
// Table of registered label addresses
4866
label_addrs: Vec<usize>,
4967

@@ -58,7 +76,6 @@ pub struct CodeBlock {
5876
asm_comments: BTreeMap<usize, Vec<String>>,
5977

6078
// True for OutlinedCb
61-
#[cfg(feature = "disasm")]
6279
pub outlined: bool,
6380

6481
// Set if the CodeBlock is unable to output some instructions,
@@ -67,27 +84,158 @@ pub struct CodeBlock {
6784
dropped_bytes: bool,
6885
}
6986

87+
/// Set of CodeBlock label states. Used for recovering the previous state.
88+
pub struct LabelState {
89+
label_addrs: Vec<usize>,
90+
label_names: Vec<String>,
91+
label_refs: Vec<LabelRef>,
92+
}
93+
7094
impl CodeBlock {
7195
/// Make a new CodeBlock
72-
pub fn new(mem_block: VirtualMem, outlined: bool) -> Self {
73-
Self {
74-
mem_size: mem_block.virtual_region_size(),
96+
pub fn new(mem_block: Rc<RefCell<VirtualMem>>, page_size: usize, outlined: bool) -> Self {
97+
let mem_size = mem_block.borrow().virtual_region_size();
98+
let mut cb = Self {
7599
mem_block,
100+
mem_size,
76101
write_pos: 0,
102+
page_size,
103+
page_end_reserve: JMP_PTR_BYTES,
77104
label_addrs: Vec::new(),
78105
label_names: Vec::new(),
79106
label_refs: Vec::new(),
80107
#[cfg(feature = "asm_comments")]
81108
asm_comments: BTreeMap::new(),
82-
#[cfg(feature = "disasm")]
83109
outlined,
84110
dropped_bytes: false,
111+
};
112+
cb.write_pos = cb.page_start();
113+
cb
114+
}
115+
116+
/// Move the CodeBlock to the next page. If it's on the furthest page,
117+
/// move the other CodeBlock to the next page as well.
118+
pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
119+
let old_write_ptr = self.get_write_ptr();
120+
self.set_write_ptr(base_ptr);
121+
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
122+
123+
// Move self to the next page
124+
let next_page_idx = self.write_pos / self.page_size + 1;
125+
if !self.set_page(next_page_idx, &jmp_ptr) {
126+
self.set_write_ptr(old_write_ptr); // rollback if there are no more pages
127+
return false;
128+
}
129+
130+
// Move the other CodeBlock to the same page if it'S on the furthest page
131+
self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr);
132+
133+
return !self.dropped_bytes;
134+
}
135+
136+
/// Move the CodeBlock to page_idx only if it's not going backwards.
137+
fn set_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, page_idx: usize, jmp_ptr: &F) -> bool {
138+
// Do not move the CodeBlock if page_idx points to an old position so that this
139+
// CodeBlock will not overwrite existing code.
140+
//
141+
// Let's say this is the current situation:
142+
// cb: [page1, page2, page3 (write_pos)], ocb: [page1, page2, page3 (write_pos)]
143+
//
144+
// When cb needs to patch page1, this will be temporarily changed to:
145+
// cb: [page1 (write_pos), page2, page3], ocb: [page1, page2, page3 (write_pos)]
146+
//
147+
// While patching page1, cb may need to jump to page2. What set_page currently does is:
148+
// cb: [page1, page2 (write_pos), page3], ocb: [page1, page2, page3 (write_pos)]
149+
// instead of:
150+
// cb: [page1, page2 (write_pos), page3], ocb: [page1, page2 (write_pos), page3]
151+
// because moving ocb's write_pos from page3 to the beginning of page2 will let ocb's
152+
// write_pos point to existing code in page2, which might let ocb overwrite it later.
153+
//
154+
// We could remember the last write_pos in page2 and let set_page use that position,
155+
// but you need to waste some space for keeping write_pos for every single page.
156+
// It doesn't seem necessary for performance either. So we're currently not doing it.
157+
let mut dst_pos = self.page_size * page_idx + self.page_start();
158+
if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos {
159+
// Reset dropped_bytes
160+
self.dropped_bytes = false;
161+
162+
// Convert dst_pos to dst_ptr
163+
let src_pos = self.write_pos;
164+
self.write_pos = dst_pos;
165+
let dst_ptr = self.get_write_ptr();
166+
self.write_pos = src_pos;
167+
168+
// Generate jmp_ptr from src_pos to dst_pos
169+
self.without_page_end_reserve(|cb| {
170+
cb.add_comment("jump to next page");
171+
jmp_ptr(cb, dst_ptr);
172+
assert!(!cb.has_dropped_bytes());
173+
});
174+
175+
// Start the next code from dst_pos
176+
self.write_pos = dst_pos;
85177
}
178+
!self.dropped_bytes
179+
}
180+
181+
/// write_pos of the current page start
182+
pub fn page_start_pos(&self) -> usize {
183+
self.get_write_pos() / self.page_size * self.page_size + self.page_start()
184+
}
185+
186+
/// Offset of each page where CodeBlock should start writing
187+
pub fn page_start(&self) -> usize {
188+
let mut start = if self.inline() {
189+
0
190+
} else {
191+
self.page_size / 2
192+
};
193+
if cfg!(debug_assertions) && !cfg!(test) {
194+
// Leave illegal instructions at the beginning of each page to assert
195+
// we're not accidentally crossing page boundaries.
196+
start += JMP_PTR_BYTES;
197+
}
198+
start
199+
}
200+
201+
/// Offset of each page where CodeBlock should stop writing (exclusive)
202+
pub fn page_end(&self) -> usize {
203+
let page_end = if self.inline() {
204+
self.page_size / 2
205+
} else {
206+
self.page_size
207+
};
208+
page_end - self.page_end_reserve // reserve space to jump to the next page
209+
}
210+
211+
/// Call a given function with page_end_reserve = 0
212+
pub fn without_page_end_reserve<F: Fn(&mut Self)>(&mut self, block: F) {
213+
let old_page_end_reserve = self.page_end_reserve;
214+
self.page_end_reserve = 0;
215+
block(self);
216+
self.page_end_reserve = old_page_end_reserve;
217+
}
218+
219+
/// Return the address ranges of a given address range that this CodeBlock can write.
220+
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
221+
let mut addrs = vec![];
222+
let mut start = start_ptr.raw_ptr() as usize;
223+
let codeblock_end = self.get_ptr(self.get_mem_size()).raw_ptr() as usize;
224+
let end = std::cmp::min(end_ptr.raw_ptr() as usize, codeblock_end);
225+
while start < end {
226+
let current_page = start / self.page_size * self.page_size;
227+
let page_end = std::cmp::min(end, current_page + self.page_end()) as usize;
228+
addrs.push((start, page_end));
229+
start = current_page + self.page_size + self.page_start();
230+
}
231+
addrs
86232
}
87233

88234
/// Check if this code block has sufficient remaining capacity
89235
pub fn has_capacity(&self, num_bytes: usize) -> bool {
90-
self.write_pos + num_bytes < self.mem_size
236+
let page_offset = self.write_pos % self.page_size;
237+
let capacity = self.page_end().saturating_sub(page_offset);
238+
num_bytes <= capacity
91239
}
92240

93241
/// Add an assembly comment if the feature is on.
@@ -121,8 +269,8 @@ impl CodeBlock {
121269
self.write_pos
122270
}
123271

124-
pub fn get_mem(&mut self) -> &mut VirtualMem {
125-
&mut self.mem_block
272+
pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
273+
self.mem_block.borrow_mut().write_byte(write_ptr, byte)
126274
}
127275

128276
// Set the current write position
@@ -134,49 +282,31 @@ impl CodeBlock {
134282
self.write_pos = pos;
135283
}
136284

137-
// Align the current write pointer to a multiple of bytes
138-
pub fn align_pos(&mut self, multiple: u32) {
139-
// Compute the alignment boundary that is lower or equal
140-
// Do everything with usize
141-
let multiple: usize = multiple.try_into().unwrap();
142-
let pos = self.get_write_ptr().raw_ptr() as usize;
143-
let remainder = pos % multiple;
144-
let prev_aligned = pos - remainder;
145-
146-
if prev_aligned == pos {
147-
// Already aligned so do nothing
148-
} else {
149-
// Align by advancing
150-
let pad = multiple - remainder;
151-
self.set_pos(self.get_write_pos() + pad);
152-
}
153-
}
154-
155285
// Set the current write position from a pointer
156286
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
157-
let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize();
287+
let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize();
158288
self.set_pos(pos);
159289
}
160290

161291
/// Get a (possibly dangling) direct pointer into the executable memory block
162292
pub fn get_ptr(&self, offset: usize) -> CodePtr {
163-
self.mem_block.start_ptr().add_bytes(offset)
293+
self.mem_block.borrow().start_ptr().add_bytes(offset)
164294
}
165295

166296
/// Get a (possibly dangling) direct pointer to the current write position
167-
pub fn get_write_ptr(&mut self) -> CodePtr {
297+
pub fn get_write_ptr(&self) -> CodePtr {
168298
self.get_ptr(self.write_pos)
169299
}
170300

171301
/// Write a single byte at the current position.
172302
pub fn write_byte(&mut self, byte: u8) {
173303
let write_ptr = self.get_write_ptr();
174-
175-
if self.mem_block.write_byte(write_ptr, byte).is_ok() {
176-
self.write_pos += 1;
177-
} else {
304+
if !self.has_capacity(1) || self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_err() {
178305
self.dropped_bytes = true;
179306
}
307+
308+
// Always advance write_pos since arm64 PadEntryExit needs this to stop the loop.
309+
self.write_pos += 1;
180310
}
181311

182312
/// Write multiple bytes starting from the current position.
@@ -242,6 +372,9 @@ impl CodeBlock {
242372
self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode });
243373

244374
// Move past however many bytes the instruction takes up
375+
if !self.has_capacity(num_bytes) {
376+
self.dropped_bytes = true; // retry emitting the Insn after next_page
377+
}
245378
self.write_pos += num_bytes;
246379
}
247380

@@ -274,14 +407,43 @@ impl CodeBlock {
274407
assert!(self.label_refs.is_empty());
275408
}
276409

410+
pub fn clear_labels(&mut self) {
411+
self.label_addrs.clear();
412+
self.label_names.clear();
413+
self.label_refs.clear();
414+
}
415+
416+
pub fn get_label_state(&self) -> LabelState {
417+
LabelState {
418+
label_addrs: self.label_addrs.clone(),
419+
label_names: self.label_names.clone(),
420+
label_refs: self.label_refs.clone(),
421+
}
422+
}
423+
424+
pub fn set_label_state(&mut self, state: LabelState) {
425+
self.label_addrs = state.label_addrs;
426+
self.label_names = state.label_names;
427+
self.label_refs = state.label_refs;
428+
}
429+
277430
pub fn mark_all_executable(&mut self) {
278-
self.mem_block.mark_all_executable();
431+
self.mem_block.borrow_mut().mark_all_executable();
279432
}
280433

281-
#[cfg(feature = "disasm")]
282434
pub fn inline(&self) -> bool {
283435
!self.outlined
284436
}
437+
438+
pub fn other_cb(&self) -> Option<&'static mut Self> {
439+
if !CodegenGlobals::has_instance() {
440+
None
441+
} else if self.inline() {
442+
Some(CodegenGlobals::get_outlined_cb().unwrap())
443+
} else {
444+
Some(CodegenGlobals::get_inline_cb())
445+
}
446+
}
285447
}
286448

287449
#[cfg(test)]
@@ -295,15 +457,15 @@ impl CodeBlock {
295457
let mem_start: *const u8 = alloc.mem_start();
296458
let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size);
297459

298-
Self::new(virt_mem, false)
460+
Self::new(Rc::new(RefCell::new(virt_mem)), 16 * 1024, false)
299461
}
300462
}
301463

302464
/// Produce hex string output from the bytes in a code block
303465
impl fmt::LowerHex for CodeBlock {
304466
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
305467
for pos in 0..self.write_pos {
306-
let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() };
468+
let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() };
307469
fmtr.write_fmt(format_args!("{:02x}", byte))?;
308470
}
309471
Ok(())

0 commit comments

Comments
 (0)
0