mirror of
https://github.com/ruby/ruby.git
synced 2026-01-27 04:24:23 +00:00
ZJIT: A64: Remove nop padding after conditional branches
Previously, there were a lot of nops after conditional branches. They
come from branch to LIR labels:
./miniruby --zjit-call-threshold=1 --zjit-dump-disasm -e 'Object || String'
# Insn: v14 CheckInterrupts
# RUBY_VM_CHECK_INTS(ec)
ldur w2, [x20, #0x20]
tst w2, w2
b.ne #0x120900278
nop
nop
nop
nop
nop
# Insn: v15 Test v11
tst x0, #-5
mov x2, #0
mov x3, #1
csel x2, x2, x3, eq
# Insn: v16 IfTrue v15, bb3(v6, v11)
tst x2, x2
b.eq #0x120900198
nop
nop
nop
nop
nop
They gunk up the disassembly and can't be helpful for speed. This commit
removes them. I think they were accidentally inherited from certain YJIT
branches that require padding for patching. ZJIT doesn't have these
requirements.
Use a single branch instruction for conditional branches to labels; Jmp
already uses a single `B` instruction. This will work for assemblers
that generate less than ~260,000 instructions -- plenty.
Let the CodeBlock::label_ref() callback return a failure, so we can
fail compilation instead of panicking in case we do get large offsets.
This commit is contained in:
parent
fa910e2bba
commit
826dbcfb2b
Notes:
git
2026-01-20 03:48:04 +00:00
@ -20,7 +20,7 @@ pub mod arm64;
|
||||
pub struct Label(pub usize);
|
||||
|
||||
/// The object that knows how to encode the branch instruction.
|
||||
type BranchEncoder = Box<dyn Fn(&mut CodeBlock, i64, i64)>;
|
||||
type BranchEncoder = Box<dyn Fn(&mut CodeBlock, i64, i64) -> Result<(), ()>>;
|
||||
|
||||
/// Reference to an ASM label
|
||||
pub struct LabelRef {
|
||||
@ -233,7 +233,7 @@ impl CodeBlock {
|
||||
}
|
||||
|
||||
// Add a label reference at the current write position
|
||||
pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: impl Fn(&mut CodeBlock, i64, i64) + 'static) {
|
||||
pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: impl Fn(&mut CodeBlock, i64, i64) -> Result<(), ()> + 'static) {
|
||||
assert!(label.0 < self.label_addrs.len());
|
||||
|
||||
// Keep track of the reference
|
||||
@ -248,8 +248,9 @@ impl CodeBlock {
|
||||
}
|
||||
|
||||
// Link internal label references
|
||||
pub fn link_labels(&mut self) {
|
||||
pub fn link_labels(&mut self) -> Result<(), ()> {
|
||||
let orig_pos = self.write_pos;
|
||||
let mut link_result = Ok(());
|
||||
|
||||
// For each label reference
|
||||
for label_ref in mem::take(&mut self.label_refs) {
|
||||
@ -261,11 +262,14 @@ impl CodeBlock {
|
||||
assert!(label_addr < self.mem_size);
|
||||
|
||||
self.write_pos = ref_pos;
|
||||
(label_ref.encode.as_ref())(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
|
||||
let encode_result = (label_ref.encode.as_ref())(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
|
||||
link_result = link_result.and(encode_result);
|
||||
|
||||
// Assert that we've written the same number of bytes that we
|
||||
// expected to have written.
|
||||
assert!(self.write_pos == ref_pos + label_ref.num_bytes);
|
||||
// Verify number of bytes written when the callback returns Ok
|
||||
if encode_result.is_ok() {
|
||||
assert_eq!(self.write_pos, ref_pos + label_ref.num_bytes, "label_ref \
|
||||
callback didn't write number of bytes it claimed to write upfront");
|
||||
}
|
||||
}
|
||||
|
||||
self.write_pos = orig_pos;
|
||||
@ -274,6 +278,8 @@ impl CodeBlock {
|
||||
self.label_addrs.clear();
|
||||
self.label_names.clear();
|
||||
assert!(self.label_refs.is_empty());
|
||||
|
||||
link_result
|
||||
}
|
||||
|
||||
/// Convert a Label to CodePtr
|
||||
|
||||
@ -679,6 +679,7 @@ pub fn call_label(cb: &mut CodeBlock, label: Label) {
|
||||
cb.label_ref(label, 5, |cb, src_addr, dst_addr| {
|
||||
cb.write_byte(0xE8);
|
||||
cb.write_int((dst_addr - src_addr) as u64, 32);
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
|
||||
@ -795,6 +796,7 @@ fn write_jcc<const OP: u8>(cb: &mut CodeBlock, label: Label) {
|
||||
cb.write_byte(0x0F);
|
||||
cb.write_byte(OP);
|
||||
cb.write_int((dst_addr - src_addr) as u64, 32);
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
|
||||
@ -834,6 +836,7 @@ pub fn jmp_label(cb: &mut CodeBlock, label: Label) {
|
||||
cb.label_ref(label, 5, |cb, src_addr, dst_addr| {
|
||||
cb.write_byte(0xE9);
|
||||
cb.write_int((dst_addr - src_addr) as u64, 32);
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -136,7 +136,7 @@ fn test_call_label() {
|
||||
let cb = compile(|cb| {
|
||||
let label_idx = cb.new_label("fn".to_owned());
|
||||
call_label(cb, label_idx);
|
||||
cb.link_labels();
|
||||
cb.link_labels().unwrap();
|
||||
});
|
||||
assert_disasm_snapshot!(cb.disasm(), @" 0x0: call 0");
|
||||
assert_snapshot!(cb.hexdump(), @"e8fbffffff");
|
||||
@ -255,7 +255,7 @@ fn test_jge_label() {
|
||||
let cb = compile(|cb| {
|
||||
let label_idx = cb.new_label("loop".to_owned());
|
||||
jge_label(cb, label_idx);
|
||||
cb.link_labels();
|
||||
cb.link_labels().unwrap();
|
||||
});
|
||||
assert_disasm_snapshot!(cb.disasm(), @" 0x0: jge 0");
|
||||
assert_snapshot!(cb.hexdump(), @"0f8dfaffffff");
|
||||
@ -268,14 +268,14 @@ fn test_jmp_label() {
|
||||
let label_idx = cb.new_label("next".to_owned());
|
||||
jmp_label(cb, label_idx);
|
||||
cb.write_label(label_idx);
|
||||
cb.link_labels();
|
||||
cb.link_labels().unwrap();
|
||||
});
|
||||
// Backwards jump
|
||||
let cb2 = compile(|cb| {
|
||||
let label_idx = cb.new_label("loop".to_owned());
|
||||
cb.write_label(label_idx);
|
||||
jmp_label(cb, label_idx);
|
||||
cb.link_labels();
|
||||
cb.link_labels().unwrap();
|
||||
});
|
||||
|
||||
assert_disasm_snapshot!(disasms!(cb1, cb2), @r"
|
||||
@ -301,7 +301,7 @@ fn test_jo_label() {
|
||||
let cb = compile(|cb| {
|
||||
let label_idx = cb.new_label("loop".to_owned());
|
||||
jo_label(cb, label_idx);
|
||||
cb.link_labels();
|
||||
cb.link_labels().unwrap();
|
||||
});
|
||||
|
||||
assert_disasm_snapshot!(cb.disasm(), @" 0x0: jo 0");
|
||||
|
||||
@ -998,10 +998,17 @@ impl Assembler {
|
||||
generate_branch::<CONDITION>(cb, src_addr, dst_addr);
|
||||
},
|
||||
Target::Label(label_idx) => {
|
||||
// We save `cb.conditional_jump_insns` number of bytes since we may use up to that amount
|
||||
// `generate_branch` will pad the emitted branch instructions with `nop`s for each unused byte.
|
||||
cb.label_ref(label_idx, (cb.conditional_jump_insns() * 4) as usize, |cb, src_addr, dst_addr| {
|
||||
generate_branch::<CONDITION>(cb, src_addr - (cb.conditional_jump_insns() * 4) as i64, dst_addr);
|
||||
// Try to use a single B.cond instruction
|
||||
cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
|
||||
// +1 since src_addr is after the instruction while A64
|
||||
// counts the offset relative to the start.
|
||||
let offset = (dst_addr - src_addr) / 4 + 1;
|
||||
if bcond_offset_fits_bits(offset) {
|
||||
bcond(cb, CONDITION, InstructionOffset::from_insns(offset as i32));
|
||||
Ok(())
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
});
|
||||
},
|
||||
Target::SideExit { .. } => {
|
||||
@ -1399,6 +1406,7 @@ impl Assembler {
|
||||
// Set output to the raw address of the label
|
||||
cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
|
||||
adr(cb, Self::EMIT_OPND, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
|
||||
Ok(())
|
||||
});
|
||||
|
||||
mov(cb, out.into(), Self::EMIT_OPND);
|
||||
@ -1480,14 +1488,17 @@ impl Assembler {
|
||||
emit_jmp_ptr(cb, dst_ptr, true);
|
||||
},
|
||||
Target::Label(label_idx) => {
|
||||
// Here we're going to save enough space for
|
||||
// ourselves and then come back and write the
|
||||
// instruction once we know the offset. We're going
|
||||
// to assume we can fit into a single b instruction.
|
||||
// It will panic otherwise.
|
||||
// Reserve space for a single B instruction
|
||||
cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
|
||||
let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap();
|
||||
b(cb, InstructionOffset::from_bytes(bytes));
|
||||
// +1 since src_addr is after the instruction while A64
|
||||
// counts the offset relative to the start.
|
||||
let offset = (dst_addr - src_addr) / 4 + 1;
|
||||
if b_offset_fits_bits(offset) {
|
||||
b(cb, InstructionOffset::from_insns(offset as i32));
|
||||
Ok(())
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
});
|
||||
},
|
||||
Target::SideExit { .. } => {
|
||||
@ -1632,7 +1643,7 @@ impl Assembler {
|
||||
let gc_offsets = asm.arm64_emit(cb);
|
||||
|
||||
if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
|
||||
cb.link_labels();
|
||||
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
|
||||
|
||||
// Invalidate icache for newly written out region so we don't run stale code.
|
||||
unsafe { rb_jit_icache_invalidate(start_ptr.raw_ptr(cb) as _, cb.get_write_ptr().raw_ptr(cb) as _) };
|
||||
@ -1748,6 +1759,29 @@ mod tests {
|
||||
assert_snapshot!(cb.hexdump(), @"600080d2207d009be10300aa");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_conditional_branch_to_label() {
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
let start = asm.new_label("start");
|
||||
let forward = asm.new_label("forward");
|
||||
|
||||
let value = asm.load(Opnd::mem(VALUE_BITS, NATIVE_STACK_PTR, 0));
|
||||
asm.write_label(start.clone());
|
||||
asm.cmp(value, 0.into());
|
||||
asm.jg(forward.clone());
|
||||
asm.jl(start.clone());
|
||||
asm.write_label(forward);
|
||||
|
||||
asm.compile_with_num_regs(&mut cb, 1);
|
||||
assert_disasm_snapshot!(cb.disasm(), @r"
|
||||
0x0: ldur x0, [sp]
|
||||
0x4: cmp x0, #0
|
||||
0x8: b.gt #0x10
|
||||
0xc: b.lt #4
|
||||
");
|
||||
assert_snapshot!(cb.hexdump(), @"e00340f81f0000f14c000054cbffff54");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sp_movements_are_single_instruction() {
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
@ -2571,7 +2605,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_label_branch_generate_bounds() {
|
||||
fn test_exceeding_label_branch_generate_bounds() {
|
||||
// The immediate in a conditional branch is a 19 bit unsigned integer
|
||||
// which has a max value of 2^18 - 1.
|
||||
const IMMEDIATE_MAX_VALUE: usize = 2usize.pow(18) - 1;
|
||||
@ -2582,6 +2616,7 @@ mod tests {
|
||||
let page_size = unsafe { rb_jit_get_page_size() } as usize;
|
||||
let memory_required = (IMMEDIATE_MAX_VALUE + 8) * 4 + page_size;
|
||||
|
||||
crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler
|
||||
let mut asm = Assembler::new();
|
||||
let mut cb = CodeBlock::new_dummy_sized(memory_required);
|
||||
|
||||
@ -2595,7 +2630,7 @@ mod tests {
|
||||
});
|
||||
|
||||
asm.write_label(far_label.clone());
|
||||
asm.compile_with_num_regs(&mut cb, 1);
|
||||
assert_eq!(Err(CompileError::LabelLinkingFailure), asm.compile(&mut cb));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@ -836,6 +836,7 @@ impl Assembler {
|
||||
cb.label_ref(*label, 7, move |cb, src_addr, dst_addr| {
|
||||
let disp = dst_addr - src_addr;
|
||||
lea(cb, out.into(), mem_opnd(8, RIP, disp.try_into().unwrap()));
|
||||
Ok(())
|
||||
});
|
||||
} else {
|
||||
// Set output to the jump target's raw address
|
||||
@ -1104,7 +1105,7 @@ impl Assembler {
|
||||
let gc_offsets = asm.x86_emit(cb);
|
||||
|
||||
if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
|
||||
cb.link_labels();
|
||||
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
|
||||
Ok((start_ptr, gc_offsets))
|
||||
} else {
|
||||
cb.clear_labels();
|
||||
|
||||
@ -306,6 +306,7 @@ make_counters! {
|
||||
compile_error_iseq_stack_too_large,
|
||||
compile_error_exception_handler,
|
||||
compile_error_out_of_memory,
|
||||
compile_error_label_linking_failure,
|
||||
compile_error_jit_to_jit_optional,
|
||||
compile_error_register_spill_on_ccall,
|
||||
compile_error_register_spill_on_alloc,
|
||||
@ -466,6 +467,10 @@ pub enum CompileError {
|
||||
ExceptionHandler,
|
||||
OutOfMemory,
|
||||
ParseError(ParseError),
|
||||
/// When a ZJIT function is too large, the branches may have
|
||||
/// offsets that don't fit in one instruction. We error in
|
||||
/// error that case.
|
||||
LabelLinkingFailure,
|
||||
}
|
||||
|
||||
/// Return a raw pointer to the exit counter for a given CompileError
|
||||
@ -479,6 +484,7 @@ pub fn exit_counter_for_compile_error(compile_error: &CompileError) -> Counter {
|
||||
IseqStackTooLarge => compile_error_iseq_stack_too_large,
|
||||
ExceptionHandler => compile_error_exception_handler,
|
||||
OutOfMemory => compile_error_out_of_memory,
|
||||
LabelLinkingFailure => compile_error_label_linking_failure,
|
||||
ParseError(parse_error) => match parse_error {
|
||||
StackUnderflow(_) => compile_error_parse_stack_underflow,
|
||||
MalformedIseq(_) => compile_error_parse_malformed_iseq,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user