ZJIT: A64: Remove nop padding after conditional branches

Previously, there were a lot of nops after conditional branches. They
come from branch to LIR labels:

    ./miniruby --zjit-call-threshold=1 --zjit-dump-disasm -e 'Object || String'

    # Insn: v14 CheckInterrupts
    # RUBY_VM_CHECK_INTS(ec)
      ldur w2, [x20, #0x20]
      tst w2, w2
      b.ne #0x120900278
      nop
      nop
      nop
      nop
      nop
    # Insn: v15 Test v11
      tst x0, #-5
      mov x2, #0
      mov x3, #1
      csel x2, x2, x3, eq
    # Insn: v16 IfTrue v15, bb3(v6, v11)
      tst x2, x2
      b.eq #0x120900198
      nop
      nop
      nop
      nop
      nop

They gunk up the disassembly and can't be helpful for speed. This commit
removes them. I think they were accidentally inherited from certain YJIT
branches that require padding for patching. ZJIT doesn't have these
requirements.

Use a single branch instruction for conditional branches to labels; Jmp
already uses a single `B` instruction. This will work for assemblers
that generate less than ~260,000 instructions -- plenty.

Let the CodeBlock::label_ref() callback return a failure, so we can
fail compilation instead of panicking in case we do get large offsets.
This commit is contained in:
Alan Wu 2026-01-16 22:39:36 -05:00
parent fa910e2bba
commit 826dbcfb2b
Notes: git 2026-01-20 03:48:04 +00:00
6 changed files with 78 additions and 27 deletions

View File

@ -20,7 +20,7 @@ pub mod arm64;
pub struct Label(pub usize);
/// The object that knows how to encode the branch instruction.
type BranchEncoder = Box<dyn Fn(&mut CodeBlock, i64, i64)>;
type BranchEncoder = Box<dyn Fn(&mut CodeBlock, i64, i64) -> Result<(), ()>>;
/// Reference to an ASM label
pub struct LabelRef {
@ -233,7 +233,7 @@ impl CodeBlock {
}
// Add a label reference at the current write position
pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: impl Fn(&mut CodeBlock, i64, i64) + 'static) {
pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: impl Fn(&mut CodeBlock, i64, i64) -> Result<(), ()> + 'static) {
assert!(label.0 < self.label_addrs.len());
// Keep track of the reference
@ -248,8 +248,9 @@ impl CodeBlock {
}
// Link internal label references
pub fn link_labels(&mut self) {
pub fn link_labels(&mut self) -> Result<(), ()> {
let orig_pos = self.write_pos;
let mut link_result = Ok(());
// For each label reference
for label_ref in mem::take(&mut self.label_refs) {
@ -261,11 +262,14 @@ impl CodeBlock {
assert!(label_addr < self.mem_size);
self.write_pos = ref_pos;
(label_ref.encode.as_ref())(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
let encode_result = (label_ref.encode.as_ref())(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
link_result = link_result.and(encode_result);
// Assert that we've written the same number of bytes that we
// expected to have written.
assert!(self.write_pos == ref_pos + label_ref.num_bytes);
// Verify number of bytes written when the callback returns Ok
if encode_result.is_ok() {
assert_eq!(self.write_pos, ref_pos + label_ref.num_bytes, "label_ref \
callback didn't write number of bytes it claimed to write upfront");
}
}
self.write_pos = orig_pos;
@ -274,6 +278,8 @@ impl CodeBlock {
self.label_addrs.clear();
self.label_names.clear();
assert!(self.label_refs.is_empty());
link_result
}
/// Convert a Label to CodePtr

View File

@ -679,6 +679,7 @@ pub fn call_label(cb: &mut CodeBlock, label: Label) {
cb.label_ref(label, 5, |cb, src_addr, dst_addr| {
cb.write_byte(0xE8);
cb.write_int((dst_addr - src_addr) as u64, 32);
Ok(())
});
}
@ -795,6 +796,7 @@ fn write_jcc<const OP: u8>(cb: &mut CodeBlock, label: Label) {
cb.write_byte(0x0F);
cb.write_byte(OP);
cb.write_int((dst_addr - src_addr) as u64, 32);
Ok(())
});
}
@ -834,6 +836,7 @@ pub fn jmp_label(cb: &mut CodeBlock, label: Label) {
cb.label_ref(label, 5, |cb, src_addr, dst_addr| {
cb.write_byte(0xE9);
cb.write_int((dst_addr - src_addr) as u64, 32);
Ok(())
});
}

View File

@ -136,7 +136,7 @@ fn test_call_label() {
let cb = compile(|cb| {
let label_idx = cb.new_label("fn".to_owned());
call_label(cb, label_idx);
cb.link_labels();
cb.link_labels().unwrap();
});
assert_disasm_snapshot!(cb.disasm(), @" 0x0: call 0");
assert_snapshot!(cb.hexdump(), @"e8fbffffff");
@ -255,7 +255,7 @@ fn test_jge_label() {
let cb = compile(|cb| {
let label_idx = cb.new_label("loop".to_owned());
jge_label(cb, label_idx);
cb.link_labels();
cb.link_labels().unwrap();
});
assert_disasm_snapshot!(cb.disasm(), @" 0x0: jge 0");
assert_snapshot!(cb.hexdump(), @"0f8dfaffffff");
@ -268,14 +268,14 @@ fn test_jmp_label() {
let label_idx = cb.new_label("next".to_owned());
jmp_label(cb, label_idx);
cb.write_label(label_idx);
cb.link_labels();
cb.link_labels().unwrap();
});
// Backwards jump
let cb2 = compile(|cb| {
let label_idx = cb.new_label("loop".to_owned());
cb.write_label(label_idx);
jmp_label(cb, label_idx);
cb.link_labels();
cb.link_labels().unwrap();
});
assert_disasm_snapshot!(disasms!(cb1, cb2), @r"
@ -301,7 +301,7 @@ fn test_jo_label() {
let cb = compile(|cb| {
let label_idx = cb.new_label("loop".to_owned());
jo_label(cb, label_idx);
cb.link_labels();
cb.link_labels().unwrap();
});
assert_disasm_snapshot!(cb.disasm(), @" 0x0: jo 0");

View File

@ -998,10 +998,17 @@ impl Assembler {
generate_branch::<CONDITION>(cb, src_addr, dst_addr);
},
Target::Label(label_idx) => {
// We save `cb.conditional_jump_insns` number of bytes since we may use up to that amount
// `generate_branch` will pad the emitted branch instructions with `nop`s for each unused byte.
cb.label_ref(label_idx, (cb.conditional_jump_insns() * 4) as usize, |cb, src_addr, dst_addr| {
generate_branch::<CONDITION>(cb, src_addr - (cb.conditional_jump_insns() * 4) as i64, dst_addr);
// Try to use a single B.cond instruction
cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
// +1 since src_addr is after the instruction while A64
// counts the offset relative to the start.
let offset = (dst_addr - src_addr) / 4 + 1;
if bcond_offset_fits_bits(offset) {
bcond(cb, CONDITION, InstructionOffset::from_insns(offset as i32));
Ok(())
} else {
Err(())
}
});
},
Target::SideExit { .. } => {
@ -1399,6 +1406,7 @@ impl Assembler {
// Set output to the raw address of the label
cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
adr(cb, Self::EMIT_OPND, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
Ok(())
});
mov(cb, out.into(), Self::EMIT_OPND);
@ -1480,14 +1488,17 @@ impl Assembler {
emit_jmp_ptr(cb, dst_ptr, true);
},
Target::Label(label_idx) => {
// Here we're going to save enough space for
// ourselves and then come back and write the
// instruction once we know the offset. We're going
// to assume we can fit into a single b instruction.
// It will panic otherwise.
// Reserve space for a single B instruction
cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap();
b(cb, InstructionOffset::from_bytes(bytes));
// +1 since src_addr is after the instruction while A64
// counts the offset relative to the start.
let offset = (dst_addr - src_addr) / 4 + 1;
if b_offset_fits_bits(offset) {
b(cb, InstructionOffset::from_insns(offset as i32));
Ok(())
} else {
Err(())
}
});
},
Target::SideExit { .. } => {
@ -1632,7 +1643,7 @@ impl Assembler {
let gc_offsets = asm.arm64_emit(cb);
if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
cb.link_labels();
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
// Invalidate icache for newly written out region so we don't run stale code.
unsafe { rb_jit_icache_invalidate(start_ptr.raw_ptr(cb) as _, cb.get_write_ptr().raw_ptr(cb) as _) };
@ -1748,6 +1759,29 @@ mod tests {
assert_snapshot!(cb.hexdump(), @"600080d2207d009be10300aa");
}
#[test]
fn test_conditional_branch_to_label() {
let (mut asm, mut cb) = setup_asm();
let start = asm.new_label("start");
let forward = asm.new_label("forward");
let value = asm.load(Opnd::mem(VALUE_BITS, NATIVE_STACK_PTR, 0));
asm.write_label(start.clone());
asm.cmp(value, 0.into());
asm.jg(forward.clone());
asm.jl(start.clone());
asm.write_label(forward);
asm.compile_with_num_regs(&mut cb, 1);
assert_disasm_snapshot!(cb.disasm(), @r"
0x0: ldur x0, [sp]
0x4: cmp x0, #0
0x8: b.gt #0x10
0xc: b.lt #4
");
assert_snapshot!(cb.hexdump(), @"e00340f81f0000f14c000054cbffff54");
}
#[test]
fn sp_movements_are_single_instruction() {
let (mut asm, mut cb) = setup_asm();
@ -2571,7 +2605,7 @@ mod tests {
}
#[test]
fn test_label_branch_generate_bounds() {
fn test_exceeding_label_branch_generate_bounds() {
// The immediate in a conditional branch is a 19 bit unsigned integer
// which has a max value of 2^18 - 1.
const IMMEDIATE_MAX_VALUE: usize = 2usize.pow(18) - 1;
@ -2582,6 +2616,7 @@ mod tests {
let page_size = unsafe { rb_jit_get_page_size() } as usize;
let memory_required = (IMMEDIATE_MAX_VALUE + 8) * 4 + page_size;
crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler
let mut asm = Assembler::new();
let mut cb = CodeBlock::new_dummy_sized(memory_required);
@ -2595,7 +2630,7 @@ mod tests {
});
asm.write_label(far_label.clone());
asm.compile_with_num_regs(&mut cb, 1);
assert_eq!(Err(CompileError::LabelLinkingFailure), asm.compile(&mut cb));
}
#[test]

View File

@ -836,6 +836,7 @@ impl Assembler {
cb.label_ref(*label, 7, move |cb, src_addr, dst_addr| {
let disp = dst_addr - src_addr;
lea(cb, out.into(), mem_opnd(8, RIP, disp.try_into().unwrap()));
Ok(())
});
} else {
// Set output to the jump target's raw address
@ -1104,7 +1105,7 @@ impl Assembler {
let gc_offsets = asm.x86_emit(cb);
if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
cb.link_labels();
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
Ok((start_ptr, gc_offsets))
} else {
cb.clear_labels();

View File

@ -306,6 +306,7 @@ make_counters! {
compile_error_iseq_stack_too_large,
compile_error_exception_handler,
compile_error_out_of_memory,
compile_error_label_linking_failure,
compile_error_jit_to_jit_optional,
compile_error_register_spill_on_ccall,
compile_error_register_spill_on_alloc,
@ -466,6 +467,10 @@ pub enum CompileError {
ExceptionHandler,
OutOfMemory,
ParseError(ParseError),
/// When a ZJIT function is too large, the branches may have
/// offsets that don't fit in one instruction. We error in
/// error that case.
LabelLinkingFailure,
}
/// Return a raw pointer to the exit counter for a given CompileError
@ -479,6 +484,7 @@ pub fn exit_counter_for_compile_error(compile_error: &CompileError) -> Counter {
IseqStackTooLarge => compile_error_iseq_stack_too_large,
ExceptionHandler => compile_error_exception_handler,
OutOfMemory => compile_error_out_of_memory,
LabelLinkingFailure => compile_error_label_linking_failure,
ParseError(parse_error) => match parse_error {
StackUnderflow(_) => compile_error_parse_stack_underflow,
MalformedIseq(_) => compile_error_parse_malformed_iseq,