From 7a474e1fbd6198e2bffff9bc4f94bfa376162d59 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 15 Oct 2025 22:01:00 -0400 Subject: [PATCH] ZJIT: Inline String#getbyte (#14842) --- zjit/bindgen/src/main.rs | 1 + zjit/src/codegen.rs | 6 ++++ zjit/src/cruby.rs | 1 + zjit/src/cruby_methods.rs | 13 +++++++ zjit/src/hir.rs | 71 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+) diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 43fec09014..b54e9404fd 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -73,6 +73,7 @@ fn main() { .allowlist_function("rb_utf8_str_new") .allowlist_function("rb_str_buf_append") .allowlist_function("rb_str_dup") + .allowlist_function("rb_str_getbyte") .allowlist_type("ruby_preserved_encindex") .allowlist_function("rb_class2name") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 0bf0205b53..049501dd15 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -363,6 +363,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio // If it happens we abort the compilation for now Insn::StringConcat { strings, state, .. } if strings.is_empty() => return Err(*state), Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)), + &Insn::StringGetbyteFixnum { string, index } => gen_string_getbyte_fixnum(asm, opnd!(string), opnd!(index)), Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)), Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"), @@ -2109,6 +2110,11 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec result } +fn gen_string_getbyte_fixnum(asm: &mut Assembler, string: Opnd, index: Opnd) -> Opnd { + // TODO(max): Open-code rb_str_getbyte to avoid a call + asm_ccall!(asm, rb_str_getbyte, string, index) +} + /// Generate a JIT entry that just increments exit_compilation_failure and exits fn gen_compile_error_counter(cb: &mut CodeBlock, compile_error: &CompileError) -> Result { let mut asm = Assembler::new(); diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index dca4d91805..5f4eac1db5 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -132,6 +132,7 @@ unsafe extern "C" { pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; pub fn rb_yjit_str_concat_codepoint(str: VALUE, codepoint: VALUE); pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; + pub fn rb_str_getbyte(str: VALUE, index: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; pub fn rb_vm_get_special_object(reg_ep: *const VALUE, value_type: vm_special_object_type) -> VALUE; diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 27e6b151bf..0da7b07ad4 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -190,6 +190,7 @@ pub fn init() -> Annotations { annotate!(rb_mKernel, "itself", inline_kernel_itself); annotate!(rb_cString, "bytesize", types::Fixnum, no_gc, leaf); annotate!(rb_cString, "to_s", types::StringExact); + annotate!(rb_cString, "getbyte", inline_string_getbyte); annotate!(rb_cModule, "name", types::StringExact.union(types::NilClass), no_gc, leaf, elidable); annotate!(rb_cModule, "===", types::BoolExact, no_gc, leaf); annotate!(rb_cArray, "length", types::Fixnum, no_gc, leaf, elidable); @@ -257,3 +258,15 @@ fn inline_hash_aref(fun: &mut hir::Function, block: hir::BlockId, recv: hir::Ins } None } + +fn inline_string_getbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { + let &[index] = args else { return None; }; + if fun.likely_a(index, types::Fixnum, state) { + // String#getbyte with a Fixnum is leaf and nogc; otherwise it may run arbitrary Ruby code + // when converting the index to a C integer. + let index = fun.coerce_to(block, index, types::Fixnum, state); + let result = fun.push_insn(block, hir::Insn::StringGetbyteFixnum { string: recv, index }); + return Some(result); + } + None +} diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 4ad20b9c1d..f7df493d4a 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -557,6 +557,8 @@ pub enum Insn { StringCopy { val: InsnId, chilled: bool, state: InsnId }, StringIntern { val: InsnId, state: InsnId }, StringConcat { strings: Vec, state: InsnId }, + /// Call rb_str_getbyte with known-Fixnum index + StringGetbyteFixnum { string: InsnId, index: InsnId }, /// Combine count stack values into a regexp ToRegexp { opt: usize, values: Vec, state: InsnId }, @@ -860,6 +862,7 @@ impl Insn { // but we don't have type information here in `impl Insn`. See rb_range_new(). Insn::NewRange { .. } => true, Insn::NewRangeFixnum { .. } => false, + Insn::StringGetbyteFixnum { .. } => false, _ => true, } } @@ -941,6 +944,9 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Ok(()) } + Insn::StringGetbyteFixnum { string, index, .. } => { + write!(f, "StringGetbyteFixnum {string}, {index}") + } Insn::ToRegexp { values, opt, .. } => { write!(f, "ToRegexp")?; let mut prefix = " "; @@ -1498,6 +1504,7 @@ impl Function { &StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state }, &StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) }, &StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) }, + &StringGetbyteFixnum { string, index } => StringGetbyteFixnum { string: find!(string), index: find!(index) }, &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state }, &Test { val } => Test { val: find!(val) }, &IsNil { val } => IsNil { val: find!(val) }, @@ -1679,6 +1686,7 @@ impl Function { Insn::StringCopy { .. } => types::StringExact, Insn::StringIntern { .. } => types::Symbol, Insn::StringConcat { .. } => types::StringExact, + Insn::StringGetbyteFixnum { .. } => types::Fixnum.union(types::NilClass), Insn::ToRegexp { .. } => types::RegexpExact, Insn::NewArray { .. } => types::ArrayExact, Insn::ArrayDup { .. } => types::ArrayExact, @@ -2712,6 +2720,10 @@ impl Function { worklist.extend(strings); worklist.push_back(state); } + &Insn::StringGetbyteFixnum { string, index } => { + worklist.push_back(string); + worklist.push_back(index); + } &Insn::ToRegexp { ref values, state, .. } => { worklist.extend(values); worklist.push_back(state); @@ -13122,4 +13134,63 @@ mod opt_tests { Return v27 "); } + + #[test] + fn test_optimize_string_getbyte_fixnum() { + eval(r#" + def test(s, i) = s.getbyte(i) + test("foo", 0) + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(String@0x1000, getbyte@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(String@0x1000) + v26:StringExact = GuardType v11, StringExact + v27:Fixnum = GuardType v12, Fixnum + v28:NilClass|Fixnum = StringGetbyteFixnum v26, v27 + CheckInterrupts + Return v28 + "); + } + + #[test] + fn test_elide_string_getbyte_fixnum() { + eval(r#" + def test(s, i) + s.getbyte(i) + 5 + end + test("foo", 0) + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(String@0x1000, getbyte@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(String@0x1000) + v29:StringExact = GuardType v11, StringExact + v30:Fixnum = GuardType v12, Fixnum + v20:Fixnum[5] = Const Value(5) + CheckInterrupts + Return v20 + "); + } }