ZJIT: Inline String#getbyte (#14842)

This commit is contained in:
Max Bernstein 2025-10-15 22:01:00 -04:00 committed by GitHub
parent 45c016866c
commit 7a474e1fbd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
Notes: git 2025-10-16 02:01:28 +00:00
Merged-By: tekknolagi <donotemailthisaddress@bernsteinbear.com>
5 changed files with 92 additions and 0 deletions

View File

@ -73,6 +73,7 @@ fn main() {
.allowlist_function("rb_utf8_str_new")
.allowlist_function("rb_str_buf_append")
.allowlist_function("rb_str_dup")
.allowlist_function("rb_str_getbyte")
.allowlist_type("ruby_preserved_encindex")
.allowlist_function("rb_class2name")

View File

@ -363,6 +363,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
// If it happens we abort the compilation for now
Insn::StringConcat { strings, state, .. } if strings.is_empty() => return Err(*state),
Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)),
&Insn::StringGetbyteFixnum { string, index } => gen_string_getbyte_fixnum(asm, opnd!(string), opnd!(index)),
Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)),
Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)),
Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"),
@ -2109,6 +2110,11 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>
result
}
fn gen_string_getbyte_fixnum(asm: &mut Assembler, string: Opnd, index: Opnd) -> Opnd {
// TODO(max): Open-code rb_str_getbyte to avoid a call
asm_ccall!(asm, rb_str_getbyte, string, index)
}
/// Generate a JIT entry that just increments exit_compilation_failure and exits
fn gen_compile_error_counter(cb: &mut CodeBlock, compile_error: &CompileError) -> Result<CodePtr, CompileError> {
let mut asm = Assembler::new();

View File

@ -132,6 +132,7 @@ unsafe extern "C" {
pub fn rb_hash_empty_p(hash: VALUE) -> VALUE;
pub fn rb_yjit_str_concat_codepoint(str: VALUE, codepoint: VALUE);
pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE;
pub fn rb_str_getbyte(str: VALUE, index: VALUE) -> VALUE;
pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE;
pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE;
pub fn rb_vm_get_special_object(reg_ep: *const VALUE, value_type: vm_special_object_type) -> VALUE;

View File

@ -190,6 +190,7 @@ pub fn init() -> Annotations {
annotate!(rb_mKernel, "itself", inline_kernel_itself);
annotate!(rb_cString, "bytesize", types::Fixnum, no_gc, leaf);
annotate!(rb_cString, "to_s", types::StringExact);
annotate!(rb_cString, "getbyte", inline_string_getbyte);
annotate!(rb_cModule, "name", types::StringExact.union(types::NilClass), no_gc, leaf, elidable);
annotate!(rb_cModule, "===", types::BoolExact, no_gc, leaf);
annotate!(rb_cArray, "length", types::Fixnum, no_gc, leaf, elidable);
@ -257,3 +258,15 @@ fn inline_hash_aref(fun: &mut hir::Function, block: hir::BlockId, recv: hir::Ins
}
None
}
fn inline_string_getbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option<hir::InsnId> {
let &[index] = args else { return None; };
if fun.likely_a(index, types::Fixnum, state) {
// String#getbyte with a Fixnum is leaf and nogc; otherwise it may run arbitrary Ruby code
// when converting the index to a C integer.
let index = fun.coerce_to(block, index, types::Fixnum, state);
let result = fun.push_insn(block, hir::Insn::StringGetbyteFixnum { string: recv, index });
return Some(result);
}
None
}

View File

@ -557,6 +557,8 @@ pub enum Insn {
StringCopy { val: InsnId, chilled: bool, state: InsnId },
StringIntern { val: InsnId, state: InsnId },
StringConcat { strings: Vec<InsnId>, state: InsnId },
/// Call rb_str_getbyte with known-Fixnum index
StringGetbyteFixnum { string: InsnId, index: InsnId },
/// Combine count stack values into a regexp
ToRegexp { opt: usize, values: Vec<InsnId>, state: InsnId },
@ -860,6 +862,7 @@ impl Insn {
// but we don't have type information here in `impl Insn`. See rb_range_new().
Insn::NewRange { .. } => true,
Insn::NewRangeFixnum { .. } => false,
Insn::StringGetbyteFixnum { .. } => false,
_ => true,
}
}
@ -941,6 +944,9 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
Ok(())
}
Insn::StringGetbyteFixnum { string, index, .. } => {
write!(f, "StringGetbyteFixnum {string}, {index}")
}
Insn::ToRegexp { values, opt, .. } => {
write!(f, "ToRegexp")?;
let mut prefix = " ";
@ -1498,6 +1504,7 @@ impl Function {
&StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state },
&StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) },
&StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) },
&StringGetbyteFixnum { string, index } => StringGetbyteFixnum { string: find!(string), index: find!(index) },
&ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state },
&Test { val } => Test { val: find!(val) },
&IsNil { val } => IsNil { val: find!(val) },
@ -1679,6 +1686,7 @@ impl Function {
Insn::StringCopy { .. } => types::StringExact,
Insn::StringIntern { .. } => types::Symbol,
Insn::StringConcat { .. } => types::StringExact,
Insn::StringGetbyteFixnum { .. } => types::Fixnum.union(types::NilClass),
Insn::ToRegexp { .. } => types::RegexpExact,
Insn::NewArray { .. } => types::ArrayExact,
Insn::ArrayDup { .. } => types::ArrayExact,
@ -2712,6 +2720,10 @@ impl Function {
worklist.extend(strings);
worklist.push_back(state);
}
&Insn::StringGetbyteFixnum { string, index } => {
worklist.push_back(string);
worklist.push_back(index);
}
&Insn::ToRegexp { ref values, state, .. } => {
worklist.extend(values);
worklist.push_back(state);
@ -13122,4 +13134,63 @@ mod opt_tests {
Return v27
");
}
#[test]
fn test_optimize_string_getbyte_fixnum() {
eval(r#"
def test(s, i) = s.getbyte(i)
test("foo", 0)
"#);
assert_snapshot!(hir_string("test"), @r"
fn test@<compiled>:2:
bb0():
EntryPoint interpreter
v1:BasicObject = LoadSelf
v2:BasicObject = GetLocal l0, SP@5
v3:BasicObject = GetLocal l0, SP@4
Jump bb2(v1, v2, v3)
bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject):
EntryPoint JIT(0)
Jump bb2(v6, v7, v8)
bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject):
PatchPoint MethodRedefined(String@0x1000, getbyte@0x1008, cme:0x1010)
PatchPoint NoSingletonClass(String@0x1000)
v26:StringExact = GuardType v11, StringExact
v27:Fixnum = GuardType v12, Fixnum
v28:NilClass|Fixnum = StringGetbyteFixnum v26, v27
CheckInterrupts
Return v28
");
}
#[test]
fn test_elide_string_getbyte_fixnum() {
eval(r#"
def test(s, i)
s.getbyte(i)
5
end
test("foo", 0)
"#);
assert_snapshot!(hir_string("test"), @r"
fn test@<compiled>:3:
bb0():
EntryPoint interpreter
v1:BasicObject = LoadSelf
v2:BasicObject = GetLocal l0, SP@5
v3:BasicObject = GetLocal l0, SP@4
Jump bb2(v1, v2, v3)
bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject):
EntryPoint JIT(0)
Jump bb2(v6, v7, v8)
bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject):
PatchPoint MethodRedefined(String@0x1000, getbyte@0x1008, cme:0x1010)
PatchPoint NoSingletonClass(String@0x1000)
v29:StringExact = GuardType v11, StringExact
v30:Fixnum = GuardType v12, Fixnum
v20:Fixnum[5] = Const Value(5)
CheckInterrupts
Return v20
");
}
}