ZJIT: Specialize OPTIMIZED_METHOD_TYPE_CALL (#15859)

Closes: https://github.com/Shopify/ruby/issues/865

## Benchmark 
### lobsters
- wall clock time
  - before patch: Average of last 10, non-warmup iters: 809ms
  - after patch: Average of last 10, non-warmup iters: 754ms
- zjit stats below

<details>

<summary>before patch</summary>

```
***ZJIT: Printing ZJIT statistics on exit***
Top-20 not inlined C methods (54.9% of total 18,003,698):
                                               Hash#fetch: 3,184,106 (17.7%)
                                            Regexp#match?:   707,148 ( 3.9%)
                                                Hash#key?:   689,879 ( 3.8%)
                                              String#sub!:   489,841 ( 2.7%)
                                           Array#include?:   470,648 ( 2.6%)
                                             Set#include?:   397,520 ( 2.2%)
                                                String#<<:   396,279 ( 2.2%)
                                       String#start_with?:   373,666 ( 2.1%)
                                               Kernel#dup:   352,617 ( 2.0%)
                                               Array#any?:   350,454 ( 1.9%)
                                              Hash#delete:   331,784 ( 1.8%)
                                               String.new:   307,248 ( 1.7%)
                                              Integer#===:   262,336 ( 1.5%)
                                         Symbol#end_with?:   255,538 ( 1.4%)
                                             Kernel#is_a?:   247,292 ( 1.4%)
                                    Process.clock_gettime:   221,588 ( 1.2%)
                                                Integer#>:   219,718 ( 1.2%)
                                            String#match?:   216,903 ( 1.2%)
                                          String#downcase:   213,108 ( 1.2%)
                                               Integer#<=:   202,617 ( 1.1%)
Top-20 calls to C functions from JIT code (80.3% of total 130,255,689):
                             rb_vm_opt_send_without_block: 28,329,698 (21.7%)
                                             rb_hash_aref:  8,992,191 ( 6.9%)
                                          rb_vm_env_write:  8,526,087 ( 6.5%)
                                               rb_vm_send:  8,337,448 ( 6.4%)
                     rb_zjit_writebarrier_check_immediate:  7,809,310 ( 6.0%)
                                        rb_obj_is_kind_of:  6,098,929 ( 4.7%)
                                rb_vm_getinstancevariable:  5,783,055 ( 4.4%)
                                        rb_vm_invokesuper:  5,038,443 ( 3.9%)
                           rb_ivar_get_at_no_ractor_check:  4,762,093 ( 3.7%)
                                             rb_ary_entry:  4,283,966 ( 3.3%)
                                             rb_hash_aset:  2,429,862 ( 1.9%)
                                rb_vm_setinstancevariable:  2,343,571 ( 1.8%)
                               rb_vm_opt_getconstant_path:  2,284,810 ( 1.8%)
                                               Hash#fetch:  1,778,515 ( 1.4%)
                                                    fetch:  1,405,591 ( 1.1%)
                                        rb_vm_invokeblock:  1,381,332 ( 1.1%)
                                        rb_str_buf_append:  1,362,272 ( 1.0%)
                                rb_ec_ary_new_from_values:  1,324,997 ( 1.0%)
                               rb_class_allocate_instance:  1,288,936 ( 1.0%)
                                    rb_hash_new_with_size:    998,628 ( 0.8%)
Top-2 not optimized method types for send (100.0% of total 4,896,274):
  iseq: 4,893,452 (99.9%)
  null:     2,822 ( 0.1%)
Top-4 not optimized method types for send_without_block (100.0% of total 782,296):
        optimized_send: 479,562 (61.3%)
        optimized_call: 256,609 (32.8%)
                  null:  41,967 ( 5.4%)
  optimized_block_call:   4,158 ( 0.5%)
Top-4 instructions with uncategorized fallback reason (100.0% of total 7,250,555):
             invokesuper: 5,038,443 (69.5%)
             invokeblock: 1,381,332 (19.1%)
             sendforward:   798,924 (11.0%)
  opt_send_without_block:    31,856 ( 0.4%)
Top-18 send fallback reasons (100.0% of total 43,885,845):
                          send_without_block_polymorphic: 18,533,639 (42.2%)
                                           uncategorized:  7,250,555 (16.5%)
                          send_not_optimized_method_type:  4,896,274 (11.2%)
                          send_without_block_no_profiles:  4,741,871 (10.8%)
                                        send_no_profiles:  2,865,577 ( 6.5%)
                            one_or_more_complex_arg_pass:  2,825,240 ( 6.4%)
  send_without_block_not_optimized_method_type_optimized:    740,329 ( 1.7%)
                          send_without_block_megamorphic:    709,818 ( 1.6%)
                                        send_polymorphic:    541,186 ( 1.2%)
        send_without_block_not_optimized_need_permission:    382,622 ( 0.9%)
                                   too_many_args_for_lir:    173,244 ( 0.4%)
                                     argc_param_mismatch:     50,382 ( 0.1%)
            send_without_block_not_optimized_method_type:     41,967 ( 0.1%)
                 send_without_block_cfunc_array_variadic:     36,302 ( 0.1%)
                                obj_to_string_not_string:     34,169 ( 0.1%)
              send_without_block_direct_keyword_mismatch:     32,436 ( 0.1%)
                                        send_megamorphic:     28,613 ( 0.1%)
                          ccall_with_frame_too_many_args:      1,621 ( 0.0%)
Top-4 setivar fallback reasons (100.0% of total 2,343,571):
            not_monomorphic: 2,120,856 (90.5%)
               not_t_object:   125,163 ( 5.3%)
                too_complex:    97,531 ( 4.2%)
  new_shape_needs_extension:        21 ( 0.0%)
Top-2 getivar fallback reasons (100.0% of total 5,908,168):
  not_monomorphic: 5,658,909 (95.8%)
      too_complex:   249,259 ( 4.2%)
Top-3 definedivar fallback reasons (100.0% of total 405,079):
  not_monomorphic: 397,150 (98.0%)
      too_complex:   5,122 ( 1.3%)
     not_t_object:   2,807 ( 0.7%)
Top-6 invokeblock handler (100.0% of total 1,381,332):
   monomorphic_iseq: 685,359 (49.6%)
        polymorphic: 521,992 (37.8%)
  monomorphic_other: 104,640 ( 7.6%)
  monomorphic_ifunc:  55,505 ( 4.0%)
        no_profiles:   9,164 ( 0.7%)
        megamorphic:   4,672 ( 0.3%)
Top-9 popular complex argument-parameter features not optimized (100.0% of total 3,097,538):
       param_kw_opt: 1,333,367 (43.0%)
        param_block:   632,885 (20.4%)
  param_forwardable:   600,601 (19.4%)
         param_rest:   329,020 (10.6%)
       param_kwrest:   119,971 ( 3.9%)
    caller_kw_splat:    39,001 ( 1.3%)
       caller_splat:    36,785 ( 1.2%)
    caller_blockarg:     5,798 ( 0.2%)
       caller_kwarg:       110 ( 0.0%)
Top-1 compile error reasons (100.0% of total 186,900):
  exception_handler: 186,900 (100.0%)
Top-7 unhandled YARV insns (100.0% of total 186,598):
       getblockparam: 99,414 (53.3%)
  invokesuperforward: 81,667 (43.8%)
       setblockparam:  2,837 ( 1.5%)
         getconstant:  1,537 ( 0.8%)
          checkmatch:    616 ( 0.3%)
         expandarray:    360 ( 0.2%)
                once:    167 ( 0.1%)
Top-3 unhandled HIR insns (100.0% of total 236,962):
          throw: 198,474 (83.8%)
  invokebuiltin:  35,767 (15.1%)
      array_max:   2,721 ( 1.1%)
Top-19 side exit reasons (100.0% of total 15,427,184):
                   guard_type_failure: 6,865,696 (44.5%)
                  guard_shape_failure: 6,779,586 (43.9%)
  block_param_proxy_not_iseq_or_ifunc: 1,030,319 ( 6.7%)
                   unhandled_hir_insn:   236,962 ( 1.5%)
                        compile_error:   186,900 ( 1.2%)
                  unhandled_yarv_insn:   186,598 ( 1.2%)
                 fixnum_mult_overflow:    50,739 ( 0.3%)
           block_param_proxy_modified:    28,119 ( 0.2%)
        patchpoint_no_singleton_class:    14,903 ( 0.1%)
         unhandled_newarray_send_pack:    14,481 ( 0.1%)
               fixnum_lshift_overflow:    10,085 ( 0.1%)
     patchpoint_stable_constant_names:     9,198 ( 0.1%)
              patchpoint_no_ep_escape:     7,815 ( 0.1%)
                  expandarray_failure:     4,533 ( 0.0%)
          patchpoint_method_redefined:       662 ( 0.0%)
               obj_to_string_fallback:       277 ( 0.0%)
                   guard_less_failure:       163 ( 0.0%)
                            interrupt:       128 ( 0.0%)
             guard_greater_eq_failure:        20 ( 0.0%)
                             send_count: 151,233,937
                     dynamic_send_count:  43,885,845 (29.0%)
                   optimized_send_count: 107,348,092 (71.0%)
                  dynamic_setivar_count:   2,343,571 ( 1.5%)
                  dynamic_getivar_count:   5,908,168 ( 3.9%)
              dynamic_definedivar_count:     405,079 ( 0.3%)
              iseq_optimized_send_count:  37,324,023 (24.7%)
      inline_cfunc_optimized_send_count:  46,056,028 (30.5%)
       inline_iseq_optimized_send_count:   3,756,875 ( 2.5%)
non_variadic_cfunc_optimized_send_count:  11,618,909 ( 7.7%)
    variadic_cfunc_optimized_send_count:   8,592,257 ( 5.7%)
compiled_iseq_count:                              5,289
failed_iseq_count:                                    0
compile_time:                                   1,664ms
profile_time:                                      13ms
gc_time:                                           20ms
invalidation_time:                                479ms
vm_write_pc_count:                          127,571,422
vm_write_sp_count:                          127,571,422
vm_write_locals_count:                      122,781,971
vm_write_stack_count:                       122,781,971
vm_write_to_parent_iseq_local_count:            689,945
vm_read_from_parent_iseq_local_count:        14,721,820
guard_type_count:                           167,633,896
guard_type_exit_ratio:                             4.1%
guard_shape_count:                                    0
code_region_bytes:                           38,912,000
zjit_alloc_bytes:                            40,542,102
total_mem_bytes:                             79,454,102
side_exit_count:                             15,427,184
total_insn_count:                           927,373,567
vm_insn_count:                              156,976,359
zjit_insn_count:                            770,397,208
ratio_in_zjit:                                    83.1%
```

</details>

<details>

<summary>after patch</summary>

```
***ZJIT: Printing ZJIT statistics on exit***
Top-20 not inlined C methods (55.0% of total 18,012,630):
                                               Hash#fetch: 3,184,101 (17.7%)
                                            Regexp#match?:   707,150 ( 3.9%)
                                                Hash#key?:   689,871 ( 3.8%)
                                              String#sub!:   489,841 ( 2.7%)
                                           Array#include?:   470,648 ( 2.6%)
                                             Set#include?:   397,520 ( 2.2%)
                                                String#<<:   396,279 ( 2.2%)
                                       String#start_with?:   382,538 ( 2.1%)
                                               Kernel#dup:   352,617 ( 2.0%)
                                               Array#any?:   350,454 ( 1.9%)
                                              Hash#delete:   331,802 ( 1.8%)
                                               String.new:   307,248 ( 1.7%)
                                              Integer#===:   262,336 ( 1.5%)
                                         Symbol#end_with?:   255,540 ( 1.4%)
                                             Kernel#is_a?:   247,292 ( 1.4%)
                                    Process.clock_gettime:   221,588 ( 1.2%)
                                                Integer#>:   219,718 ( 1.2%)
                                            String#match?:   216,905 ( 1.2%)
                                          String#downcase:   213,107 ( 1.2%)
                                               Integer#<=:   202,617 ( 1.1%)
Top-20 calls to C functions from JIT code (80.1% of total 130,218,934):
                             rb_vm_opt_send_without_block: 28,073,153 (21.6%)
                                             rb_hash_aref:  8,992,167 ( 6.9%)
                                          rb_vm_env_write:  8,526,089 ( 6.5%)
                                               rb_vm_send:  8,337,453 ( 6.4%)
                     rb_zjit_writebarrier_check_immediate:  7,786,426 ( 6.0%)
                                        rb_obj_is_kind_of:  6,098,927 ( 4.7%)
                                rb_vm_getinstancevariable:  5,783,053 ( 4.4%)
                                        rb_vm_invokesuper:  5,038,444 ( 3.9%)
                           rb_ivar_get_at_no_ractor_check:  4,762,093 ( 3.7%)
                                             rb_ary_entry:  4,283,965 ( 3.3%)
                                             rb_hash_aset:  2,429,864 ( 1.9%)
                                rb_vm_setinstancevariable:  2,343,573 ( 1.8%)
                               rb_vm_opt_getconstant_path:  2,284,809 ( 1.8%)
                                               Hash#fetch:  1,778,510 ( 1.4%)
                                                    fetch:  1,405,591 ( 1.1%)
                                        rb_vm_invokeblock:  1,381,329 ( 1.1%)
                                        rb_str_buf_append:  1,362,272 ( 1.0%)
                                rb_ec_ary_new_from_values:  1,325,005 ( 1.0%)
                               rb_class_allocate_instance:  1,288,944 ( 1.0%)
                                    rb_hash_new_with_size:    998,629 ( 0.8%)
Top-2 not optimized method types for send (100.0% of total 4,896,276):
  iseq: 4,893,454 (99.9%)
  null:     2,822 ( 0.1%)
Top-3 not optimized method types for send_without_block (100.0% of total 525,687):
        optimized_send: 479,562 (91.2%)
                  null:  41,967 ( 8.0%)
  optimized_block_call:   4,158 ( 0.8%)
Top-4 instructions with uncategorized fallback reason (100.0% of total 7,250,556):
             invokesuper: 5,038,444 (69.5%)
             invokeblock: 1,381,329 (19.1%)
             sendforward:   798,924 (11.0%)
  opt_send_without_block:    31,859 ( 0.4%)
Top-18 send fallback reasons (100.0% of total 43,629,303):
                          send_without_block_polymorphic: 18,533,669 (42.5%)
                                           uncategorized:  7,250,556 (16.6%)
                          send_not_optimized_method_type:  4,896,276 (11.2%)
                          send_without_block_no_profiles:  4,741,899 (10.9%)
                                        send_no_profiles:  2,865,579 ( 6.6%)
                            one_or_more_complex_arg_pass:  2,825,242 ( 6.5%)
                          send_without_block_megamorphic:    709,818 ( 1.6%)
                                        send_polymorphic:    541,187 ( 1.2%)
  send_without_block_not_optimized_method_type_optimized:    483,720 ( 1.1%)
        send_without_block_not_optimized_need_permission:    382,623 ( 0.9%)
                                   too_many_args_for_lir:    173,244 ( 0.4%)
                                     argc_param_mismatch:     50,382 ( 0.1%)
            send_without_block_not_optimized_method_type:     41,967 ( 0.1%)
                 send_without_block_cfunc_array_variadic:     36,302 ( 0.1%)
                                obj_to_string_not_string:     34,169 ( 0.1%)
              send_without_block_direct_keyword_mismatch:     32,436 ( 0.1%)
                                        send_megamorphic:     28,613 ( 0.1%)
                          ccall_with_frame_too_many_args:      1,621 ( 0.0%)
Top-4 setivar fallback reasons (100.0% of total 2,343,573):
            not_monomorphic: 2,120,858 (90.5%)
               not_t_object:   125,163 ( 5.3%)
                too_complex:    97,531 ( 4.2%)
  new_shape_needs_extension:        21 ( 0.0%)
Top-2 getivar fallback reasons (100.0% of total 5,908,165):
  not_monomorphic: 5,658,912 (95.8%)
      too_complex:   249,253 ( 4.2%)
Top-3 definedivar fallback reasons (100.0% of total 405,079):
  not_monomorphic: 397,150 (98.0%)
      too_complex:   5,122 ( 1.3%)
     not_t_object:   2,807 ( 0.7%)
Top-6 invokeblock handler (100.0% of total 1,381,329):
   monomorphic_iseq: 685,363 (49.6%)
        polymorphic: 521,984 (37.8%)
  monomorphic_other: 104,640 ( 7.6%)
  monomorphic_ifunc:  55,505 ( 4.0%)
        no_profiles:   9,164 ( 0.7%)
        megamorphic:   4,673 ( 0.3%)
Top-9 popular complex argument-parameter features not optimized (100.0% of total 3,094,719):
       param_kw_opt: 1,333,367 (43.1%)
        param_block:   632,886 (20.5%)
  param_forwardable:   600,605 (19.4%)
         param_rest:   329,019 (10.6%)
       param_kwrest:   119,971 ( 3.9%)
    caller_kw_splat:    39,001 ( 1.3%)
       caller_splat:    33,962 ( 1.1%)
    caller_blockarg:     5,798 ( 0.2%)
       caller_kwarg:       110 ( 0.0%)
Top-1 compile error reasons (100.0% of total 186,917):
  exception_handler: 186,917 (100.0%)
Top-7 unhandled YARV insns (100.0% of total 186,598):
       getblockparam: 99,414 (53.3%)
  invokesuperforward: 81,667 (43.8%)
       setblockparam:  2,837 ( 1.5%)
         getconstant:  1,537 ( 0.8%)
          checkmatch:    616 ( 0.3%)
         expandarray:    360 ( 0.2%)
                once:    167 ( 0.1%)
Top-3 unhandled HIR insns (100.0% of total 236,969):
          throw: 198,475 (83.8%)
  invokebuiltin:  35,773 (15.1%)
      array_max:   2,721 ( 1.1%)
Top-19 side exit reasons (100.0% of total 15,450,102):
                   guard_type_failure: 6,888,596 (44.6%)
                  guard_shape_failure: 6,779,586 (43.9%)
  block_param_proxy_not_iseq_or_ifunc: 1,030,319 ( 6.7%)
                   unhandled_hir_insn:   236,969 ( 1.5%)
                        compile_error:   186,917 ( 1.2%)
                  unhandled_yarv_insn:   186,598 ( 1.2%)
                 fixnum_mult_overflow:    50,739 ( 0.3%)
           block_param_proxy_modified:    28,119 ( 0.2%)
        patchpoint_no_singleton_class:    14,903 ( 0.1%)
         unhandled_newarray_send_pack:    14,481 ( 0.1%)
               fixnum_lshift_overflow:    10,085 ( 0.1%)
     patchpoint_stable_constant_names:     9,198 ( 0.1%)
              patchpoint_no_ep_escape:     7,815 ( 0.1%)
                  expandarray_failure:     4,533 ( 0.0%)
          patchpoint_method_redefined:       662 ( 0.0%)
               obj_to_string_fallback:       277 ( 0.0%)
                   guard_less_failure:       163 ( 0.0%)
                            interrupt:       122 ( 0.0%)
             guard_greater_eq_failure:        20 ( 0.0%)
                             send_count: 150,986,368
                     dynamic_send_count:  43,629,303 (28.9%)
                   optimized_send_count: 107,357,065 (71.1%)
                  dynamic_setivar_count:   2,343,573 ( 1.6%)
                  dynamic_getivar_count:   5,908,165 ( 3.9%)
              dynamic_definedivar_count:     405,079 ( 0.3%)
              iseq_optimized_send_count:  37,324,039 (24.7%)
      inline_cfunc_optimized_send_count:  46,056,046 (30.5%)
       inline_iseq_optimized_send_count:   3,756,881 ( 2.5%)
non_variadic_cfunc_optimized_send_count:  11,618,958 ( 7.7%)
    variadic_cfunc_optimized_send_count:   8,601,141 ( 5.7%)
compiled_iseq_count:                              5,289
failed_iseq_count:                                    0
compile_time:                                   1,700ms
profile_time:                                      13ms
gc_time:                                           21ms
invalidation_time:                                519ms
vm_write_pc_count:                          127,557,549
vm_write_sp_count:                          127,557,549
vm_write_locals_count:                      122,768,084
vm_write_stack_count:                       122,768,084
vm_write_to_parent_iseq_local_count:            689,953
vm_read_from_parent_iseq_local_count:        14,730,705
guard_type_count:                           167,853,730
guard_type_exit_ratio:                             4.1%
guard_shape_count:                                    0
code_region_bytes:                           38,928,384
zjit_alloc_bytes:                            41,103,415
total_mem_bytes:                             80,031,799
side_exit_count:                             15,450,102
total_insn_count:                           927,432,364
vm_insn_count:                              157,182,251
zjit_insn_count:                            770,250,113
ratio_in_zjit:                                    83.1%
```

</details>
This commit is contained in:
Nozomi Hijikata 2026-01-17 11:19:54 +09:00 committed by GitHub
parent 8a586af33b
commit 0e0fad1e58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
Notes: git 2026-01-17 02:20:27 +00:00
Merged-By: tekknolagi <donotemailthisaddress@bernsteinbear.com>
8 changed files with 361 additions and 24 deletions

8
jit.c
View File

@ -191,6 +191,14 @@ rb_jit_get_proc_ptr(VALUE procv)
return proc; return proc;
} }
VALUE
rb_optimized_call(VALUE *recv, rb_execution_context_t *ec, int argc, VALUE *argv, int kw_splat, VALUE block_handler)
{
rb_proc_t *proc;
GetProcPtr(recv, proc);
return rb_vm_invoke_proc(ec, proc, argc, argv, kw_splat, block_handler);
}
unsigned int unsigned int
rb_jit_iseq_builtin_attrs(const rb_iseq_t *iseq) rb_jit_iseq_builtin_attrs(const rb_iseq_t *iseq)
{ {

View File

@ -470,6 +470,74 @@ class TestZJIT < Test::Unit::TestCase
}, insns: [:getblockparamproxy] }, insns: [:getblockparamproxy]
end end
def test_optimized_method_call_proc_call
assert_compiles '2', %q{
p = proc { |x| x * 2 }
def test(p)
p.call(1)
end
test(p)
test(p)
}, call_threshold: 2, insns: [:opt_send_without_block]
end
def test_optimized_method_call_proc_aref
assert_compiles '4', %q{
p = proc { |x| x * 2 }
def test(p)
p[2]
end
test(p)
test(p)
}, call_threshold: 2, insns: [:opt_aref]
end
def test_optimized_method_call_proc_yield
assert_compiles '6', %q{
p = proc { |x| x * 2 }
def test(p)
p.yield(3)
end
test(p)
test(p)
}, call_threshold: 2, insns: [:opt_send_without_block]
end
def test_optimized_method_call_proc_kw_splat
assert_compiles '3', %q{
p = proc { |**kw| kw[:a] + kw[:b] }
def test(p, h)
p.call(**h)
end
h = { a: 1, b: 2 }
test(p, h)
test(p, h)
}, call_threshold: 2, insns: [:opt_send_without_block]
end
def test_optimized_method_call_proc_call_splat
assert_compiles '43', %q{
p = proc { |x| x + 1 }
def test(p)
ary = [42]
p.call(*ary)
end
test(p)
test(p)
}, call_threshold: 2
end
def test_optimized_method_call_proc_call_kwarg
assert_compiles '1', %q{
p = proc { |a:| a }
def test(p)
p.call(a: 1)
end
test(p)
test(p)
}, call_threshold: 2
end
def test_call_a_forwardable_method def test_call_a_forwardable_method
assert_runs '[]', %q{ assert_runs '[]', %q{
def test_root = forwardable def test_root = forwardable

8
yjit.c
View File

@ -223,14 +223,6 @@ typedef struct rb_iseq_param_keyword rb_seq_param_keyword_struct;
ID rb_get_symbol_id(VALUE namep); ID rb_get_symbol_id(VALUE namep);
VALUE
rb_optimized_call(VALUE *recv, rb_execution_context_t *ec, int argc, VALUE *argv, int kw_splat, VALUE block_handler)
{
rb_proc_t *proc;
GetProcPtr(recv, proc);
return rb_vm_invoke_proc(ec, proc, argc, argv, kw_splat, block_handler);
}
// If true, the iseq has only opt_invokebuiltin_delegate(_leave) and leave insns. // If true, the iseq has only opt_invokebuiltin_delegate(_leave) and leave insns.
static bool static bool
invokebuiltin_delegate_leave_p(const rb_iseq_t *iseq) invokebuiltin_delegate_leave_p(const rb_iseq_t *iseq)

View File

@ -1164,14 +1164,6 @@ extern "C" {
pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void; pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void;
pub fn rb_iseq_set_yjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void); pub fn rb_iseq_set_yjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void);
pub fn rb_get_symbol_id(namep: VALUE) -> ID; pub fn rb_get_symbol_id(namep: VALUE) -> ID;
pub fn rb_optimized_call(
recv: *mut VALUE,
ec: *mut rb_execution_context_t,
argc: ::std::os::raw::c_int,
argv: *mut VALUE,
kw_splat: ::std::os::raw::c_int,
block_handler: VALUE,
) -> VALUE;
pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function; pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function;
pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE;
pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
@ -1240,6 +1232,14 @@ extern "C" {
pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID; pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID;
pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE; pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE;
pub fn rb_jit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; pub fn rb_jit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t;
pub fn rb_optimized_call(
recv: *mut VALUE,
ec: *mut rb_execution_context_t,
argc: ::std::os::raw::c_int,
argv: *mut VALUE,
kw_splat: ::std::os::raw::c_int,
block_handler: VALUE,
) -> VALUE;
pub fn rb_jit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_jit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int; pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int;
pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void; pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void;

View File

@ -404,6 +404,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state), None), Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state), None),
&Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
&Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason), &Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason),
Insn::InvokeProc { recv, args, state, kw_splat } => gen_invokeproc(jit, asm, opnd!(recv), opnds!(args), *kw_splat, &function.frame_state(*state)),
// Ensure we have enough room fit ec, self, and arguments // Ensure we have enough room fit ec, self, and arguments
// TODO remove this check when we have stack args (we can use Time.new to test it) // TODO remove this check when we have stack args (we can use Time.new to test it)
Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state), Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state),
@ -1497,6 +1498,35 @@ fn gen_invokeblock(
) )
} }
fn gen_invokeproc(
jit: &mut JITState,
asm: &mut Assembler,
recv: Opnd,
args: Vec<Opnd>,
kw_splat: bool,
state: &FrameState,
) -> lir::Opnd {
gen_prepare_non_leaf_call(jit, asm, state);
asm_comment!(asm, "call invokeproc");
let argv_ptr = gen_push_opnds(asm, &args);
let kw_splat_opnd = Opnd::Imm(i64::from(kw_splat));
let result = asm_ccall!(
asm,
rb_optimized_call,
recv,
EC,
args.len().into(),
argv_ptr,
kw_splat_opnd,
VM_BLOCK_HANDLER_NONE.into()
);
gen_pop_opnds(asm, &args);
result
}
/// Compile a dynamic dispatch for `super` /// Compile a dynamic dispatch for `super`
fn gen_invokesuper( fn gen_invokesuper(
jit: &mut JITState, jit: &mut JITState,

View File

@ -2114,6 +2114,14 @@ unsafe extern "C" {
pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID; pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID;
pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE; pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE;
pub fn rb_jit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; pub fn rb_jit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t;
pub fn rb_optimized_call(
recv: *mut VALUE,
ec: *mut rb_execution_context_t,
argc: ::std::os::raw::c_int,
argv: *mut VALUE,
kw_splat: ::std::os::raw::c_int,
block_handler: VALUE,
) -> VALUE;
pub fn rb_jit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_jit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int; pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int;
pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void; pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void;

View File

@ -932,6 +932,13 @@ pub enum Insn {
state: InsnId, state: InsnId,
reason: SendFallbackReason, reason: SendFallbackReason,
}, },
/// Call Proc#call optimized method type.
InvokeProc {
recv: InsnId,
args: Vec<InsnId>,
state: InsnId,
kw_splat: bool,
},
/// Optimized ISEQ call /// Optimized ISEQ call
SendWithoutBlockDirect { SendWithoutBlockDirect {
@ -1452,6 +1459,16 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
write!(f, " # SendFallbackReason: {reason}")?; write!(f, " # SendFallbackReason: {reason}")?;
Ok(()) Ok(())
} }
Insn::InvokeProc { recv, args, kw_splat, .. } => {
write!(f, "InvokeProc {recv}")?;
for arg in args {
write!(f, ", {arg}")?;
}
if *kw_splat {
write!(f, ", kw_splat")?;
}
Ok(())
}
Insn::InvokeBuiltin { bf, args, leaf, .. } => { Insn::InvokeBuiltin { bf, args, leaf, .. } => {
let bf_name = unsafe { CStr::from_ptr(bf.name) }.to_str().unwrap(); let bf_name = unsafe { CStr::from_ptr(bf.name) }.to_str().unwrap();
write!(f, "InvokeBuiltin{} {}", write!(f, "InvokeBuiltin{} {}",
@ -2228,6 +2245,12 @@ impl Function {
state, state,
reason, reason,
}, },
&InvokeProc { recv, ref args, state, kw_splat } => InvokeProc {
recv: find!(recv),
args: find_vec!(args),
state: find!(state),
kw_splat,
},
&InvokeBuiltin { bf, recv, ref args, state, leaf, return_type } => InvokeBuiltin { bf, recv: find!(recv), args: find_vec!(args), state, leaf, return_type }, &InvokeBuiltin { bf, recv, ref args, state, leaf, return_type } => InvokeBuiltin { bf, recv: find!(recv), args: find_vec!(args), state, leaf, return_type },
&ArrayDup { val, state } => ArrayDup { val: find!(val), state }, &ArrayDup { val, state } => ArrayDup { val: find!(val), state },
&HashDup { val, state } => HashDup { val: find!(val), state }, &HashDup { val, state } => HashDup { val: find!(val), state },
@ -2416,6 +2439,7 @@ impl Function {
Insn::SendForward { .. } => types::BasicObject, Insn::SendForward { .. } => types::BasicObject,
Insn::InvokeSuper { .. } => types::BasicObject, Insn::InvokeSuper { .. } => types::BasicObject,
Insn::InvokeBlock { .. } => types::BasicObject, Insn::InvokeBlock { .. } => types::BasicObject,
Insn::InvokeProc { .. } => types::BasicObject,
Insn::InvokeBuiltin { return_type, .. } => return_type.unwrap_or(types::BasicObject), Insn::InvokeBuiltin { return_type, .. } => return_type.unwrap_or(types::BasicObject),
Insn::Defined { pushval, .. } => Type::from_value(*pushval).union(types::NilClass), Insn::Defined { pushval, .. } => Type::from_value(*pushval).union(types::NilClass),
Insn::DefinedIvar { pushval, .. } => Type::from_value(*pushval).union(types::NilClass), Insn::DefinedIvar { pushval, .. } => Type::from_value(*pushval).union(types::NilClass),
@ -2828,14 +2852,7 @@ impl Function {
}; };
let ci = unsafe { get_call_data_ci(cd) }; // info about the call site let ci = unsafe { get_call_data_ci(cd) }; // info about the call site
// If the call site info indicates that the `Function` has overly complex arguments, then
// do not optimize into a `SendWithoutBlockDirect`.
let flags = unsafe { rb_vm_ci_flag(ci) }; let flags = unsafe { rb_vm_ci_flag(ci) };
if unspecializable_call_type(flags) {
self.count_complex_call_features(block, flags);
self.set_dynamic_send_reason(insn_id, ComplexArgPass);
self.push_insn_id(block, insn_id); continue;
}
let mid = unsafe { vm_ci_mid(ci) }; let mid = unsafe { vm_ci_mid(ci) };
// Do method lookup // Do method lookup
@ -2863,6 +2880,14 @@ impl Function {
def_type = unsafe { get_cme_def_type(cme) }; def_type = unsafe { get_cme_def_type(cme) };
} }
// If the call site info indicates that the `Function` has overly complex arguments, then do not optimize into a `SendWithoutBlockDirect`.
// Optimized methods(`VM_METHOD_TYPE_OPTIMIZED`) handle their own argument constraints (e.g., kw_splat for Proc call).
if def_type != VM_METHOD_TYPE_OPTIMIZED && unspecializable_call_type(flags) {
self.count_complex_call_features(block, flags);
self.set_dynamic_send_reason(insn_id, ComplexArgPass);
self.push_insn_id(block, insn_id); continue;
}
if def_type == VM_METHOD_TYPE_ISEQ { if def_type == VM_METHOD_TYPE_ISEQ {
// TODO(max): Allow non-iseq; cache cme // TODO(max): Allow non-iseq; cache cme
// Only specialize positional-positional calls // Only specialize positional-positional calls
@ -2993,7 +3018,31 @@ impl Function {
} else if def_type == VM_METHOD_TYPE_OPTIMIZED { } else if def_type == VM_METHOD_TYPE_OPTIMIZED {
let opt_type: OptimizedMethodType = unsafe { get_cme_def_body_optimized_type(cme) }.into(); let opt_type: OptimizedMethodType = unsafe { get_cme_def_body_optimized_type(cme) }.into();
match (opt_type, args.as_slice()) { match (opt_type, args.as_slice()) {
(OptimizedMethodType::Call, _) => {
if flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KWARG) != 0 {
self.count_complex_call_features(block, flags);
self.set_dynamic_send_reason(insn_id, ComplexArgPass);
self.push_insn_id(block, insn_id); continue;
}
// Check singleton class assumption first, before emitting other patchpoints
if !self.assume_no_singleton_classes(block, klass, state) {
self.set_dynamic_send_reason(insn_id, SingletonClassSeen);
self.push_insn_id(block, insn_id); continue;
}
self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state });
if let Some(profiled_type) = profiled_type {
recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state });
}
let kw_splat = flags & VM_CALL_KW_SPLAT != 0;
let invoke_proc = self.push_insn(block, Insn::InvokeProc { recv, args: args.clone(), state, kw_splat });
self.make_equal_to(insn_id, invoke_proc);
}
(OptimizedMethodType::StructAref, &[]) | (OptimizedMethodType::StructAset, &[_]) => { (OptimizedMethodType::StructAref, &[]) | (OptimizedMethodType::StructAset, &[_]) => {
if unspecializable_call_type(flags) {
self.count_complex_call_features(block, flags);
self.set_dynamic_send_reason(insn_id, ComplexArgPass);
self.push_insn_id(block, insn_id); continue;
}
let index: i32 = unsafe { get_cme_def_body_optimized_index(cme) } let index: i32 = unsafe { get_cme_def_body_optimized_index(cme) }
.try_into() .try_into()
.unwrap(); .unwrap();
@ -4416,7 +4465,8 @@ impl Function {
| &Insn::CCallWithFrame { recv, ref args, state, .. } | &Insn::CCallWithFrame { recv, ref args, state, .. }
| &Insn::SendWithoutBlockDirect { recv, ref args, state, .. } | &Insn::SendWithoutBlockDirect { recv, ref args, state, .. }
| &Insn::InvokeBuiltin { recv, ref args, state, .. } | &Insn::InvokeBuiltin { recv, ref args, state, .. }
| &Insn::InvokeSuper { recv, ref args, state, .. } => { | &Insn::InvokeSuper { recv, ref args, state, .. }
| &Insn::InvokeProc { recv, ref args, state, .. } => {
worklist.push_back(recv); worklist.push_back(recv);
worklist.extend(args); worklist.extend(args);
worklist.push_back(state); worklist.push_back(state);
@ -5041,6 +5091,7 @@ impl Function {
| Insn::CCallWithFrame { recv, ref args, .. } | Insn::CCallWithFrame { recv, ref args, .. }
| Insn::CCallVariadic { recv, ref args, .. } | Insn::CCallVariadic { recv, ref args, .. }
| Insn::InvokeBuiltin { recv, ref args, .. } | Insn::InvokeBuiltin { recv, ref args, .. }
| Insn::InvokeProc { recv, ref args, .. }
| Insn::ArrayInclude { target: recv, elements: ref args, .. } => { | Insn::ArrayInclude { target: recv, elements: ref args, .. } => {
self.assert_subtype(insn_id, recv, types::BasicObject)?; self.assert_subtype(insn_id, recv, types::BasicObject)?;
for &arg in args { for &arg in args {

View File

@ -3817,6 +3817,186 @@ mod hir_opt_tests {
"); ");
} }
#[test]
fn test_specialize_proc_call() {
eval("
p = proc { |x| x + 1 }
def test(p)
p.call(1)
end
test p
");
assert_snapshot!(hir_string("test"), @"
fn test@<compiled>:4:
bb0():
EntryPoint interpreter
v1:BasicObject = LoadSelf
v2:BasicObject = GetLocal :p, l0, SP@4
Jump bb2(v1, v2)
bb1(v5:BasicObject, v6:BasicObject):
EntryPoint JIT(0)
Jump bb2(v5, v6)
bb2(v8:BasicObject, v9:BasicObject):
v14:Fixnum[1] = Const Value(1)
PatchPoint NoSingletonClass(Proc@0x1000)
PatchPoint MethodRedefined(Proc@0x1000, call@0x1008, cme:0x1010)
v23:HeapObject[class_exact:Proc] = GuardType v9, HeapObject[class_exact:Proc]
v24:BasicObject = InvokeProc v23, v14
CheckInterrupts
Return v24
");
}
#[test]
fn test_specialize_proc_aref() {
eval("
p = proc { |x| x + 1 }
def test(p)
p[2]
end
test p
");
assert_snapshot!(hir_string("test"), @"
fn test@<compiled>:4:
bb0():
EntryPoint interpreter
v1:BasicObject = LoadSelf
v2:BasicObject = GetLocal :p, l0, SP@4
Jump bb2(v1, v2)
bb1(v5:BasicObject, v6:BasicObject):
EntryPoint JIT(0)
Jump bb2(v5, v6)
bb2(v8:BasicObject, v9:BasicObject):
v14:Fixnum[2] = Const Value(2)
PatchPoint NoSingletonClass(Proc@0x1000)
PatchPoint MethodRedefined(Proc@0x1000, []@0x1008, cme:0x1010)
v24:HeapObject[class_exact:Proc] = GuardType v9, HeapObject[class_exact:Proc]
v25:BasicObject = InvokeProc v24, v14
CheckInterrupts
Return v25
");
}
#[test]
fn test_specialize_proc_yield() {
eval("
p = proc { |x| x + 1 }
def test(p)
p.yield(3)
end
test p
");
assert_snapshot!(hir_string("test"), @"
fn test@<compiled>:4:
bb0():
EntryPoint interpreter
v1:BasicObject = LoadSelf
v2:BasicObject = GetLocal :p, l0, SP@4
Jump bb2(v1, v2)
bb1(v5:BasicObject, v6:BasicObject):
EntryPoint JIT(0)
Jump bb2(v5, v6)
bb2(v8:BasicObject, v9:BasicObject):
v14:Fixnum[3] = Const Value(3)
PatchPoint NoSingletonClass(Proc@0x1000)
PatchPoint MethodRedefined(Proc@0x1000, yield@0x1008, cme:0x1010)
v23:HeapObject[class_exact:Proc] = GuardType v9, HeapObject[class_exact:Proc]
v24:BasicObject = InvokeProc v23, v14
CheckInterrupts
Return v24
");
}
#[test]
fn test_specialize_proc_eqq() {
eval("
p = proc { |x| x > 0 }
def test(p)
p === 1
end
test p
");
assert_snapshot!(hir_string("test"), @"
fn test@<compiled>:4:
bb0():
EntryPoint interpreter
v1:BasicObject = LoadSelf
v2:BasicObject = GetLocal :p, l0, SP@4
Jump bb2(v1, v2)
bb1(v5:BasicObject, v6:BasicObject):
EntryPoint JIT(0)
Jump bb2(v5, v6)
bb2(v8:BasicObject, v9:BasicObject):
v14:Fixnum[1] = Const Value(1)
PatchPoint NoSingletonClass(Proc@0x1000)
PatchPoint MethodRedefined(Proc@0x1000, ===@0x1008, cme:0x1010)
v23:HeapObject[class_exact:Proc] = GuardType v9, HeapObject[class_exact:Proc]
v24:BasicObject = InvokeProc v23, v14
CheckInterrupts
Return v24
");
}
#[test]
fn test_dont_specialize_proc_call_splat() {
eval("
p = proc { }
def test(p)
empty = []
p.call(*empty)
end
test p
");
assert_snapshot!(hir_string("test"), @"
fn test@<compiled>:4:
bb0():
EntryPoint interpreter
v1:BasicObject = LoadSelf
v2:BasicObject = GetLocal :p, l0, SP@5
v3:NilClass = Const Value(nil)
Jump bb2(v1, v2, v3)
bb1(v6:BasicObject, v7:BasicObject):
EntryPoint JIT(0)
v8:NilClass = Const Value(nil)
Jump bb2(v6, v7, v8)
bb2(v10:BasicObject, v11:BasicObject, v12:NilClass):
v16:ArrayExact = NewArray
v22:ArrayExact = ToArray v16
IncrCounter complex_arg_pass_caller_splat
v24:BasicObject = SendWithoutBlock v11, :call, v22 # SendFallbackReason: Complex argument passing
CheckInterrupts
Return v24
");
}
#[test]
fn test_dont_specialize_proc_call_kwarg() {
eval("
p = proc { |a:| a }
def test(p)
p.call(a: 1)
end
test p
");
assert_snapshot!(hir_string("test"), @"
fn test@<compiled>:4:
bb0():
EntryPoint interpreter
v1:BasicObject = LoadSelf
v2:BasicObject = GetLocal :p, l0, SP@4
Jump bb2(v1, v2)
bb1(v5:BasicObject, v6:BasicObject):
EntryPoint JIT(0)
Jump bb2(v5, v6)
bb2(v8:BasicObject, v9:BasicObject):
v14:Fixnum[1] = Const Value(1)
IncrCounter complex_arg_pass_caller_kwarg
v16:BasicObject = SendWithoutBlock v9, :call, v14 # SendFallbackReason: Complex argument passing
CheckInterrupts
Return v16
");
}
#[test] #[test]
fn test_dont_specialize_definedivar_with_t_data() { fn test_dont_specialize_definedivar_with_t_data() {
eval(" eval("