ruby/yjit.c
Nozomi Hijikata 0e0fad1e58
ZJIT: Specialize OPTIMIZED_METHOD_TYPE_CALL (#15859)
Closes: https://github.com/Shopify/ruby/issues/865

## Benchmark 
### lobsters
- wall clock time
  - before patch: Average of last 10, non-warmup iters: 809ms
  - after patch: Average of last 10, non-warmup iters: 754ms
- zjit stats below

<details>

<summary>before patch</summary>

```
***ZJIT: Printing ZJIT statistics on exit***
Top-20 not inlined C methods (54.9% of total 18,003,698):
                                               Hash#fetch: 3,184,106 (17.7%)
                                            Regexp#match?:   707,148 ( 3.9%)
                                                Hash#key?:   689,879 ( 3.8%)
                                              String#sub!:   489,841 ( 2.7%)
                                           Array#include?:   470,648 ( 2.6%)
                                             Set#include?:   397,520 ( 2.2%)
                                                String#<<:   396,279 ( 2.2%)
                                       String#start_with?:   373,666 ( 2.1%)
                                               Kernel#dup:   352,617 ( 2.0%)
                                               Array#any?:   350,454 ( 1.9%)
                                              Hash#delete:   331,784 ( 1.8%)
                                               String.new:   307,248 ( 1.7%)
                                              Integer#===:   262,336 ( 1.5%)
                                         Symbol#end_with?:   255,538 ( 1.4%)
                                             Kernel#is_a?:   247,292 ( 1.4%)
                                    Process.clock_gettime:   221,588 ( 1.2%)
                                                Integer#>:   219,718 ( 1.2%)
                                            String#match?:   216,903 ( 1.2%)
                                          String#downcase:   213,108 ( 1.2%)
                                               Integer#<=:   202,617 ( 1.1%)
Top-20 calls to C functions from JIT code (80.3% of total 130,255,689):
                             rb_vm_opt_send_without_block: 28,329,698 (21.7%)
                                             rb_hash_aref:  8,992,191 ( 6.9%)
                                          rb_vm_env_write:  8,526,087 ( 6.5%)
                                               rb_vm_send:  8,337,448 ( 6.4%)
                     rb_zjit_writebarrier_check_immediate:  7,809,310 ( 6.0%)
                                        rb_obj_is_kind_of:  6,098,929 ( 4.7%)
                                rb_vm_getinstancevariable:  5,783,055 ( 4.4%)
                                        rb_vm_invokesuper:  5,038,443 ( 3.9%)
                           rb_ivar_get_at_no_ractor_check:  4,762,093 ( 3.7%)
                                             rb_ary_entry:  4,283,966 ( 3.3%)
                                             rb_hash_aset:  2,429,862 ( 1.9%)
                                rb_vm_setinstancevariable:  2,343,571 ( 1.8%)
                               rb_vm_opt_getconstant_path:  2,284,810 ( 1.8%)
                                               Hash#fetch:  1,778,515 ( 1.4%)
                                                    fetch:  1,405,591 ( 1.1%)
                                        rb_vm_invokeblock:  1,381,332 ( 1.1%)
                                        rb_str_buf_append:  1,362,272 ( 1.0%)
                                rb_ec_ary_new_from_values:  1,324,997 ( 1.0%)
                               rb_class_allocate_instance:  1,288,936 ( 1.0%)
                                    rb_hash_new_with_size:    998,628 ( 0.8%)
Top-2 not optimized method types for send (100.0% of total 4,896,274):
  iseq: 4,893,452 (99.9%)
  null:     2,822 ( 0.1%)
Top-4 not optimized method types for send_without_block (100.0% of total 782,296):
        optimized_send: 479,562 (61.3%)
        optimized_call: 256,609 (32.8%)
                  null:  41,967 ( 5.4%)
  optimized_block_call:   4,158 ( 0.5%)
Top-4 instructions with uncategorized fallback reason (100.0% of total 7,250,555):
             invokesuper: 5,038,443 (69.5%)
             invokeblock: 1,381,332 (19.1%)
             sendforward:   798,924 (11.0%)
  opt_send_without_block:    31,856 ( 0.4%)
Top-18 send fallback reasons (100.0% of total 43,885,845):
                          send_without_block_polymorphic: 18,533,639 (42.2%)
                                           uncategorized:  7,250,555 (16.5%)
                          send_not_optimized_method_type:  4,896,274 (11.2%)
                          send_without_block_no_profiles:  4,741,871 (10.8%)
                                        send_no_profiles:  2,865,577 ( 6.5%)
                            one_or_more_complex_arg_pass:  2,825,240 ( 6.4%)
  send_without_block_not_optimized_method_type_optimized:    740,329 ( 1.7%)
                          send_without_block_megamorphic:    709,818 ( 1.6%)
                                        send_polymorphic:    541,186 ( 1.2%)
        send_without_block_not_optimized_need_permission:    382,622 ( 0.9%)
                                   too_many_args_for_lir:    173,244 ( 0.4%)
                                     argc_param_mismatch:     50,382 ( 0.1%)
            send_without_block_not_optimized_method_type:     41,967 ( 0.1%)
                 send_without_block_cfunc_array_variadic:     36,302 ( 0.1%)
                                obj_to_string_not_string:     34,169 ( 0.1%)
              send_without_block_direct_keyword_mismatch:     32,436 ( 0.1%)
                                        send_megamorphic:     28,613 ( 0.1%)
                          ccall_with_frame_too_many_args:      1,621 ( 0.0%)
Top-4 setivar fallback reasons (100.0% of total 2,343,571):
            not_monomorphic: 2,120,856 (90.5%)
               not_t_object:   125,163 ( 5.3%)
                too_complex:    97,531 ( 4.2%)
  new_shape_needs_extension:        21 ( 0.0%)
Top-2 getivar fallback reasons (100.0% of total 5,908,168):
  not_monomorphic: 5,658,909 (95.8%)
      too_complex:   249,259 ( 4.2%)
Top-3 definedivar fallback reasons (100.0% of total 405,079):
  not_monomorphic: 397,150 (98.0%)
      too_complex:   5,122 ( 1.3%)
     not_t_object:   2,807 ( 0.7%)
Top-6 invokeblock handler (100.0% of total 1,381,332):
   monomorphic_iseq: 685,359 (49.6%)
        polymorphic: 521,992 (37.8%)
  monomorphic_other: 104,640 ( 7.6%)
  monomorphic_ifunc:  55,505 ( 4.0%)
        no_profiles:   9,164 ( 0.7%)
        megamorphic:   4,672 ( 0.3%)
Top-9 popular complex argument-parameter features not optimized (100.0% of total 3,097,538):
       param_kw_opt: 1,333,367 (43.0%)
        param_block:   632,885 (20.4%)
  param_forwardable:   600,601 (19.4%)
         param_rest:   329,020 (10.6%)
       param_kwrest:   119,971 ( 3.9%)
    caller_kw_splat:    39,001 ( 1.3%)
       caller_splat:    36,785 ( 1.2%)
    caller_blockarg:     5,798 ( 0.2%)
       caller_kwarg:       110 ( 0.0%)
Top-1 compile error reasons (100.0% of total 186,900):
  exception_handler: 186,900 (100.0%)
Top-7 unhandled YARV insns (100.0% of total 186,598):
       getblockparam: 99,414 (53.3%)
  invokesuperforward: 81,667 (43.8%)
       setblockparam:  2,837 ( 1.5%)
         getconstant:  1,537 ( 0.8%)
          checkmatch:    616 ( 0.3%)
         expandarray:    360 ( 0.2%)
                once:    167 ( 0.1%)
Top-3 unhandled HIR insns (100.0% of total 236,962):
          throw: 198,474 (83.8%)
  invokebuiltin:  35,767 (15.1%)
      array_max:   2,721 ( 1.1%)
Top-19 side exit reasons (100.0% of total 15,427,184):
                   guard_type_failure: 6,865,696 (44.5%)
                  guard_shape_failure: 6,779,586 (43.9%)
  block_param_proxy_not_iseq_or_ifunc: 1,030,319 ( 6.7%)
                   unhandled_hir_insn:   236,962 ( 1.5%)
                        compile_error:   186,900 ( 1.2%)
                  unhandled_yarv_insn:   186,598 ( 1.2%)
                 fixnum_mult_overflow:    50,739 ( 0.3%)
           block_param_proxy_modified:    28,119 ( 0.2%)
        patchpoint_no_singleton_class:    14,903 ( 0.1%)
         unhandled_newarray_send_pack:    14,481 ( 0.1%)
               fixnum_lshift_overflow:    10,085 ( 0.1%)
     patchpoint_stable_constant_names:     9,198 ( 0.1%)
              patchpoint_no_ep_escape:     7,815 ( 0.1%)
                  expandarray_failure:     4,533 ( 0.0%)
          patchpoint_method_redefined:       662 ( 0.0%)
               obj_to_string_fallback:       277 ( 0.0%)
                   guard_less_failure:       163 ( 0.0%)
                            interrupt:       128 ( 0.0%)
             guard_greater_eq_failure:        20 ( 0.0%)
                             send_count: 151,233,937
                     dynamic_send_count:  43,885,845 (29.0%)
                   optimized_send_count: 107,348,092 (71.0%)
                  dynamic_setivar_count:   2,343,571 ( 1.5%)
                  dynamic_getivar_count:   5,908,168 ( 3.9%)
              dynamic_definedivar_count:     405,079 ( 0.3%)
              iseq_optimized_send_count:  37,324,023 (24.7%)
      inline_cfunc_optimized_send_count:  46,056,028 (30.5%)
       inline_iseq_optimized_send_count:   3,756,875 ( 2.5%)
non_variadic_cfunc_optimized_send_count:  11,618,909 ( 7.7%)
    variadic_cfunc_optimized_send_count:   8,592,257 ( 5.7%)
compiled_iseq_count:                              5,289
failed_iseq_count:                                    0
compile_time:                                   1,664ms
profile_time:                                      13ms
gc_time:                                           20ms
invalidation_time:                                479ms
vm_write_pc_count:                          127,571,422
vm_write_sp_count:                          127,571,422
vm_write_locals_count:                      122,781,971
vm_write_stack_count:                       122,781,971
vm_write_to_parent_iseq_local_count:            689,945
vm_read_from_parent_iseq_local_count:        14,721,820
guard_type_count:                           167,633,896
guard_type_exit_ratio:                             4.1%
guard_shape_count:                                    0
code_region_bytes:                           38,912,000
zjit_alloc_bytes:                            40,542,102
total_mem_bytes:                             79,454,102
side_exit_count:                             15,427,184
total_insn_count:                           927,373,567
vm_insn_count:                              156,976,359
zjit_insn_count:                            770,397,208
ratio_in_zjit:                                    83.1%
```

</details>

<details>

<summary>after patch</summary>

```
***ZJIT: Printing ZJIT statistics on exit***
Top-20 not inlined C methods (55.0% of total 18,012,630):
                                               Hash#fetch: 3,184,101 (17.7%)
                                            Regexp#match?:   707,150 ( 3.9%)
                                                Hash#key?:   689,871 ( 3.8%)
                                              String#sub!:   489,841 ( 2.7%)
                                           Array#include?:   470,648 ( 2.6%)
                                             Set#include?:   397,520 ( 2.2%)
                                                String#<<:   396,279 ( 2.2%)
                                       String#start_with?:   382,538 ( 2.1%)
                                               Kernel#dup:   352,617 ( 2.0%)
                                               Array#any?:   350,454 ( 1.9%)
                                              Hash#delete:   331,802 ( 1.8%)
                                               String.new:   307,248 ( 1.7%)
                                              Integer#===:   262,336 ( 1.5%)
                                         Symbol#end_with?:   255,540 ( 1.4%)
                                             Kernel#is_a?:   247,292 ( 1.4%)
                                    Process.clock_gettime:   221,588 ( 1.2%)
                                                Integer#>:   219,718 ( 1.2%)
                                            String#match?:   216,905 ( 1.2%)
                                          String#downcase:   213,107 ( 1.2%)
                                               Integer#<=:   202,617 ( 1.1%)
Top-20 calls to C functions from JIT code (80.1% of total 130,218,934):
                             rb_vm_opt_send_without_block: 28,073,153 (21.6%)
                                             rb_hash_aref:  8,992,167 ( 6.9%)
                                          rb_vm_env_write:  8,526,089 ( 6.5%)
                                               rb_vm_send:  8,337,453 ( 6.4%)
                     rb_zjit_writebarrier_check_immediate:  7,786,426 ( 6.0%)
                                        rb_obj_is_kind_of:  6,098,927 ( 4.7%)
                                rb_vm_getinstancevariable:  5,783,053 ( 4.4%)
                                        rb_vm_invokesuper:  5,038,444 ( 3.9%)
                           rb_ivar_get_at_no_ractor_check:  4,762,093 ( 3.7%)
                                             rb_ary_entry:  4,283,965 ( 3.3%)
                                             rb_hash_aset:  2,429,864 ( 1.9%)
                                rb_vm_setinstancevariable:  2,343,573 ( 1.8%)
                               rb_vm_opt_getconstant_path:  2,284,809 ( 1.8%)
                                               Hash#fetch:  1,778,510 ( 1.4%)
                                                    fetch:  1,405,591 ( 1.1%)
                                        rb_vm_invokeblock:  1,381,329 ( 1.1%)
                                        rb_str_buf_append:  1,362,272 ( 1.0%)
                                rb_ec_ary_new_from_values:  1,325,005 ( 1.0%)
                               rb_class_allocate_instance:  1,288,944 ( 1.0%)
                                    rb_hash_new_with_size:    998,629 ( 0.8%)
Top-2 not optimized method types for send (100.0% of total 4,896,276):
  iseq: 4,893,454 (99.9%)
  null:     2,822 ( 0.1%)
Top-3 not optimized method types for send_without_block (100.0% of total 525,687):
        optimized_send: 479,562 (91.2%)
                  null:  41,967 ( 8.0%)
  optimized_block_call:   4,158 ( 0.8%)
Top-4 instructions with uncategorized fallback reason (100.0% of total 7,250,556):
             invokesuper: 5,038,444 (69.5%)
             invokeblock: 1,381,329 (19.1%)
             sendforward:   798,924 (11.0%)
  opt_send_without_block:    31,859 ( 0.4%)
Top-18 send fallback reasons (100.0% of total 43,629,303):
                          send_without_block_polymorphic: 18,533,669 (42.5%)
                                           uncategorized:  7,250,556 (16.6%)
                          send_not_optimized_method_type:  4,896,276 (11.2%)
                          send_without_block_no_profiles:  4,741,899 (10.9%)
                                        send_no_profiles:  2,865,579 ( 6.6%)
                            one_or_more_complex_arg_pass:  2,825,242 ( 6.5%)
                          send_without_block_megamorphic:    709,818 ( 1.6%)
                                        send_polymorphic:    541,187 ( 1.2%)
  send_without_block_not_optimized_method_type_optimized:    483,720 ( 1.1%)
        send_without_block_not_optimized_need_permission:    382,623 ( 0.9%)
                                   too_many_args_for_lir:    173,244 ( 0.4%)
                                     argc_param_mismatch:     50,382 ( 0.1%)
            send_without_block_not_optimized_method_type:     41,967 ( 0.1%)
                 send_without_block_cfunc_array_variadic:     36,302 ( 0.1%)
                                obj_to_string_not_string:     34,169 ( 0.1%)
              send_without_block_direct_keyword_mismatch:     32,436 ( 0.1%)
                                        send_megamorphic:     28,613 ( 0.1%)
                          ccall_with_frame_too_many_args:      1,621 ( 0.0%)
Top-4 setivar fallback reasons (100.0% of total 2,343,573):
            not_monomorphic: 2,120,858 (90.5%)
               not_t_object:   125,163 ( 5.3%)
                too_complex:    97,531 ( 4.2%)
  new_shape_needs_extension:        21 ( 0.0%)
Top-2 getivar fallback reasons (100.0% of total 5,908,165):
  not_monomorphic: 5,658,912 (95.8%)
      too_complex:   249,253 ( 4.2%)
Top-3 definedivar fallback reasons (100.0% of total 405,079):
  not_monomorphic: 397,150 (98.0%)
      too_complex:   5,122 ( 1.3%)
     not_t_object:   2,807 ( 0.7%)
Top-6 invokeblock handler (100.0% of total 1,381,329):
   monomorphic_iseq: 685,363 (49.6%)
        polymorphic: 521,984 (37.8%)
  monomorphic_other: 104,640 ( 7.6%)
  monomorphic_ifunc:  55,505 ( 4.0%)
        no_profiles:   9,164 ( 0.7%)
        megamorphic:   4,673 ( 0.3%)
Top-9 popular complex argument-parameter features not optimized (100.0% of total 3,094,719):
       param_kw_opt: 1,333,367 (43.1%)
        param_block:   632,886 (20.5%)
  param_forwardable:   600,605 (19.4%)
         param_rest:   329,019 (10.6%)
       param_kwrest:   119,971 ( 3.9%)
    caller_kw_splat:    39,001 ( 1.3%)
       caller_splat:    33,962 ( 1.1%)
    caller_blockarg:     5,798 ( 0.2%)
       caller_kwarg:       110 ( 0.0%)
Top-1 compile error reasons (100.0% of total 186,917):
  exception_handler: 186,917 (100.0%)
Top-7 unhandled YARV insns (100.0% of total 186,598):
       getblockparam: 99,414 (53.3%)
  invokesuperforward: 81,667 (43.8%)
       setblockparam:  2,837 ( 1.5%)
         getconstant:  1,537 ( 0.8%)
          checkmatch:    616 ( 0.3%)
         expandarray:    360 ( 0.2%)
                once:    167 ( 0.1%)
Top-3 unhandled HIR insns (100.0% of total 236,969):
          throw: 198,475 (83.8%)
  invokebuiltin:  35,773 (15.1%)
      array_max:   2,721 ( 1.1%)
Top-19 side exit reasons (100.0% of total 15,450,102):
                   guard_type_failure: 6,888,596 (44.6%)
                  guard_shape_failure: 6,779,586 (43.9%)
  block_param_proxy_not_iseq_or_ifunc: 1,030,319 ( 6.7%)
                   unhandled_hir_insn:   236,969 ( 1.5%)
                        compile_error:   186,917 ( 1.2%)
                  unhandled_yarv_insn:   186,598 ( 1.2%)
                 fixnum_mult_overflow:    50,739 ( 0.3%)
           block_param_proxy_modified:    28,119 ( 0.2%)
        patchpoint_no_singleton_class:    14,903 ( 0.1%)
         unhandled_newarray_send_pack:    14,481 ( 0.1%)
               fixnum_lshift_overflow:    10,085 ( 0.1%)
     patchpoint_stable_constant_names:     9,198 ( 0.1%)
              patchpoint_no_ep_escape:     7,815 ( 0.1%)
                  expandarray_failure:     4,533 ( 0.0%)
          patchpoint_method_redefined:       662 ( 0.0%)
               obj_to_string_fallback:       277 ( 0.0%)
                   guard_less_failure:       163 ( 0.0%)
                            interrupt:       122 ( 0.0%)
             guard_greater_eq_failure:        20 ( 0.0%)
                             send_count: 150,986,368
                     dynamic_send_count:  43,629,303 (28.9%)
                   optimized_send_count: 107,357,065 (71.1%)
                  dynamic_setivar_count:   2,343,573 ( 1.6%)
                  dynamic_getivar_count:   5,908,165 ( 3.9%)
              dynamic_definedivar_count:     405,079 ( 0.3%)
              iseq_optimized_send_count:  37,324,039 (24.7%)
      inline_cfunc_optimized_send_count:  46,056,046 (30.5%)
       inline_iseq_optimized_send_count:   3,756,881 ( 2.5%)
non_variadic_cfunc_optimized_send_count:  11,618,958 ( 7.7%)
    variadic_cfunc_optimized_send_count:   8,601,141 ( 5.7%)
compiled_iseq_count:                              5,289
failed_iseq_count:                                    0
compile_time:                                   1,700ms
profile_time:                                      13ms
gc_time:                                           21ms
invalidation_time:                                519ms
vm_write_pc_count:                          127,557,549
vm_write_sp_count:                          127,557,549
vm_write_locals_count:                      122,768,084
vm_write_stack_count:                       122,768,084
vm_write_to_parent_iseq_local_count:            689,953
vm_read_from_parent_iseq_local_count:        14,730,705
guard_type_count:                           167,853,730
guard_type_exit_ratio:                             4.1%
guard_shape_count:                                    0
code_region_bytes:                           38,928,384
zjit_alloc_bytes:                            41,103,415
total_mem_bytes:                             80,031,799
side_exit_count:                             15,450,102
total_insn_count:                           927,432,364
vm_insn_count:                              157,182,251
zjit_insn_count:                            770,250,113
ratio_in_zjit:                                    83.1%
```

</details>
2026-01-16 21:19:54 -05:00

539 lines
19 KiB
C

// This part of YJIT helps interfacing with the rest of CRuby and with the OS.
// Sometimes our FFI binding generation tool gives undesirable outputs when it
// sees C features that Rust doesn't support well. We mitigate that by binding
// functions which have simple parameter types. The boilerplate C functions for
// that purpose are in this file.
// Similarly, we wrap OS facilities we need in simple functions to help with
// FFI and to avoid the need to use external crates.io Rust libraries.
#include "internal.h"
#include "internal/sanitizers.h"
#include "internal/string.h"
#include "internal/hash.h"
#include "internal/variable.h"
#include "internal/compile.h"
#include "internal/class.h"
#include "internal/fixnum.h"
#include "internal/numeric.h"
#include "internal/gc.h"
#include "vm_core.h"
#include "vm_callinfo.h"
#include "builtin.h"
#include "insns.inc"
#include "insns_info.inc"
#include "yjit.h"
#include "zjit.h"
#include "vm_insnhelper.h"
#include "probes.h"
#include "probes_helper.h"
#include "iseq.h"
#include "ruby/debug.h"
#include "internal/cont.h"
// For mmapp(), sysconf()
#ifndef _WIN32
#include <unistd.h>
#include <sys/mman.h>
#endif
#include <errno.h>
// We need size_t to have a known size to simplify code generation and FFI.
// TODO(alan): check this in configure.ac to fail fast on 32 bit platforms.
STATIC_ASSERT(64b_size_t, SIZE_MAX == UINT64_MAX);
// I don't know any C implementation that has uint64_t and puts padding bits
// into size_t but the standard seems to allow it.
STATIC_ASSERT(size_t_no_padding_bits, sizeof(size_t) == sizeof(uint64_t));
// This build config impacts the pointer tagging scheme and we only want to
// support one scheme for simplicity.
STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM);
// NOTE: We can trust that uint8_t has no "padding bits" since the C spec
// guarantees it. Wording about padding bits is more explicit in C11 compared
// to C99. See C11 7.20.1.1p2. All this is to say we have _some_ standards backing to
// use a Rust `*mut u8` to represent a C `uint8_t *`.
//
// If we don't want to trust that we can interpreter the C standard correctly, we
// could outsource that work to the Rust standard library by sticking to fundamental
// types in C such as int, long, etc. and use `std::os::raw::c_long` and friends on
// the Rust side.
//
// What's up with the long prefix? Even though we build with `-fvisibility=hidden`
// we are sometimes a static library where the option doesn't prevent name collision.
// The "_yjit_" part is for trying to be informative. We might want different
// suffixes for symbols meant for Rust and symbols meant for broader CRuby.
// For a given raw_sample (frame), set the hash with the caller's
// name, file, and line number. Return the hash with collected frame_info.
static void
rb_yjit_add_frame(VALUE hash, VALUE frame)
{
VALUE frame_id = PTR2NUM(frame);
if (RTEST(rb_hash_aref(hash, frame_id))) {
return;
}
else {
VALUE frame_info = rb_hash_new();
// Full label for the frame
VALUE name = rb_profile_frame_full_label(frame);
// Absolute path of the frame from rb_iseq_realpath
VALUE file = rb_profile_frame_absolute_path(frame);
// Line number of the frame
VALUE line = rb_profile_frame_first_lineno(frame);
// If absolute path isn't available use the rb_iseq_path
if (NIL_P(file)) {
file = rb_profile_frame_path(frame);
}
rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name);
rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file);
rb_hash_aset(frame_info, ID2SYM(rb_intern("samples")), INT2NUM(0));
rb_hash_aset(frame_info, ID2SYM(rb_intern("total_samples")), INT2NUM(0));
rb_hash_aset(frame_info, ID2SYM(rb_intern("edges")), rb_hash_new());
rb_hash_aset(frame_info, ID2SYM(rb_intern("lines")), rb_hash_new());
if (line != INT2FIX(0)) {
rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line);
}
rb_hash_aset(hash, frame_id, frame_info);
}
}
// Parses the YjitExitLocations raw_samples and line_samples collected by
// rb_yjit_record_exit_stack and turns them into 3 hashes (raw, lines, and frames) to
// be used by RubyVM::YJIT.exit_locations. yjit_raw_samples represents the raw frames information
// (without name, file, and line), and yjit_line_samples represents the line information
// of the iseq caller.
VALUE
rb_yjit_exit_locations_dict(VALUE *yjit_raw_samples, int *yjit_line_samples, int samples_len)
{
VALUE result = rb_hash_new();
VALUE raw_samples = rb_ary_new_capa(samples_len);
VALUE line_samples = rb_ary_new_capa(samples_len);
VALUE frames = rb_hash_new();
int idx = 0;
// While the index is less than samples_len, parse yjit_raw_samples and
// yjit_line_samples, then add casted values to raw_samples and line_samples array.
while (idx < samples_len) {
int num = (int)yjit_raw_samples[idx];
int line_num = (int)yjit_line_samples[idx];
idx++;
// + 1 as we append an additional sample for the insn
rb_ary_push(raw_samples, SIZET2NUM(num + 1));
rb_ary_push(line_samples, INT2NUM(line_num + 1));
// Loop through the length of samples_len and add data to the
// frames hash. Also push the current value onto the raw_samples
// and line_samples array respectively.
for (int o = 0; o < num; o++) {
rb_yjit_add_frame(frames, yjit_raw_samples[idx]);
rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx]));
rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx]));
idx++;
}
rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx]));
rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx]));
idx++;
rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx]));
rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx]));
idx++;
}
// Set add the raw_samples, line_samples, and frames to the results
// hash.
rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples);
rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples);
rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames);
return result;
}
// Is anyone listening for :c_call and :c_return event currently?
bool
rb_c_method_tracing_currently_enabled(const rb_execution_context_t *ec)
{
return ruby_vm_c_events_enabled > 0;
}
// The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
// like the interpreter. When tracing for c_return is enabled, we patch the code after
// the C method return to call into this to fire the event.
void
rb_full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
{
rb_control_frame_t *cfp = ec->cfp;
RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
// CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
// Pop the C func's frame and fire the c_return TracePoint event
// Note that this is the same order as vm_call_cfunc_with_frame().
rb_vm_pop_frame(ec);
EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
// Note, this deviates from the interpreter in that users need to enable
// a c_return TracePoint for this DTrace hook to work. A reasonable change
// since the Ruby return event works this way as well.
RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
// Push return value into the caller's stack. We know that it's a frame that
// uses cfp->sp because we are patching a call done with gen_send_cfunc().
ec->cfp->sp[0] = return_value;
ec->cfp->sp++;
}
// TODO(alan): consider using an opaque pointer for the payload rather than a void pointer
void *
rb_iseq_get_yjit_payload(const rb_iseq_t *iseq)
{
RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
if (iseq->body) {
return iseq->body->yjit_payload;
}
else {
// Body is NULL when constructing the iseq.
return NULL;
}
}
void
rb_iseq_set_yjit_payload(const rb_iseq_t *iseq, void *payload)
{
RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
RUBY_ASSERT_ALWAYS(iseq->body);
RUBY_ASSERT_ALWAYS(NULL == iseq->body->yjit_payload);
iseq->body->yjit_payload = payload;
}
// This is defined only as a named struct inside rb_iseq_constant_body.
// By giving it a separate typedef, we make it nameable by rust-bindgen.
// Bindgen's temp/anon name isn't guaranteed stable.
typedef struct rb_iseq_param_keyword rb_seq_param_keyword_struct;
ID rb_get_symbol_id(VALUE namep);
// If true, the iseq has only opt_invokebuiltin_delegate(_leave) and leave insns.
static bool
invokebuiltin_delegate_leave_p(const rb_iseq_t *iseq)
{
int insn1 = rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]);
if ((int)iseq->body->iseq_size != insn_len(insn1) + insn_len(BIN(leave))) {
return false;
}
int insn2 = rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[insn_len(insn1)]);
return (insn1 == BIN(opt_invokebuiltin_delegate) || insn1 == BIN(opt_invokebuiltin_delegate_leave)) &&
insn2 == BIN(leave);
}
// Return an rb_builtin_function if the iseq contains only that builtin function.
const struct rb_builtin_function *
rb_yjit_builtin_function(const rb_iseq_t *iseq)
{
if (invokebuiltin_delegate_leave_p(iseq)) {
return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1];
}
else {
return NULL;
}
}
VALUE
rb_yjit_str_simple_append(VALUE str1, VALUE str2)
{
return rb_str_cat(str1, RSTRING_PTR(str2), RSTRING_LEN(str2));
}
extern VALUE *rb_vm_base_ptr(struct rb_control_frame_struct *cfp);
VALUE
rb_str_neq_internal(VALUE str1, VALUE str2)
{
return rb_str_eql_internal(str1, str2) == Qtrue ? Qfalse : Qtrue;
}
extern VALUE rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary);
VALUE
rb_yjit_rb_ary_subseq_length(VALUE ary, long beg)
{
long len = RARRAY_LEN(ary);
return rb_ary_subseq(ary, beg, len);
}
// Return non-zero when `obj` is an array and its last item is a
// `ruby2_keywords` hash. We don't support this kind of splat.
size_t
rb_yjit_ruby2_keywords_splat_p(VALUE obj)
{
if (!RB_TYPE_P(obj, T_ARRAY)) return 0;
long len = RARRAY_LEN(obj);
if (len == 0) return 0;
VALUE last = RARRAY_AREF(obj, len - 1);
if (!RB_TYPE_P(last, T_HASH)) return 0;
return FL_TEST_RAW(last, RHASH_PASS_AS_KEYWORDS);
}
// Checks to establish preconditions for rb_yjit_splat_varg_cfunc()
VALUE
rb_yjit_splat_varg_checks(VALUE *sp, VALUE splat_array, rb_control_frame_t *cfp)
{
// We inserted a T_ARRAY guard before this call
long len = RARRAY_LEN(splat_array);
// Large splat arrays need a separate allocation
if (len < 0 || len > VM_ARGC_STACK_MAX) return Qfalse;
// Would we overflow if we put the contents of the array onto the stack?
if (sp + len > (VALUE *)(cfp - 2)) return Qfalse;
// Reject keywords hash since that requires duping it sometimes
if (len > 0) {
VALUE last_hash = RARRAY_AREF(splat_array, len - 1);
if (RB_TYPE_P(last_hash, T_HASH) &&
FL_TEST_RAW(last_hash, RHASH_PASS_AS_KEYWORDS)) {
return Qfalse;
}
}
return Qtrue;
}
// Push array elements to the stack for a C method that has a variable number
// of parameters. Returns the number of arguments the splat array contributes.
int
rb_yjit_splat_varg_cfunc(VALUE *stack_splat_array)
{
VALUE splat_array = *stack_splat_array;
int len;
// We already checked that length fits in `int`
RUBY_ASSERT(RB_TYPE_P(splat_array, T_ARRAY));
len = (int)RARRAY_LEN(splat_array);
// Push the contents of the array onto the stack
MEMCPY(stack_splat_array, RARRAY_CONST_PTR(splat_array), VALUE, len);
return len;
}
// Print the Ruby source location of some ISEQ for debugging purposes
void
rb_yjit_dump_iseq_loc(const rb_iseq_t *iseq, uint32_t insn_idx)
{
char *ptr;
long len;
VALUE path = rb_iseq_path(iseq);
RSTRING_GETMEM(path, ptr, len);
fprintf(stderr, "%s %.*s:%u\n", __func__, (int)len, ptr, rb_iseq_line_no(iseq, insn_idx));
}
// Get the number of digits required to print an integer
static int
num_digits(int integer)
{
int num = 1;
while (integer /= 10) {
num++;
}
return num;
}
// Allocate a C string that formats an ISEQ label like iseq_inspect()
char *
rb_yjit_iseq_inspect(const rb_iseq_t *iseq)
{
const char *label = RSTRING_PTR(iseq->body->location.label);
const char *path = RSTRING_PTR(rb_iseq_path(iseq));
int lineno = iseq->body->location.code_location.beg_pos.lineno;
const size_t size = strlen(label) + strlen(path) + num_digits(lineno) + 3;
char *buf = ZALLOC_N(char, size);
snprintf(buf, size, "%s@%s:%d", label, path, lineno);
return buf;
}
// There are RSTRUCT_SETs in ruby/internal/core/rstruct.h and internal/struct.h
// with different types (int vs long) for k. Here we use the one from ruby/internal/core/rstruct.h,
// which takes an int.
void
rb_RSTRUCT_SET(VALUE st, int k, VALUE v)
{
RSTRUCT_SET(st, k, v);
}
// Return the string encoding index
int
rb_ENCODING_GET(VALUE obj)
{
return RB_ENCODING_GET(obj);
}
bool
rb_yjit_constcache_shareable(const struct iseq_inline_constant_cache_entry *ice)
{
return (ice->flags & IMEMO_CONST_CACHE_SHAREABLE) != 0;
}
// For running write barriers from Rust. Required when we add a new edge in the
// object graph from `old` to `young`.
void
rb_yjit_obj_written(VALUE old, VALUE young, const char *file, int line)
{
rb_obj_written(old, Qundef, young, file, line);
}
void
rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec, bool jit_exception)
{
RB_VM_LOCKING() {
rb_vm_barrier();
// Compile a block version starting at the current instruction
uint8_t *rb_yjit_iseq_gen_entry_point(const rb_iseq_t *iseq, rb_execution_context_t *ec, bool jit_exception); // defined in Rust
uintptr_t code_ptr = (uintptr_t)rb_yjit_iseq_gen_entry_point(iseq, ec, jit_exception);
if (jit_exception) {
iseq->body->jit_exception = (rb_jit_func_t)code_ptr;
}
else {
iseq->body->jit_entry = (rb_jit_func_t)code_ptr;
}
}
}
// GC root for interacting with the GC
struct yjit_root_struct {
bool unused; // empty structs are not legal in C99
};
// For dealing with refinements
void
rb_yjit_invalidate_all_method_lookup_assumptions(void)
{
// It looks like Module#using actually doesn't need to invalidate all the
// method caches, so we do nothing here for now.
}
// Number of object shapes, which might be useful for investigating YJIT exit reasons.
VALUE
rb_object_shape_count(void)
{
// next_shape_id starts from 0, so it's the same as the count
return ULONG2NUM((unsigned long)rb_shapes_count());
}
bool
rb_yjit_shape_obj_too_complex_p(VALUE obj)
{
return rb_shape_obj_too_complex_p(obj);
}
attr_index_t
rb_yjit_shape_capacity(shape_id_t shape_id)
{
return RSHAPE_CAPACITY(shape_id);
}
attr_index_t
rb_yjit_shape_index(shape_id_t shape_id)
{
return RSHAPE_INDEX(shape_id);
}
// The number of stack slots that vm_sendish() pops for send and invokesuper.
size_t
rb_yjit_sendish_sp_pops(const struct rb_callinfo *ci)
{
return 1 - sp_inc_of_sendish(ci); // + 1 to ignore return value push
}
// The number of stack slots that vm_sendish() pops for invokeblock.
size_t
rb_yjit_invokeblock_sp_pops(const struct rb_callinfo *ci)
{
return 1 - sp_inc_of_invokeblock(ci); // + 1 to ignore return value push
}
rb_serial_t
rb_yjit_cme_ractor_serial(const rb_callable_method_entry_t *cme)
{
return cme->def->body.bmethod.defined_ractor_id;
}
// Setup jit_return to avoid returning a non-Qundef value on a non-FINISH frame.
// See [jit_compile_exception] for details.
void
rb_yjit_set_exception_return(rb_control_frame_t *cfp, void *leave_exit, void *leave_exception)
{
if (VM_FRAME_FINISHED_P(cfp)) {
// If it's a FINISH frame, just normally exit with a non-Qundef value.
cfp->jit_return = leave_exit;
}
else if (cfp->jit_return) {
while (!VM_FRAME_FINISHED_P(cfp)) {
if (cfp->jit_return == leave_exit) {
// Unlike jit_exec(), leave_exit is not safe on a non-FINISH frame on
// jit_exec_exception(). See [jit_exec] and [jit_exec_exception] for
// details. Exit to the interpreter with Qundef to let it keep executing
// other Ruby frames.
cfp->jit_return = leave_exception;
return;
}
cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
}
}
else {
// If the caller was not JIT code, exit to the interpreter with Qundef
// to keep executing Ruby frames with the interpreter.
cfp->jit_return = leave_exception;
}
}
// VM_INSTRUCTION_SIZE changes depending on if ZJIT is in the build. Since
// bindgen can only grab one version of the constant and copy that to rust,
// we make that the upper bound and this the accurate value.
uint32_t
rb_vm_instruction_size(void)
{
return VM_INSTRUCTION_SIZE;
}
// Primitives used by yjit.rb
VALUE rb_yjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_print_stats_p(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_log_enabled_p(rb_execution_context_t *c, VALUE self);
VALUE rb_yjit_print_log_p(rb_execution_context_t *c, VALUE self);
VALUE rb_yjit_trace_exit_locations_enabled_p(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self, VALUE key);
VALUE rb_yjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_get_log(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_disasm_iseq(rb_execution_context_t *ec, VALUE self, VALUE iseq);
VALUE rb_yjit_insns_compiled(rb_execution_context_t *ec, VALUE self, VALUE iseq);
VALUE rb_yjit_code_gc(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_simulate_oom_bang(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_get_exit_locations(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_enable(rb_execution_context_t *ec, VALUE self, VALUE gen_stats, VALUE print_stats, VALUE gen_compilation_log, VALUE print_compilation_log, VALUE mem_size, VALUE call_threshold);
VALUE rb_yjit_c_builtin_p(rb_execution_context_t *ec, VALUE self);
// Allow YJIT_C_BUILTIN macro to force --yjit-c-builtin
#ifdef YJIT_C_BUILTIN
static VALUE yjit_c_builtin_p(rb_execution_context_t *ec, VALUE self) { return Qtrue; }
#else
#define yjit_c_builtin_p rb_yjit_c_builtin_p
#endif
// Preprocessed yjit.rb generated during build
#include "yjit.rbinc"