From 319550527ff8fbff6ee586fb75da2234de5d2feb Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 11 Aug 2025 13:21:45 -0700 Subject: [PATCH] ZJIT: Add compile/profile/GC/invalidation time stats (#14158) Co-authored-by: Stan Lo --- zjit.rb | 24 ++++++++++++- zjit/src/codegen.rs | 7 ++-- zjit/src/gc.rs | 46 ++++++++++++++----------- zjit/src/invariants.rs | 24 +++++++------ zjit/src/profile.rs | 8 +++-- zjit/src/stats.rs | 77 +++++++++++++++++++++++++++++++++++------- 6 files changed, 138 insertions(+), 48 deletions(-) diff --git a/zjit.rb b/zjit.rb index a307abdf88..fc2c80c52f 100644 --- a/zjit.rb +++ b/zjit.rb @@ -22,6 +22,7 @@ class << RubyVM::ZJIT # Return \ZJIT statistics as a Hash def stats stats = Primitive.rb_zjit_stats + return nil if stats.nil? if stats.key?(:vm_insns_count) && stats.key?(:zjit_insns_count) stats[:total_insns_count] = stats[:vm_insns_count] + stats[:zjit_insns_count] @@ -37,15 +38,29 @@ class << RubyVM::ZJIT stats = self.stats [ + :compile_time_ns, + :profile_time_ns, + :gc_time_ns, + :invalidation_time_ns, :total_insns_count, :vm_insns_count, :zjit_insns_count, :ratio_in_zjit, ].each do |key| + # Some stats like vm_insns_count and ratio_in_zjit are not supported on the release build + next unless stats.key?(key) value = stats[key] - if key == :ratio_in_zjit + + case key + when :ratio_in_zjit value = '%0.1f%%' % value + when /_time_ns\z/ + key = key.to_s.sub(/_time_ns\z/, '_time') + value = "#{number_with_delimiter(value / 10**6)}ms" + else + value = number_with_delimiter(value) end + buf << "#{'%-18s' % "#{key}:"} #{value}\n" end buf @@ -59,6 +74,13 @@ class << RubyVM::ZJIT # :stopdoc: private + def number_with_delimiter(number) + s = number.to_s + i = s.index('.') || s.size + s.insert(i -= 3, ',') while i > 3 + s + end + # Print ZJIT stats def print_stats $stderr.write stats_string diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 1d6901bac4..f9532dfe03 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -7,7 +7,7 @@ use crate::backend::current::{Reg, ALLOC_REGS}; use crate::invariants::{track_bop_assumption, track_cme_assumption, track_single_ractor_assumption, track_stable_constant_names_assumption}; use crate::gc::{append_gc_offsets, get_or_create_iseq_payload, get_or_create_iseq_payload_ptr}; use crate::state::ZJITState; -use crate::stats::{counter_ptr, Counter}; +use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::compile_time_ns}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP}; use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX}; @@ -86,7 +86,7 @@ pub extern "C" fn rb_zjit_iseq_gen_entry_point(iseq: IseqPtr, _ec: EcPtr) -> *co // Take a lock to avoid writing to ISEQ in parallel with Ractors. // with_vm_lock() does nothing if the program doesn't use Ractors. let code_ptr = with_vm_lock(src_loc!(), || { - gen_iseq_entry_point(iseq) + with_time_stat(compile_time_ns, || gen_iseq_entry_point(iseq)) }); // Assert that the ISEQ compiles if RubyVM::ZJIT.assert_compiles is enabled @@ -1359,7 +1359,8 @@ c_callable! { with_vm_lock(src_loc!(), || { // Get a pointer to compiled code or the side-exit trampoline let cb = ZJITState::get_code_block(); - let code_ptr = if let Some(code_ptr) = function_stub_hit_body(cb, iseq, branch_ptr) { + let code_ptr = with_time_stat(compile_time_ns, || function_stub_hit_body(cb, iseq, branch_ptr)); + let code_ptr = if let Some(code_ptr) = code_ptr { code_ptr } else { // gen_push_frame() doesn't set PC and SP, so we need to set them for side-exit diff --git a/zjit/src/gc.rs b/zjit/src/gc.rs index d94d86036b..ea1b0ed2ea 100644 --- a/zjit/src/gc.rs +++ b/zjit/src/gc.rs @@ -1,7 +1,8 @@ // This module is responsible for marking/moving objects on GC. use std::{ffi::c_void, ops::Range}; -use crate::{cruby::*, profile::IseqProfile, state::ZJITState, virtualmem::CodePtr}; +use crate::{cruby::*, profile::IseqProfile, state::ZJITState, stats::with_time_stat, virtualmem::CodePtr}; +use crate::stats::Counter::gc_time_ns; /// This is all the data ZJIT stores on an ISEQ. We mark objects in this struct on GC. #[derive(Debug)] @@ -65,6 +66,7 @@ fn payload_ptr_as_mut(payload_ptr: *mut IseqPayload) -> &'static mut IseqPayload unsafe { payload_ptr.as_mut() }.unwrap() } +/// GC callback for marking GC objects in the per-ISEQ payload. #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_iseq_mark(payload: *mut c_void) { let payload = if payload.is_null() { @@ -80,7 +82,29 @@ pub extern "C" fn rb_zjit_iseq_mark(payload: *mut c_void) { &*(payload as *const IseqPayload) } }; + with_time_stat(gc_time_ns, || iseq_mark(payload)); +} +/// GC callback for updating GC objects in the per-ISEQ payload. +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_iseq_update_references(payload: *mut c_void) { + let payload = if payload.is_null() { + return; // nothing to update + } else { + // SAFETY: The GC takes the VM lock while marking, which + // we assert, so we should be synchronized and data race free. + // + // For aliasing, having the VM lock hopefully also implies that no one + // else has an overlapping &mut IseqPayload. + unsafe { + rb_assert_holding_vm_lock(); + &mut *(payload as *mut IseqPayload) + } + }; + with_time_stat(gc_time_ns, || iseq_update_references(payload)); +} + +fn iseq_mark(payload: &IseqPayload) { // Mark objects retained by profiling instructions payload.profile.each_object(|object| { unsafe { rb_gc_mark_movable(object); } @@ -100,24 +124,8 @@ pub extern "C" fn rb_zjit_iseq_mark(payload: *mut c_void) { } } -/// GC callback for updating GC objects in the per-ISEQ payload. -/// This is a mirror of [rb_zjit_iseq_mark]. -#[unsafe(no_mangle)] -pub extern "C" fn rb_zjit_iseq_update_references(payload: *mut c_void) { - let payload = if payload.is_null() { - return; // nothing to update - } else { - // SAFETY: The GC takes the VM lock while marking, which - // we assert, so we should be synchronized and data race free. - // - // For aliasing, having the VM lock hopefully also implies that no one - // else has an overlapping &mut IseqPayload. - unsafe { - rb_assert_holding_vm_lock(); - &mut *(payload as *mut IseqPayload) - } - }; - +/// This is a mirror of [iseq_mark]. +fn iseq_update_references(payload: &mut IseqPayload) { // Move objects retained by profiling instructions payload.profile.each_object_mut(|object| { *object = unsafe { rb_gc_location(*object) }; diff --git a/zjit/src/invariants.rs b/zjit/src/invariants.rs index 85bc04fc71..3f291415be 100644 --- a/zjit/src/invariants.rs +++ b/zjit/src/invariants.rs @@ -1,20 +1,24 @@ use std::{collections::{HashMap, HashSet}, mem}; use crate::{backend::lir::{asm_comment, Assembler}, cruby::{rb_callable_method_entry_t, ruby_basic_operators, src_loc, with_vm_lock, IseqPtr, RedefinitionFlag, ID}, gc::IseqPayload, hir::Invariant, options::debug, state::{zjit_enabled_p, ZJITState}, virtualmem::CodePtr}; +use crate::stats::with_time_stat; +use crate::stats::Counter::invalidation_time_ns; use crate::gc::remove_gc_offsets; macro_rules! compile_patch_points { ($cb:expr, $patch_points:expr, $($comment_args:tt)*) => { - for patch_point in $patch_points { - let written_range = $cb.with_write_ptr(patch_point.patch_point_ptr, |cb| { - let mut asm = Assembler::new(); - asm_comment!(asm, $($comment_args)*); - asm.jmp(patch_point.side_exit_ptr.into()); - asm.compile(cb).expect("can write existing code"); - }); - // Stop marking GC offsets corrupted by the jump instruction - remove_gc_offsets(patch_point.payload_ptr, &written_range); - } + with_time_stat(invalidation_time_ns, || { + for patch_point in $patch_points { + let written_range = $cb.with_write_ptr(patch_point.patch_point_ptr, |cb| { + let mut asm = Assembler::new(); + asm_comment!(asm, $($comment_args)*); + asm.jmp(patch_point.side_exit_ptr.into()); + asm.compile(cb).expect("can write existing code"); + }); + // Stop marking GC offsets corrupted by the jump instruction + remove_gc_offsets(patch_point.payload_ptr, &written_range); + } + }); }; } diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index 12b10b98ee..7ffaea29dc 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -3,6 +3,8 @@ use crate::{cruby::*, gc::get_or_create_iseq_payload, options::get_option}; use crate::distribution::{Distribution, DistributionSummary}; +use crate::stats::Counter::profile_time_ns; +use crate::stats::with_time_stat; /// Ephemeral state for profiling runtime information struct Profiler { @@ -41,13 +43,13 @@ impl Profiler { #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_profile_insn(bare_opcode: u32, ec: EcPtr) { with_vm_lock(src_loc!(), || { - let mut profiler = Profiler::new(ec); - profile_insn(&mut profiler, bare_opcode as ruby_vminsn_type); + with_time_stat(profile_time_ns, || profile_insn(bare_opcode as ruby_vminsn_type, ec)); }); } /// Profile a YARV instruction -fn profile_insn(profiler: &mut Profiler, bare_opcode: ruby_vminsn_type) { +fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { + let profiler = &mut Profiler::new(ec); let profile = &mut get_or_create_iseq_payload(profiler.iseq).profile; match bare_opcode { YARVINSN_opt_nil_p => profile_operands(profiler, profile, 1), diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 4fbad5d247..5b39ecdf4b 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -1,38 +1,75 @@ -// Maxime would like to rebuild an improved stats system -// Individual stats should be tagged as always available, or only available in stats mode -// We could also tag which stats are fallback or exit counters, etc. Maybe even tag units? -// -// Comptime vs Runtime stats? +use std::time::Instant; use crate::{cruby::*, options::get_option, state::{zjit_enabled_p, ZJITState}}; macro_rules! make_counters { - ($($counter_name:ident,)+) => { + ( + default { + $($default_counter_name:ident,)+ + } + $($counter_name:ident,)+ + ) => { /// Struct containing the counter values #[derive(Default, Debug)] - pub struct Counters { $(pub $counter_name: u64),+ } + pub struct Counters { + $(pub $default_counter_name: u64,)+ + $(pub $counter_name: u64,)+ + } /// Enum to represent a counter #[allow(non_camel_case_types)] #[derive(Clone, Copy, PartialEq, Eq, Debug)] - pub enum Counter { $($counter_name),+ } + pub enum Counter { + $($default_counter_name,)+ + $($counter_name,)+ + } + + impl Counter { + pub fn name(&self) -> String { + match self { + $( Counter::$default_counter_name => stringify!($default_counter_name).to_string(), )+ + $( Counter::$counter_name => stringify!($counter_name).to_string(), )+ + } + } + } /// Map a counter to a pointer pub fn counter_ptr(counter: Counter) -> *mut u64 { let counters = $crate::state::ZJITState::get_counters(); match counter { - $( Counter::$counter_name => std::ptr::addr_of_mut!(counters.$counter_name) ),+ + $( Counter::$default_counter_name => std::ptr::addr_of_mut!(counters.$default_counter_name), )+ + $( Counter::$counter_name => std::ptr::addr_of_mut!(counters.$counter_name), )+ } } + + /// The list of counters that are available without --zjit-stats. + /// They are incremented only by `incr_counter()` and don't use `gen_incr_counter()`. + pub const DEFAULT_COUNTERS: &'static [Counter] = &[ + $( Counter::$default_counter_name, )+ + ]; } } // Declare all the counters we track make_counters! { + // Default counters that are available without --zjit-stats + default { + compile_time_ns, + profile_time_ns, + gc_time_ns, + invalidation_time_ns, + } + // The number of times YARV instructions are executed on JIT code zjit_insns_count, } +/// Increase a counter by a specified amount +fn incr_counter(counter: Counter, amount: u64) { + let ptr = counter_ptr(counter); + unsafe { *ptr += amount; } +} + pub fn zjit_alloc_size() -> usize { 0 // TODO: report the actual memory usage } @@ -49,14 +86,30 @@ pub extern "C" fn rb_zjit_stats(_ec: EcPtr, _self: VALUE) -> VALUE { } let hash = unsafe { rb_hash_new() }; - // TODO: Set counters that are always available here + let counters = ZJITState::get_counters(); + + for &counter in DEFAULT_COUNTERS { + let counter_val = unsafe { *counter_ptr(counter) }; + set_stat(hash, &counter.name(), counter_val); + } // Set counters that are enabled when --zjit-stats is enabled if get_option!(stats) { - let counters = ZJITState::get_counters(); set_stat(hash, "zjit_insns_count", counters.zjit_insns_count); - set_stat(hash, "vm_insns_count", unsafe { rb_vm_insns_count }); + + if unsafe { rb_vm_insns_count } > 0 { + set_stat(hash, "vm_insns_count", unsafe { rb_vm_insns_count }); + } } hash } + +/// Measure the time taken by func() and add that to zjit_compile_time. +pub fn with_time_stat(counter: Counter, func: F) -> R where F: FnOnce() -> R { + let start = Instant::now(); + let ret = func(); + let nanos = Instant::now().duration_since(start).as_nanos(); + incr_counter(counter, nanos as u64); + ret +}