[ruby/mmtk] Ensure not blocking for GC in rb_gc_impl_before_fork

In rb_gc_impl_before_fork, it locks the VM and barriers all the Ractors
before calling mmtk_before_fork. However, since rb_mmtk_block_for_gc is
a barrier point, one or more Ractors could be paused there. However,
mmtk_before_fork is not compatible with that because it assumes that the
MMTk workers are idle, but the workers are not idle because they are
busy working on a GC.

This commit essentially implements a trylock. It will optimistically
lock but will release the lock if it detects that any other Ractors are
waiting in rb_mmtk_block_for_gc.

For example, the following script demonstrates the issue:

    puts "Hello #{Process.pid}"

    100.times do |i|
      puts "i = #{i}"
      Ractor.new(i) do |j|
        puts "Ractor #{j} hello"
        1000.times do |i|
          s = "#{j}-#{i}"
        end
        Ractor.receive
        puts "Ractor #{j} goodbye"
      end
      pid = fork { }
      puts "Child pid is #{pid}"
      _, status = Process.waitpid2 pid
      puts status.success?
    end

    puts "Goodbye"

We can see the MMTk worker thread is waiting to start the GC:

    #4  0x00007ffff66538b1 in rb_mmtk_stop_the_world () at gc/mmtk/mmtk.c:101
    #5  0x00007ffff6d04caf in mmtk_ruby::collection::{impl#0}::stop_all_mutators<mmtk::scheduler::gc_work::{impl#14}::do_work::{closure_env#0}<mmtk::plan::immix::gc_work::ImmixGCWorkContext<mmtk_ruby::Ruby, 0>>> (_tls=..., mutator_visitor=...) at src/collection.rs:23

However, the mutator thread is stuck in mmtk_before_fork trying to stop
that worker thread:

    #4  0x00007ffff6c0b621 in std::sys:🧵:unix::Thread::join () at library/std/src/sys/thread/unix.rs:134
    #5  0x00007ffff6658b6e in std:🧵:JoinInner<()>::join<()> (self=...)
    #6  0x00007ffff6658d4c in std:🧵:JoinHandle<()>::join<()> (self=...)
    #7  0x00007ffff665795e in mmtk_ruby::binding::RubyBinding::join_all_gc_threads (self=0x7ffff72462d0 <mmtk_ruby::BINDING+8>) at src/binding.rs:115
    #8  0x00007ffff66561a8 in mmtk_ruby::api::mmtk_before_fork () at src/api.rs:309
    #9  0x00007ffff66556ff in rb_gc_impl_before_fork (objspace_ptr=0x555555d17980) at gc/mmtk/mmtk.c:1054
    #10 0x00005555556bbc3e in rb_gc_before_fork () at gc.c:5429

https://github.com/ruby/mmtk/commit/1a629504a7
This commit is contained in:
Peter Zhu 2025-11-17 21:39:02 -05:00 committed by git
parent 69b1c567d7
commit f040b94cf5

View File

@ -32,6 +32,7 @@ struct objspace {
unsigned long live_ractor_cache_count;
pthread_mutex_t mutex;
rb_atomic_t mutator_blocking_count;
bool world_stopped;
pthread_cond_t cond_world_stopped;
pthread_cond_t cond_world_started;
@ -131,7 +132,9 @@ rb_mmtk_block_for_gc(MMTk_VMMutatorThread mutator)
struct objspace *objspace = rb_gc_get_objspace();
size_t starting_gc_count = objspace->gc_count;
RUBY_ATOMIC_INC(objspace->mutator_blocking_count);
int lock_lev = RB_GC_VM_LOCK();
RUBY_ATOMIC_DEC(objspace->mutator_blocking_count);
int err;
if ((err = pthread_mutex_lock(&objspace->mutex)) != 0) {
rb_bug("ERROR: cannot lock objspace->mutex: %s", strerror(err));
@ -1049,9 +1052,26 @@ rb_gc_impl_before_fork(void *objspace_ptr)
{
struct objspace *objspace = objspace_ptr;
retry:
objspace->fork_hook_vm_lock_lev = RB_GC_VM_LOCK();
rb_gc_vm_barrier();
/* At this point, we know that all the Ractors are paused because of the
* rb_gc_vm_barrier above. Since rb_mmtk_block_for_gc is a barrier point,
* one or more Ractors could be paused there. However, mmtk_before_fork is
* not compatible with that because it assumes that the MMTk workers are idle,
* but the workers are not idle because they are busy working on a GC.
*
* This essentially implements a trylock. It will optimistically lock but will
* release the lock if it detects that any other Ractors are waiting in
* rb_mmtk_block_for_gc.
*/
rb_atomic_t mutator_blocking_count = RUBY_ATOMIC_LOAD(objspace->mutator_blocking_count);
if (mutator_blocking_count != 0) {
RB_GC_VM_UNLOCK(objspace->fork_hook_vm_lock_lev);
goto retry;
}
mmtk_before_fork();
}