ruby/ractor_core.h
Peter Zhu fca258f97f Fix deadlock when malloc in Ractor lock
If we malloc when the current Ractor is locked, we can deadlock because
GC requires VM lock and Ractor barrier. If another Ractor is waiting on
this Ractor lock, then it will deadlock because the other Ractor will
never join the barrier.

For example, this script deadlocks:

    r = Ractor.new do
      loop do
        Ractor::Port.new
      end
    end

    100000.times do |i|
      r.send(nil)
      puts i
    end

On debug builds, it fails with this assertion error:

    vm_sync.c:75: Assertion Failed: vm_lock_enter:cr->sync.locked_by != rb_ractor_self(cr)

On non-debug builds, we can see that it deadlocks in the debugger:

    Main Ractor:
    frame #3: 0x000000010021fdc4 miniruby`rb_native_mutex_lock(lock=<unavailable>) at thread_pthread.c:115:14
    frame #4: 0x0000000100193eb8 miniruby`ractor_send0 [inlined] ractor_lock(r=<unavailable>, file=<unavailable>, line=1180) at ractor.c:73:5
    frame #5: 0x0000000100193eb0 miniruby`ractor_send0 [inlined] ractor_send_basket(ec=<unavailable>, rp=0x0000000131092840, b=0x000000011c63de80, raise_on_error=true) at ractor_sync.c:1180:5
    frame #6: 0x0000000100193eac miniruby`ractor_send0(ec=<unavailable>, rp=0x0000000131092840, obj=4, move=<unavailable>, raise_on_error=true) at ractor_sync.c:1211:5

    Second Ractor:
    frame #2: 0x00000001002208d0 miniruby`rb_ractor_sched_barrier_start [inlined] rb_native_cond_wait(cond=<unavailable>, mutex=<unavailable>) at thread_pthread.c:221:13
    frame #3: 0x00000001002208cc miniruby`rb_ractor_sched_barrier_start(vm=0x000000013180d600, cr=0x0000000131093460) at thread_pthread.c:1438:13
    frame #4: 0x000000010028a328 miniruby`rb_vm_barrier at vm_sync.c:262:13 [artificial]
    frame #5: 0x00000001000dfa6c miniruby`gc_start [inlined] rb_gc_vm_barrier at gc.c:179:5
    frame #6: 0x00000001000dfa68 miniruby`gc_start [inlined] gc_enter(objspace=0x000000013180fc00, event=gc_enter_event_start, lock_lev=<unavailable>) at default.c:6636:9
    frame #7: 0x00000001000dfa48 miniruby`gc_start(objspace=0x000000013180fc00, reason=<unavailable>) at default.c:6361:5
    frame #8: 0x00000001000e3fd8 miniruby`objspace_malloc_increase_body [inlined] garbage_collect(objspace=0x000000013180fc00, reason=512) at default.c:6341:15
    frame #9: 0x00000001000e3fa4 miniruby`objspace_malloc_increase_body [inlined] garbage_collect_with_gvl(objspace=0x000000013180fc00, reason=512) at default.c:6741:16
    frame #10: 0x00000001000e3f88 miniruby`objspace_malloc_increase_body(objspace=0x000000013180fc00, mem=<unavailable>, new_size=<unavailable>, old_size=<unavailable>, type=<unavailable>) at default.c:8007:13
    frame #11: 0x00000001000e3c44 miniruby`rb_gc_impl_malloc [inlined] objspace_malloc_fixup(objspace=0x000000013180fc00, mem=0x000000011c700000, size=12582912) at default.c:8085:5
    frame #12: 0x00000001000e3c30 miniruby`rb_gc_impl_malloc(objspace_ptr=0x000000013180fc00, size=12582912) at default.c:8182:12
    frame #13: 0x00000001000d4584 miniruby`ruby_xmalloc [inlined] ruby_xmalloc_body(size=<unavailable>) at gc.c:5128:12
    frame #14: 0x00000001000d4568 miniruby`ruby_xmalloc(size=<unavailable>) at gc.c:5118:34
    frame #15: 0x00000001001eb184 miniruby`rb_st_init_existing_table_with_size(tab=0x000000011c2b4b40, type=<unavailable>, size=<unavailable>) at st.c:559:39
    frame #16: 0x00000001001ebc74 miniruby`rebuild_table_if_necessary [inlined] rb_st_init_table_with_size(type=0x00000001004f4a78, size=524287) at st.c:585:5
    frame #17: 0x00000001001ebc5c miniruby`rebuild_table_if_necessary [inlined] rebuild_table(tab=0x000000013108e2f0) at st.c:753:19
    frame #18: 0x00000001001ebbfc miniruby`rebuild_table_if_necessary(tab=0x000000013108e2f0) at st.c:1125:9
    frame #19: 0x00000001001eba08 miniruby`rb_st_insert(tab=0x000000013108e2f0, key=262144, value=4767566624) at st.c:1143:5
    frame #20: 0x0000000100194b84 miniruby`ractor_port_initialzie [inlined] ractor_add_port(r=0x0000000131093460, id=262144) at ractor_sync.c:399:9
    frame #21: 0x0000000100194b58 miniruby`ractor_port_initialzie [inlined] ractor_port_init(rpv=4750065560, r=0x0000000131093460) at ractor_sync.c:87:5
    frame #22: 0x0000000100194b34 miniruby`ractor_port_initialzie(self=4750065560) at ractor_sync.c:103:12
2025-08-25 15:43:01 -04:00

307 lines
7.7 KiB
C

#include "internal/gc.h"
#include "ruby/ruby.h"
#include "ruby/ractor.h"
#include "vm_core.h"
#include "id_table.h"
#include "vm_debug.h"
#ifndef RACTOR_CHECK_MODE
#define RACTOR_CHECK_MODE (VM_CHECK_MODE || RUBY_DEBUG) && (SIZEOF_UINT64_T == SIZEOF_VALUE)
#endif
struct rb_ractor_sync {
// ractor lock
rb_nativethread_lock_t lock;
#if RACTOR_CHECK_MODE > 0
VALUE locked_by;
#endif
#ifndef RUBY_THREAD_PTHREAD_H
rb_nativethread_cond_t wakeup_cond;
#endif
// incoming messages
struct ractor_queue *recv_queue;
// waiting threads for receiving
struct ccan_list_head waiters;
// ports
VALUE default_port_value;
struct st_table *ports;
size_t next_port_id;
// monitors
struct ccan_list_head monitors;
// value
rb_ractor_t *successor;
VALUE legacy;
bool legacy_exc;
};
// created
// | ready to run
// ====================== inserted to vm->ractor
// v
// blocking <---+ all threads are blocking
// | |
// v |
// running -----+
// | all threads are terminated.
// ====================== removed from vm->ractor
// v
// terminated
//
// status is protected by VM lock (global state)
enum ractor_status {
ractor_created,
ractor_running,
ractor_blocking,
ractor_terminated,
};
struct rb_ractor_struct {
struct rb_ractor_pub pub;
struct rb_ractor_sync sync;
// thread management
struct {
struct ccan_list_head set;
unsigned int cnt;
unsigned int blocking_cnt;
unsigned int sleeper;
struct rb_thread_sched sched;
rb_execution_context_t *running_ec;
rb_thread_t *main;
} threads;
VALUE thgroup_default;
VALUE name;
VALUE loc;
enum ractor_status status_;
struct ccan_list_node vmlr_node;
// ractor local data
st_table *local_storage;
struct rb_id_table *idkey_local_storage;
VALUE local_storage_store_lock;
VALUE r_stdin;
VALUE r_stdout;
VALUE r_stderr;
VALUE verbose;
VALUE debug;
bool malloc_gc_disabled;
void *newobj_cache;
}; // rb_ractor_t is defined in vm_core.h
static inline VALUE
rb_ractor_self(const rb_ractor_t *r)
{
return r->pub.self;
}
rb_ractor_t *rb_ractor_main_alloc(void);
void rb_ractor_main_setup(rb_vm_t *vm, rb_ractor_t *main_ractor, rb_thread_t *main_thread);
void rb_ractor_atexit(rb_execution_context_t *ec, VALUE result);
void rb_ractor_atexit_exception(rb_execution_context_t *ec);
void rb_ractor_teardown(rb_execution_context_t *ec);
void rb_ractor_receive_parameters(rb_execution_context_t *ec, rb_ractor_t *g, int len, VALUE *ptr);
void rb_ractor_send_parameters(rb_execution_context_t *ec, rb_ractor_t *g, VALUE args);
VALUE rb_thread_create_ractor(rb_ractor_t *g, VALUE args, VALUE proc); // defined in thread.c
int rb_ractor_living_thread_num(const rb_ractor_t *);
VALUE rb_ractor_thread_list(void);
bool rb_ractor_p(VALUE rv);
void rb_ractor_living_threads_init(rb_ractor_t *r);
void rb_ractor_living_threads_insert(rb_ractor_t *r, rb_thread_t *th);
void rb_ractor_living_threads_remove(rb_ractor_t *r, rb_thread_t *th);
void rb_ractor_blocking_threads_inc(rb_ractor_t *r, const char *file, int line); // TODO: file, line only for RUBY_DEBUG_LOG
void rb_ractor_blocking_threads_dec(rb_ractor_t *r, const char *file, int line); // TODO: file, line only for RUBY_DEBUG_LOG
void rb_ractor_vm_barrier_interrupt_running_thread(rb_ractor_t *r);
void rb_ractor_terminate_interrupt_main_thread(rb_ractor_t *r);
void rb_ractor_terminate_all(void);
bool rb_ractor_main_p_(void);
void rb_ractor_atfork(rb_vm_t *vm, rb_thread_t *th);
void rb_ractor_terminate_atfork(rb_vm_t *vm, rb_ractor_t *th);
VALUE rb_ractor_require(VALUE feature, bool silent);
VALUE rb_ractor_autoload_load(VALUE space, ID id);
VALUE rb_ractor_ensure_shareable(VALUE obj, VALUE name);
RUBY_SYMBOL_EXPORT_BEGIN
void rb_ractor_finish_marking(void);
bool rb_ractor_shareable_p_continue(VALUE obj);
// THIS FUNCTION SHOULD NOT CALL WHILE INCREMENTAL MARKING!!
// This function is for T_DATA::free_func
void rb_ractor_local_storage_delkey(rb_ractor_local_key_t key);
RUBY_SYMBOL_EXPORT_END
static inline bool
rb_ractor_main_p(void)
{
if (ruby_single_main_ractor) {
return true;
}
else {
return rb_ractor_main_p_();
}
}
static inline bool
rb_ractor_status_p(rb_ractor_t *r, enum ractor_status status)
{
return r->status_ == status;
}
static inline void
rb_ractor_sleeper_threads_inc(rb_ractor_t *r)
{
r->threads.sleeper++;
}
static inline void
rb_ractor_sleeper_threads_dec(rb_ractor_t *r)
{
r->threads.sleeper--;
}
static inline void
rb_ractor_sleeper_threads_clear(rb_ractor_t *r)
{
r->threads.sleeper = 0;
}
static inline int
rb_ractor_sleeper_thread_num(rb_ractor_t *r)
{
return r->threads.sleeper;
}
static inline void
rb_ractor_thread_switch(rb_ractor_t *cr, rb_thread_t *th, bool always_reset)
{
RUBY_DEBUG_LOG("th:%d->%u%s",
cr->threads.running_ec ? (int)rb_th_serial(cr->threads.running_ec->thread_ptr) : -1,
rb_th_serial(th), cr->threads.running_ec == th->ec ? " (same)" : "");
if (cr->threads.running_ec != th->ec || always_reset) {
th->running_time_us = 0;
}
if (cr->threads.running_ec != th->ec) {
if (0) {
ruby_debug_printf("rb_ractor_thread_switch ec:%p->%p\n",
(void *)cr->threads.running_ec, (void *)th->ec);
}
}
else {
return;
}
cr->threads.running_ec = th->ec;
VM_ASSERT(cr == GET_RACTOR());
}
#define rb_ractor_set_current_ec(cr, ec) rb_ractor_set_current_ec_(cr, ec, __FILE__, __LINE__)
#ifdef RB_THREAD_LOCAL_SPECIFIER
void rb_current_ec_set(rb_execution_context_t *ec);
#endif
static inline void
rb_ractor_set_current_ec_(rb_ractor_t *cr, rb_execution_context_t *ec, const char *file, int line)
{
#ifdef RB_THREAD_LOCAL_SPECIFIER
rb_current_ec_set(ec);
#else
native_tls_set(ruby_current_ec_key, ec);
#endif
RUBY_DEBUG_LOG2(file, line, "ec:%p->%p", (void *)cr->threads.running_ec, (void *)ec);
VM_ASSERT(ec == NULL || cr->threads.running_ec != ec);
cr->threads.running_ec = ec;
}
void rb_vm_ractor_blocking_cnt_inc(rb_vm_t *vm, rb_ractor_t *cr, const char *file, int line);
void rb_vm_ractor_blocking_cnt_dec(rb_vm_t *vm, rb_ractor_t *cr, const char *file, int line);
static inline uint32_t
rb_ractor_id(const rb_ractor_t *r)
{
return r->pub.id;
}
#if RACTOR_CHECK_MODE > 0
# define RACTOR_BELONGING_ID(obj) (*(uint32_t *)(((uintptr_t)(obj)) + rb_gc_obj_slot_size(obj)))
uint32_t rb_ractor_current_id(void);
static inline void
rb_ractor_setup_belonging_to(VALUE obj, uint32_t rid)
{
RACTOR_BELONGING_ID(obj) = rid;
}
static inline uint32_t
rb_ractor_belonging(VALUE obj)
{
if (SPECIAL_CONST_P(obj) || RB_OBJ_SHAREABLE_P(obj)) {
return 0;
}
else {
return RACTOR_BELONGING_ID(obj);
}
}
extern bool rb_ractor_ignore_belonging_flag;
static inline VALUE
rb_ractor_confirm_belonging(VALUE obj)
{
if (rb_ractor_ignore_belonging_flag) return obj;
uint32_t id = rb_ractor_belonging(obj);
if (id == 0) {
if (UNLIKELY(!rb_ractor_shareable_p(obj))) {
rp(obj);
rb_bug("id == 0 but not shareable");
}
}
else if (UNLIKELY(id != rb_ractor_current_id())) {
if (rb_ractor_shareable_p(obj)) {
// ok
}
else {
rp(obj);
rb_bug("rb_ractor_confirm_belonging object-ractor id:%u, current-ractor id:%u", id, rb_ractor_current_id());
}
}
return obj;
}
static inline void
rb_ractor_ignore_belonging(bool flag)
{
rb_ractor_ignore_belonging_flag = flag;
}
#else
#define rb_ractor_confirm_belonging(obj) obj
#define rb_ractor_ignore_belonging(flag) (0)
#endif