Handle NEWOBJ tracepoints settings fields

[Bug #21710]

- struct.c: `struct_alloc`

It is possible for a `NEWOBJ` tracepoint call back to write fields
into a newly allocated object before `struct_alloc` had the time
to set the `RSTRUCT_GEN_FIELDS` flags and such.

Hence we can't blindly initialize the `fields_obj` reference to `0`
we first need to check no fields were added yet.

- object.c: `rb_class_allocate_instance`

Similarly, if a `NEWOBJ` tracepoint tries to set fields on the object,
the `shape_id` must already be set, as it's required on T_OBJECT to
know where to write fields.

`NEWOBJ_OF` had to be refactored to accept a `shape_id`.
This commit is contained in:
Jean Boussier 2025-11-25 15:06:33 +01:00
parent 4762f429f4
commit 8c3909935e
Notes: git 2025-12-03 07:15:27 +00:00
8 changed files with 87 additions and 26 deletions

View File

@ -86,6 +86,29 @@ tracepoint_specify_normal_and_internal_events(VALUE self)
return Qnil; /* should not be reached */
}
int rb_objspace_internal_object_p(VALUE obj);
static void
on_newobj_event(VALUE tpval, void *data)
{
VALUE obj = rb_tracearg_object(rb_tracearg_from_tracepoint(tpval));
if (RB_TYPE_P(obj, T_STRING)) {
// Would fail !rb_obj_exivar_p(str) assertion in fstring_concurrent_set_create
return;
}
if (!rb_objspace_internal_object_p(obj)) rb_obj_id(obj);
}
static VALUE
add_object_id(RB_UNUSED_VAR(VALUE _))
{
VALUE tp = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, on_newobj_event, NULL);
rb_tracepoint_enable(tp);
rb_yield(Qnil);
rb_tracepoint_disable(tp);
return Qnil;
}
void Init_gc_hook(VALUE);
void
@ -95,4 +118,5 @@ Init_tracepoint(void)
Init_gc_hook(tp_mBug);
rb_define_module_function(tp_mBug, "tracepoint_track_objspace_events", tracepoint_track_objspace_events, 0);
rb_define_module_function(tp_mBug, "tracepoint_specify_normal_and_internal_events", tracepoint_specify_normal_and_internal_events, 0);
rb_define_singleton_method(tp_mBug, "tracepoint_add_object_id", add_object_id, 0);
}

15
gc.c
View File

@ -991,9 +991,10 @@ gc_validate_pc(VALUE obj)
}
static inline VALUE
newobj_of(rb_ractor_t *cr, VALUE klass, VALUE flags, bool wb_protected, size_t size)
newobj_of(rb_ractor_t *cr, VALUE klass, VALUE flags, shape_id_t shape_id, bool wb_protected, size_t size)
{
VALUE obj = rb_gc_impl_new_obj(rb_gc_get_objspace(), cr->newobj_cache, klass, flags, wb_protected, size);
RBASIC_SET_SHAPE_ID_NO_CHECKS(obj, shape_id);
gc_validate_pc(obj);
@ -1032,17 +1033,17 @@ newobj_of(rb_ractor_t *cr, VALUE klass, VALUE flags, bool wb_protected, size_t s
}
VALUE
rb_wb_unprotected_newobj_of(VALUE klass, VALUE flags, size_t size)
rb_wb_unprotected_newobj_of(VALUE klass, VALUE flags, shape_id_t shape_id, size_t size)
{
GC_ASSERT((flags & FL_WB_PROTECTED) == 0);
return newobj_of(GET_RACTOR(), klass, flags, FALSE, size);
return newobj_of(GET_RACTOR(), klass, flags, shape_id, FALSE, size);
}
VALUE
rb_wb_protected_newobj_of(rb_execution_context_t *ec, VALUE klass, VALUE flags, size_t size)
rb_wb_protected_newobj_of(rb_execution_context_t *ec, VALUE klass, VALUE flags, shape_id_t shape_id, size_t size)
{
GC_ASSERT((flags & FL_WB_PROTECTED) == 0);
return newobj_of(rb_ec_ractor_ptr(ec), klass, flags, TRUE, size);
return newobj_of(rb_ec_ractor_ptr(ec), klass, flags, shape_id, TRUE, size);
}
#define UNEXPECTED_NODE(func) \
@ -1063,7 +1064,7 @@ rb_data_object_wrap(VALUE klass, void *datap, RUBY_DATA_FUNC dmark, RUBY_DATA_FU
{
RUBY_ASSERT_ALWAYS(dfree != (RUBY_DATA_FUNC)1);
if (klass) rb_data_object_check(klass);
VALUE obj = newobj_of(GET_RACTOR(), klass, T_DATA, !dmark, sizeof(struct RTypedData));
VALUE obj = newobj_of(GET_RACTOR(), klass, T_DATA, ROOT_SHAPE_ID, !dmark, sizeof(struct RTypedData));
struct RData *data = (struct RData *)obj;
data->dmark = dmark;
@ -1087,7 +1088,7 @@ typed_data_alloc(VALUE klass, VALUE typed_flag, void *datap, const rb_data_type_
RBIMPL_NONNULL_ARG(type);
if (klass) rb_data_object_check(klass);
bool wb_protected = (type->flags & RUBY_FL_WB_PROTECTED) || !type->function.dmark;
VALUE obj = newobj_of(GET_RACTOR(), klass, T_DATA | RUBY_TYPED_FL_IS_TYPED_DATA, wb_protected, size);
VALUE obj = newobj_of(GET_RACTOR(), klass, T_DATA | RUBY_TYPED_FL_IS_TYPED_DATA, ROOT_SHAPE_ID, wb_protected, size);
struct RTypedData *data = (struct RTypedData *)obj;
data->fields_obj = 0;

View File

@ -122,10 +122,12 @@ const char *rb_raw_obj_info(char *const buff, const size_t buff_size, VALUE obj)
struct rb_execution_context_struct; /* in vm_core.h */
struct rb_objspace; /* in vm_core.h */
#define NEWOBJ_OF(var, T, c, f, s, ec) \
#define NEWOBJ_OF_WITH_SHAPE(var, T, c, f, shape_id, s, ec) \
T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \
rb_wb_protected_newobj_of((ec ? ec : GET_EC()), (c), (f) & ~FL_WB_PROTECTED, s) : \
rb_wb_unprotected_newobj_of((c), (f), s))
rb_wb_protected_newobj_of((ec ? ec : GET_EC()), (c), (f) & ~FL_WB_PROTECTED, shape_id, s) : \
rb_wb_unprotected_newobj_of((c), (f), shape_id, s))
#define NEWOBJ_OF(var, T, c, f, s, ec) NEWOBJ_OF_WITH_SHAPE(var, T, c, f, 0 /* ROOT_SHAPE_ID */, s, ec)
#ifndef RB_GC_OBJECT_METADATA_ENTRY_DEFINED
# define RB_GC_OBJECT_METADATA_ENTRY_DEFINED
@ -248,8 +250,8 @@ VALUE rb_gc_disable_no_rest(void);
/* gc.c (export) */
const char *rb_objspace_data_type_name(VALUE obj);
VALUE rb_wb_protected_newobj_of(struct rb_execution_context_struct *, VALUE, VALUE, size_t);
VALUE rb_wb_unprotected_newobj_of(VALUE, VALUE, size_t);
VALUE rb_wb_protected_newobj_of(struct rb_execution_context_struct *, VALUE, VALUE, uint32_t /* shape_id_t */, size_t);
VALUE rb_wb_unprotected_newobj_of(VALUE, VALUE, uint32_t /* shape_id_t */, size_t);
size_t rb_obj_memsize_of(VALUE);
struct rb_gc_object_metadata_entry *rb_gc_object_metadata(VALUE obj);
void rb_gc_mark_values(long n, const VALUE *values);

View File

@ -124,18 +124,17 @@ rb_class_allocate_instance(VALUE klass)
size = sizeof(struct RObject);
}
NEWOBJ_OF(o, struct RObject, klass,
T_OBJECT | (RGENGC_WB_PROTECTED_OBJECT ? FL_WB_PROTECTED : 0), size, 0);
// There might be a NEWOBJ tracepoint callback, and it may set fields.
// So the shape must be passed to `NEWOBJ_OF`.
VALUE flags = T_OBJECT | (RGENGC_WB_PROTECTED_OBJECT ? FL_WB_PROTECTED : 0);
NEWOBJ_OF_WITH_SHAPE(o, struct RObject, klass, flags, rb_shape_root(rb_gc_heap_id_for_size(size)), size, 0);
VALUE obj = (VALUE)o;
RUBY_ASSERT(RSHAPE_TYPE_P(RBASIC_SHAPE_ID(obj), SHAPE_ROOT));
RBASIC_SET_SHAPE_ID(obj, rb_shape_root(rb_gc_heap_id_for_size(size)));
#if RUBY_DEBUG
RUBY_ASSERT(!rb_shape_obj_too_complex_p(obj));
VALUE *ptr = ROBJECT_FIELDS(obj);
for (size_t i = 0; i < ROBJECT_FIELDS_CAPACITY(obj); i++) {
size_t fields_count = RSHAPE_LEN(RBASIC_SHAPE_ID(obj));
for (size_t i = fields_count; i < ROBJECT_FIELDS_CAPACITY(obj); i++) {
ptr[i] = Qundef;
}
if (rb_obj_class(obj) != rb_class_real(klass)) {

View File

@ -1240,6 +1240,10 @@ rb_shape_foreach_field(shape_id_t initial_shape_id, rb_shape_foreach_transition_
bool
rb_shape_verify_consistency(VALUE obj, shape_id_t shape_id)
{
if (shape_id == ROOT_SHAPE_ID) {
return true;
}
if (shape_id == INVALID_SHAPE_ID) {
rb_bug("Can't set INVALID_SHAPE_ID on an object");
}

14
shape.h
View File

@ -163,10 +163,8 @@ bool rb_shape_verify_consistency(VALUE obj, shape_id_t shape_id);
#endif
static inline void
RBASIC_SET_SHAPE_ID(VALUE obj, shape_id_t shape_id)
RBASIC_SET_SHAPE_ID_NO_CHECKS(VALUE obj, shape_id_t shape_id)
{
RUBY_ASSERT(!RB_SPECIAL_CONST_P(obj));
RUBY_ASSERT(!RB_TYPE_P(obj, T_IMEMO) || IMEMO_TYPE_P(obj, imemo_fields));
#if RBASIC_SHAPE_ID_FIELD
RBASIC(obj)->shape_id = (VALUE)shape_id;
#else
@ -174,6 +172,16 @@ RBASIC_SET_SHAPE_ID(VALUE obj, shape_id_t shape_id)
RBASIC(obj)->flags &= SHAPE_FLAG_MASK;
RBASIC(obj)->flags |= ((VALUE)(shape_id) << SHAPE_FLAG_SHIFT);
#endif
}
static inline void
RBASIC_SET_SHAPE_ID(VALUE obj, shape_id_t shape_id)
{
RUBY_ASSERT(!RB_SPECIAL_CONST_P(obj));
RUBY_ASSERT(!RB_TYPE_P(obj, T_IMEMO) || IMEMO_TYPE_P(obj, imemo_fields));
RBASIC_SET_SHAPE_ID_NO_CHECKS(obj, shape_id);
RUBY_ASSERT(rb_shape_verify_consistency(obj, shape_id));
}

View File

@ -819,12 +819,17 @@ struct_alloc(VALUE klass)
if (n > 0 && rb_gc_size_allocatable_p(embedded_size)) {
flags |= n << RSTRUCT_EMBED_LEN_SHIFT;
if (RCLASS_MAX_IV_COUNT(klass) == 0) {
// We set the flag before calling `NEWOBJ_OF` in case a NEWOBJ tracepoint does
// attempt to write fields. We'll remove it later if no fields was written to.
flags |= RSTRUCT_GEN_FIELDS;
}
NEWOBJ_OF(st, struct RStruct, klass, flags, embedded_size, 0);
if (RCLASS_MAX_IV_COUNT(klass) == 0 && embedded_size == rb_gc_obj_slot_size((VALUE)st)) {
FL_SET_RAW((VALUE)st, RSTRUCT_GEN_FIELDS);
}
else {
if (RCLASS_MAX_IV_COUNT(klass) == 0
&& !rb_shape_obj_has_fields((VALUE)st)
&& embedded_size < rb_gc_obj_slot_size((VALUE)st)) {
FL_UNSET_RAW((VALUE)st, RSTRUCT_GEN_FIELDS);
RSTRUCT_SET_FIELDS_OBJ((VALUE)st, 0);
}
rb_mem_clear((VALUE *)st->as.ary, n);

View File

@ -82,6 +82,24 @@ class TestTracepointObj < Test::Unit::TestCase
end
end
def test_tracepoint_add_object_id
Bug.tracepoint_add_object_id do
klass = Struct.new
2.times { klass.new }
klass = Struct.new(:a)
2.times { klass.new }
klass = Struct.new(:a, :b, :c)
2.times { klass.new }
2.times { Set.new } # To test T_DATA / TypedData RUBY_TYPED_EMBEDDABLE
2.times { Proc.new { } } # To test T_DATA / TypedData non embeddable
2.times { Object.new }
end
end
def test_teardown_with_active_GC_end_hook
assert_separately([], 'require("-test-/tracepoint"); Bug.after_gc_exit_hook = proc {}; GC.start')
end