From 05fc01188a5315a5de034122f6c4864702f80378 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 28 Aug 2025 14:08:56 +0200 Subject: [PATCH] rb_ivar_delete: allow complex transition `rb_ivar_delete` would force a new shape to be created. And in the case of a shape exhaustion, it wouldn't handle T_CLASS/T_MODULE correctly. --- shape.c | 137 ++++++++++++++-------------- test/ruby/test_shapes.rb | 27 ++++++ variable.c | 191 ++++++++++++++++++--------------------- 3 files changed, 182 insertions(+), 173 deletions(-) diff --git a/shape.c b/shape.c index 599b774226..d6b5c30408 100644 --- a/shape.c +++ b/shape.c @@ -656,42 +656,6 @@ get_next_shape_internal(rb_shape_t *shape, ID id, enum shape_type shape_type, bo return res; } -static rb_shape_t * -remove_shape_recursive(rb_shape_t *shape, ID id, rb_shape_t **removed_shape) -{ - if (shape->parent_id == INVALID_SHAPE_ID) { - // We've hit the top of the shape tree and couldn't find the - // IV we wanted to remove, so return NULL - *removed_shape = NULL; - return NULL; - } - else { - if (shape->type == SHAPE_IVAR && shape->edge_name == id) { - *removed_shape = shape; - - return RSHAPE(shape->parent_id); - } - else { - // This isn't the IV we want to remove, keep walking up. - rb_shape_t *new_parent = remove_shape_recursive(RSHAPE(shape->parent_id), id, removed_shape); - - // We found a new parent. Create a child of the new parent that - // has the same attributes as this shape. - if (new_parent) { - bool dont_care; - rb_shape_t *new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care, true); - RUBY_ASSERT(!new_child || new_child->capacity <= shape->capacity); - return new_child; - } - else { - // We went all the way to the top of the shape tree and couldn't - // find an IV to remove so return NULL. - return NULL; - } - } - } -} - static inline shape_id_t transition_complex(shape_id_t shape_id); static shape_id_t @@ -758,34 +722,6 @@ transition_complex(shape_id_t shape_id) return next_shape_id; } -shape_id_t -rb_shape_transition_remove_ivar(VALUE obj, ID id, shape_id_t *removed_shape_id) -{ - shape_id_t original_shape_id = RBASIC_SHAPE_ID(obj); - - RUBY_ASSERT(!rb_shape_too_complex_p(original_shape_id)); - RUBY_ASSERT(!shape_frozen_p(original_shape_id)); - - rb_shape_t *removed_shape = NULL; - rb_shape_t *new_shape = remove_shape_recursive(RSHAPE(original_shape_id), id, &removed_shape); - - if (removed_shape) { - *removed_shape_id = raw_shape_id(removed_shape); - } - - if (new_shape) { - return shape_id(new_shape, original_shape_id); - } - else if (removed_shape) { - // We found the shape to remove, but couldn't create a new variation. - // We must transition to TOO_COMPLEX. - shape_id_t next_shape_id = transition_complex(original_shape_id); - RUBY_ASSERT(rb_shape_has_object_id(next_shape_id) == rb_shape_has_object_id(original_shape_id)); - return next_shape_id; - } - return original_shape_id; -} - shape_id_t rb_shape_transition_frozen(VALUE obj) { @@ -865,7 +801,7 @@ shape_get_iv_index(rb_shape_t *shape, ID id, attr_index_t *value) } static inline rb_shape_t * -shape_get_next(rb_shape_t *shape, VALUE obj, ID id, bool emit_warnings) +shape_get_next(rb_shape_t *shape, enum shape_type shape_type, VALUE obj, ID id, bool emit_warnings) { RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id)))); @@ -895,7 +831,7 @@ shape_get_next(rb_shape_t *shape, VALUE obj, ID id, bool emit_warnings) bool allow_new_shape = RCLASS_VARIATION_COUNT(klass) < SHAPE_MAX_VARIATIONS; bool variation_created = false; - rb_shape_t *new_shape = get_next_shape_internal(shape, id, SHAPE_IVAR, &variation_created, allow_new_shape); + rb_shape_t *new_shape = get_next_shape_internal(shape, id, shape_type, &variation_created, allow_new_shape); if (!new_shape) { // We could create a new variation, transitioning to TOO_COMPLEX. @@ -926,13 +862,78 @@ shape_get_next(rb_shape_t *shape, VALUE obj, ID id, bool emit_warnings) return new_shape; } +static rb_shape_t * +remove_shape_recursive(VALUE obj, rb_shape_t *shape, ID id, rb_shape_t **removed_shape) +{ + if (shape->parent_id == INVALID_SHAPE_ID) { + // We've hit the top of the shape tree and couldn't find the + // IV we wanted to remove, so return NULL + *removed_shape = NULL; + return NULL; + } + else { + if (shape->type == SHAPE_IVAR && shape->edge_name == id) { + *removed_shape = shape; + + return RSHAPE(shape->parent_id); + } + else { + // This isn't the IV we want to remove, keep walking up. + rb_shape_t *new_parent = remove_shape_recursive(obj, RSHAPE(shape->parent_id), id, removed_shape); + + // We found a new parent. Create a child of the new parent that + // has the same attributes as this shape. + if (new_parent) { + rb_shape_t *new_child = shape_get_next(new_parent, shape->type, obj, shape->edge_name, true); + RUBY_ASSERT(!new_child || new_child->capacity <= shape->capacity); + return new_child; + } + else { + // We went all the way to the top of the shape tree and couldn't + // find an IV to remove so return NULL. + return NULL; + } + } + } +} + +shape_id_t +rb_shape_transition_remove_ivar(VALUE obj, ID id, shape_id_t *removed_shape_id) +{ + shape_id_t original_shape_id = RBASIC_SHAPE_ID(obj); + RUBY_ASSERT(!shape_frozen_p(original_shape_id)); + + if (rb_shape_too_complex_p(original_shape_id)) { + return original_shape_id; + } + + rb_shape_t *removed_shape = NULL; + rb_shape_t *new_shape = remove_shape_recursive(obj, RSHAPE(original_shape_id), id, &removed_shape); + + if (removed_shape) { + *removed_shape_id = raw_shape_id(removed_shape); + } + + if (new_shape) { + return shape_id(new_shape, original_shape_id); + } + else if (removed_shape) { + // We found the shape to remove, but couldn't create a new variation. + // We must transition to TOO_COMPLEX. + shape_id_t next_shape_id = transition_complex(original_shape_id); + RUBY_ASSERT(rb_shape_has_object_id(next_shape_id) == rb_shape_has_object_id(original_shape_id)); + return next_shape_id; + } + return original_shape_id; +} + shape_id_t rb_shape_transition_add_ivar(VALUE obj, ID id) { shape_id_t original_shape_id = RBASIC_SHAPE_ID(obj); RUBY_ASSERT(!shape_frozen_p(original_shape_id)); - rb_shape_t *next_shape = shape_get_next(RSHAPE(original_shape_id), obj, id, true); + rb_shape_t *next_shape = shape_get_next(RSHAPE(original_shape_id), SHAPE_IVAR, obj, id, true); if (next_shape) { return shape_id(next_shape, original_shape_id); } @@ -947,7 +948,7 @@ rb_shape_transition_add_ivar_no_warnings(VALUE obj, ID id) shape_id_t original_shape_id = RBASIC_SHAPE_ID(obj); RUBY_ASSERT(!shape_frozen_p(original_shape_id)); - rb_shape_t *next_shape = shape_get_next(RSHAPE(original_shape_id), obj, id, false); + rb_shape_t *next_shape = shape_get_next(RSHAPE(original_shape_id), SHAPE_IVAR, obj, id, false); if (next_shape) { return shape_id(next_shape, original_shape_id); } diff --git a/test/ruby/test_shapes.rb b/test/ruby/test_shapes.rb index 08a841d254..453ca8f6a7 100644 --- a/test/ruby/test_shapes.rb +++ b/test/ruby/test_shapes.rb @@ -2,6 +2,7 @@ require 'test/unit' require 'objspace' require 'json' +require 'securerandom' # These test the functionality of object shapes class TestShapes < Test::Unit::TestCase @@ -1182,4 +1183,30 @@ class TestShapes < Test::Unit::TestCase tc.send("a#{_1}_m") end end + + def assert_too_complex_during_delete(obj) + obj.instance_variable_set("@___#{SecureRandom.hex}", 1) + + (RubyVM::Shape::SHAPE_MAX_VARIATIONS * 2).times do |i| + obj.instance_variable_set("@ivar#{i}", i) + end + + refute_predicate RubyVM::Shape.of(obj), :too_complex? + (RubyVM::Shape::SHAPE_MAX_VARIATIONS * 2).times do |i| + obj.remove_instance_variable("@ivar#{i}") + end + assert_predicate RubyVM::Shape.of(obj), :too_complex? + end + + def test_object_too_complex_during_delete + assert_too_complex_during_delete(Class.new.new) + end + + def test_class_too_complex_during_delete + assert_too_complex_during_delete(Module.new) + end + + def test_generic_too_complex_during_delete + assert_too_complex_during_delete(Class.new(Array).new) + end end if defined?(RubyVM::Shape) diff --git a/variable.c b/variable.c index 8964b8e5ed..690b7a26cf 100644 --- a/variable.c +++ b/variable.c @@ -1305,6 +1305,11 @@ rb_obj_set_fields(VALUE obj, VALUE fields_obj, ID field_name, VALUE original_fie { ivar_ractor_check(obj, field_name); + if (!fields_obj) { + rb_free_generic_ivar(obj); + return; + } + RUBY_ASSERT(IMEMO_TYPE_P(fields_obj, imemo_fields)); RUBY_ASSERT(!original_fields_obj || IMEMO_TYPE_P(original_fields_obj, imemo_fields)); @@ -1465,14 +1470,11 @@ rb_attr_get(VALUE obj, ID id) } void rb_obj_copy_fields_to_hash_table(VALUE obj, st_table *table); +static VALUE imemo_fields_complex_from_obj(VALUE owner, VALUE source_fields_obj, shape_id_t shape_id); static shape_id_t obj_transition_too_complex(VALUE obj, st_table *table) { - if (BUILTIN_TYPE(obj) == T_CLASS || BUILTIN_TYPE(obj) == T_MODULE) { - return obj_transition_too_complex(RCLASS_WRITABLE_ENSURE_FIELDS_OBJ(obj), table); - } - RUBY_ASSERT(!rb_shape_obj_too_complex_p(obj)); shape_id_t shape_id = rb_shape_transition_complex(obj); @@ -1495,11 +1497,12 @@ obj_transition_too_complex(VALUE obj, st_table *table) break; case T_CLASS: case T_MODULE: - rb_bug("Unreachable"); + case T_IMEMO: + UNREACHABLE; break; default: { - VALUE fields_obj = rb_imemo_fields_new_complex_tbl(rb_obj_class(obj), table); + VALUE fields_obj = rb_imemo_fields_new_complex_tbl(obj, table); RBASIC_SET_SHAPE_ID(fields_obj, shape_id); rb_obj_replace_fields(obj, fields_obj); } @@ -1542,125 +1545,103 @@ rb_ivar_delete(VALUE obj, ID id, VALUE undef) rb_check_frozen(obj); VALUE val = undef; - if (BUILTIN_TYPE(obj) == T_CLASS || BUILTIN_TYPE(obj) == T_MODULE) { - IVAR_ACCESSOR_SHOULD_BE_MAIN_RACTOR(id); + VALUE fields_obj; + bool concurrent = false; + int type = BUILTIN_TYPE(obj); - VALUE fields_obj = RCLASS_WRITABLE_FIELDS_OBJ(obj); - if (fields_obj) { - if (rb_multi_ractor_p()) { - fields_obj = rb_imemo_fields_clone(fields_obj); - val = rb_ivar_delete(fields_obj, id, undef); - RCLASS_WRITABLE_SET_FIELDS_OBJ(obj, fields_obj); - } - else { - val = rb_ivar_delete(fields_obj, id, undef); - } - // TODO: What should we set as the T_CLASS shape_id? - // In most case we can replicate the single `fields_obj` shape - // but in namespaced case? Perhaps INVALID_SHAPE_ID? - RBASIC_SET_SHAPE_ID(obj, RBASIC_SHAPE_ID(fields_obj)); - } - return val; - } - - shape_id_t old_shape_id = rb_obj_shape_id(obj); - if (rb_shape_too_complex_p(old_shape_id)) { - goto too_complex; - } - - shape_id_t removed_shape_id = 0; - shape_id_t next_shape_id = rb_shape_transition_remove_ivar(obj, id, &removed_shape_id); - - if (next_shape_id == old_shape_id) { - return undef; - } - - if (UNLIKELY(rb_shape_too_complex_p(next_shape_id))) { - rb_evict_fields_to_hash(obj); - goto too_complex; - } - - RUBY_ASSERT(RSHAPE_LEN(next_shape_id) == RSHAPE_LEN(old_shape_id) - 1); - - VALUE *fields; - switch(BUILTIN_TYPE(obj)) { + switch(type) { case T_CLASS: case T_MODULE: - rb_bug("Unreachable"); - break; - case T_IMEMO: - RUBY_ASSERT(IMEMO_TYPE_P(obj, imemo_fields)); - fields = rb_imemo_fields_ptr(obj); + IVAR_ACCESSOR_SHOULD_BE_MAIN_RACTOR(id); + + fields_obj = RCLASS_WRITABLE_FIELDS_OBJ(obj); + if (rb_multi_ractor_p()) { + concurrent = true; + } break; case T_OBJECT: - fields = ROBJECT_FIELDS(obj); + fields_obj = obj; break; default: { - VALUE fields_obj = rb_obj_fields(obj, id); - fields = rb_imemo_fields_ptr(fields_obj); + fields_obj = rb_obj_fields(obj, id); break; } } - RUBY_ASSERT(removed_shape_id != INVALID_SHAPE_ID); + if (!fields_obj) { + return undef; + } - attr_index_t removed_index = RSHAPE_INDEX(removed_shape_id); - val = fields[removed_index]; + const VALUE original_fields_obj = fields_obj; + if (concurrent) { + fields_obj = rb_imemo_fields_clone(fields_obj); + } - attr_index_t new_fields_count = RSHAPE_LEN(next_shape_id); - if (new_fields_count) { - size_t trailing_fields = new_fields_count - removed_index; + shape_id_t old_shape_id = RBASIC_SHAPE_ID(fields_obj); + shape_id_t removed_shape_id; + shape_id_t next_shape_id = rb_shape_transition_remove_ivar(fields_obj, id, &removed_shape_id); - MEMMOVE(&fields[removed_index], &fields[removed_index + 1], VALUE, trailing_fields); + if (UNLIKELY(rb_shape_too_complex_p(next_shape_id))) { + if (UNLIKELY(!rb_shape_too_complex_p(old_shape_id))) { + if (type == T_OBJECT) { + rb_evict_fields_to_hash(obj); + } + else { + fields_obj = imemo_fields_complex_from_obj(obj, fields_obj, next_shape_id); + } + } + st_data_t key = id; + if (!st_delete(rb_imemo_fields_complex_tbl(fields_obj), &key, (st_data_t *)&val)) { + val = undef; + } } else { - rb_free_generic_ivar(obj); - } - - if (RB_TYPE_P(obj, T_OBJECT) && - FL_TEST_RAW(obj, ROBJECT_HEAP) && - rb_obj_embedded_size(new_fields_count) <= rb_gc_obj_slot_size(obj)) { - // Re-embed objects when instances become small enough - // This is necessary because YJIT assumes that objects with the same shape - // have the same embeddedness for efficiency (avoid extra checks) - FL_UNSET_RAW(obj, ROBJECT_HEAP); - MEMCPY(ROBJECT_FIELDS(obj), fields, VALUE, new_fields_count); - xfree(fields); - } - RBASIC_SET_SHAPE_ID(obj, next_shape_id); - - return val; - -too_complex: - { - st_table *table = NULL; - switch (BUILTIN_TYPE(obj)) { - case T_CLASS: - case T_MODULE: - rb_bug("Unreachable"); - break; - - case T_IMEMO: - RUBY_ASSERT(IMEMO_TYPE_P(obj, imemo_fields)); - table = rb_imemo_fields_complex_tbl(obj); - break; - - case T_OBJECT: - table = ROBJECT_FIELDS_HASH(obj); - break; - - default: { - VALUE fields_obj = rb_obj_fields(obj, id); - table = rb_imemo_fields_complex_tbl(fields_obj); - break; - } + if (next_shape_id == old_shape_id) { + return undef; } - if (table) { - if (!st_delete(table, (st_data_t *)&id, (st_data_t *)&val)) { - val = undef; + RUBY_ASSERT(removed_shape_id != INVALID_SHAPE_ID); + RUBY_ASSERT(RSHAPE_LEN(next_shape_id) == RSHAPE_LEN(old_shape_id) - 1); + + VALUE *fields = rb_imemo_fields_ptr(fields_obj); + attr_index_t removed_index = RSHAPE_INDEX(removed_shape_id); + val = fields[removed_index]; + + attr_index_t new_fields_count = RSHAPE_LEN(next_shape_id); + if (new_fields_count) { + size_t trailing_fields = new_fields_count - removed_index; + + MEMMOVE(&fields[removed_index], &fields[removed_index + 1], VALUE, trailing_fields); + RBASIC_SET_SHAPE_ID(fields_obj, next_shape_id); + + if (type == T_OBJECT && FL_TEST_RAW(obj, ROBJECT_HEAP) && rb_obj_embedded_size(new_fields_count) <= rb_gc_obj_slot_size(obj)) { + // Re-embed objects when instances become small enough + // This is necessary because YJIT assumes that objects with the same shape + // have the same embeddedness for efficiency (avoid extra checks) + FL_UNSET_RAW(obj, ROBJECT_HEAP); + MEMCPY(ROBJECT_FIELDS(obj), fields, VALUE, new_fields_count); + xfree(fields); } } + else { + fields_obj = 0; + rb_free_generic_ivar(obj); + } + } + + RBASIC_SET_SHAPE_ID(obj, next_shape_id); + if (fields_obj != original_fields_obj) { + switch (type) { + case T_OBJECT: + break; + case T_CLASS: + case T_MODULE: + RCLASS_WRITABLE_SET_FIELDS_OBJ(obj, fields_obj); + break; + default: + rb_obj_set_fields(obj, fields_obj, id, original_fields_obj); + break; + } } return val;