Pin ID symbols

Symbols with a corresponding ID should be pinned because they can be used
by things that don't support compaction.
This commit is contained in:
Peter Zhu 2026-01-14 17:09:26 -05:00
parent 6e480e6714
commit f7ae32ed3b
Notes: git 2026-01-15 22:58:01 +00:00
3 changed files with 112 additions and 67 deletions

View File

@ -4,6 +4,7 @@
#include <stdint.h> #include <stdint.h>
#include <stddef.h> #include <stddef.h>
#include <stdlib.h> #include <stdlib.h>
#include "ruby/ruby.h"
// Type for a dynamic array. Use to declare a dynamic array. // Type for a dynamic array. Use to declare a dynamic array.
// It is a pointer so it fits in st_table nicely. Designed // It is a pointer so it fits in st_table nicely. Designed
@ -147,6 +148,9 @@ rb_darray_size(const void *ary)
return meta ? meta->size : 0; return meta ? meta->size : 0;
} }
/* Estimate of the amount of memory used by this darray.
* Useful for TypedData objects. */
#define rb_darray_memsize(ary) (sizeof(*(ary)) + (rb_darray_size(ary) * sizeof((ary)->data[0])))
static inline void static inline void
rb_darray_pop(void *ary, size_t count) rb_darray_pop(void *ary, size_t count)

1
depend
View File

@ -17138,6 +17138,7 @@ symbol.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
symbol.$(OBJEXT): {$(VPATH)}builtin.h symbol.$(OBJEXT): {$(VPATH)}builtin.h
symbol.$(OBJEXT): {$(VPATH)}config.h symbol.$(OBJEXT): {$(VPATH)}config.h
symbol.$(OBJEXT): {$(VPATH)}constant.h symbol.$(OBJEXT): {$(VPATH)}constant.h
symbol.$(OBJEXT): {$(VPATH)}darray.h
symbol.$(OBJEXT): {$(VPATH)}debug_counter.h symbol.$(OBJEXT): {$(VPATH)}debug_counter.h
symbol.$(OBJEXT): {$(VPATH)}defines.h symbol.$(OBJEXT): {$(VPATH)}defines.h
symbol.$(OBJEXT): {$(VPATH)}encoding.h symbol.$(OBJEXT): {$(VPATH)}encoding.h

174
symbol.c
View File

@ -9,6 +9,7 @@
**********************************************************************/ **********************************************************************/
#include "darray.h"
#include "internal.h" #include "internal.h"
#include "internal/concurrent_set.h" #include "internal/concurrent_set.h"
#include "internal/error.h" #include "internal/error.h"
@ -87,12 +88,6 @@ Init_op_tbl(void)
static const int ID_ENTRY_UNIT = 512; static const int ID_ENTRY_UNIT = 512;
enum id_entry_type {
ID_ENTRY_STR,
ID_ENTRY_SYM,
ID_ENTRY_SIZE
};
typedef struct { typedef struct {
rb_atomic_t next_id; rb_atomic_t next_id;
VALUE sym_set; VALUE sym_set;
@ -169,6 +164,62 @@ sym_set_cmp(VALUE a, VALUE b)
return rb_str_hash_cmp(sym_set_sym_get_str(a), sym_set_sym_get_str(b)) == false; return rb_str_hash_cmp(sym_set_sym_get_str(a), sym_set_sym_get_str(b)) == false;
} }
struct sym_id_entry {
VALUE sym;
VALUE str;
};
static void
sym_id_entry_list_mark(void *ptr)
{
rb_darray(struct sym_id_entry) ary = ptr;
struct sym_id_entry *entry;
rb_darray_foreach(ary, i, entry) {
// sym must be pinned because it may be used in places that don't
// support compaction
rb_gc_mark(entry->sym);
rb_gc_mark_movable(entry->str);
}
}
static void
sym_id_entry_list_free(void *ptr)
{
rb_darray(struct sym_id_entry) ary = ptr;
rb_darray_free(ary);
}
static size_t
sym_id_entry_list_memsize(const void *ptr)
{
const rb_darray(struct sym_id_entry) ary = ptr;
return rb_darray_memsize(ary);
}
static void
sym_id_entry_list_compact(void *ptr)
{
rb_darray(struct sym_id_entry) ary = ptr;
struct sym_id_entry *entry;
rb_darray_foreach(ary, i, entry) {
entry->str = rb_gc_location(entry->str);
}
}
static const rb_data_type_t sym_id_entry_list_type = {
"symbol_id_entry_list",
{
sym_id_entry_list_mark,
sym_id_entry_list_free,
sym_id_entry_list_memsize,
sym_id_entry_list_compact,
},
0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
};
static int static int
sym_check_asciionly(VALUE str, bool fake_str) sym_check_asciionly(VALUE str, bool fake_str)
@ -231,14 +282,24 @@ set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym)
size_t idx = num / ID_ENTRY_UNIT; size_t idx = num / ID_ENTRY_UNIT;
VALUE ary, ids = symbols->ids; VALUE id_entry_list, ids = symbols->ids;
if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) { rb_darray(struct sym_id_entry) entries;
ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE); if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(id_entry_list = rb_ary_entry(ids, (long)idx))) {
rb_ary_store(ids, (long)idx, ary); rb_darray_make(&entries, ID_ENTRY_UNIT);
id_entry_list = TypedData_Wrap_Struct(0, &sym_id_entry_list_type, entries);
rb_ary_store(ids, (long)idx, id_entry_list);
} }
idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE; else {
rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str); entries = RTYPEDDATA_GET_DATA(id_entry_list);
rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym); }
idx = num % ID_ENTRY_UNIT;
struct sym_id_entry *entry = rb_darray_ref(entries, idx);
RUBY_ASSERT(entry->str == 0);
RUBY_ASSERT(entry->sym == 0);
RB_OBJ_WRITE(id_entry_list, &entry->str, str);
RB_OBJ_WRITE(id_entry_list, &entry->sym, sym);
} }
static VALUE static VALUE
@ -394,7 +455,7 @@ rb_free_global_symbol_table(void)
} }
WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str)); WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id)); WARN_UNUSED_RESULT(static VALUE get_id_str(ID id));
ID ID
rb_id_attrset(ID id) rb_id_attrset(ID id)
@ -419,7 +480,7 @@ rb_id_attrset(ID id)
return id; return id;
default: default:
{ {
VALUE str = lookup_id_str(id); VALUE str = get_id_str(id);
if (str != 0) { if (str != 0) {
rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset", rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
scope, str); scope, str);
@ -434,7 +495,7 @@ rb_id_attrset(ID id)
bool error = false; bool error = false;
/* make new symbol and ID */ /* make new symbol and ID */
VALUE str = lookup_id_str(id); VALUE str = get_id_str(id);
if (str) { if (str) {
str = rb_str_dup(str); str = rb_str_dup(str);
rb_str_cat(str, "=", 1); rb_str_cat(str, "=", 1);
@ -705,75 +766,60 @@ rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1; return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
} }
static VALUE static struct sym_id_entry *
get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t) get_id_serial_entry(rb_id_serial_t num)
{ {
VALUE result = 0; struct sym_id_entry *entry = NULL;
GLOBAL_SYMBOLS_LOCKING(symbols) { GLOBAL_SYMBOLS_LOCKING(symbols) {
if (num && num < RUBY_ATOMIC_LOAD(symbols->next_id)) { if (num && num < RUBY_ATOMIC_LOAD(symbols->next_id)) {
size_t idx = num / ID_ENTRY_UNIT; size_t idx = num / ID_ENTRY_UNIT;
VALUE ids = symbols->ids; VALUE ids = symbols->ids;
VALUE ary; VALUE id_entry_list;
if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) { if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(id_entry_list = rb_ary_entry(ids, (long)idx))) {
long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE; rb_darray(struct sym_id_entry) entries = RTYPEDDATA_GET_DATA(id_entry_list);
result = rb_ary_entry(ary, pos + t);
if (NIL_P(result)) { size_t pos = (size_t)(num % ID_ENTRY_UNIT);
result = 0; RUBY_ASSERT(pos < rb_darray_size(entries));
} entry = rb_darray_ref(entries, pos);
else if (CHECK_ID_SERIAL) {
if (id) {
VALUE sym = result;
if (t != ID_ENTRY_SYM)
sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
if (STATIC_SYM_P(sym)) {
if (STATIC_SYM2ID(sym) != id) result = 0;
}
else {
if (RSYMBOL(sym)->id != id) result = 0;
}
}
}
} }
} }
} }
if (result) { return entry;
switch (t) {
case ID_ENTRY_STR:
RUBY_ASSERT_BUILTIN_TYPE(result, T_STRING);
break;
case ID_ENTRY_SYM:
RUBY_ASSERT_BUILTIN_TYPE(result, T_SYMBOL);
break;
default:
break;
}
}
return result;
} }
static VALUE static VALUE
get_id_entry(ID id, const enum id_entry_type t) get_id_sym(ID id)
{ {
return get_id_serial_entry(rb_id_to_serial(id), id, t); struct sym_id_entry *entry = get_id_serial_entry(rb_id_to_serial(id));
return entry ? entry->sym : 0;
}
static VALUE
get_id_str(ID id)
{
struct sym_id_entry *entry = get_id_serial_entry(rb_id_to_serial(id));
return entry ? entry->str : 0;
} }
int int
rb_static_id_valid_p(ID id) rb_static_id_valid_p(ID id)
{ {
return STATIC_ID2SYM(id) == get_id_entry(id, ID_ENTRY_SYM); return STATIC_ID2SYM(id) == get_id_sym(id);
} }
static inline ID static inline ID
rb_id_serial_to_id(rb_id_serial_t num) rb_id_serial_to_id(rb_id_serial_t num)
{ {
if (is_notop_id((ID)num)) { if (is_notop_id((ID)num)) {
VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM); struct sym_id_entry *entry = get_id_serial_entry(num);
if (sym) return SYM2ID(sym); if (entry && entry->sym != 0) {
return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM; return SYM2ID(entry->sym);
}
else {
return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM;
}
} }
else { else {
return (ID)num; return (ID)num;
@ -836,12 +882,6 @@ lookup_str_id(VALUE str)
return (ID)0; return (ID)0;
} }
static VALUE
lookup_id_str(ID id)
{
return get_id_entry(id, ID_ENTRY_STR);
}
ID ID
rb_intern3(const char *name, long len, rb_encoding *enc) rb_intern3(const char *name, long len, rb_encoding *enc)
{ {
@ -974,7 +1014,7 @@ VALUE
rb_id2sym(ID x) rb_id2sym(ID x)
{ {
if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x); if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
return get_id_entry(x, ID_ENTRY_SYM); return get_id_sym(x);
} }
/* /*
@ -1008,7 +1048,7 @@ rb_sym2str(VALUE sym)
VALUE VALUE
rb_id2str(ID id) rb_id2str(ID id)
{ {
return lookup_id_str(id); return get_id_str(id);
} }
const char * const char *