8000 gc.c: Fix a race condition in object_id for shareable objects · ruby/ruby@d202c39 · GitHub
[go: up one dir, main page]

Skip to content

Commit d202c39

Browse files
committed
gc.c: Fix a race condition in object_id for shareable objects
If an object is shareable and has no capacity left, it isn't safe to store the object ID in fields as it requires an object resize which can't be done unless all field reads are synchronized. So in this case we have to store the ID externally like we used to.
1 parent b5575a8 commit d202c39

File tree

7 files changed

+255
-28
lines changed

7 files changed

+255
-28
lines changed

ext/objspace/objspace_dump.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,8 @@ shape_i(rb_shape_t *shape, void *data)
827827
break;
828828
case SHAPE_OBJ_ID:
829829
dump_append(dc, ", \"shape_type\":\"OBJ_ID\"");
830+
case SHAPE_EXTERNAL_OBJ_ID:
831+
dump_append(dc, ", \"shape_type\":\"EXTERNAL_OBJ_ID\"");
830832
break;
831833
}
832834

gc.c

Lines changed: 141 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,61 @@ static const rb_data_type_t id2ref_tbl_type = {
18791879
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
18801880
};
18811881

1882+
static VALUE obj_to_id_value = 0;
1883+
static st_table *obj_to_id_tbl = NULL;
1884+
1885+
static void mark_hash_values(st_table *tbl);
1886+
1887+
static void
1888+
obj_to_id_tbl_mark(void *data)
1889+
{
1890+
st_table *table = (st_table *)data;
1891+
if (UNLIKELY(!RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1892+
// It's very unlikely, but if enough object ids were generated, keys may be T_BIGNUM
1893+
mark_hash_values(table);
1894+
}
1895+
// We purposedly don't mark keys, as they are weak references.
1896+
// rb_gc_obj_free_vm_weak_references takes care of cleaning them up.
1897+
}
1898+
1899+
static size_t
1900+
obj_to_id_tbl_memsize(const void *data)
1901+
{
1902+
return rb_st_memsize(data);
1903+
}
1904+
1905+
static void
1906+
obj_to_id_tbl_compact(void *data)
1907+
{
1908+
st_table *table = (st_table *)data;
1909+
if (LIKELY(RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1910+
// We know values are all FIXNUM, so no need to update them.
1911+
gc_ref_update_table_keys_only(table);
1912+
}
1913+
else {
1914+
gc_update_table_refs(table);
1915+
}
1916+
}
1917+
1918+
static void
1919+
obj_to_id_tbl_free(void *data)
1920+
{
1921+
obj_to_id_tbl = NULL; // clear global ref
1922+
st_table *table = (st_table *)data;
1923+
st_free_table(table);
1924+
}
1925+
1926+
static const rb_data_type_t obj_to_id_tbl_type = {
1927+
.wrap_struct_name = "VM/obj_to_id_table",
1928+
.function = {
1929+
.dmark = obj_to_id_tbl_mark,
1930+
.dfree = obj_to_id_tbl_free,
1931+
.dsize = obj_to_id_tbl_memsize,
1932+
.dcompact = obj_to_id_tbl_compact,
1933+
},
1934+
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
1935+
};
1936+
18821937
#define RUBY_ATOMIC_VALUE_LOAD(x) (VALUE)(RUBY_ATOMIC_PTR_LOAD(x))
18831938

18841939
static VALUE
@@ -1901,23 +1956,46 @@ class_object_id(VALUE klass)
19011956
}
19021957

19031958
static VALUE
1904-
object_id0(VALUE obj)
1959+
object_id0(VALUE obj, bool shareable)
19051960
{
19061961
VALUE id = Qfalse;
19071962

1908-
if (rb_shape_has_object_id(rb_obj_shape(obj))) {
1909-
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1910-
id = rb_obj_field_get(obj, object_id_shape_id);
1963+
rb_shape_t *current_shape = rb_obj_shape(obj);
1964+
1965+
if (rb_shape_has_object_id(current_shape)) {
1966+
shape_id_t object_id_shape_id = rb_shape_object_id(obj);
1967+
1968+
if (RB_UNLIKELY(shareable && RSHAPE(object_id_shape_id)->type == SHAPE_EXTERNAL_OBJ_ID)) {
1969+
RUBY_ASSERT(obj_to_id_tbl);
1970+
st_lookup(obj_to_id_tbl, obj, &id);
1971+
}
1972+
else {
1973+
id = rb_obj_field_get(obj, object_id_shape_id);
1974+
}
19111975
RUBY_ASSERT(id, "object_id missing");
19121976
return id;
19131977
}
19141978

1915-
// rb_shape_object_id_shape may lock if the current shape has
1916-
// multiple children.
1917-
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1918-
19191979
id = generate_next_object_id();
1920-
rb_obj_field_set(obj, object_id_shape_id, id);
1980+
// If the object is shareable and doesn't have free capacity we can't safely
1981+
// resize it. So we have to store the object_id externally.
1982+
if (shareable && current_shape->capacity && current_shape->next_field_index == current_shape->capacity) {
1983+
shape_id_t object_id_shape_id = rb_shape_transition_external_object_id(obj);
1984+
1985+
if (RB_UNLIKELY(!obj_to_id_tbl)) {
1986+
obj_to_id_tbl = st_init_numtable();
1987+
obj_to_id_value = TypedData_Wrap_Struct(0, &obj_to_id_tbl_type, obj_to_id_tbl);
1988+
}
1989+
st_insert(obj_to_id_tbl, obj, id);
1990+
rb_shape_set_shape_id(obj, object_id_shape_id);
1991+
}
1992+
else {
1993+
// rb_shape_object_id_shape may lock if the current shape has
1994+
// multiple children.
1995+
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1996+
rb_obj_field_set(obj, object_id_shape_id, id);
1997+
}
1998+
19211999
if (RB_UNLIKELY(id2ref_tbl)) {
19222000
st_insert(id2ref_tbl, (st_data_t)id, (st_data_t)obj);
19232001
}
@@ -1940,12 +2018,12 @@ object_id(VALUE obj)
19402018

19412019
if (UNLIKELY(rb_gc_multi_ractor_p() && rb_ractor_shareable_p(obj))) {
19422020
unsigned int lock_lev = rb_gc_vm_lock();
1943-
VALUE id = object_id0(obj);
2021+
VALUE id = object_id0(obj, true);
19442022
rb_gc_vm_unlock(lock_lev);
19452023
return id;
19462024
}
19472025

1948-
return object_id0(obj);
2026+
return object_id0(obj, false);
19492027
}
19502028

19512029
static void
@@ -2008,23 +2086,35 @@ static inline void
20082086
obj_free_object_id(VALUE obj)
20092087
{
20102088
VALUE obj_id = 0;
2011-
if (RB_UNLIKELY(id2ref_tbl)) {
2012-
switch (BUILTIN_TYPE(obj)) {
2013-
case T_CLASS:
2014-
case T_MODULE:
2015-
if (RCLASS(obj)->object_id) {
2016-
obj_id = RCLASS(obj)->object_id;
2089+
switch (BUILTIN_TYPE(obj)) {
2090+
case T_CLASS:
2091+
case T_MODULE:
2092+
if (RB_LIKELY(!id2ref_tbl)) return;
2093+
2094+
if (RCLASS(obj)->object_id) {
2095+
obj_id = RCLASS(obj)->object_id;
2096+
}
2097+
break;
2098+
default:
2099+
if (rb_shape_obj_has_id(obj)) {
2100+
shape_id_t shape_id = rb_shape_object_id(obj);
2101+
if (RSHAPE(shape_id)->type == SHAPE_EXTERNAL_OBJ_ID) {
2102+
RUBY_ASSERT(obj_to_id_tbl);
2103+
2104+
VALUE key = obj;
2105+
st_delete(obj_to_id_tbl, &key, &obj_id);
20172106
}
2018-
break;
2019-
default:
2020-
if (rb_shape_obj_has_id(obj)) {
2021-
obj_id = object_id(obj);
2107+
else {
2108+
if (RB_LIKELY(!id2ref_tbl)) return;
2109+
2110+
obj_id = rb_obj_field_get(obj, shape_id);
20222111
}
2023-
break;
2112+
RUBY_ASSERT(obj_id);
20242113
}
2114+
break;
20252115
}
20262116

2027-
if (RB_UNLIKELY(obj_id)) {
2117+
if (RB_UNLIKELY(id2ref_tbl && obj_id)) {
20282118
RUBY_ASSERT(FIXNUM_P(obj_id) || RB_TYPE_P(obj, T_BIGNUM));
20292119

20302120
if (!st_delete(id2ref_tbl, (st_data_t *)&obj_id, NULL)) {
@@ -2717,6 +2807,14 @@ mark_key(st_data_t key, st_data_t value, st_data_t data)
27172807
return ST_CONTINUE;
27182808
}
27192809

2810+
static int
2811+
mark_value(st_data_t key, st_data_t value, st_data_t data)
2812+
{
2813+
gc_mark_internal((VALUE)value);
2814+
2815+
return ST_CONTINUE;
2816+
}
2817+
27202818
void
27212819
rb_mark_set(st_table *tbl)
27222820
{
@@ -2725,6 +2823,14 @@ rb_mark_set(st_table *tbl)
27252823
st_foreach(tbl, mark_key, (st_data_t)rb_gc_get_objspace());
27262824
}
27272825

2826+
static void
2827+
mark_hash_values(st_table *tbl)
2828+
{
2829+
if (!tbl) return;
2830+
2831+
st_foreach(tbl, mark_value, 0);
2832+
}
2833+
27282834
static int
27292835
mark_keyvalue(st_data_t key, st_data_t value, st_data_t data)
27302836
{
@@ -4132,6 +4238,17 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback,
41324238
}
41334239
break;
41344240
}
4241+
case RB_GC_VM_OBJ_TO_ID_TABLE: {
4242+
if (obj_to_id_tbl) {
4243+
st_foreach_with_replace(
4244+
obj_to_id_tbl,
4245+
vm_weak_table_foreach_weak_key,
4246+
vm_weak_table_foreach_update_weak_key,
4247+
(st_data_t)&foreach_data
4248+
);
4249+
}
4250+
break;
4251+
}
41354252
case RB_GC_VM_GENERIC_FIELDS_TABLE: {
41364253
st_table *generic_fields_tbl = rb_generic_fields_tbl_get();
41374254
if (generic_fields_tbl) {
@@ -5512,6 +5629,7 @@ Init_GC(void)
55125629
{
55135630
#undef rb_intern
55145631
rb_gc_register_address(&id2ref_value);
5632+
rb_gc_register_address(&obj_to_id_value);
55155633

55165634
malloc_offset = gc_compute_malloc_offset();
55175635

gc/gc.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ enum rb_gc_vm_weak_tables {
2929
RB_GC_VM_OVERLOADED_CME_TABLE,
3030
RB_GC_VM_GLOBAL_SYMBOLS_TABLE,
3131
RB_GC_VM_ID2REF_TABLE,
32+
RB_GC_VM_OBJ_TO_ID_TABLE,
3233
RB_GC_VM_GENERIC_FIELDS_TABLE,
3334
RB_GC_VM_FROZEN_STRINGS_TABLE,
3435
RB_GC_VM_WEAK_TABLE_COUNT
@@ -135,6 +136,34 @@ gc_ref_update_table_values_only(st_table *tbl)
135136
}
136137
}
137138

139+
static int
140+
hash_foreach_replace_key(st_data_t key, st_data_t value, st_data_t argp, int error)
141+
{
142+
if (rb_gc_location((VALUE)key) != (VALUE)key) {
143+
return ST_REPLACE;
144+
}
145+
return ST_CONTINUE;
146+
}
147+
148+
static int
149+
hash_replace_ref_key(st_data_t *key, st_data_t *value, st_data_t argp, int existing)
150+
{
151+
*key = rb_gc_location((VALUE)*key);
152+
153+
return ST_CONTINUE;
154+
}
155+
156+
static void
157+
gc_ref_update_table_keys_only(st_table *tbl)
158+
{
159+
if (!tbl || tbl->num_entries == 0) return;
160+
161+
// FIXME: this certainly isn't correct. If a key moved, we need to re-hash.
162+
if (st_foreach_with_replace(tbl, hash_foreach_replace_key, hash_replace_ref_key, 0)) {
163+
rb_raise(rb_eRuntimeError, "hash modified during iteration");
164+
}
165+
}
166+
138167
static int
139168
gc_mark_tbl_no_pin_i(st_data_t key, st_data_t value, st_data_t data)
140169
{

shape.c

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
static ID id_frozen;
4949
static ID id_t_object;
50+
static ID id_external_object_id;
5051
ID ruby_internal_object_id; // extern
5152

5253
#define LEAF 0
@@ -468,6 +469,10 @@ rb_shape_alloc_new_child(ID id, rb_shape_t *shape, enum shape_type shape_type)
468469
rb_shape_t *new_shape = rb_shape_alloc(id, shape, shape_type);
469470

470471
switch (shape_type) {
472+
case SHAPE_EXTERNAL_OBJ_ID:
473+
new_shape->flags |= SHAPE_FL_HAS_OBJECT_ID;
474+
new_shape->next_field_index = shape->next_field_index;
475+
break;
471476
case SHAPE_OBJ_ID:
472477
new_shape->flags |= SHAPE_FL_HAS_OBJECT_ID;
473478
// fallthrough
@@ -745,22 +750,57 @@ rb_shape_has_object_id(rb_shape_t *shape)
745750
return shape->flags & SHAPE_FL_HAS_OBJECT_ID;
746751
}
747752

753+
shape_id_t
754+
rb_shape_object_id(VALUE obj)
755+
{
756+
rb_shape_t *shape = rb_obj_shape(obj);
757+
RUBY_ASSERT(shape);
758+
RUBY_ASSERT(shape->flags & SHAPE_FL_HAS_OBJECT_ID);
759+
760+
while (shape->type != SHAPE_OBJ_ID && shape->type != SHAPE_EXTERNAL_OBJ_ID) {
761+
shape = RSHAPE(shape->parent_id);
762+
}
763+
return rb_shape_id(shape);
764+
}
765+
748766
shape_id_t
749767
rb_shape_transition_object_id(VALUE obj)
750768
{
751-
rb_shape_t* shape = rb_obj_shape(obj);
769+
rb_shape_t *shape = rb_obj_shape(obj);
752770
RUBY_ASSERT(shape);
753771

754772
if (shape->flags & SHAPE_FL_HAS_OBJECT_ID) {
755773
while (shape->type != SHAPE_OBJ_ID) {
774+
RUBY_ASSERT(shape->type != SHAPE_EXTERNAL_OBJ_ID);
756775
shape = RSHAPE(shape->parent_id);
757776
}
758777
}
759778
else {
760779
bool dont_care;
761780
shape = get_next_shape_internal(shape, ruby_internal_object_id, SHAPE_OBJ_ID, &dont_care, true);
762781
}
782+
783+
RUBY_ASSERT(shape && shape->type == SHAPE_OBJ_ID);
784+
return rb_shape_id(shape);
785+
}
786+
787+
shape_id_t
788+
rb_shape_transition_external_object_id(VALUE obj)
789+
{
790+
rb_shape_t* shape = rb_obj_shape(obj);
763791
RUBY_ASSERT(shape);
792+
793+
if (shape->flags & SHAPE_FL_HAS_OBJECT_ID) {
794+
while (shape->type != SHAPE_EXTERNAL_OBJ_ID) {
795+
RUBY_ASSERT(shape->type != SHAPE_OBJ_ID);
796+
shape = RSHAPE(shape->parent_id);
797+
}
798+
}
799+
else {
800+
bool dont_care;
801+
shape = get_next_shape_internal(shape, id_external_object_id, SHAPE_EXTERNAL_OBJ_ID, &dont_care, true);
802+
}
803+
RUBY_ASSERT(shape && shape->type == SHAPE_EXTERNAL_OBJ_ID);
764804
return rb_shape_id(shape);
765805
}
766806

@@ -924,6 +964,7 @@ shape_get_iv_index(rb_shape_t *shape, ID id, attr_index_t *value)
924964
return false;
925965
case SHAPE_OBJ_TOO_COMPLEX:
926966
case SHAPE_OBJ_ID:
967+
case SHAPE_EXTERNAL_OBJ_ID:
927968
case SHAPE_FROZEN:
928969
rb_bug("Ivar should not exist on transition");
929970
}
@@ -1009,6 +1050,7 @@ shape_traverse_from_new_root(rb_shape_t *initial_shape, rb_shape_t *dest_shape)
10091050
switch ((enum shape_type)dest_shape->type) {
10101051
case SHAPE_IVAR:
10111052
case SHAPE_OBJ_ID:
1053+
case SHAPE_EXTERNAL_OBJ_ID:
10121054
case SHAPE_FROZEN:
10131055
if (!next_shape->edges) {
10141056
return NULL;
@@ -1080,6 +1122,7 @@ rb_shape_rebuild_shape(rb_shape_t *initial_shape, rb_shape_t *dest_shape)
10801122
midway_shape = shape_get_next_iv_shape(midway_shape, dest_shape->edge_name);
10811123
break;
10821124
case SHAPE_OBJ_ID:
1125+
case SHAPE_EXTERNAL_OBJ_ID:
10831126
case SHAPE_ROOT:
10841127
case SHAPE_FROZEN:
10851128
case SHAPE_T_OBJECT:
@@ -1368,6 +1411,7 @@ Init_default_shapes(void)
13681411

13691412
id_frozen = rb_make_internal_id();
13701413
id_t_object = rb_make_internal_id();
1414+
id_external_object_id = rb_make_internal_id();
13711415
ruby_internal_object_id = rb_make_internal_id();
13721416

13731417
#ifdef HAVE_MMAP

0 commit comments

Comments
 (0)
0