8000 gc.c: Fix a race condition in object_id for shareable objects · ruby/ruby@4b27a1c · GitHub
[go: up one dir, main page]

Skip to content

Commit 4b27a1c

Browse files
committed
gc.c: Fix a race condition in object_id for shareable objects
If an object is shareable and has no capacity left, it isn't safe to store the object ID in fields as it requires an object resize which can't be done unless all field reads are synchronized. So in this case we have to store the ID externally like we used to.
1 parent 5ec9a39 commit 4b27a1c

File tree

5 files changed

+227
-76
lines changed

5 files changed

+227
-76
lines changed

gc.c

Lines changed: 129 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1839,20 +1839,80 @@ id2ref_tbl_free(void *data)
18391839
st_free_table(table);
18401840
}
18411841

1842+
static void
1843+
id2ref_tbl_compact(void *data)
1844+
{
1845+
rb_gc_update_tbl_refs(id2ref_tbl);
1846+
}
1847+
18421848
static const rb_data_type_t id2ref_tbl_type = {
18431849
.wrap_struct_name = "VM/_id2ref_table",
18441850
.function = {
18451851
.dmark = id2ref_tbl_mark,
18461852
.dfree = id2ref_tbl_free,
18471853
.dsize = id2ref_tbl_memsize,
1848-
// dcompact function not required because the table is reference updated
1849-
// in rb_gc_vm_weak_table_foreach
1854+
.dcompact = id2ref_tbl_compact,
18501855
},
18511856
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
18521857
};
18531858

18541859
#define RUBY_ATOMIC_VALUE_LOAD(x) (VALUE)(RUBY_ATOMIC_PTR_LOAD(x))
18551860

1861+
static VALUE obj_to_id_value = 0;
1862+
static st_table *obj_to_id_tbl = NULL;
1863+
1864+
static void mark_hash_values(st_table *tbl);
1865+
1866+
static void
1867+
obj_to_id_tbl_mark(void *data)
1868+
{
1869+
st_table *table = (st_table *)data;
1870+
if (UNLIKELY(!RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1871+
// It's very unlikely, but if enough object ids were generated, keys may be T_BIGNUM
1872+
mark_hash_values(table);
1873+
}
1874+
// We purposedly don't mark keys, as they are weak references.
1875+
// rb_gc_obj_free_vm_weak_references takes care of cleaning them up.
1876+
}
1877+
1878+
static size_t
1879+
obj_to_id_tbl_memsize(const void *data)
1880+
{
1881+
return rb_st_memsize(data);
1882+
}
1883+
1884+
static void
1885+
obj_to_id_tbl_compact(void *data)
1886+
{
1887+
st_table *table = (st_table *)data;
1888+
if (LIKELY(RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1889+
// We know values are all FIXNUM, so no need to update them.
1890+
gc_ref_update_table_keys_only(table);
1891+
}
1892+
else {
1893+
gc_update_table_refs(table);
1894+
}
1895+
}
1896+
1897+
static void
1898+
obj_to_id_tbl_free(void *data)
1899+
{
1900+
obj_to_id_tbl = NULL; // clear global ref
1901+
st_table *table = (st_table *)data;
1902+
st_free_table(table);
1903+
}
1904+
1905+
static const rb_data_type_t obj_to_id_tbl_type = {
1906+
.wrap_struct_name = "VM/obj_to_id_table",
1907+
.function = {
1908+
.dmark = obj_to_id_tbl_mark,
1909+
.dfree = obj_to_id_tbl_free,
1910+
.dsize = obj_to_id_tbl_memsize,
1911+
.dcompact = obj_to_id_tbl_compact,
1912+
},
1913+
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
1914+
};
1915+
18561916
static VALUE
18571917
class_object_id(VALUE klass)
18581918
{
@@ -1876,11 +1936,23 @@ static inline VALUE
18761936
object_id_get(VALUE obj, shape_id_t shape_id)
18771937
{
18781938
VALUE id;
1879-
if (rb_shape_too_complex_p(shape_id)) {
1880-
id = rb_obj_field_get(obj, ROOT_TOO_COMPLEX_WITH_OBJ_ID);
1881-
}
1882-
else {
1883-
id = rb_obj_field_get(obj, rb_shape_object_id(shape_id));
1939+
1940+
switch (rb_shape_has_object_id(shape_id)) {
1941+
case SHAPE_NO_OBJ_ID:
1942+
rb_bug("Unreacheable");
1943+
break;
1944+
case SHAPE_INLINE_OBJ_ID:
1945+
if (rb_shape_too_complex_p(shape_id)) {
1946+
id = rb_obj_field_get(obj, ROOT_TOO_COMPLEX_WITH_OBJ_ID);
1947+
}
1948+
else {
1949+
id = rb_obj_field_get(obj, rb_shape_object_id_inline(shape_id));
1950+
}
1951+
break;
1952+
case SHAPE_EXTERNAL_OBJ_ID:
1953+
RUBY_ASSERT(obj_to_id_tbl);
1954+
st_lookup(obj_to_id_tbl, obj, &id);
1955+
break;
18841956
}
18851957

18861958
#if RUBY_DEBUG
@@ -1894,7 +1966,7 @@ object_id_get(VALUE obj, shape_id_t shape_id)
18941966
}
18951967

18961968
static VALUE
1897-
object_id0(VALUE obj)
1969+
object_id0(VALUE obj, bool shareable)
18981970
{
18991971
VALUE id = Qfalse;
19001972
shape_id_t shape_id = RBASIC_SHAPE_ID(obj);
@@ -1903,14 +1975,26 @@ object_id0(VALUE obj)
19031975
return object_id_get(obj, shape_id);
19041976
}
19051977

1906-
// rb_shape_object_id_shape may lock if the current shape has
1907-
// multiple children.
1908-
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1909-
19101978
id = generate_next_object_id();
1911-
rb_obj_field_set(obj, object_id_shape_id, id);
19121979

1913-
RUBY_ASSERT(RBASIC_SHAPE_ID(obj) == object_id_shape_id);
1980+
attr_index_t capacity = RSHAPE_CAPACITY(shape_id);
1981+
attr_index_t free_capacity = capacity - RSHAPE_LEN(shape_id);
1982+
if (shareable && capacity && !free_capacity) {
1983+
// The object is shared and has no free capacity, we can't
1984+
// safely store the object_id inline.
1985+
shape_id_t next_shape_id = rb_shape_transition_object_id_external(obj);
1986+
if (RB_UNLIKELY(!obj_to_id_tbl)) {
1987+
obj_to_id_tbl = st_init_numtable();
1988+
obj_to_id_value = TypedData_Wrap_Struct(0, &obj_to_id_tbl_type, obj_to_id_tbl);
1989+
}
1990+
st_insert(obj_to_id_tbl, obj, id);
1991+
RBASIC_SET_SHAPE_ID(obj, next_shape_id);
1992+
}
1993+
else {
1994+
shape_id_t next_shape_id = rb_shape_transition_object_id_inline(obj);
1995+
rb_obj_field_set(obj, next_shape_id, id);
1996+
}
1997+
19141998
RUBY_ASSERT(rb_shape_obj_has_id(obj));
19151999

19162000
if (RB_UNLIKELY(id2ref_tbl)) {
@@ -1936,14 +2020,14 @@ object_id(VALUE obj)
19362020
break;
19372021
}
19382022

1939-
if (UNLIKELY(rb_gc_multi_ractor_p() && rb_ractor_shareable_p(obj))) {
2023+
if (UNLIKELY(rb_gc_multi_ractor_p() && RB_OBJ_SHAREABLE_P(obj))) {
19402024
unsigned int lock_lev = RB_GC_VM_LOCK();
1941-
VALUE id = object_id0(obj);
2025+
VALUE id = object_id0(obj, true);
19422026
RB_GC_VM_UNLOCK(lock_lev);
19432027
return id;
19442028
}
19452029

1946-
return object_id0(obj);
2030+
return object_id0(obj, false);
19472031
}
19482032

19492033
static void
@@ -2045,8 +2129,18 @@ obj_free_object_id(VALUE obj)
20452129
break;
20462130
default: {
20472131
shape_id_t shape_id = RBASIC_SHAPE_ID(obj);
2048-
if (rb_shape_has_object_id(shape_id)) {
2132+
switch (rb_shape_has_object_id(shape_id)) {
2133+
case SHAPE_NO_OBJ_ID:
2134+
break;
2135+
case SHAPE_INLINE_OBJ_ID:
20492136
obj_id = object_id_get(obj, shape_id);
2137+
break;
2138+
case SHAPE_EXTERNAL_OBJ_ID:
2139+
// During shutdown the `obj_to_id_tbl` may be freed first.
2140+
if (obj_to_id_tbl) {
2141+
st_delete(obj_to_id_tbl, &obj, &obj_id);
2142+
}
2143+
break;
20502144
}
20512145
break;
20522146
}
@@ -2739,6 +2833,22 @@ rb_mark_set(st_table *tbl)
27392833
st_foreach(tbl, mark_key, (st_data_t)rb_gc_get_objspace());
27402834
}
27412835

2836+
static int
2837+
mark_value(st_data_t key, st_data_t value, st_data_t data)
2838+
{
2839+
gc_mark_internal((VALUE)value);
2840+
2841+
return ST_CONTINUE;
2842+
}
2843+
2844+
static void
2845+
mark_hash_values(st_table *tbl)
2846+
{
2847+
if (!tbl) return;
2848+
2849+
st_foreach(tbl, mark_value, 0);
2850+
}
2851+
27422852
static int
27432853
mark_keyvalue(st_data_t key, st_data_t value, st_data_t data)
27442854
{
@@ -3971,33 +4081,6 @@ vm_weak_table_gen_fields_foreach_too_complex_replace_i(st_data_t *_key, st_data_
39714081

39724082
struct st_table *rb_generic_fields_tbl_get(void);
39734083

3974-
static int
3975-
vm_weak_table_id2ref_foreach(st_data_t key, st_data_t value, st_data_t data, int error)
3976-
{
3977-
struct global_vm_table_foreach_data *iter_data = (struct global_vm_table_foreach_data *)data;
3978-
3979-
if (!iter_data->weak_only && !FIXNUM_P((VALUE)key)) {
3980-
int ret = iter_data->callback((VALUE)key, iter_data->data);
3981-
if (ret != ST_CONTINUE) return ret;
3982-
}
3983-
3984-
return iter_data->callback((VALUE)value, iter_data->data);
3985-
}
3986-
3987-
static int
3988-
vm_weak_table_id2ref_foreach_update(st_data_t *key, st_data_t *value, st_data_t data, int existing)
3989-
{
3990-
struct global_vm_table_foreach_data *iter_data = (struct global_vm_table_foreach_data *)data;
3991-
3992-
iter_data->update_callback((VALUE *)value, iter_data->data);
3993-
3994-
if (!iter_data->weak_only && !FIXNUM_P((VALUE)*key)) {
3995-
iter_data->update_callback((VALUE *)key, iter_data->data);
3996-
}
3997-
3998-
return ST_CONTINUE;
3999-
}
4000-
40014084
static int
40024085
vm_weak_table_gen_fields_foreach(st_data_t key, st_data_t value, st_data_t data)
40034086
{
@@ -4126,17 +4209,6 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback,
41264209
}
41274210
break;
41284211
}
4129-
case RB_GC_VM_ID2REF_TABLE: {
4130-
if (id2ref_tbl) {
4131-
st_foreach_with_replace(
4132-
id2ref_tbl,
4133-
vm_weak_table_id2ref_foreach,
4134-
vm_weak_table_id2ref_foreach_update,
4135-
(st_data_t)&foreach_data
4136-
);
4137-
}
4138-
break;
4139-
}
41404212
case RB_GC_VM_GENERIC_FIELDS_TABLE: {
41414213
st_table *generic_fields_tbl = rb_generic_fields_tbl_get();
41424214
if (generic_fields_tbl) {
@@ -5543,6 +5615,7 @@ Init_GC(void)
55435615
{
55445616
#undef rb_intern
55455617
rb_gc_register_address(&id2ref_value);
5618+
rb_gc_register_address(&obj_to_id_value);
55465619

55475620
malloc_offset = gc_compute_malloc_offset();
55485621

gc/gc.h

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ enum rb_gc_vm_weak_tables {
2828
RB_GC_VM_CI_TABLE,
2929
RB_GC_VM_OVERLOADED_CME_TABLE,
3030
RB_GC_VM_GLOBAL_SYMBOLS_TABLE,
31-
RB_GC_VM_ID2REF_TABLE,
3231
RB_GC_VM_GENERIC_FIELDS_TABLE,
3332
RB_GC_VM_FROZEN_STRINGS_TABLE,
3433
RB_GC_VM_CC_REFINEMENT_TABLE,
@@ -160,6 +159,33 @@ gc_ref_update_table_values_only(st_table *tbl)
160159
}
161160
}
162161

162+
static int
163+
hash_foreach_replace_key(st_data_t key, st_data_t value, st_data_t argp, int error)
164+
{
165+
if (rb_gc_location((VALUE)key) != (VALUE)key) {
166+
return ST_REPLACE;
167+
}
168+
return ST_CONTINUE;
169+
}
170+
171+
static int
172+
hash_replace_ref_key(st_data_t *key, st_data_t *value, st_data_t argp, int existing)
173+
{
174+
*key = rb_gc_location((VALUE)*key);
175+
176+
return ST_CONTINUE;
177+
}
178+
179+
static void
180+
gc_ref_update_table_keys_only(st_table *tbl)
181+
{
182+
if (!tbl || tbl->num_entries == 0) return;
183+
184+
if (st_foreach_with_replace(tbl, hash_foreach_replace_key, hash_replace_ref_key, 0)) {
185+
rb_raise(rb_eRuntimeError, "hash modified during iteration");
186+
}
187+
}
188+
163189
static int
164190
gc_mark_tbl_no_pin_i(st_data_t key, st_data_t value, st_data_t data)
165191
{

shape.c

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ remove_shape_recursive(rb_shape_t *shape, ID id, rb_shape_t **removed_shape)
709709
static inline shape_id_t transition_complex(shape_id_t shape_id);
710710

711711
static shape_id_t
712-
shape_transition_object_id(shape_id_t original_shape_id)
712+
shape_transition_object_id_inline(shape_id_t original_shape_id)
713713
{
714714
RUBY_ASSERT(!rb_shape_has_object_id(original_shape_id));
715715

@@ -720,17 +720,25 @@ shape_transition_object_id(shape_id_t original_shape_id)
720720
}
721721

722722
RUBY_ASSERT(shape);
723-
return shape_id(shape, original_shape_id) | SHAPE_ID_FL_HAS_OBJECT_ID;
723+
return shape_id(shape, original_shape_id) | SHAPE_ID_FL_OBJ_ID_INLINE;
724724
}
725725

726726
shape_id_t
727-
rb_shape_transition_object_id(VALUE obj)
727+
rb_shape_transition_object_id_inline(VALUE obj)
728728
{
729-
return shape_transition_object_id(RBASIC_SHAPE_ID(obj));
729+
return shape_transition_object_id_inline(RBASIC_SHAPE_ID(obj));
730730
}
731731

732732
shape_id_t
733-
rb_shape_object_id(shape_id_t original_shape_id)
733+
rb_shape_transition_object_id_external(VALUE obj)
734+
{
735+
shape_id_t original_shape_id = RBASIC_SHAPE_ID(obj);
736+
RUBY_ASSERT(!rb_shape_has_object_id(original_shape_id));
737+
return original_shape_id | SHAPE_ID_FL_OBJ_ID_EXTERNAL;
738+
}
739+
740+
shape_id_t
741+
rb_shape_object_id_inline(shape_id_t original_shape_id)
734742
{
735743
RUBY_ASSERT(rb_shape_has_object_id(original_shape_id));
736744

@@ -742,7 +750,7 @@ rb_shape_object_id(shape_id_t original_shape_id)
742750
shape = RSHAPE(shape->parent_id);
743751
}
744752

745-
return shape_id(shape, original_shape_id) | SHAPE_ID_FL_HAS_OBJECT_ID;
753+
return shape_id(shape, original_shape_id) | SHAPE_ID_FL_OBJ_ID_INLINE;
746754
}
747755

748756
static inline shape_id_t
@@ -754,7 +762,7 @@ transition_complex(shape_id_t shape_id)
754762
if (heap_index) {
755763
next_shape_id = rb_shape_root(heap_index - 1) | SHAPE_ID_FL_TOO_COMPLEX;
756764
if (rb_shape_has_object_id(shape_id)) {
757-
next_shape_id = shape_transition_object_id(next_shape_id);
765+
next_shape_id = shape_transition_object_id_inline(next_shape_id);
758766
}
759767
}
760768
else {
@@ -1104,7 +1112,7 @@ rb_shape_rebuild(shape_id_t initial_shape_id, shape_id_t dest_shape_id)
11041112
return shape_id(next_shape, initial_shape_id);
11051113
}
11061114
else {
1107-
return transition_complex(initial_shape_id | (dest_shape_id & SHAPE_ID_FL_HAS_OBJECT_ID));
1115+
return transition_complex(initial_shape_id | (dest_shape_id & SHAPE_ID_FL_OBJ_ID_INLINE));
11081116
}
11091117
}
11101118

@@ -1223,7 +1231,7 @@ rb_shape_verify_consistency(VALUE obj, shape_id_t shape_id)
12231231
shape = RSHAPE(shape->parent_id);
12241232
}
12251233

1226-
if (rb_shape_has_object_id(shape_id)) {
1234+
if (rb_shape_has_object_id(shape_id) == SHAPE_INLINE_OBJ_ID) {
12271235
if (!has_object_id) {
12281236
rb_p(obj);
12291237
rb_bug("shape_id claim having obj_id but doesn't shape_id=%u, obj=%s", shape_id, rb_obj_info(obj));

0 commit comments

Comments
 (0)
0