E573 gc: add CollectResult, stats fields, get_referrers, and fix count reset · RustPython/RustPython@764e061 · GitHub
[go: up one dir, main page]

Skip to content

Commit 764e061

Browse files
committed
gc: add CollectResult, stats fields, get_referrers, and fix count reset
- Add CollectResult struct with collected/uncollectable/candidates/duration - Add candidates and duration fields to GcStats and gc.get_stats() - Pass CollectResult to gc.callbacks info dict - Reset generation counts for all collected generations (0..=N) - Return 0 for third value in gc.get_threshold() (3.13+) - Implement gc.get_referrers() by scanning all tracked objects - Add DEBUG_COLLECTABLE output for collectable objects - Update test_gc.py to expect candidates/duration in stats
1 parent 6f07745 commit 764e061

File tree

3 files changed

+138
-45
lines changed

3 files changed

+138
-45
lines changed

Lib/test/test_gc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,7 @@ def test_get_stats(self):
822822
for st in stats:
823823
self.assertIsInstance(st, dict)
824824
self.assertEqual(set(st),
825-
{"collected", "collections", "uncollectable"})
825+
{"collected", "collections", "uncollectable", "candidates", "duration"})
826826
self.assertGreaterEqual(st["collected"], 0)
827827
self.assertGreaterEqual(st["collections"], 0)
828828
self.assertGreaterEqual(st["uncollectable"], 0)

crates/vm/src/gc_state.rs

Lines changed: 97 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
//! Garbage Collection State and Algorithm
22
//!
3-
//! This module implements CPython-compatible generational garbage collection
4-
//! for RustPython, using an intrusive doubly-linked list approach.
3+
//! Generational garbage collection using an intrusive doubly-linked list.
54
65
use crate::common::linked_list::LinkedList;
76
use crate::common::lock::{PyMutex, PyRwLock};
@@ -28,12 +27,23 @@ bitflags::bitflags! {
2827
}
2928
}
3029

30+
/// Result from a single collection run
31+
#[derive(Debug, Default)]
32+
pub struct CollectResult {
33+
pub collected: usize,
34+
pub uncollectable: usize,
35+
pub candidates: usize,
36+
pub duration: f64,
37+
}
38+
3139
/// Statistics for a single generation (gc_generation_stats)
3240
#[derive(Debug, Default)]
3341
pub struct GcStats {
3442
pub collections: usize,
3543
pub collected: usize,
3644
pub uncollectable: usize,
45+
pub candidates: usize,
46+
pub duration: f64,
3747
}
3848

3949
/// A single GC generation with intrusive linked list
@@ -55,6 +65,8 @@ impl GcGeneration {
5565
collections: 0,
5666
collected: 0,
5767
uncollectable: 0,
68+
candidates: 0,
69+
duration: 0.0,
5870
}),
5971
}
6072
}
@@ -77,14 +89,24 @@ impl GcGeneration {
7789
collections: guard.collections,
7890
collected: guard.collected,
7991
uncollectable: guard.uncollectable,
92+
candidates: guard.candidates,
93+
duration: guard.duration,
8094
}
8195
}
8296

83-
pub fn update_stats(&self, collected: usize, uncollectable: usize) {
97+
pub fn update_stats(
98+
&self,
99+
collected: usize,
100+
uncollectable: usize,
101+
candidates: usize,
102+
duration: f64,
103+
) {
84104
let mut guard = self.stats.lock();
85105
guard.collections += 1;
86106
guard.collected += collected;
87107
guard.uncollectable += uncollectable;
108+
guard.candidates += candidates;
109+
guard.duration += duration;
88110
}
89111

90112
/// Reset the stats mutex to unlocked state after fork().
@@ -340,25 +362,27 @@ impl GcState {
340362
}
341363

342364
/// Perform garbage collection on the given generation
343-
pub fn collect(&self, generation: usize) -> (usize, usize) {
365+
pub fn collect(&self, generation: usize) -> CollectResult {
344366
self.collect_inner(generation, false)
345367
}
346368

347369
/// Force collection even if GC is disabled (for manual gc.collect() calls)
348-
pub fn collect_force(&self, generation: usize) -> (usize, usize) {
370+
pub fn collect_force(&self, generation: usize) -> CollectResult {
349371
self.collect_inner(generation, true)
350372
}
351373

352-
fn collect_inner(&self, generation: usize, force: bool) -> (usize, usize) {
374+
fn collect_inner(&self, generation: usize, force: bool) -> CollectResult {
353375
if !force && !self.is_enabled() {
354-
return (0, 0);
376+
return CollectResult::default();
355377
}
356378

357379
// Try to acquire the collecting lock
358380
let Some(_guard) = self.collecting.try_lock() else {
359-
return (0, 0);
381+
return CollectResult::default();
360382
};
361383

384+
let start_time = std::time::Instant::now();
385+
362386
// Memory barrier to ensure visibility of all reference count updates
363387
// from other threads before we start analyzing the object graph.
364388
core::sync::atomic::fence(Ordering::SeqCst);
@@ -386,11 +410,24 @@ impl GcState {
386410
}
387411

388412
if collecting.is_empty() {
389-
self.generations[0].count.store(0, Ordering::SeqCst);
390-
self.generations[generation].update_stats(0, 0);
391-
return (0, 0);
413+
// Reset counts for generations whose objects were promoted away.
414+
// For gen2 (oldest), survivors stay in-place so don't reset gen2 count.
415+
let reset_end = if generation >= 2 { 2 } else { generation + 1 };
416+
for i in 0..reset_end {
417+
self.generations[i].count.store(0, Ordering::SeqCst);
418+
}
419+
let duration = start_time.elapsed().as_secs_f64();
420+
self.generations[generation].update_stats(0, 0, 0, duration);
421+
return CollectResult {
422+
collected: 0,
423+
uncollectable: 0,
424+
candidates: 0,
425+
duration,
426+
};
392427
}
393428

429+
let candidates = collecting.len();
430+
394431
if debug.contains(GcDebugFlags::STATS) {
395432
eprintln!(
396433
"gc: collecting {} objects from generations 0..={}",
@@ -486,9 +523,17 @@ impl GcState {
486523
if unreachable.is_empty() {
487524
drop(gen_locks);
488525
self.promote_survivors(generation, &survivor_refs);
489-
self.generations[0].count.store(0, Ordering::SeqCst);
490-
self.generations[generation].update_stats(0, 0);
491-
return (0, 0);
526+
for i in 0..generation {
527+
self.generations[i].count.store(0, Ordering::SeqCst);
528+
}
529+
let duration = start_time.elapsed().as_secs_f64();
530+
self.generations[generation].update_stats(0, 0, candidates, duration);
531+
return CollectResult {
532+
collected: 0,
533+
uncollectable: 0,
534+
candidates,
535+
duration,
536+
};
492537
}
493538

494539
// Release read locks before finalization phase.
@@ -498,9 +543,17 @@ impl GcState {
498543

499544
if unreachable_refs.is_empty() {
500545
self.promote_survivors(generation, &survivor_refs);
501-
self.generations[0].count.store(0, Ordering::SeqCst);
502-
self.generations[generation].update_stats(0, 0);
503-
return (0, 0);
546+
for i in 0..generation {
547+
self.generations[i].count.store(0, Ordering::SeqCst);
548+
}
549+
let duration = start_time.elapsed().as_secs_f64();
550+
self.generations[generation].update_stats(0, 0, candidates, duration);
551+
return CollectResult {
552+
collected: 0,
553+
uncollectable: 0,
554+
candidates,
555+
duration,
556+
};
504557
}
505558

506559
// 6b: Record initial strong counts (for resurrection detection)
@@ -594,15 +647,25 @@ impl GcState {
594647
};
595648

596649
// Promote survivors to next generation BEFORE tp_clear.
597-
// This matches CPython's order (move_legacy_finalizer_reachable → delete_garbage)
598-
// and ensures survivor_refs are dropped before tp_clear, so reachable objects
599-
// (e.g. LateFin) aren't kept alive beyond the deferred-drop phase.
650+
// move_legacy_finalizer_reachable → delete_garbage order ensures
651+
// survivor_refs are dropped before tp_clear, so reachable objects
652+
// aren't kept alive beyond the deferred-drop phase.
600653
self.promote_survivors(generation, &survivor_refs);
601654
drop(survivor_refs);
602655

603656
// Resurrected objects stay tracked — just drop our references
604657
drop(resurrected);
605658

659+
if debug.contains(GcDebugFlags::COLLECTABLE) {
660+
for obj in &truly_dead {
661+
eprintln!(
662+
"gc: collectable <{} {:p}>",
663+
obj.class().name(),
664+
obj.as_ref()
665+
);
666+
}
667+
}
668+
606669
if debug.contains(GcDebugFlags::SAVEALL) {
607670
let mut garbage_guard = self.garbage.lock();
608671
for obj_ref in truly_dead.iter() {
@@ -624,12 +687,22 @@ impl GcState {
624687
});
625688
}
626689

627-
// Reset gen0 count
628-
self.generations[0].count.store(0, Ordering::SeqCst);
690+
// Reset counts for generations whose objects were promoted away.
691+
// For gen2 (oldest), survivors stay in-place so don't reset gen2 count.
692+
let reset_end = if generation >= 2 { 2 } else { generation + 1 };
693+
for i in 0..reset_end {
694+
self.generations[i].count.store(0, Ordering::SeqCst);
695+
}
629696

630-
self.generations[generation].update_stats(collected, 0);
697+
let duration = start_time.elapsed().as_secs_f64();
698+
self.generations[generation].update_stats(collected, 0, candidates, duration);
631699

632-
(collected, 0)
700+
CollectResult {
701+
collected,
702+
uncollectable: 0,
703+
candidates,
704+
duration,
705+
}
633706
}
634707

635708
/// Promote surviving objects to the next generation.

crates/vm/src/stdlib/gc.rs

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ mod gc {
5555
}
5656

5757
// Invoke callbacks with "start" phase
58-
invoke_callbacks(vm, "start", generation_num as usize, 0, 0);
58+
invoke_callbacks(vm, "start", generation_num as usize, &Default::default());
5959

6060
// Manual gc.collect() should run even if GC is disabled
6161
let gc = gc_state::gc_state();
62-
let (collected, uncollectable) = gc.collect_force(generation_num as usize);
62+
let result = gc.collect_force(generation_num as usize);
6363

6464
// Move objects from gc_state.garbage to vm.ctx.gc_garbage (for DEBUG_SAVEALL)
6565
{
@@ -74,26 +74,21 @@ mod gc {
7474
}
7575

7676
// Invoke callbacks with "stop" phase
77-
invoke_callbacks(
78-
vm,
79-
"stop",
80-
generation_num as usize,
81-
collected,
82-
uncollectable,
83-
);
77+
invoke_callbacks(vm, "stop", generation_num as usize, &result);
8478

85-
Ok(collected as i32)
79+
Ok((result.collected + result.uncollectable) as i32)
8680
}
8781

8882
/// Return the current collection thresholds as a tuple.
83+
/// The third value is always 0.
8984
#[pyfunction]
9085
fn get_threshold(vm: &VirtualMachine) -> PyObjectRef {
91-
let (t0, t1, t2) = gc_state::gc_state().get_threshold();
86+
let (t0, t1, _t2) = gc_state::gc_state().get_threshold();
9287
F0E4 vm.ctx
9388
.new_tuple(vec![
9489
vm.ctx.new_int(t0).into(),
9590
vm.ctx.new_int(t1).into(),
96-
vm.ctx.new_int(t2).into(),
91+
vm.ctx.new_int(0).into(),
9792
])
9893
.into()
9994
}
@@ -148,6 +143,8 @@ mod gc {
148143
vm.ctx.new_int(stat.uncollectable).into(),
149144
vm,
150145
)?;
146+
dict.set_item("candidates", vm.ctx.new_int(stat.candidates).into(), vm)?;
147+
dict.set_item("duration", vm.ctx.new_float(stat.duration).into(), vm)?;
151148
result.push(dict.into());
152149
}
153150

@@ -189,10 +186,30 @@ mod gc {
189186
/// Return the list of objects that directly refer to any of the arguments.
190187
#[pyfunction]
191188
fn get_referrers(args: FuncArgs, vm: &VirtualMachine) -> PyListRef {
192-
// This is expensive: we need to scan all tracked objects
193-
// For now, return an empty list (would need full object tracking to implement)
194-
let _ = args;
195-
vm.ctx.new_list(vec![])
189+
use std::collections::HashSet;
190+
191+
// Build a set of target object pointers for fast lookup
192+
let targets: HashSet<usize> = args
193+
.args
194+
.iter()
195+
.map(|obj| obj.as_ref() as *const crate::PyObject as usize)
196+
.collect();
197+
198+
let mut result = Vec::new();
199+
200+
// Scan all tracked objects across all generations
201+
let all_objects = gc_state::gc_state().get_objects(None);
202+
for obj in all_objects {
203+
let referent_ptrs = unsafe { obj.gc_get_referent_ptrs() };
204+
for child_ptr in referent_ptrs {
205+
if targets.contains(&(child_ptr.as_ptr() as usize)) {
206+
result.push(obj.clone());
207+
break;
208+
}
209+
}
210+
}
211+
212+
vm.ctx.new_list(result)
196213
}
197214

198215
/// Return True if the object is tracked by the garbage collector.
@@ -243,8 +260,7 @@ mod gc {
243260
vm: &VirtualMachine,
244261
phase: &str,
245262
generation: usize,
246-
collected: usize,
247-
uncollectable: usize,
263+
result: &gc_state::CollectResult,
248264
) {
249265
let callbacks_list = &vm.ctx.gc_callbacks;
250266
let callbacks: Vec<PyObjectRef> = callbacks_list.borrow_vec().to_vec();
@@ -255,8 +271,12 @@ mod gc {
255271
let phase_str: PyObjectRef = vm.ctx.new_str(phase).into();
256272
let info = vm.ctx.new_dict();
257273
let _ = info.set_item("generation", vm.ctx.new_int(generation).into(), vm);
258-
let _ = info.set_item("collected", vm.ctx.new_int(collected).into(), vm);
259-
let _ = info.set_item("uncollectable", vm.ctx.new_int(uncollectable).into(), vm);
274+
let _ = info.set_item("collected", vm.ctx.new_int(result.collected).into(), vm);
275+
let _ = info.set_item(
276+
"uncollectable",
277+
vm.ctx.new_int(result.uncollectable).into(),
278+
vm,
279+
);
260280

261281
for callback in callbacks {
262282
let _ = callback.call((phase_str.clone(), info.clone()), vm);

0 commit comments

Comments
 (0)
0