8000 feat: implement cache invalidation logic for prebuilds · coder/coder@8977d49 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8977d49

Browse files
committed
feat: implement cache invalidation logic for prebuilds
1 parent d6c14f3 commit 8977d49

File tree

12 files changed

+1446
-985
lines changed

12 files changed

+1446
-985
lines changed

coderd/database/queries.sql.go

Lines changed: 16 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/prebuilds.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ SELECT
3535
tvp.id,
3636
tvp.name,
3737
tvp.desired_instances AS desired_instances,
38+
tvp.invalidate_after_secs AS invalidated_after_secs,
3839
tvp.prebuild_status,
3940
t.deleted,
4041
t.deprecated != '' AS deprecated

coderd/prebuilds/global_snapshot.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package prebuilds
22

33
import (
4+
"time"
5+
46
"github.com/google/uuid"
57
"golang.org/x/xerrors"
68

@@ -41,13 +43,17 @@ func (s GlobalSnapshot) FilterByPreset(presetID uuid.UUID) (*PresetSnapshot, err
4143
return nil, xerrors.Errorf("no preset found with ID %q", presetID)
4244
}
4345

46+
// Only include workspaces that have successfully started
4447
running := slice.Filter(s.RunningPrebuilds, func(prebuild database.GetRunningPrebuiltWorkspacesRow) bool {
4548
if !prebuild.CurrentPresetID.Valid {
4649
return false
4750
}
4851
return prebuild.CurrentPresetID.UUID == preset.ID
4952
})
5053

54+
// Separate running workspaces into non-expired and expired based on the preset's TTL
55+
nonExpired, expired := filterExpiredWorkspaces(preset, running)
56+
5157
inProgress := slice.Filter(s.PrebuildsInProgress, func(prebuild database.CountInProgressPrebuildsRow) bool {
5258
return prebuild.PresetID.UUID == preset.ID
5359
})
@@ -66,9 +72,33 @@ func (s GlobalSnapshot) FilterByPreset(presetID uuid.UUID) (*PresetSnapshot, err
6672

6773
return &PresetSnapshot{
6874
Preset: preset,
69-
Running: running,
75+
Running: nonExpired,
76+
Expired: expired,
7077
InProgress: inProgress,
7178
Backoff: backoffPtr,
7279
IsHardLimited: isHardLimited,
7380
}, nil
7481
}
82+
83+
// filterExpiredWorkspaces splits running workspaces into expired and non-expired
84+
// based on the preset's InvalidatedAfterSecs TTL. If TTL is missing or zero,
85+
// all workspaces are considered non-expired.
86+
func filterExpiredWorkspaces(preset database.GetTemplatePresetsWithPrebuildsRow, runningWorkspaces []database.GetRunningPrebuiltWorkspacesRow) (nonExpired []database.GetRunningPrebuiltWorkspacesRow, expired []database.GetRunningPrebuiltWorkspacesRow) {
87+
if !preset.InvalidatedAfterSecs.Valid {
88+
return runningWorkspaces, expired
89+
}
90+
91+
ttl := time.Duration(preset.InvalidatedAfterSecs.Int32) * time.Second
92+
if ttl <= 0 {
93+
return runningWorkspaces, expired
94+
}
95+
96+
for _, prebuild := range runningWorkspaces {
97+
if time.Since(prebuild.CreatedAt) > ttl {
98+
expired = append(expired, prebuild)
99+
} else {
100+
nonExpired = append(nonExpired, prebuild)
101+
}
102+
}
103+
return nonExpired, expired
104+
}

coderd/prebuilds/preset_snapshot.go

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,14 @@ const (
3131
// PresetSnapshot is a filtered view of GlobalSnapshot focused on a single preset.
3232
// It contains the raw data needed to calculate the current state of a preset's prebuilds,
3333
// including running prebuilds, in-progress builds, and backoff information.
34+
// - Running: prebuilds running and non-expired
35+
// - Expired: prebuilds running and expired due to the preset's TTL
36+
// - InProgress: prebuilds currently in progress
37+
// - Backoff: holds failure info to decide if prebuild creation should be backed off
3438
type PresetSnapshot struct {
3539
Preset database.GetTemplatePresetsWithPrebuildsRow
3640
Running []database.GetRunningPrebuiltWorkspacesRow
41+
Expired []database.GetRunningPrebuiltWorkspacesRow
3742
InProgress []database.CountInProgressPrebuildsRow
3843
Backoff *database.GetPresetsBackoffRow
3944
IsHardLimited bool
@@ -43,10 +48,11 @@ type PresetSnapshot struct {
4348
// calculated from a PresetSnapshot. While PresetSnapshot contains raw data,
4449
// ReconciliationState contains derived metrics that are directly used to
4550
// determine what actions are needed (create, delete, or backoff).
46-
// For example, it calculates how many prebuilds are eligible, how many are
47-
// extraneous, and how many are in various transition states.
51+
// For example, it calculates how many prebuilds are expired, eligible,
52+
// how many are extraneous, and how many are in various transition states.
4853
type ReconciliationState struct {
4954
Actual int32 // Number of currently running prebuilds
55+
Expired int32 // Number of currently running prebuilds that exceeded their allowed time-to-live (TTL)
5056
Desired int32 // Number of prebuilds desired as defined in the preset
5157
Eligible int32 // Number of prebuilds that are ready to be claimed
5258
Extraneous int32 // Number of extra running prebuilds beyond the desired count
@@ -78,7 +84,8 @@ func (ra *ReconciliationActions) IsNoop() bool {
7884
}
7985

8086
// CalculateState computes the current state of prebuilds for a preset, including:
81-
// - Actual: Number of currently running prebuilds
87+
// - Actual: Number of currently valid running prebuilds, i.e., non-expired prebuilds
88+
// - Expired: Number of currently running expired prebuilds
8289
// - Desired: Number of prebuilds desired as defined in the preset
8390
// - Eligible: Number of prebuilds that are ready to be claimed
8491
// - Extraneous: Number of extra running prebuilds beyond the desired count
@@ -92,13 +99,17 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
9299
var (
93100
actual int32
94101
desired int32
102+
expired int32
95103
eligible int32
96104
extraneous int32
97105
)
98106

99107
// #nosec G115 - Safe conversion as p.Running slice length is expected to be within int32 range
100108
actual = int32(len(p.Running))
101109

110+
// #nosec G115 - Safe conversion as p.Expired slice length is expected to be within int32 range
111+
expired = int32(len(p.Expired))
112+
102113
if p.isActive() {
103114
desired = p.Preset.DesiredInstances.Int32
104115
eligible = p.countEligible()
@@ -109,6 +120,7 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
109120

110121
return &ReconciliationState{
111122
Actual: actual,
123+
Expired: expired,
112124
Desired: desired,
113125
Eligible: eligible,
114126
Extraneous: extraneous,
@@ -125,15 +137,16 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
125137
// 2. If the preset is inactive (template version is not active), it will delete all running prebuilds
126138
// 3. For active presets, it calculates the number of prebuilds to create or delete based on:
127139
// - The desired number of instances
128-
// - Currently running prebuilds
140+
// - Currently running non-expired prebuilds
141+
// - Currently running expired prebuilds
129142
// - Prebuilds in transition states (starting/stopping/deleting)
130143
// - Any extraneous prebuilds that need to be removed
131144
//
132145
// The function returns a ReconciliationActions struct that will have exactly one action type set:
133146
// - ActionTypeBackoff: Only BackoffUntil is set, indicating when to retry
134147
// - ActionTypeCreate: Only Create is set, indicating how many prebuilds to create
135148
// - ActionTypeDelete: Only DeleteIDs is set, containing IDs of prebuilds to delete
136-
func (p PresetSnapshot) CalculateActions(clock quartz.Clock, backoffInterval time.Duration) (*ReconciliationActions, error) {
149+
func (p PresetSnapshot) CalculateActions(clock quartz.Clock, backoffInterval time.Duration) ([]*ReconciliationActions, error) {
137150
// TODO: align workspace states with how we represent them on the FE and the CLI
138151
// right now there's some slight differences which can lead to additional prebuilds being created
139152

@@ -158,45 +171,74 @@ func (p PresetSnapshot) isActive() bool {
158171
return p.Preset.UsingActiveVersion && !p.Preset.Deleted && !p.Preset.Deprecated
159172
}
160173

161-
// handleActiveTemplateVersion deletes excess prebuilds if there are too many,
162-
// otherwise creates new ones to reach the desired count.
163-
func (p PresetSnapshot) handleActiveTemplateVersion() (*ReconciliationActions, error) {
174+
// handleActiveTemplateVersion determines the reconciliation actions for a preset with an active template version.
175+
// It ensures the system moves towards the desired number of healthy prebuilds.
176+
//
177+
// The reconciliation follows this order:
178+
// 1. Delete expired prebuilds: These are no longer valid and must be removed first.
179+
// 2. Delete extraneous prebuilds: After expired ones are removed, if the number of running prebuilds (excluding expired)
180+
// still exceeds the desired count, the oldest prebuilds are deleted to reduce excess.
181+
// 3. Create missing prebuilds: If the number of non-expired, non-starting prebuilds is still below the desired count,
182+
// create the necessary number of prebuilds to reach the target.
183+
//
184+
// The function returns a list of actions to be executed to achieve the desired state.
185+
func (p PresetSnapshot) handleActiveTemplateVersion() (actions []*ReconciliationActions, err error) {
164186
state := p.CalculateState()
165187

166-
// If we have more prebuilds than desired, delete the oldest ones
188+
// If we have expired prebuilds, delete them
189+
if state.Expired > 0 {
190+
var deleteIDs []uuid.UUID
191+
for _, expired := range p.Expired {
192+
deleteIDs = append(deleteIDs, expired.ID)
193+
}
194+
actions = append(actions,
195+
&ReconciliationActions{
196+
ActionType: ActionTypeDelete,
197+
DeleteIDs: deleteIDs,
198+
})
199+
}
200+
201+
// If we still have more prebuilds than desired, delete the oldest ones
167202
if state.Extraneous > 0 {
168-
return &ReconciliationActions{
169-
ActionType: ActionTypeDelete,
170-
DeleteIDs: p.getOldestPrebuildIDs(int(state.Extraneous)),
171-
}, nil
203+
actions = append(actions,
204+
&ReconciliationActions{
205+
ActionType: ActionTypeDelete,
206+
DeleteIDs: p.getOldestPrebuildIDs(int(state.Extraneous)),
207+
})
172208
}
173209

174210
// Calculate how many new prebuilds we need to create
175211
// We subtract starting prebuilds since they're already being created
176212
prebuildsToCreate := max(state.Desired-state.Actual-state.Starting, 0)
213+
if prebuildsToCreate > 0 {
214+
actions = append(actions,
215+
&ReconciliationActions{
216+
ActionType: ActionTypeCreate,
217+
Create: prebuildsToCreate,
218+
})
219+
}
177220

178-
return &ReconciliationActions{
179-
ActionType: ActionTypeCreate,
180-
Create: prebuildsToCreate,
181-
}, nil
221+
return actions, nil
182222
}
183223

184224
// handleInactiveTemplateVersion deletes all running prebuilds except those already being deleted
185225
// to avoid duplicate deletion attempts.
186-
func (p PresetSnapshot) handleInactiveTemplateVersion() (*ReconciliationActions, error) {
226+
func (p PresetSnapshot) handleInactiveTemplateVersion() ([]*ReconciliationActions, error) {
187227
prebuildsToDelete := len(p.Running)
188228
deleteIDs := p.getOldestPrebuildIDs(prebuildsToDelete)
189229

190-
return &ReconciliationActions{
191-
ActionType: ActionTypeDelete,
192-
DeleteIDs: deleteIDs,
230+
return []*ReconciliationActions{
231+
{
232+
ActionType: ActionTypeDelete,
233+
DeleteIDs: deleteIDs,
234+
},
193235
}, nil
194236
}
195237

196238
// needsBackoffPeriod checks if we should delay prebuild creation due to recent failures.
197239
// If there were failures, it calculates a backoff period based on the number of failures
198240
// and returns true if we're still within that period.
199-
func (p PresetSnapshot) needsBackoffPeriod(clock quartz.Clock, backoffInterval time.Duration) (*ReconciliationActions, bool) {
241+
func (p PresetSnapshot) needsBackoffPeriod(clock quartz.Clock, backoffInterval time.Duration) ([]*ReconciliationActions, bool) {
200242
if p.Backoff == nil || p.Backoff.NumFailed == 0 {
201243
return nil, false
202244
}
@@ -205,9 +247,11 @@ func (p PresetSnapshot) needsBackoffPeriod(clock quartz.Clock, backoffInterval t
205247
return nil, false
206248
}
207249

208-
return &ReconciliationActions{
209-
ActionType: ActionTypeBackoff,
210-
BackoffUntil: backoffUntil,
250+
return []*ReconciliationActions{
251+
{
252+
ActionType: ActionTypeBackoff,
253+
BackoffUntil: backoffUntil,
254+
},
211255
}, true
212256
}
213257

0 commit comments

Comments
 (0)
0