@@ -51,7 +51,17 @@ GROUP BY t.id, wpb.template_version_id, wpb.transition;
51
51
-- For each group, the query checks the last N jobs, where N equals the number of desired instances for the corresponding preset.
52
52
-- If at least one of the last N jobs has failed, we should backoff on the corresponding template version ID.
53
53
-- Query returns a list of template version IDs for which we should backoff.
54
- -- Only template versions with configured presets are considered.
54
+ -- Only active template versions with configured presets are considered.
55
+ --
56
+ -- NOTE:
57
+ -- We back off on the template version ID if at least one of the N latest workspace builds has failed.
58
+ -- However, we also return the number of failed workspace builds that occurred during the lookback period.
59
+ --
60
+ -- In other words:
61
+ -- - To **decide whether to back off**, we look at the N most recent builds (regardless of when they happened).
62
+ -- - To **calculate the number of failed builds**, we consider all builds within the defined lookback period.
63
+ --
64
+ -- The number of failed builds is used downstream to determine the backoff duration.
55
65
WITH filtered_builds AS (
56
66
-- Only select builds which are for prebuild creations
57
67
SELECT wlb.* , tvp .id AS preset_id, pj .job_status , tvpp .desired_instances
@@ -62,8 +72,8 @@ WITH filtered_builds AS (
62
72
JOIN templates t ON tv .template_id = t .id AND t .active_version_id = tv .id
63
73
JOIN template_version_preset_prebuilds tvpp ON tvpp .preset_id = tvp .id
64
74
WHERE wlb .transition = ' start' ::workspace_transition),
65
- latest_builds AS (
66
- -- Select only the latest build per template_version AND preset
75
+ time_sorted_builds AS (
76
+ -- Group builds by template version, then sort each group by created_at.
67
77
SELECT fb.* ,
68
78
ROW_NUMBER() OVER (PARTITION BY fb .template_version_preset_id ORDER BY fb .created_at DESC ) as rn
69
79
FROM filtered_builds fb),
@@ -74,16 +84,16 @@ WITH filtered_builds AS (
74
84
WHERE job_status = ' failed' ::provisioner_job_status
75
85
AND created_at >= @lookback::timestamptz
76
86
GROUP BY preset_id)
77
- SELECT lb .template_version_id ,
78
- lb .preset_id ,
79
- MAX ( lb .job_status ) ::provisioner_job_status AS latest_build_status,
80
- MAX ( COALESCE(fc .num_failed , 0 )) ::int AS num_failed,
81
- MAX ( lb .created_at ) ::timestamptz AS last_build_at
82
- FROM latest_builds lb
83
- LEFT JOIN failed_count fc ON fc .preset_id = lb .preset_id
84
- WHERE lb .rn <= lb .desired_instances -- Fetch the last N builds, where N is the number of desired instances; if any fail, we backoff
85
- AND lb .job_status = ' failed' ::provisioner_job_status
86
- GROUP BY lb .template_version_id , lb .preset_id , lb .job_status ;
87
+ SELECT tsb .template_version_id ,
88
+ tsb .preset_id ,
89
+ tsb .job_status ::provisioner_job_status AS latest_build_status,
90
+ COALESCE(fc .num_failed , 0 )::int AS num_failed,
91
+ tsb .created_at ::timestamptz AS last_build_at
92
+ FROM time_sorted_builds tsb
93
+ LEFT JOIN failed_count fc ON fc .preset_id = tsb .preset_id
94
+ WHERE tsb .rn <= tsb .desired_instances -- Fetch the last N builds, where N is the number of desired instances; if any fail, we backoff
95
+ AND tsb .job_status = ' failed' ::provisioner_job_status
96
+ GROUP BY tsb .template_version_id , tsb .preset_id , tsb .job_status , tsb . created_at , fc . num_failed ;
87
97
88
98
-- name: ClaimPrebuild :one
89
99
UPDATE workspaces w
0 commit comments