8000 feat: track resource replacements when claiming a prebuilt workspace by dannykopping · Pull Request #17571 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

feat: track resource replacements when claiming a prebuilt workspace #17571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 38 commits into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
b32923a
feat: log resource replacements
dannykopping Apr 25, 2025
0b0830f
feat: show terraform state drift diff in build logs
dannykopping Apr 25, 2025
256395a
feat: only highlight lines which mention replacement
dannykopping Apr 25, 2025
61ef61a
feat: notify template admins when prebuild claim results in resource …
dannykopping Apr 25, 2025
a66559f
chore: appease linter
dannykopping Apr 25, 2025
222892b
chore: fix notifications test
dannykopping Apr 25, 2025
f34e011
fix: don't panic
dannykopping Apr 28, 2025
5168c01
fix: renaming type
dannykopping Apr 28, 2025
41e5e0c
chore: updating migration numbers
dannykopping May 6, 2025
b29e8fa
chore: minor touch-ups
dannykopping May 6, 2025
b31ed5e
feat: add resource replacements metric
dannykopping May 7, 2025
adf98d2
feat: add resource replacement notification
dannykopping May 7, 2025
f24aef0
make lint; make fmt
dannykopping May 7, 2025
70f9a53
chore: adding tests
dannykopping May 8, 2025
1e8385d
feat: pass flag to terraform provider when prebuilt workspace claimed
dannykopping May 9, 2025
d0f00ce
chore: update provider, add test for is_prebuild_claim
dannykopping May 12, 2025
11a2c5a
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 12, 2025
ce63b24
Merge branch 'dk/is-prebuild-claim' of github.com:/coder/coder into d…
dannykopping May 12, 2025
d2c5d43
chore: replace GetTemplatePresetsByID with GetPresetByID
dannykopping May 12, 2025
22d82a4
chore: correcting docs link
dannykopping May 12, 2025
5209aae
Merge branch 'main' of github.com:/coder/coder into dk/logreplacement
dannykopping May 12, 2025
39ce658
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 12, 2025
ac5655f
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 12, 2025
82c3f58
chore: note provisioner API change
dannykopping May 12, 2025
7577a90
chore: fixups
dannykopping May 13, 2025
a893b79
chore: adding note about immutable resources
dannykopping May 13, 2025
d9c906a
chore: review feedback
dannykopping May 13, 2025
471198a
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 13, 2025
7d694e6
chore: merge conflicts
dannykopping May 13, 2025
6b7a8b7
chore: fix 'is not iterable' bullshit
dannykopping May 13, 2025
5df2cb3
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 14, 2025
6d1c3ea
chore: rename migrations
dannykopping May 14, 2025
5f62702
chore: set notifications manager before enterprise server initializes…
dannykopping May 14, 2025
f74d799
chore: completing refactor since https://github.com/coder/coder/pull/…
dannykopping May 14, 2025
971f65c
chore: remove unnecessary atomicity since map is protected by mutex a…
dannykopping May 14, 2025
bc362b0
chore: appeasing linter's Very Important Suggestion
dannykopping May 14, 2025
4fbd356
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 14, 2025
b9eb8be
chore: remove old replacement logging
dannykopping May 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: add resource replacement notification
Signed-off-by: Danny Kopping <dannykopping@gmail.com>
  • Loading branch information
dannykopping committed May 8, 2025
commit adf98d201e15a2e7891068f09eed1df56dd332bf
6 changes: 6 additions & 0 deletions coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -1763,6 +1763,11 @@ func (api *API) CreateInMemoryTaggedProvisionerDaemon(dialCtx context.Context, n
return nil, xerrors.Errorf("failed to create in-memory provisioner daemon: %w", err)
}

var prebuildsOrchestrator prebuilds.ReconciliationOrchestrator
if val := api.PrebuildsReconciler.Load(); val != nil {
prebuildsOrchestrator = *val
}

mux := drpcmux.New()
api.Logger.Debug(dialCtx, "starting in-memory provisioner daemon", slog.F("name", name))
logger := api.Logger.Named(fmt.Sprintf("inmem-provisionerd-%s", name))
Expand Down Expand Up @@ -1790,6 +1795,7 @@ func (api *API) CreateInMemoryTaggedProvisionerDaemon(dialCtx context.Context, n
Clock: api.Clock,
},
api.NotificationsEnqueuer,
prebuildsOrchestrator,
)
if err != nil {
return nil, err
Expand Down
7 changes: 7 additions & 0 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -2524,6 +2524,13 @@ func (q *querier) GetTemplateParameterInsights(ctx context.Context, arg database
return q.db.GetTemplateParameterInsights(ctx, arg)
}

func (q *querier) GetTemplatePresetsByID(ctx context.Context, id uuid.UUID) (database.TemplateVersionPreset, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceTemplate.All()); err != nil {
return database.TemplateVersionPreset{}, err
}
return q.db.GetTemplatePresetsByID(ctx, id)
}

func (q *querier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templateID uuid.NullUUID) ([]database.GetTemplatePresetsWithPrebuildsRow, error) {
// GetTemplatePresetsWithPrebuilds retrieves template versions with configured presets and prebuilds.
// Presets and prebuilds are part of the template, so if you can access templates - you can access them as well.
Expand Down
21 changes: 21 additions & 0 deletions coderd/database/dbauthz/dbauthz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4873,6 +4873,27 @@ func (s *MethodTestSuite) TestPrebuilds() {
Asserts(rbac.ResourceTemplate.All(), policy.ActionRead).
ErrorsWithInMemDB(dbmem.ErrUnimplemented)
}))
s.Run("GetTemplatePresetsByID", s.Subtest(func(db database.Store, check *expects) {
org := dbgen.Organization(s.T(), db, database.Organization{})
user := dbgen.User(s.T(), db, database.User{})
t := dbgen.Template(s.T(), db, database.Template{
OrganizationID: org.ID,
CreatedBy: user.ID,
})
tv := dbgen.TemplateVersion(s.T(), db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: t.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
})
preset := dbgen.Preset(s.T(), db, database.InsertPresetParams{
TemplateVersionID: tv.ID,
Name: "my-preset",
DesiredInstances: sql.NullInt32{Int32: 1, Valid: true},
})
check.Args(preset.ID).
Asserts(rbac.ResourceTemplate.All(), policy.ActionRead).
ErrorsWithInMemDB(dbmem.ErrUnimplemented)
}))
s.Run("GetPresetByID", s.Subtest(func(db database.Store, check *expects) {
org := dbgen.Organization(s.T(), db, database.Organization{})
user := dbgen.User(s.T(), db, database.User{})
Expand Down
4 changes: 4 additions & 0 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -6072,6 +6072,10 @@ func (q *FakeQuerier) GetTemplateParameterInsights(ctx context.Context, arg data
return rows, nil
}

func (q *FakeQuerier) GetTemplatePresetsByID(ctx context.Context, id uuid.UUID) (database.TemplateVersionPreset, error) {
return database.TemplateVersionPreset{}, ErrUnimplemented
}

func (*FakeQuerier) GetTemplatePresetsWithPrebuilds(_ context.Context, _ uuid.NullUUID) ([]database.GetTemplatePresetsWithPrebuildsRow, error) {
return nil, ErrUnimplemented
}
Expand Down
7 changes: 7 additions & 0 deletions coderd/database/dbmetrics/querymetrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions coderd/database/dbmock/dbmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ VALUES ('89d9745a-816e-4695-a17f-3d0a229e2b8d',
E'There might be a problem with a recently claimed prebuilt workspace',
$$
Workspace **{{.Labels.workspace}}** was claimed from a prebuilt workspace by **{{.Labels.claimant}}**.

During the claim, Terraform destroyed and recreated the following resources
because one or more immutable attributes changed:

Expand All @@ -16,12 +17,18 @@ When Terraform must change an immutable attribute, it replaces the entire resour
If you’re using prebuilds to speed up provisioning, unexpected replacements will slow down
workspace startup—even when claiming a prebuilt environment.

For tips on preventing replacements and improving claim performance, see [this guide](https://coder.com/docs/TODO).
For tips on preventing replacements and improving claim performance, see [this guide](https://coder.com/docs/admin/templates/extending-templates/prebuilt-workspaces.md#preventing-resource-replacement).

NOTE: this prebuilt workspace used the **{{.Labels.preset}}** preset.
$$,
'Workspace Events',
'Template Events',
'[
{
"label": "View workspace build",
"url": "{{base_url}}/@{{.Labels.claimant}}/{{.Labels.workspace}}/builds/{{.Labels.workspace_build_num}}"
},
{
"label": "View template version",
"url": "{{base_url}}/templates/{{.Labels.org}}/{{.Labels.template}}/versions/{{.Labels.template_version}}"
}
]'::jsonb);
1 change: 1 addition & 0 deletions coderd/database/querier.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions coderd/database/queries/prebuilds.sql
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ FROM templates t
WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration.
AND (t.id = sqlc.narg('template_id')::uuid OR sqlc.narg('template_id') IS NULL);

-- name: GetTemplatePresetsByID :one
SELECT *
FROM template_version_presets
WHERE id = $1;

-- name: GetRunningPrebuiltWorkspaces :many
SELECT
p.id,
Expand Down
4 changes: 2 additions & 2 deletions coderd/notifications/enqueuer.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ type StoreEnqueuer struct {
}

// NewStoreEnqueuer creates an Enqueuer implementation which can persist notification messages in the store.
func NewStoreEnqueuer(cfg codersdk.NotificationsConfig, store Store, helpers template.FuncMap, log slog.Logger, clock quartz.Clock) (*StoreEnqueuer, error) {
func NewStoreEnqueuer(cfg codersdk.NotificationsConfig, store Store, helpers template.FuncMap, log slog.Logger, clock quartz.Clock) (Enqueuer, error) {
var method database.NotificationMethod
// TODO(DanielleMaywood):
// Currently we do not want to allow setting `inbox` as the default notification method.
Expand Down Expand Up @@ -203,7 +203,7 @@ func (s *StoreEnqueuer) buildPayload(metadata database.FetchNewMessageMetadataRo
type NoopEnqueuer struct{}

// NewNoopEnqueuer builds a NoopEnqueuer which is used to fulfill the contract for enqueuing notifications, if ExperimentNotifications is not set.
func NewNoopEnqueuer() *NoopEnqueuer {
func NewNoopEnqueuer() Enqueuer {
return &NoopEnqueuer{}
}

Expand Down
2 changes: 1 addition & 1 deletion coderd/notifications/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ var (
TemplateWorkspaceManualBuildFailed = uuid.MustParse("2faeee0f-26cb-4e96-821c-85ccb9f71513")
TemplateWorkspaceOutOfMemory = uuid.MustParse("a9d027b4-ac49-4fb1-9f6d-45af15f64e7a")
TemplateWorkspaceOutOfDisk = uuid.MustParse("f047f6a3-5713-40f7-85aa-0394cce9fa3a")
TemplateWorkspaceResourceReplaced = uuid.MustParse("89d9745a-816e-4695-a17f-3d0a229e2b8d")
)

// Account-related events.
Expand All @@ -40,6 +39,7 @@ var (
TemplateTemplateDeprecated = uuid.MustParse("f40fae84-55a2-42cd-99fa-b41c1ca64894")

TemplateWorkspaceBuildsFailedReport = uuid.MustParse("34a20db2-e9cc-4a93-b0e4-8569699d7a00")
TemplateWorkspaceResourceReplaced = uuid.MustParse("89d9745a-816e-4695-a17f-3d0a229e2b8d")
)

// Notification-related events.
Expand Down
4 changes: 4 additions & 0 deletions coderd/notifications/notifications_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1235,8 +1235,12 @@ func TestNotificationTemplates_Golden(t *testing.T) {
UserEmail: "bobby@coder.com",
UserUsername: "bobby",
Labels: map[string]string{
"org": "cern",
"workspace": "my-workspace",
"workspace_build_num": "2",
"template": "docker",
"template_version": "angry_torvalds",
"preset": "particle-accelerator",
"claimant": "prebuilds-claimer",
},
Data: map[string]any{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Hi Bobby,

Workspace my-workspace was claimed from a prebuilt workspace by prebuilds-c=
laimer.

During the claim, Terraform destroyed and recreated the following resources
because one or more immutable attributes changed:

Expand All @@ -26,12 +27,18 @@ acements will slow down
workspace startup=E2=80=94even when claiming a prebuilt environment.

For tips on preventing replacements and improving claim performance, see th=
is guide (https://coder.com/docs/TODO).
is guide (https://coder.com/docs/admin/templates/extending-templates/prebui=
lt-workspaces.md#preventing-resource-replacement).

NOTE: this prebuilt workspace used the particle-accelerator preset.


View workspace build: http://test.com/@prebuilds-claimer/my-workspace/build=
s/2

View template version: http://test.com/templates/cern/docker/versions/angry=
_torvalds

--bbe61b741255b6098bb6b3c1f41b885773df633cb18d2a3002b68e4bc9c4
Content-Transfer-Encoding: quoted-printable
Content-Type: text/html; charset=UTF-8
Expand Down Expand Up @@ -63,9 +70,10 @@ argin: 8px 0 32px; line-height: 1.5;">
<div style=3D"line-height: 1.5;">
<p>Hi Bobby,</p>
<p>Workspace <strong>my-workspace</strong> was claimed from a prebu=
ilt workspace by <strong>prebuilds-claimer</strong>.<br>
During the claim, Terraform destroyed and recreated the following resources=
<br>
ilt workspace by <strong>prebuilds-claimer</strong>.</p>

<p>During the claim, Terraform destroyed and recreated the following resour=
ces<br>
because one or more immutable attributes changed:</p>

<ul>
Expand All @@ -81,7 +89,11 @@ acements will slow down<br>
workspace startup=E2=80=94even when claiming a prebuilt environment.</p>

<p>For tips on preventing replacements and improving claim performance, see=
<a href=3D"https://coder.com/docs/TODO">this guide</a>.</p>
<a href=3D"https://coder.com/docs/admin/templates/extending-templates/preb=
uilt-workspaces.md#preventing-resource-replacement">this guide</a>.</p>

<p>NOTE: this prebuilt workspace used the <strong>particle-accelerator</str=
ong> preset.</p>
</div>
<div style=3D"text-align: center; margin-top: 32px;">
=20
Expand All @@ -92,6 +104,13 @@ workspace startup=E2=80=94even when claiming a prebuilt environment.</p>
View workspace build
</a>
=20
<a href=3D"http://test.com/templates/cern/docker/versions/angry_tor=
valds" style=3D"display: inline-block; padding: 13px 24px; background-color=
: #020617; color: #f8fafc; text-decoration: none; border-radius: 8px; margi=
n: 0 4px;">
View template version
</a>
=20
</div>
<div style=3D"border-top: 1px solid #e2e8f0; color: #475569; font-siz=
e: 12px; margin-top: 64px; padding-top: 24px; line-height: 1.6;">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,18 @@
{
"label": "View workspace build",
"url": "http://test.com/@prebuilds-claimer/my-workspace/builds/2"
},
{
"label": "View template version",
"url": "http://test.com/templates/cern/docker/versions/angry_torvalds"
}
],
"labels": {
"claimant": "prebuilds-claimer",
"org": "cern",
"preset": "particle-accelerator",
"template": "docker",
"template_version": "angry_torvalds",
"workspace": "my-workspace",
"workspace_build_num": "2"
},
Expand All @@ -29,6 +37,6 @@
},
"title": "There might be a problem with a recently claimed prebuilt workspace",
"title_markdown": "There might be a problem with a recently claimed prebuilt workspace",
"body": "Workspace my-workspace was claimed from a prebuilt workspace by prebuilds-claimer.\nDuring the claim, Terraform destroyed and recreated the following resources\nbecause one or more immutable attributes changed:\n\ndocker_container[0] was replaced due to changes to env, hostname\n\nWhen Terraform must change an immutable attribute, it replaces the entire resource.\nIf you’re using prebuilds to speed up provisioning, unexpected replacements will slow down\nworkspace startup—even when claiming a prebuilt environment.\n\nFor tips on preventing replacements and improving claim performance, see this guide (https://coder.com/docs/TODO).",
"body_markdown": "\nWorkspace **my-workspace** was claimed from a prebuilt workspace by **prebuilds-claimer**.\nDuring the claim, Terraform destroyed and recreated the following resources\nbecause one or more immutable attributes changed:\n\n- _docker_container[0]_ was replaced due to changes to _env, hostname_\n\n\nWhen Terraform must change an immutable attribute, it replaces the entire resource.\nIf you’re using prebuilds to speed up provisioning, unexpected replacements will slow down\nworkspace startup—even when claiming a prebuilt environment.\n\nFor tips on preventing replacements and improving claim performance, see [this guide](https://coder.com/docs/TODO).\n"
"body": "Workspace my-workspace was claimed from a prebuilt workspace by prebuilds-claimer.\n\nDuring the claim, Terraform destroyed and recreated the following resources\nbecause one or more immutable attributes changed:\n\ndocker_container[0] was replaced due to changes to env, hostname\n\nWhen Terraform must change an immutable attribute, it replaces the entire resource.\nIf you’re using prebuilds to speed up provisioning, unexpected replacements will slow down\nworkspace startup—even when claiming a prebuilt environment.\n\nFor tips on preventing replacements and improving claim performance, see this guide (https://coder.com/docs/admin/templates/extending-templates/prebuilt-workspaces.md#preventing-resource-replacement).\n\nNOTE: this prebuilt workspace used the particle-accelerator preset.",
"body_markdown": "\nWorkspace **my-workspace** was claimed from a prebuilt workspace by **prebuilds-claimer**.\n\nDuring the claim, Terraform destroyed and recreated the following resources\nbecause one or more immutable attributes changed:\n\n- _docker_container[0]_ was replaced due to changes to _env, hostname_\n\n\nWhen Terraform must change an immutable attribute, it replaces the entire resource.\nIf you’re using prebuilds to speed up provisioning, unexpected replacements will slow down\nworkspace startup—even when claiming a prebuilt environment.\n\nFor tips on preventing replacements and improving claim performance, see [this guide](https://coder.com/docs/admin/templates/extending-templates/prebuilt-workspaces.md#preventing-resource-replacement).\n\nNOTE: this prebuilt workspace used the **particle-accelerator** preset.\n"
}
5 changes: 5 additions & 0 deletions coderd/prebuilds/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"golang.org/x/xerrors"

"github.com/coder/coder/v2/coderd/database"
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
)

var (
Expand All @@ -27,6 +28,10 @@ type ReconciliationOrchestrator interface {
// Stop gracefully shuts down the orchestrator with the given cause.
// The cause is used for logging and error reporting.
Stop(ctx context.Context, cause error)

// TrackResourceReplacement handles a pathological situation whereby a terraform resource is replaced due to drift,
// which can obviate the whole point of pre-provisioning a prebuilt workspace.
TrackResourceReplacement(ctx context.Context, workspaceID, buildID, claimantID uuid.UUID, replacements []*sdkproto.ResourceReplacement)
}

type Reconciler interface {
Expand Down
7 changes: 5 additions & 2 deletions coderd/prebuilds/noop.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ import (
"github.com/google/uuid"

"github.com/coder/coder/v2/coderd/database"
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
)

type NoopReconciler struct{}

func (NoopReconciler) Run(context.Context) {}
func (NoopReconciler) Stop(context.Context, error) {}
func (NoopReconciler) Run(context.Context) {}
func (NoopReconciler) Stop(context.Context, error) {}
func (NoopReconciler) TrackResourceReplacement(ctx context.Context, workspaceID, buildID, claimantID uuid.UUID, replacements []*sdkproto.ResourceReplacement) {
}
func (NoopReconciler) ReconcileAll(context.Context) error { return nil }
func (NoopReconciler) SnapshotState(context.Context, database.Store) (*GlobalSnapshot, error) {
return &GlobalSnapshot{}, nil
Expand Down
Loading
0