8000 feat: track resource replacements when claiming a prebuilt workspace by dannykopping · Pull Request #17571 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

feat: track resource replacements when claiming a prebuilt workspace #17571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 38 commits into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
b32923a
feat: log resource replacements
dannykopping Apr 25, 2025
0b0830f
feat: show terraform state drift diff in build logs
dannykopping Apr 25, 2025
256395a
feat: only highlight lines which mention replacement
dannykopping Apr 25, 2025
61ef61a
feat: notify template admins when prebuild claim results in resource …
dannykopping Apr 25, 2025
a66559f
chore: appease linter
dannykopping Apr 25, 2025
222892b
chore: fix notifications test
dannykopping Apr 25, 2025
f34e011
fix: don't panic
dannykopping Apr 28, 2025
5168c01
fix: renaming type
dannykopping Apr 28, 2025
41e5e0c
chore: updating migration numbers
dannykopping May 6, 2025
b29e8fa
chore: minor touch-ups
dannykopping May 6, 2025
b31ed5e
feat: add resource replacements metric
dannykopping May 7, 2025
8000
adf98d2
feat: add resource replacement notification
dannykopping May 7, 2025
f24aef0
make lint; make fmt
dannykopping May 7, 2025
70f9a53
chore: adding tests
dannykopping May 8, 2025
1e8385d
feat: pass flag to terraform provider when prebuilt workspace claimed
dannykopping May 9, 2025
d0f00ce
chore: update provider, add test for is_prebuild_claim
dannykopping May 12, 2025
11a2c5a
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 12, 2025
ce63b24
Merge branch 'dk/is-prebuild-claim' of github.com:/coder/coder into d…
dannykopping May 12, 2025
d2c5d43
chore: replace GetTemplatePresetsByID with GetPresetByID
dannykopping May 12, 2025
22d82a4
chore: correcting docs link
dannykopping May 12, 2025
5209aae
Merge branch 'main' of github.com:/coder/coder into dk/logreplacement
dannykopping May 12, 2025
39ce658
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 12, 2025
ac5655f
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 12, 2025
82c3f58
chore: note provisioner API change
dannykopping May 12, 2025
7577a90
chore: fixups
dannykopping May 13, 2025
a893b79
chore: adding note about immutable resources
dannykopping May 13, 2025
d9c906a
chore: review feedback
dannykopping May 13, 2025
471198a
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 13, 2025
7d694e6
chore: merge conflicts
dannykopping May 13, 2025
6b7a8b7
chore: fix 'is not iterable' bullshit
dannykopping May 13, 2025
5df2cb3
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 14, 2025
6d1c3ea
chore: rename migrations
dannykopping May 14, 2025
5f62702
chore: set notifications manager before enterprise server initializes…
dannykopping May 14, 2025
f74d799
chore: completing refactor since https://github.com/coder/coder/pull/…
dannykopping May 14, 2025
971f65c
chore: remove unnecessary atomicity since map is protected by mutex a…
dannykopping May 14, 2025
bc362b0
chore: appeasing linter's Very Important Suggestion
dannykopping May 14, 2025
4fbd356
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 14, 2025
b9eb8be
chore: remove old replacement logging
dannykopping May 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
make lint; make fmt
Signed-off-by: Danny Kopping <dannykopping@gmail.com>
  • Loading branch information
dannykopping committed May 8, 2025
commit f24aef0af834f22b7e2e295f2830afaedeb25bcb
2 changes: 1 addition & 1 deletion coderd/notifications/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ var (
TemplateTemplateDeprecated = uuid.MustParse("f40fae84-55a2-42cd-99fa-b41c1ca64894")

TemplateWorkspaceBuildsFailedReport = uuid.MustParse("34a20db2-e9cc-4a93-b0e4-8569699d7a00")
TemplateWorkspaceResourceReplaced = uuid.MustParse("89d9745a-816e-4695-a17f-3d0a229e2b8d")
TemplateWorkspaceResourceReplaced = uuid.MustParse("89d9745a-816e-4695-a17f-3d0a229e2b8d")
)

// Notification-related events.
Expand Down
2 changes: 1 addition & 1 deletion coderd/prebuilds/noop.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type NoopReconciler struct{}

func (NoopReconciler) Run(context.Context) {}
func (NoopReconciler) Stop(context.Context, error) {}
func (NoopReconciler) TrackResourceReplacement(ctx context.Context, workspaceID, buildID, claimantID uuid.UUID, replacements []*sdkproto.ResourceReplacement) {
func (NoopReconciler) TrackResourceReplacement(context.Context, uuid.UUID, uuid.UUID, uuid.UUID, []*sdkproto.ResourceReplacement) {
}
func (NoopReconciler) ReconcileAll(context.Context) error { return nil }
func (NoopReconciler) SnapshotState(context.Context, database.Store) (*GlobalSnapshot, error) {
Expand Down
6 changes: 4 additions & 2 deletions enterprise/coderd/prebuilds/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@ func (c *StoreReconciler) provision(

func (c *StoreReconciler) TrackResourceReplacement(ctx context.Context, workspaceID, buildID, claimantID uuid.UUID, replacements []*sdkproto.ResourceReplacement) {
// Set authorization context since this may be called in the background (i.e. with a bare context).
// nolint:gocritic // Necessary to query all the required data.
ctx = dbauthz.AsSystemRestricted(ctx)
// Since this may be called in a fire-and-forget fashion, we need to give up at some point.
trackCtx, trackCancel := context.WithTimeout(ctx, time.Minute)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a best-effort attempt to warn operators of this situation; it's ok if it times out, we'll get a log to trace this with.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see this partially covers my earlier comment. I still think it'd be good to take shutdowns into consideration for these and define what that behavior should be (rather than undefined). Right now these routines will be rudely interrupted during shutdown rather than exiting cleanly. Likewise these can be left running even if a CompleteJob is interrupted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now these routines will be rudely interrupted during shutdown rather than exiting cleanly.

Good call! I've now passed in the provisionerdserver's lifecycle context.

Likewise these can be left running even if a CompleteJob is interrupted.

I believe that's already the case?

Expand All @@ -642,6 +643,7 @@ func (c *StoreReconciler) TrackResourceReplacement(ctx context.Context, workspac
}
}

// nolint:revive // Shut up it's fine.
func (c *StoreReconciler) trackResourceReplacement(ctx context.Context, workspaceID, buildID, claimantID uuid.UUID, replacements []*sdkproto.ResourceReplacement) error {
if err := ctx.Err(); err != nil {
return err
Expand Down Expand Up @@ -683,12 +685,12 @@ func (c *StoreReconciler) trackResourceReplacement(ctx context.Context, workspac
// Use the claiming build here (not prebuild) because both should be equivalent, and we might as well spot inconsistencies now.
templateVersion, err := c.store.GetTemplateVersionByID(ctx, build.TemplateVersionID)
if err != nil {
return xerrors.Errorf("fetch template version %q: %w", build.TemplateVersionID.String())
return xerrors.Errorf("fetch template version %q: %w", build.TemplateVersionID.String(), err)
}

org, err := c.store.GetOrganizationByID(ctx, workspace.OrganizationID)
if err != nil {
return xerrors.Errorf("fetch org %q: %w", workspace.OrganizationID.String())
return xerrors.Errorf("fetch org %q: %w", workspace.OrganizationID.String(), err)
}

// Track resource replacement in Prometheus metric.
Expand Down
382D
0