8000 feat: track resource replacements when claiming a prebuilt workspace by dannykopping · Pull Request #17571 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

feat: track resource replacements when claiming a prebuilt workspace #17571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 38 commits into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
b32923a
feat: log resource replacements
dannykopping Apr 25, 2025
0b0830f
feat: show terraform state drift diff in build logs
dannykopping Apr 25, 2025
256395a
feat: only highlight lines which mention replacement
dannykopping Apr 25, 2025
61ef61a
feat: notify template admins when prebuild claim results in resource …
dannykopping Apr 25, 2025
a66559f
chore: appease linter
dannykopping Apr 25, 2025
222892b
chore: fix notifications test
dannykopping Apr 25, 2025
f34e011
fix: don't panic
dannykopping Apr 28, 2025
5168c01
fix: renaming type
dannykopping Apr 28, 2025
41e5e0c
chore: updating migration numbers
dannykopping May 6, 2025
b29e8fa
chore: minor touch-ups
dannykopping May 6, 2025
b31ed5e
feat: add resource replacements metric
dannykopping May 7, 2025
adf98d2
8000 feat: add resource replacement notification
dannykopping May 7, 2025
f24aef0
make lint; make fmt
dannykopping May 7, 2025
70f9a53
chore: adding tests
dannykopping May 8, 2025
1e8385d
feat: pass flag to terraform provider when prebuilt workspace claimed
dannykopping May 9, 2025
d0f00ce
chore: update provider, add test for is_prebuild_claim
dannykopping May 12, 2025
11a2c5a
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 12, 2025
ce63b24
Merge branch 'dk/is-prebuild-claim' of github.com:/coder/coder into d…
dannykopping May 12, 2025
d2c5d43
chore: replace GetTemplatePresetsByID with GetPresetByID
dannykopping May 12, 2025
22d82a4
chore: correcting docs link
dannykopping May 12, 2025
5209aae
Merge branch 'main' of github.com:/coder/coder into dk/logreplacement
dannykopping May 12, 2025
39ce658
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 12, 2025
ac5655f
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 12, 2025
82c3f58
chore: note provisioner API change
dannykopping May 12, 2025
7577a90
chore: fixups
dannykopping May 13, 2025
a893b79
chore: adding note about immutable resources
dannykopping May 13, 2025
d9c906a
chore: review feedback
dannykopping May 13, 2025
471198a
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 13, 2025
7d694e6
chore: merge conflicts
dannykopping May 13, 2025
6b7a8b7
chore: fix 'is not iterable' bullshit
dannykopping May 13, 2025
5df2cb3
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 14, 2025
6d1c3ea
chore: rename migrations
dannykopping May 14, 2025
5f62702
chore: set notifications manager before enterprise server initializes…
dannykopping May 14, 2025
f74d799
chore: completing refactor since https://github.com/coder/coder/pull/…
dannykopping May 14, 2025
971f65c
chore: remove unnecessary atomicity since map is protected by mutex a…
dannykopping May 14, 2025
bc362b0
chore: appeasing linter's Very Important Suggestion
dannykopping May 14, 2025
4fbd356
Merge branch 'main' of github.com:/coder/coder into dk/logreplacements
dannykopping May 14, 2025
b9eb8be
chore: remove old replacement logging
dannykopping May 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/testdata/coder_provisioner_list_--output_json.golden
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"last_seen_at": "====[timestamp]=====",
"name": "test",
"version": "v0.0.0-devel",
"api_version": "1.4",
"api_version": "1.5",
"provisioners": [
"echo"
],
Expand Down
2 changes: 1 addition & 1 deletion coderd/prebuilds/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type ReconciliationOrchestrator interface {
// TrackResourceReplacement handles a pathological situation whereby a terraform resource is replaced due to drift,
// which can obviate the whole point of pre-provisioning a prebuilt workspace.
// See more detail at https://coder.com/docs/admin/templates/extending-templates/prebuilt-workspaces.md#preventing-resource-replacement.
TrackResourceReplacement(ctx context.Context, workspaceID, buildID, claimantID uuid.UUID, replacements []*sdkproto.ResourceReplacement)
TrackResourceReplacement(ctx context.Context, workspaceID, buildID uuid.UUID, replacements []*sdkproto.ResourceReplacement)
}

type Reconciler interface {
Expand Down
2 changes: 1 addition & 1 deletion coderd/prebuilds/noop.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type NoopReconciler struct{}

func (NoopReconciler) Run(context.Context) {}
func (NoopReconciler) Stop(context.Context, error) {}
func (NoopReconciler) TrackResourceReplacement(context.Context, uuid.UUID, uuid.UUID, uuid.UUID, []*sdkproto.ResourceReplacement) {
func (NoopReconciler) TrackResourceReplacement(context.Context, uuid.UUID, uuid.UUID, []*sdkproto.ResourceReplacement) {
}
func (NoopReconciler) ReconcileAll(context.Context) error { return nil }
func (NoopReconciler) SnapshotState(context.Context, database.Store) (*GlobalSnapshot, error) {
Expand Down
8000 19 changes: 7 additions & 12 deletions coderd/provisionerdserver/provisionerdserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -620,11 +620,6 @@ func (s *server) acquireProtoJob(ctx context.Context, job database.ProvisionerJo
}
}

var prebuildClaimedForUserID string
if input.PrebuildClaimedByUser != uuid.Nil {
prebuildClaimedForUserID = input.PrebuildClaimedByUser.String()
}

protoJob.Type = &proto.AcquiredJob_WorkspaceBuild_{
WorkspaceBuild: &proto.AcquiredJob_WorkspaceBuild{
WorkspaceBuildId: workspaceBuild.ID.String(),
Expand Down Expand Up @@ -654,7 +649,7 @@ func (s *server) acquireProtoJob(ctx context.Context, job database.ProvisionerJo
WorkspaceOwnerLoginType: string(owner.LoginType),
WorkspaceOwnerRbacRoles: ownerRbacRoles,
IsPrebuild: input.IsPrebuild,
PrebuildClaimForUserId: prebuildClaimedForUserID,
IsPrebuildClaim: input.IsPrebuildClaim,
},
LogLevel: input.LogLevel,
},
Expand Down Expand Up @@ -1736,7 +1731,7 @@ func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob)
orchestrator := s.PrebuildsOrchestrator.Load()
if resourceReplacements := completed.GetWorkspaceBuild().GetResourceReplacements(); orchestrator != nil && len(resourceReplacements) > 0 {
// Fire and forget.
go (*orchestrator).TrackResourceReplacement(context.Background(), workspace.ID, workspaceBuild.ID, input.PrebuildClaimedByUser, resourceReplacements)
go (*orchestrator).TrackResourceReplacement(context.Background(), workspace.ID, workspaceBuild.ID, resourceReplacements)
}
}

Expand Down Expand Up @@ -2489,11 +2484,11 @@ type TemplateVersionImportJob struct {

// WorkspaceProvisionJob is the payload for the "workspace_provision" job type.
type WorkspaceProvisionJob struct {
WorkspaceBuildID uuid.UUID `json:"workspace_build_id"`
DryRun bool `json:"dry_run"`
IsPrebuild bool `json:"is_prebuild,omitempty"`
PrebuildClaimedByUser uuid.UUID `json:"prebuild_claimed_by,omitempty"`
LogLevel string `json:"log_level,omitempty"`
WorkspaceBuildID uuid.UUID `json:"workspace_build_id"`
DryRun bool `json:"dry_run"`
IsPrebuild bool `json:"is_prebuild,omitempty"`
IsPrebuildClaim bool `json:"is_prebuild_claim,omitempty"`
LogLevel string `json:"log_level,omitempty"`
}

// TemplateVersionDryRunJob is the payload for the "template_version_dry_run" job type.
Expand Down
6 changes: 3 additions & 3 deletions coderd/provisionerdserver/provisionerdserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1798,9 +1798,9 @@ func TestCompleteJob(t *testing.T) {
Input: must(json.Marshal(provisionerdserver.WorkspaceProvisionJob{
WorkspaceBuildID: build.ID,

IsPrebuild: false,
// Mark the job as a prebuilt workspace claim.
PrebuildClaimedByUser: uuid.New(),
IsPrebuild: false,
IsPrebuildClaim: true,
})),
OrganizationID: pd.OrganizationID,
})
Expand Down Expand Up @@ -1847,7 +1847,7 @@ type mockPrebuildsOrchestrator struct {
done chan struct{}
}

func (m *mockPrebuildsOrchestrator) TrackResourceReplacement(_ context.Context, _, _, _ uuid.UUID, replacements []*sdkproto.ResourceReplacement) {
func (m *mockPrebuildsOrchestrator) TrackResourceReplacement(_ context.Context, _, _ uuid.UUID, replacements []*sdkproto.ResourceReplacement) {
m.replacements = replacements
m.done <- struct{}{}
}
Expand Down
2 changes: 1 addition & 1 deletion coderd/workspaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ func createWorkspace(
builder = builder.TemplateVersionPresetID(req.TemplateVersionPresetID)
}
if claimedWorkspace != nil {
builder = builder.MarkPrebuildClaimedBy(owner.ID)
builder = builder.MarkPrebuildClaim()
}

if req.EnableDynamicParameters && api.Experiments.Enabled(codersdk.ExperimentDynamicParameters) {
Expand Down
17 changes: 9 additions & 8 deletions coderd/wsbuilder/wsbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ type Builder struct {
parameterValues *[]string
templateVersionPresetParameterValues []database.TemplateVersionPresetParameter

prebuild bool
prebuildClaimedBy uuid.UUID
prebuild, prebuildClaim bool

verifyNoLegacyParametersOnce bool
}
Expand Down Expand Up @@ -174,15 +173,17 @@ func (b Builder) RichParameterValues(p []codersdk.WorkspaceBuildParameter) Build
return b
}

// MarkPrebuild indicates that a prebuilt workspace is being built.
func (b Builder) MarkPrebuild() Builder {
// nolint: revive
b.prebuild = true
return b
}

func (b Builder) MarkPrebuildClaimedBy(userID uuid.UUID) Builder {
// MarkPrebuildClaim indicates that a prebuilt workspace is being claimed.
func (b Builder) MarkPrebuildClaim() Builder {
// nolint: revive
b.prebuildClaimedBy = userID
b.prebuildClaim = true
return b
}

Expand Down Expand Up @@ -322,10 +323,10 @@ func (b *Builder) buildTx(authFunc func(action policy.Action, object rbac.Object

workspaceBuildID := uuid.New()
input, err := json.Marshal(provisionerdserver.WorkspaceProvisionJob{
WorkspaceBuildID: workspaceBuildID,
LogLevel: b.logLevel,
IsPrebuild: b.prebuild,
PrebuildClaimedByUser: b.prebuildClaimedBy,
WorkspaceBuildID: workspaceBuildID,
LogLevel: b.logLevel,
IsPrebuild: b.prebuild,
IsPrebuildClaim: b.prebuildClaim,
})
if err != nil {
return nil, nil, nil, BuildError{
Expand Down
10 changes: 5 additions & 5 deletions enterprise/coderd/prebuilds/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -630,21 +630,21 @@ func (c *StoreReconciler) provision(
return nil
}

func (c *StoreReconciler) TrackResourceReplacement(ctx context.Context, workspaceID, buildID, claimantID uuid.UUID, replacements []*sdkproto.ResourceReplacement) {
func (c *StoreReconciler) TrackResourceReplacement(ctx context.Context, workspaceID, buildID uuid.UUID, replacements []*sdkproto.ResourceReplacement) {
// Set authorization context since this may be called in the background (i.e. with a bare context).
// nolint:gocritic // Necessary to query all the required data.
ctx = dbauthz.AsSystemRestricted(ctx)
// Since this may be called in a fire-and-forget fashion, we need to give up at some point.
trackCtx, trackCancel := context.WithTimeout(ctx, time.Minute)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a best-effort attempt to warn operators of this situation; it's ok if it times out, we'll get a log to trace this with.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see this partially covers my earlier comment. I still think it'd be good to take shutdowns into consideration for these and define what that behavior should be (rather than undefined). Right now these routines will be rudely interrupted during shutdown rather than exiting cleanly. Likewise these can be left running even if a CompleteJob is interrupted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now these routines will be rudely interrupted during shutdown rather than exiting cleanly.

Good call! I've now passed in the provisionerdserver's lifecycle context.

Likewise these can be left running even if a CompleteJob is interrupted.

I believe that's already the case?

defer trackCancel()

if err := c.trackResourceReplacement(trackCtx, workspaceID, buildID, claimantID, replacements); err != nil {
if err := c.trackResourceReplacement(trackCtx, workspaceID, buildID, replacements); err != nil {
c.logger.Error(ctx, "failed to track resource replacement", slog.Error(err))
}
}

// nolint:revive // Shut up it's fine.
func (c *StoreReconciler) trackResourceReplacement(ctx context.Context, workspaceID, buildID, claimantID uuid.UUID, replacements []*sdkproto.ResourceReplacement) error {
func (c *StoreReconciler) trackResourceReplacement(ctx context.Context, workspaceID, buildID uuid.UUID, replacements []*sdkproto.ResourceReplacement) error {
if err := ctx.Err(); err != nil {
return err
}
Expand Down Expand Up @@ -677,9 +677,9 @@ func (c *StoreReconciler) trackResourceReplacement(ctx context.Context, workspac
return xerrors.Errorf("fetch template preset for template version ID %q: %w", prebuild.TemplateVersionID.String(), err)
}

claimant, err := c.store.GetUserByID(ctx, claimantID)
claimant, err := c.store.GetUserByID(ctx, workspace.OwnerID) // At this point, the workspace is owned by the new owner.
if err != nil {
return xerrors.Errorf("fetch claimant %q: %w", claimantID.String(), err)
return xerrors.Errorf("fetch claimant %q: %w", workspace.OwnerID.String(), err)
}

// Use the claiming build here (not prebuild) because both should be equivalent, and we might as well spot inconsistencies now.
Expand Down
2 changes: 1 addition & 1 deletion enterprise/coderd/prebuilds/reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ func TestTrackResourceReplacement(t *testing.T) {
}))

// When: a claim occurred and resource replacements are detected (_how_ is out of scope of this test).
reconciler.TrackResourceReplacement(ctx, prebuiltWorkspace.ID, prebuild.ID, userID, []*sdkproto.ResourceReplacement{
reconciler.TrackResourceReplacement(ctx, prebuiltWorkspace.ID, prebuild.ID, []*sdkproto.ResourceReplacement{
{
Resource: "docker_container[0]",
Paths: []string{"env", "image"},
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ require (
github.com/coder/quartz v0.1.3
github.com/coder/retry v1.5.1
github.com/coder/serpent v0.10.0
github.com/coder/terraform-provider-coder/v2 v2.4.0
github.com/coder/terraform-provider-coder/v2 v2.4.1
github.com/coder/websocket v1.8.13
github.com/coder/wgtunnel v0.1.13-0.20240522110300-ade90dfb2da0
github.com/coreos/go-oidc/v3 v3.14.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
F438
Original file line number Diff line number Diff line change
Expand Up @@ -921,8 +921,8 @@ github.com/coder/tailscale v1.1.1-0.20250422090654-5090e715905e h1:nope/SZfoLB9M
github.com/coder/tailscale v1.1.1-0.20250422090654-5090e715905e/go.mod h1:1ggFFdHTRjPRu9Yc1yA7nVHBYB50w9Ce7VIXNqcW6Ko=
github.com/coder/terraform-config-inspect v0.0.0-20250107175719-6d06d90c630e h1:JNLPDi2P73laR1oAclY6jWzAbucf70ASAvf5mh2cME0=
github.com/coder/terraform-config-inspect v0.0.0-20250107175719-6d06d90c630e/go.mod h1:Gz/z9Hbn+4KSp8A2FBtNszfLSdT2Tn/uAKGuVqqWmDI=
github.com/coder/terraform-provider-coder/v2 v2.4.0 h1:uuFmF03IyahAZLXEukOdmvV9hGfUMJSESD8+G5wkTcM=
github.com/coder/terraform-provider-coder/v2 v2.4.0/go.mod h1:2kaBpn5k9ZWtgKq5k4JbkVZG9DzEqR4mJSmpdshcO+s=
github.com/coder/terraform-provider-coder/v2 v2.4.1 h1:+HxLJVENJ+kvGhibQ0jbr8Evi6M857d9691ytxNbv90=
github.com/coder/terraform-provider-coder/v2 v2.4.1/go.mod h1:2kaBpn5k9ZWtgKq5k4JbkVZG9DzEqR4mJSmpdshcO+s=
github.com/coder/trivy v0.0.0-20250409153844-e6b004bc465a h1:yryP7e+IQUAArlycH4hQrjXQ64eRNbxsV5/wuVXHgME=
github.com/coder/trivy v0.0.0-20250409153844-e6b004bc465a/go.mod h1:dDvq9axp3kZsT63gY2Znd1iwzfqDq3kXbQnccIrjRYY=
github.com/coder/websocket v1.8.13 h1:f3QZdXy7uGVz+4uCJy2nTZyM0yTBj8yANEHhqlXZ9FE=
Expand Down
2 changes: 1 addition & 1 deletion provisioner/terraform/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ func (e *executor) plan(ctx, killCtx context.Context, env, vars []string, logr l
// When a prebuild claim attempt is made, log a warning if a resource is due to be replaced, since this will obviate
// the point of prebuilding if the expensive resource is replaced once claimed!
var (
isPrebuildClaimAttempt = !destroy && metadata.GetPrebuildClaimForUserId() != ""
isPrebuildClaimAttempt = !destroy && metadata.GetIsPrebuildClaim()
resReps []*proto.ResourceReplacement
)
if repsFromPlan := findResourceReplacements(plan); len(repsFromPlan) > 0 {
Expand Down
3 changes: 3 additions & 0 deletions provisioner/terraform/provision.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ func provisionEnv(
if metadata.GetIsPrebuild() {
env = append(env, provider.IsPrebuildEnvironmentVariable()+"=true")
}
if metadata.GetIsPrebuildClaim() {
env = append(env, provider.IsPrebuildClaimEnvironmentVariable()+"=true")
}

for key, value := range provisionersdk.AgentScriptEnv() {
env = append(env, key+"="+value)
Expand Down
45 changes: 43 additions & 2 deletions provisioner/terraform/provision_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"cdr.dev/slog"
"cdr.dev/slog/sloggers/slogtest"

"github.com/coder/coder/v2/codersdk/drpc"
"github.com/coder/coder/v2/provisioner/terraform"
"github.com/coder/coder/v2/provisionersdk"
Expand Down Expand Up @@ -977,7 +978,7 @@ func TestProvision(t *testing.T) {
required_providers {
coder = {
source = "coder/coder"
version = "2.3.0-pre2"
version = ">= 2.4.1"
}
}
}
Expand All @@ -994,7 +995,8 @@ func TestProvision(t *testing.T) {
},
Request: &proto.PlanRequest{
Metadata: &proto.Metadata{
IsPrebuild: true,
IsPrebuild: true,
IsPrebuildClaim: false,
},
},
Response: &proto.PlanComplete{
Expand All @@ -1008,6 +1010,45 @@ func TestProvision(t *testing.T) {
}},
},
},
{
Name: "is-prebuild-claim",
Files: map[string]string{
"main.tf": `terraform {
required_providers {
coder = {
source = "coder/coder"
version = ">= 2.4.1"
}
}
}
data "coder_workspace" "me" {}
resource "null_resource" "example" {}
resource "coder_metadata" "example" {
resource_id = null_resource.example.id
item {
key = "is_prebuild_claim"
value = data.coder_workspace.me.is_prebuild_claim
}
}
`,
},
Request: &proto.PlanRequest{
Metadata: &proto.Metadata{
IsPrebuild: false,
IsPrebuildClaim: true,
},
},
Response: &proto.PlanComplete{
Resources: []*proto.Resource{{
Name: "example",
Type: "null_resource",
Metadata: []*proto.Resource_Metadata{{
Key: "is_prebuild_claim",
Value: "true",
}},
}},
},
},
}

// Remove unused cache dirs before running tests.
Expand Down
7 changes: 6 additions & 1 deletion provisionerd/proto/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,17 @@ import "github.com/coder/coder/v2/apiversion"
//
// API v1.4:
// - Add new field named `devcontainers` in the Agent.
//
// API v1.5:
// - Add new field named `is_prebuild_claim` in the Metadata message.
const (
CurrentMajor = 1
CurrentMinor = 4
CurrentMinor = 5
)

// CurrentVersion is the current provisionerd API version.
// Breaking changes to the provisionerd API **MUST** increment
// CurrentMajor above.
// Non-breaking changes to the provisionerd API **MUST** increment
// CurrentMinor above.
var CurrentVersion = apiversion.New(CurrentMajor, CurrentMinor)
21 changes: 10 additions & 11 deletions provisionersdk/proto/provisioner.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions provisionersdk/proto/provisioner.proto
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,9 @@ message Metadata {
string workspace_build_id = 17;
string workspace_owner_login_type = 18;
repeated Role workspace_owner_rbac_roles = 19;
bool is_prebuild = 20;
string running_workspace_agent_token = 21;
string prebuild_claim_for_user_id = 22;
bool is_prebuild = 20; // Indicates that a prebuilt workspace is being built.
string running_workspace_agent_token = 21; // Preserves the running agent token of a prebuilt workspace so it can reinitialize.
bool is_prebuild_claim = 22; // Indicates that a prebuilt workspace is being claimed.
}

// Config represents execution configuration shared by all subsequent requests in the Session
Expand Down
Loading
0