8000 feat!: add ability to cancel pending workspace build by kacpersaw · Pull Request #18713 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

feat!: add ability to cancel pending workspace build #18713

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jul 8, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ba41ae8
add support for canceling workspace builds with expect_state param (l…
kacpersaw Jul 1, 2025
c4ee5b6
Use single transaction for canceling workspace build
kacpersaw Jul 2, 2025
c49c33e
Fix lint problem in ut
kacpersaw Jul 2, 2025
ba1dbf3
add cancel confirmation dialog for workspace builds and add expect_st…
kacpersaw Jul 2, 2025
acffda6
Fix lint
kacpersaw Jul 2, 2025
b672d76
Merge branch 'main' into kacpersaw/cancel-pending-provisioner-jobs
kacpersaw Jul 2, 2025
86a34df
Apply review suggestions
kacpersaw Jul 3, 2025
42170ab
Fix unit test
kacpersaw Jul 3, 2025
5db9d71
Apply review suggestions
kacpersaw Jul 3, 2025
1ede20c
Fix typo
kacpersaw Jul 3, 2025
2597615
Regenerate api types
kacpersaw Jul 3, 2025
6c2d0cf
Fix typo
kacpersaw Jul 3, 2025
c800494
Merge branch 'main' into kacpersaw/cancel-pending-provisioner-jobs
kacpersaw Jul 3, 2025
c5cb203
Apply a new authorization check for GetProvisionerJobByIDForUpdate
kacpersaw Jul 6, 2025
1de84cc
Apply a new authorization check for GetProvisionerJobByIDForUpdate
kacpersaw Jul 6, 2025
17fb6a3
Apply FE review suggestions
kacpersaw Jul 6, 2025
1b7b614
Apply review suggestions
kacpersaw Jul 7, 2025
634f556
Refactor cancelWorkspaceBuild parameter handling
kacpersaw Jul 7, 2025
4deace0
Fix lint
kacpersaw Jul 7, 2025
43430fa
Update coderd/workspacebuilds.go
kacpersaw Jul 7, 2025
6272d93
Extract cancel confirm dialog to a separate component
kacpersaw Jul 7, 2025
4d4a01d
Merge branch 'main' into kacpersaw/cancel-pending-provisioner-jobs
kacpersaw Jul 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/provisionerjobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func (r *RootCmd) provisionerJobsCancel() *serpent.Command {
err = client.CancelTemplateVersion(ctx, ptr.NilToEmpty(job.Input.TemplateVersionID))
case codersdk.ProvisionerJobTypeWorkspaceBuild:
_, _ = fmt.Fprintf(inv.Stdout, "Canceling workspace build job %s...\n", job.ID)
err = client.CancelWorkspaceBuild(ctx, ptr.NilToEmpty(job.Input.WorkspaceBuildID))
err = client.CancelWorkspaceBuild(ctx, ptr.NilToEmpty(job.Input.WorkspaceBuildID), codersdk.CancelWorkspaceBuildRequest{})
}
if err != nil {
return xerrors.Errorf("cancel provisioner job: %w", err)
Expand Down
10 changes: 10 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

133 changes: 81 additions & 52 deletions coderd/workspacebuilds.go
10000
Original file line number Diff line number Diff line change
Expand Up @@ -581,10 +581,12 @@ func (api *API) notifyWorkspaceUpdated(
// @Produce json
// @Tags Builds
// @Param workspacebuild path string true "Workspace build ID"
// @Param expect_status query string false "Expected status of the job" Enums(running, pending)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might pay to have a description field (idk if our API generation has something like that, you might need to check other routes) that explains in more detail what this actually does:

If expect_status is supplied, the request will be rejected with 499 Precondition Failed if the job doesn't match the state when performing the cancellation.

// @Success 200 {object} codersdk.Response
// @Router /workspacebuilds/{workspacebuild}/cancel [patch]
func (api *API) patchCancelWorkspaceBuild(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
expectStatus := r.URL.Query().Get("expect_status")
workspaceBuild := httpmw.WorkspaceBuildParam(r)
workspace, err := api.Database.GetWorkspaceByID(ctx, workspaceBuild.WorkspaceID)
if err != nil {
Expand All @@ -594,58 +596,85 @@ func (api *API) patchCancelWorkspaceBuild(rw http.ResponseWriter, r *http.Reques
return
}

valid, err := api.verifyUserCanCancelWorkspaceBuilds(ctx, httpmw.APIKey(r).UserID, workspace.TemplateID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error verifying permission to cancel workspace build.",
Detail: err.Error(),
})
return
}
if !valid {
httpapi.Write(ctx, rw, http.StatusForbidden, codersdk.Response{
Message: "User is not allowed to cancel workspace builds. Owner role is required.",
})
return
}
code := http.StatusOK
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably have a default status code of 500 and a generic error message, otherwise if the tx returns an error below (because the tx couldn't start) you'll be returning an empty 200 response.

resp := codersdk.Response{}
err = api.Database.InTx(func(store database.Store) error {
valid, err := verifyUserCanCancelWorkspaceBuilds(ctx, store, httpmw.APIKey(r).UserID, workspace.TemplateID)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We often just use the variable name db in these transaction closures

if err != nil {
code = http.StatusInternalServerError
resp.Message = "Internal error verifying permission to cancel workspace build."
resp.Detail = err.Error()

job, err := api.Database.GetProvisionerJobByID(ctx, workspaceBuild.JobID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error fetching provisioner job.",
Detail: err.Error(),
})
return
}
if job.CompletedAt.Valid {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Job has already completed!",
})
return
}
if job.CanceledAt.Valid {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Job has already been marked as canceled!",
return xerrors.Errorf("verify user can cancel workspace builds: %w", err)
}
if !valid {
code = http.StatusForbidden
resp.Message = "User is not allowed to cancel workspace builds. Owner role is required."

return xerrors.Errorf("user is not allowed to cancel workspace builds")
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should still allow for cancellations even if the template forbids them as long as the user specified expect_status=pending.


job, err := store.GetProvisionerJobByID(ctx, workspaceBuild.JobID)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably be a clone of this query with a SELECT ... FOR UPDATE query instead, otherwise there's nothing preventing this job from being picked up during the cancel operation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, I forgot to change that query 🙅

if err != nil {
code = http.StatusInternalServerError
resp.Message = "Internal error fetching provisioner job."
resp.Detail = err.Error()

return xerrors.Errorf("get provisioner job: %w", err)
}
if job.CompletedAt.Valid {
code = http.StatusBadRequest
resp.Message = "Job has already completed!"

return xerrors.Errorf("job has already completed")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return xerrors.Errorf("job has already completed")
return xerrors.New("job has already completed")

}
if job.CanceledAt.Valid {
code = http.StatusBadRequest
resp.Message = "Job has already been marked as canceled!"

return xerrors.Errorf("job has already been marked as canceled")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return xerrors.Errorf("job has already been marked as canceled")
return xerrors.New("job has already been marked as canceled")

}

if expectStatus != "" {
if expectStatus != "running" && expectStatus != "pending" {
code = http.StatusBadRequest
resp.Message = "Invalid expect_status. Only 'running' or 'pending' are allowed."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
resp.Message = "Invalid expect_status. Only 'running' or 'pending' are allowed."
resp.Message = fmt.Sprintf("Invalid expect_status %q. Only 'running' or 'pending' are allowed.", expectStatus)


return xerrors.Errorf("invalid expect_status")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return xerrors.Errorf("invalid expect_status")
return xerrors.Errorf("invalid expect_status %q", expectStatus)

}

if job.JobStatus != database.ProvisionerJobStatus(expectStatus) {
code = http.StatusPreconditionFailed
resp.Message = "Job is not in the expected state."

return xerrors.Errorf("job is not in the expected state")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

F438
Suggested change
return xerrors.Errorf("job is not in the expected state")
return xerrors.Errorf("job is not in the expected state: expected %q, got %q", expectStatus, job.JobStatus)

}
}

err = store.UpdateProvisionerJobWithCancelByID(ctx, database.UpdateProvisionerJobWithCancelByIDParams{
ID: job.ID,
CanceledAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
// If the job is running, don't mark it completed!
Valid: !job.WorkerID.Valid,
},
})
return
}
err = api.Database.UpdateProvisionerJobWithCancelByID(ctx, database.UpdateProvisionerJobWithCancelByIDParams{
ID: job.ID,
CanceledAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
// If the job is running, don't mark it completed!
Valid: !job.WorkerID.Valid,
},
})
if err != nil {
code = http.StatusInternalServerError
resp.Message = "Internal error updating provisioner job."
resp.Detail = err.Error()

return xerrors.Errorf("update provisioner job: %w", err)
}

return nil
}, nil)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error updating provisioner job.",
Detail: err.Error(),
})
httpapi.Write(ctx, rw, code, resp)
return
}

Expand All @@ -659,8 +688,8 @@ func (api *API) patchCancelWorkspaceBuild(rw http.ResponseWriter, r *http.Reques
})
}

func (api *API) verifyUserCanCancelWorkspaceBuilds(ctx context.Context, userID uuid.UUID, templateID uuid.UUID) (bool, error) {
template, err := api.Database.GetTemplateByID(ctx, templateID)
func verifyUserCanCancelWorkspaceBuilds(ctx context.Context, store database.Store, userID uuid.UUID, templateID uuid.UUID) (bool, error) {
template, err := store.GetTemplateByID(ctx, templateID)
if err != nil {
return false, xerrors.New("no template exists for this workspace")
}
Expand All @@ -669,7 +698,7 @@ func (api *API) verifyUserCanCancelWorkspaceBuilds(ctx context.Context, userID u
return true, nil // all users can cancel workspace builds
}

user, err := api.Database.GetUserByID(ctx, userID)
user, err := store.GetUserByID(ctx, userID)
if err != nil {
return false, xerrors.New("user does not exist")
}
Expand Down
136 changes: 133 additions & 3 deletions coderd/workspacebuilds_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ func TestPatchCancelWorkspaceBuild(t *testing.T) {
build, err = client.WorkspaceBuild(ctx, workspace.LatestBuild.ID)
return assert.NoError(t, err) && build.Job.Status == codersdk.ProvisionerJobRunning
}, testutil.WaitShort, testutil.IntervalFast)
err := client.CancelWorkspaceBuild(ctx, build.ID)
err := client.CancelWorkspaceBuild(ctx, build.ID, codersdk.CancelWorkspaceBuildRequest{})
require.NoError(t, err)
require.Eventually(t, func() bool {
var err error
Expand Down Expand Up @@ -618,11 +618,141 @@ func TestPatchCancelWorkspaceBuild(t *testing.T) {
build, err = userClient.WorkspaceBuild(ctx, workspace.LatestBuild.ID)
return assert.NoError(t, err) && build.Job.Status == codersdk.ProvisionerJobRunning
}, testutil.WaitShort, testutil.IntervalFast)
err := userClient.CancelWorkspaceBuild(ctx, build.ID)
err := userClient.CancelWorkspaceBuild(ctx, build.ID, codersdk.CancelWorkspaceBuildRequest{})
var apiErr *codersdk.Error
require.ErrorAs(t, err, &apiErr)
require.Equal(t, http.StatusForbidden, apiErr.StatusCode())
})

t.Run("Cancel with expect_state=pending", func(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("this test requires postgres")
}
// Given: a coderd instance with a provisioner daemon
store, ps, db := dbtestutil.NewDBWithSQLDB(t)
client, closeDaemon := coderdtest.NewWithProvisionerCloser(t, &coderdtest.Options{
Database: store,
Pubsub: ps,
IncludeProvisionerDaemon: true,
})
defer closeDaemon.Close()
// Given: a user, template, and workspace
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, nil)
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, template.ID)
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)

// Stop the provisioner daemon.
require.NoError(t, closeDaemon.Close())
ctx := testutil.Context(t, testutil.WaitLong)
// Given: no provisioner daemons exist.
_, err := db.ExecContext(ctx, `DELETE FROM provisioner_daemons;`)
require.NoError(t, err)

// When: a new workspace build is created
build, err := client.CreateWorkspaceBuild(ctx, workspace.ID, codersdk.CreateWorkspaceBuildRequest{
TemplateVersionID: template.ActiveVersionID,
Transition: codersdk.WorkspaceTransitionStart,
})
// Then: the request should succeed.
require.NoError(t, err)
// Then: the provisioner job should remain pending.
require.Equal(t, codersdk.ProvisionerJobPending, build.Job.Status)

// Then: the response should indicate no provisioners are available.
if assert.NotNil(t, build.MatchedProvisioners) {
assert.Zero(t, build.MatchedProvisioners.Count)
assert.Zero(t, build.MatchedProvisioners.Available)
assert.Zero(t, build.MatchedProvisioners.MostRecentlySeen.Time)
assert.False(t, build.MatchedProvisioners.MostRecentlySeen.Valid)
}

// When: the workspace build is canceled
err = client.CancelWorkspaceBuild(ctx, build.ID, codersdk.CancelWorkspaceBuildRequest{
ExpectStatus: codersdk.CancelWorkspaceBuildStatusPending,
})
require.NoError(t, err)

// Then: the workspace build should be canceled.
build, err = client.WorkspaceBuild(ctx, build.ID)
require.NoError(t, err)
require.Equal(t, codersdk.ProvisionerJobCanceled, build.Job.Status)
})

t.Run("Cancel with expect_state=pending - should fail with 412", func(t *testing.T) {
t.Parallel()

client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionApply: []*proto.Response{{
Type: &proto.Response_Log{
Log: &proto.Log{},
},
}},
ProvisionPlan: echo.PlanComplete,
})
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, template.ID)
var build codersdk.WorkspaceBuild

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()

require.Eventually(t, func() bool {
var err error
build, err = client.WorkspaceBuild(ctx, workspace.LatestBuild.ID)
return assert.NoError(t, err) && build.Job.Status == codersdk.ProvisionerJobRunning
}, testutil.WaitShort, testutil.IntervalFast)

// When: a cancel request is made with expect_state=pending
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this have a chance to race against the job succeeding? The code for checking if the build is already done is before the code that checks the expect_status param

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From my understanding - this job should stay in running state until cancel, because the test doesn't include ApplyComplete in ProvisionApply.
https://github.com/coder/coder/blob/main/provisioner/echo/serve.go#L194

err := client.CancelWorkspaceBuild(ctx, build.ID, codersdk.CancelWorkspaceBuildRequest{
ExpectStatus: codersdk.CancelWorkspaceBuildStatusPending,
})
// Then: the request should fail with 412.
require.Error(t, err)

var apiErr *codersdk.Error
require.ErrorAs(t, err, &apiErr)
require.Equal(t, http.StatusPreconditionFailed, apiErr.StatusCode())
})

t.Run("Cancel with expect_state - invalid status", func(t *testing.T) {
t.Parallel()

// Given: a coderd instance with a provisioner daemon
client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionApply: []*proto.Response{{
Type: &proto.Response_Log{
Log: &proto.Log{},
},
}},
ProvisionPlan: echo.PlanComplete,
})
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, template.ID)

ctx := testutil.Context(t, testutil.WaitLong)

// When: a cancel request is made with invalid expect_state
err := client.CancelWorkspaceBuild(ctx, workspace.LatestBuild.ID, codersdk.CancelWorkspaceBuildRequest{
ExpectStatus: "invalid_status",
})
// Then: the request should fail with 400.
var apiErr *codersdk.Error
require.ErrorAs(t, err, &apiErr)
require.Equal(t, http.StatusBadRequest, apiErr.StatusCode())
require.Contains(t, apiErr.Message, "Invalid expect_status")
})
}

func TestWorkspaceBuildResources(t *testing.T) {
Expand Down Expand Up @@ -968,7 +1098,7 @@ func TestWorkspaceBuildStatus(t *testing.T) {
_ = closeDaemon.Close()
// after successful cancel is "canceled"
build = coderdtest.CreateWorkspaceBuild(t, client, workspace, database.WorkspaceTransitionStart)
err = client.CancelWorkspaceBuild(ctx, build.ID)
err = client.CancelWorkspaceBuild(ctx, build.ID, codersdk.CancelWorkspaceBuildRequest{})
require.NoError(t, err)

workspace, err = client.Workspace(ctx, workspace.ID)
Expand Down
2 changes: 1 addition & 1 deletion coderd/workspaces_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3245,7 +3245,7 @@ func TestWorkspaceWatcher(t *testing.T) {
closeFunc.Close()
build := coderdtest.CreateWorkspaceBuild(t, client, workspace, database.WorkspaceTransitionStart)
wait("first is for the workspace build itself", nil)
err = client.CancelWorkspaceBuild(ctx, build.ID)
err = client.CancelWorkspaceBuild(ctx, build.ID, codersdk.CancelWorkspaceBuildRequest{})
require.NoError(t, err)
wait("second is for the build cancel", nil)
}
Expand Down
Loading
Loading
0