diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 992fa06d94dc5..6ebd741a53187 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -43,7 +43,7 @@ type Executor struct { type Stats struct { Transitions map[uuid.UUID]database.WorkspaceTransition Elapsed time.Duration - Error error + Errors map[uuid.UUID]error } // New returns a new wsactions executor. @@ -83,9 +83,6 @@ func (e *Executor) Run() { return } stats := e.runOnce(t) - if stats.Error != nil { - e.log.Error(e.ctx, "error running once", slog.Error(stats.Error)) - } if e.statsCh != nil { select { case <-e.ctx.Done(): @@ -100,15 +97,14 @@ func (e *Executor) Run() { } func (e *Executor) runOnce(t time.Time) Stats { - var err error stats := Stats{ Transitions: make(map[uuid.UUID]database.WorkspaceTransition), + Errors: make(map[uuid.UUID]error), } // we build the map of transitions concurrently, so need a mutex to serialize writes to the map statsMu := sync.Mutex{} defer func() { stats.Elapsed = time.Since(t) - stats.Error = err }() currentTick := t.Truncate(time.Minute) @@ -139,152 +135,158 @@ func (e *Executor) runOnce(t time.Time) Stats { log := e.log.With(slog.F("workspace_id", wsID)) eg.Go(func() error { - var job *database.ProvisionerJob - var auditLog *auditParams - err := e.db.InTx(func(tx database.Store) error { - // Re-check eligibility since the first check was outside the - // transaction and the workspace settings may have changed. - ws, err := tx.GetWorkspaceByID(e.ctx, wsID) - if err != nil { - return xerrors.Errorf("get workspace by id: %w", err) - } + err := func() error { + var job *database.ProvisionerJob + var auditLog *auditParams + err := e.db.InTx(func(tx database.Store) error { + // Re-check eligibility since the first check was outside the + // transaction and the workspace settings may have changed. + ws, err := tx.GetWorkspaceByID(e.ctx, wsID) + if err != nil { + return xerrors.Errorf("get workspace by id: %w", err) + } - user, err := tx.GetUserByID(e.ctx, ws.OwnerID) - if err != nil { - return xerrors.Errorf("get user by id: %w", err) - } + user, err := tx.GetUserByID(e.ctx, ws.OwnerID) + if err != nil { + return xerrors.Errorf("get user by id: %w", err) + } - // Determine the workspace state based on its latest build. - latestBuild, err := tx.GetLatestWorkspaceBuildByWorkspaceID(e.ctx, ws.ID) - if err != nil { - return xerrors.Errorf("get latest workspace build: %w", err) - } + // Determine the workspace state based on its latest build. + latestBuild, err := tx.GetLatestWorkspaceBuildByWorkspaceID(e.ctx, ws.ID) + if err != nil { + return xerrors.Errorf("get latest workspace build: %w", err) + } - latestJob, err := tx.GetProvisionerJobByID(e.ctx, latestBuild.JobID) - if err != nil { - return xerrors.Errorf("get latest provisioner job: %w", err) - } + latestJob, err := tx.GetProvisionerJobByID(e.ctx, latestBuild.JobID) + if err != nil { + return xerrors.Errorf("get latest provisioner job: %w", err) + } - templateSchedule, err := (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) - if err != nil { - return xerrors.Errorf("get template scheduling options: %w", err) - } + templateSchedule, err := (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) + if err != nil { + return xerrors.Errorf("get template scheduling options: %w", err) + } - template, err := tx.GetTemplateByID(e.ctx, ws.TemplateID) - if err != nil { - return xerrors.Errorf("get template by ID: %w", err) - } + template, err := tx.GetTemplateByID(e.ctx, ws.TemplateID) + if err != nil { + return xerrors.Errorf("get template by ID: %w", err) + } - accessControl := (*(e.accessControlStore.Load())).GetTemplateAccessControl(template) + accessControl := (*(e.accessControlStore.Load())).GetTemplateAccessControl(template) - nextTransition, reason, err := getNextTransition(user, ws, latestBuild, latestJob, templateSchedule, currentTick) - if err != nil { - log.Debug(e.ctx, "skipping workspace", slog.Error(err)) - // err is used to indicate that a workspace is not eligible - // so returning nil here is ok although ultimately the distinction - // doesn't matter since the transaction is read-only up to - // this point. - return nil - } + nextTransition, reason, err := getNextTransition(user, ws, latestBuild, latestJob, templateSchedule, currentTick) + if err != nil { + log.Debug(e.ctx, "skipping workspace", slog.Error(err)) + // err is used to indicate that a workspace is not eligible + // so returning nil here is ok although ultimately the distinction + // doesn't matter since the transaction is read-only up to + // this point. + return nil + } - var build *database.WorkspaceBuild - if nextTransition != "" { - builder := wsbuilder.New(ws, nextTransition). - SetLastWorkspaceBuildInTx(&latestBuild). - SetLastWorkspaceBuildJobInTx(&latestJob). - Reason(reason) - log.Debug(e.ctx, "auto building workspace", slog.F("transition", nextTransition)) - if nextTransition == database.WorkspaceTransitionStart && - useActiveVersion(accessControl, ws) { - log.Debug(e.ctx, "autostarting with active version") - builder = builder.ActiveVersion() + var build *database.WorkspaceBuild + if nextTransition != "" { + builder := wsbuilder.New(ws, nextTransition). + SetLastWorkspaceBuildInTx(&latestBuild). + SetLastWorkspaceBuildJobInTx(&latestJob). + Reason(reason) + log.Debug(e.ctx, "auto building workspace", slog.F("transition", nextTransition)) + if nextTransition == database.WorkspaceTransitionStart && + useActiveVersion(accessControl, ws) { + log.Debug(e.ctx, "autostarting with active version") + builder = builder.ActiveVersion() + } + + build, job, err = builder.Build(e.ctx, tx, nil, audit.WorkspaceBuildBaggage{IP: "127.0.0.1"}) + if err != nil { + return xerrors.Errorf("build workspace with transition %q: %w", nextTransition, err) + } } - build, job, err = builder.Build(e.ctx, tx, nil, audit.WorkspaceBuildBaggage{IP: "127.0.0.1"}) - if err != nil { - log.Error(e.ctx, "unable to transition workspace", - slog.F("transition", nextTransition), - slog.Error(err), + // Transition the workspace to dormant if it has breached the template's + // threshold for inactivity. + if reason == database.BuildReasonAutolock { + wsOld := ws + ws, err = tx.UpdateWorkspaceDormantDeletingAt(e.ctx, database.UpdateWorkspaceDormantDeletingAtParams{ + ID: ws.ID, + DormantAt: sql.NullTime{ + Time: dbtime.Now(), + Valid: true, + }, + }) + + auditLog = &auditParams{ + Build: build, + Job: latestJob, + Reason: reason, + Old: wsOld, + New: ws, + } + if err != nil { + return xerrors.Errorf("update workspace dormant deleting at: %w", err) + } + + log.Info(e.ctx, "dormant workspace", + slog.F("last_used_at", ws.LastUsedAt), + slog.F("time_til_dormant", templateSchedule.TimeTilDormant), + slog.F("since_last_used_at", time.Since(ws.LastUsedAt)), ) - return xerrors.Errorf("build workspace: %w", err) } - } - // Transition the workspace to dormant if it has breached the template's - // threshold for inactivity. - if reason == database.BuildReasonAutolock { - wsOld := ws - ws, err = tx.UpdateWorkspaceDormantDeletingAt(e.ctx, database.UpdateWorkspaceDormantDeletingAtParams{ - ID: ws.ID, - DormantAt: sql.NullTime{ - Time: dbtime.Now(), - Valid: true, - }, - }) - - auditLog = &auditParams{ - Build: build, - Job: latestJob, - Reason: reason, - Old: wsOld, - New: ws, + if reason == database.BuildReasonAutodelete { + log.Info(e.ctx, "deleted workspace", + slog.F("dormant_at", ws.DormantAt.Time), + slog.F("time_til_dormant_autodelete", templateSchedule.TimeTilDormantAutoDelete), + ) } - if err != nil { - return xerrors.Errorf("update workspace dormant deleting at: %w", err) + + if nextTransition == "" { + return nil } - log.Info(e.ctx, "dormant workspace", - slog.F("last_used_at", ws.LastUsedAt), - slog.F("time_til_dormant", templateSchedule.TimeTilDormant), - slog.F("since_last_used_at", time.Since(ws.LastUsedAt)), - ) - } + statsMu.Lock() + stats.Transitions[ws.ID] = nextTransition + statsMu.Unlock() - if reason == database.BuildReasonAutodelete { - log.Info(e.ctx, "deleted workspace", - slog.F("dormant_at", ws.DormantAt.Time), - slog.F("time_til_dormant_autodelete", templateSchedule.TimeTilDormantAutoDelete), + log.Info(e.ctx, "scheduling workspace transition", + slog.F("transition", nextTransition), + slog.F("reason", reason), ) - } - if nextTransition == "" { return nil - } - - statsMu.Lock() - stats.Transitions[ws.ID] = nextTransition - statsMu.Unlock() - log.Info(e.ctx, "scheduling workspace transition", - slog.F("transition", nextTransition), - slog.F("reason", reason), - ) - - return nil - - // Run with RepeatableRead isolation so that the build process sees the same data - // as our calculation that determines whether an autobuild is necessary. - }, &sql.TxOptions{Isolation: sql.LevelRepeatableRead}) - if err != nil { - log.Error(e.ctx, "workspace scheduling failed", slog.Error(err)) - } - if auditLog != nil { - // If the transition didn't succeed then updating the workspace - // to indicate dormant didn't either. - auditLog.Success = err == nil - auditBuild(e.ctx, e.log, *e.auditor.Load(), *auditLog) - } - if job != nil && err == nil { - // Note that we can't refactor such that posting the job happens inside wsbuilder because it's called - // with an outer transaction like this, and we need to make sure the outer transaction commits before - // posting the job. If we post before the transaction commits, provisionerd might try to acquire the - // job, fail, and then sit idle instead of picking up the job. - err = provisionerjobs.PostJob(e.ps, *job) + // Run with RepeatableRead isolation so that the build process sees the same data + // as our calculation that determines whether an autobuild is necessary. + }, &sql.TxOptions{Isolation: sql.LevelRepeatableRead}) + if auditLog != nil { + // If the transition didn't succeed then updating the workspace + // to indicate dormant didn't either. + auditLog.Success = err == nil + auditBuild(e.ctx, e.log, *e.auditor.Load(), *auditLog) + } if err != nil { - // Client probably doesn't care about this error, so just log it. - log.Error(e.ctx, "failed to post provisioner job to pubsub", slog.Error(err)) + return xerrors.Errorf("transition workspace: %w", err) } + if job != nil { + // Note that we can't refactor such that posting the job happens inside wsbuilder because it's called + // with an outer transaction like this, and we need to make sure the outer transaction commits before + // posting the job. If we post before the transaction commits, provisionerd might try to acquire the + // job, fail, and then sit idle instead of picking up the job. + err = provisionerjobs.PostJob(e.ps, *job) + if err != nil { + return xerrors.Errorf("post provisioner job to pubsub: %w", err) + } + } + return nil + }() + if err != nil { + e.log.Error(e.ctx, "failed to transition workspace", slog.Error(err)) + statsMu.Lock() + stats.Errors[wsID] = err + statsMu.Unlock() } + // Even though we got an error we still return nil to avoid + // short-circuiting the evaluation loop. return nil }) } diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index 366abdca7b2e8..f6d0c7caf4c21 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -54,7 +54,7 @@ func TestExecutorAutostartOK(t *testing.T) { // Then: the workspace should eventually be started stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 1) assert.Contains(t, stats.Transitions, workspace.ID) assert.Equal(t, database.WorkspaceTransitionStart, stats.Transitions[workspace.ID]) @@ -172,13 +172,14 @@ func TestExecutorAutostartTemplateUpdated(t *testing.T) { }() stats := <-statsCh - assert.NoError(t, stats.Error) if !tc.expectStart { // Then: the workspace should not be started assert.Len(t, stats.Transitions, 0) + assert.Len(t, stats.Errors, 1) return } + assert.Len(t, stats.Errors, 0) // Then: the workspace should be started assert.Len(t, stats.Transitions, 1) assert.Contains(t, stats.Transitions, workspace.ID) @@ -226,7 +227,7 @@ func TestExecutorAutostartAlreadyRunning(t *testing.T) { // Then: the workspace should not be started. stats := <-statsCh - require.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) require.Len(t, stats.Transitions, 0) } @@ -261,7 +262,7 @@ func TestExecutorAutostartNotEnabled(t *testing.T) { // Then: the workspace should not be started. stats := <-statsCh - require.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) require.Len(t, stats.Transitions, 0) } @@ -305,7 +306,7 @@ func TestExecutorAutostartUserSuspended(t *testing.T) { // Then: nothing should happen stats := testutil.RequireRecvCtx(ctx, t, statsCh) - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) } @@ -335,7 +336,7 @@ func TestExecutorAutostopOK(t *testing.T) { // Then: the workspace should be stopped stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 1) assert.Contains(t, stats.Transitions, workspace.ID) assert.Equal(t, database.WorkspaceTransitionStop, stats.Transitions[workspace.ID]) @@ -378,7 +379,7 @@ func TestExecutorAutostopExtend(t *testing.T) { // Then: nothing should happen and the workspace should stay running stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) // When: the autobuild executor ticks after the *new* deadline: @@ -389,7 +390,7 @@ func TestExecutorAutostopExtend(t *testing.T) { // Then: the workspace should be stopped stats = <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 1) assert.Contains(t, stats.Transitions, workspace.ID) assert.Equal(t, database.WorkspaceTransitionStop, stats.Transitions[workspace.ID]) @@ -423,7 +424,7 @@ func TestExecutorAutostopAlreadyStopped(t *testing.T) { // Then: the workspace should remain stopped and no build should happen. stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) } @@ -461,7 +462,7 @@ func TestExecutorAutostopNotEnabled(t *testing.T) { // Then: the workspace should not be stopped. stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) } @@ -494,7 +495,7 @@ func TestExecutorWorkspaceDeleted(t *testing.T) { // Then: nothing should happen stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) } @@ -526,7 +527,7 @@ func TestExecutorWorkspaceAutostartTooEarly(t *testing.T) { // Then: nothing should happen stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) } @@ -557,7 +558,7 @@ func TestExecutorWorkspaceAutostopBeforeDeadline(t *testing.T) { // Then: nothing should happen stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) } @@ -592,7 +593,7 @@ func TestExecutorWorkspaceAutostopNoWaitChangedMyMind(t *testing.T) { // Then: the workspace should stop stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 1) assert.Equal(t, stats.Transitions[workspace.ID], database.WorkspaceTransitionStop) @@ -620,7 +621,7 @@ func TestExecutorWorkspaceAutostopNoWaitChangedMyMind(t *testing.T) { // Then: the workspace should not stop stats = <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) } @@ -665,14 +666,14 @@ func TestExecutorAutostartMultipleOK(t *testing.T) { // Then: the workspace should eventually be started stats1 := <-statsCh1 - assert.NoError(t, stats1.Error) + assert.Len(t, stats1.Errors, 0) assert.Len(t, stats1.Transitions, 1) assert.Contains(t, stats1.Transitions, workspace.ID) assert.Equal(t, database.WorkspaceTransitionStart, stats1.Transitions[workspace.ID]) // Then: the other executor should not have done anything stats2 := <-statsCh2 - assert.NoError(t, stats2.Error) + assert.Len(t, stats2.Errors, 0) assert.Len(t, stats2.Transitions, 0) } @@ -728,7 +729,7 @@ func TestExecutorAutostartWithParameters(t *testing.T) { // Then: the workspace with parameters should eventually be started stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 1) assert.Contains(t, stats.Transitions, workspace.ID) assert.Equal(t, database.WorkspaceTransitionStart, stats.Transitions[workspace.ID]) @@ -778,7 +779,7 @@ func TestExecutorAutostartTemplateDisabled(t *testing.T) { // Then: nothing should happen stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) } @@ -821,7 +822,7 @@ func TestExecutorAutostopTemplateDisabled(t *testing.T) { // Then: nothing should happen stats := <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 0) // When: the autobuild executor ticks after the template setting: @@ -832,7 +833,7 @@ func TestExecutorAutostopTemplateDisabled(t *testing.T) { // Then: the workspace should be stopped stats = <-statsCh - assert.NoError(t, stats.Error) + assert.Len(t, stats.Errors, 0) assert.Len(t, stats.Transitions, 1) assert.Contains(t, stats.Transitions, workspace.ID) assert.Equal(t, database.WorkspaceTransitionStop, stats.Transitions[workspace.ID]) diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 9d0f499a70d63..c5bb986c0c8d9 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -227,6 +227,12 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can options.Database, options.Pubsub = dbtestutil.NewDB(t) } + accessControlStore := &atomic.Pointer[dbauthz.AccessControlStore]{} + var acs dbauthz.AccessControlStore = dbauthz.AGPLTemplateAccessControlStore{} + accessControlStore.Store(&acs) + + options.Database = dbauthz.New(options.Database, options.Authorizer, *options.Logger, accessControlStore) + // Some routes expect a deployment ID, so just make sure one exists. // Check first incase the caller already set up this database. // nolint:gocritic // Setting up unit test data inside test helper @@ -266,10 +272,6 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can t.Cleanup(closeBatcher) } - accessControlStore := &atomic.Pointer[dbauthz.AccessControlStore]{} - var acs dbauthz.AccessControlStore = dbauthz.AGPLTemplateAccessControlStore{} - accessControlStore.Store(&acs) - var templateScheduleStore atomic.Pointer[schedule.TemplateScheduleStore] if options.TemplateScheduleStore == nil { options.TemplateScheduleStore = schedule.NewAGPLTemplateScheduleStore() diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 9fb52099e9f00..176c92ca19c78 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -183,6 +183,7 @@ var ( rbac.ResourceTemplate.Type: {rbac.ActionRead, rbac.ActionUpdate}, rbac.ResourceWorkspace.Type: {rbac.ActionRead, rbac.ActionUpdate}, rbac.ResourceWorkspaceBuild.Type: {rbac.ActionRead, rbac.ActionUpdate, rbac.ActionDelete}, + rbac.ResourceUser.Type: {rbac.ActionRead}, }), Org: map[string][]rbac.Permission{}, User: []rbac.Permission{}, diff --git a/enterprise/coderd/workspaces_test.go b/enterprise/coderd/workspaces_test.go index ed6621b223a4b..b288e4a88f211 100644 --- a/enterprise/coderd/workspaces_test.go +++ b/enterprise/coderd/workspaces_test.go @@ -702,7 +702,7 @@ func TestWorkspaceAutobuild(t *testing.T) { // Assert that autostart works when the workspace isn't dormant.. tickCh <- sched.Next(ws.LatestBuild.CreatedAt) stats := <-statsCh - require.NoError(t, stats.Error) + require.Len(t, stats.Errors, 0) require.Len(t, stats.Transitions, 1) require.Contains(t, stats.Transitions, ws.ID) require.Equal(t, database.WorkspaceTransitionStart, stats.Transitions[ws.ID]) @@ -720,7 +720,7 @@ func TestWorkspaceAutobuild(t *testing.T) { // We should see the workspace get stopped now. tickCh <- ws.LastUsedAt.Add(inactiveTTL * 2) stats = <-statsCh - require.NoError(t, stats.Error) + require.Len(t, stats.Errors, 0) require.Len(t, stats.Transitions, 1) require.Contains(t, stats.Transitions, ws.ID) require.Equal(t, database.WorkspaceTransitionStop, stats.Transitions[ws.ID]) @@ -878,7 +878,7 @@ func TestWorkspaceAutobuild(t *testing.T) { // Kick of an autostart build. tickCh <- sched.Next(ws.LatestBuild.CreatedAt) stats := <-statsCh - require.NoError(t, stats.Error) + require.Len(t, stats.Errors, 0) require.Len(t, stats.Transitions, 1) require.Contains(t, stats.Transitions, ws.ID) require.Equal(t, database.WorkspaceTransitionStart, stats.Transitions[ws.ID]) @@ -904,7 +904,7 @@ func TestWorkspaceAutobuild(t *testing.T) { // Force an autostart transition again. tickCh <- sched.Next(firstBuild.CreatedAt) stats = <-statsCh - require.NoError(t, stats.Error) + require.Len(t, stats.Errors, 0) require.Len(t, stats.Transitions, 1) require.Contains(t, stats.Transitions, ws.ID) require.Equal(t, database.WorkspaceTransitionStart, stats.Transitions[ws.ID]) @@ -970,7 +970,7 @@ func TestExecutorAutostartBlocked(t *testing.T) { // Then: the workspace should not be started. stats := <-statsCh - require.NoError(t, stats.Error) + require.Len(t, stats.Errors, 0) require.Len(t, stats.Transitions, 0) }