From 7358ee0a23b665fe1add4794e8cc5dec1d675186 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Wed, 4 Jun 2025 10:38:04 +0000 Subject: [PATCH 01/18] feat(agent/agentcontainers): implement sub agent injection This change adds support for sub agent creation and injection into dev containers. Closes coder/internal#621 --- agent/agent.go | 6 +- agent/agentcontainers/api.go | 354 +++++++++++++++++++++++++++--- agent/agentcontainers/api_test.go | 241 ++++++++++++++++++++ agent/agentcontainers/subagent.go | 128 +++++++++++ agent/api.go | 4 +- 5 files changed, 698 insertions(+), 35 deletions(-) create mode 100644 agent/agentcontainers/subagent.go diff --git a/agent/agent.go b/agent/agent.go index 74cf305c9434a..17298e7aa5772 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1188,7 +1188,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, // createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates // the tailnet using the information in the manifest func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient26) error { - return func(ctx context.Context, _ proto.DRPCAgentClient26) (retErr error) { + return func(ctx context.Context, aAPI proto.DRPCAgentClient26) (retErr error) { if err := manifestOK.wait(ctx); err != nil { return xerrors.Errorf("no manifest: %w", err) } @@ -1208,6 +1208,7 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co // agent API. network, err = a.createTailnet( a.gracefulCtx, + aAPI, manifest.AgentID, manifest.DERPMap, manifest.DERPForceWebSockets, @@ -1355,6 +1356,7 @@ func (a *agent) trackGoroutine(fn func()) error { func (a *agent) createTailnet( ctx context.Context, + aAPI proto.DRPCAgentClient26, agentID uuid.UUID, derpMap *tailcfg.DERPMap, derpForceWebSockets, disableDirectConnections bool, @@ -1487,7 +1489,7 @@ func (a *agent) createTailnet( }() if err = a.trackGoroutine(func() { defer apiListener.Close() - apiHandler, closeAPIHAndler := a.apiHandler() + apiHandler, closeAPIHAndler := a.apiHandler(aAPI) defer func() { _ = closeAPIHAndler() }() diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index d826cb23cbc1f..72f2a119c5e7d 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -5,7 +5,10 @@ import ( "errors" "fmt" "net/http" + "os" "path" + "path/filepath" + "runtime" "slices" "strings" "sync" @@ -26,8 +29,14 @@ import ( ) const ( - defaultUpdateInterval = 10 * time.Second - listContainersTimeout = 15 * time.Second + defaultUpdateInterval = 10 * time.Second + defaultOperationTimeout = 15 * time.Second + + // Destination path inside the container, we store it in a fixed location + // under /.coder-agent/coder to avoid conflicts and avoid being shadowed + // by tmpfs or other mounts. This assumes the container root filesystem is + // read-write, which seems sensible for dev containers. + coderPathInsideContainer = "/.coder-agent/coder" ) // API is responsible for container-related operations in the agent. @@ -47,6 +56,7 @@ type API struct { dccli DevcontainerCLI clock quartz.Clock scriptLogger func(logSourceID uuid.UUID) ScriptLogger + subAgentClient SubAgentClient mu sync.RWMutex closed bool @@ -57,11 +67,18 @@ type API struct { configFileModifiedTimes map[string]time.Time // By config file path. recreateSuccessTimes map[string]time.Time // By workspace folder. recreateErrorTimes map[string]time.Time // By workspace folder. - recreateWg sync.WaitGroup + injectedSubAgentProcs map[string]subAgentProcess // By container ID. + asyncWg sync.WaitGroup devcontainerLogSourceIDs map[string]uuid.UUID // By workspace folder. } +type subAgentProcess struct { + agent SubAgent + ctx context.Context + stop context.CancelFunc +} + // Option is a functional option for API. type Option func(*API) @@ -96,6 +113,14 @@ func WithDevcontainerCLI(dccli DevcontainerCLI) Option { } } +// WithSubAgentClient sets the SubAgentClient implementation to use. +// This is used to list, create and delete Dev Container agents. +func WithSubAgentClient(client SubAgentClient) Option { + return func(api *API) { + api.subAgentClient = client + } +} + // WithDevcontainers sets the known devcontainers for the API. This // allows the API to be aware of devcontainers defined in the workspace // agent manifest. @@ -174,12 +199,14 @@ func NewAPI(logger slog.Logger, options ...Option) *API { logger: logger, clock: quartz.NewReal(), execer: agentexec.DefaultExecer, + subAgentClient: noopSubAgentClient{}, devcontainerNames: make(map[string]bool), knownDevcontainers: make(map[string]codersdk.WorkspaceAgentDevcontainer), configFileModifiedTimes: make(map[string]time.Time), recreateSuccessTimes: make(map[string]time.Time), recreateErrorTimes: make(map[string]time.Time), scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} }, + injectedSubAgentProcs: make(map[string]subAgentProcess), } // The ctx and logger must be set before applying options to avoid // nil pointer dereference. @@ -254,6 +281,15 @@ func (api *API) updaterLoop() { defer api.logger.Debug(api.ctx, "updater loop stopped") api.logger.Debug(api.ctx, "updater loop started") + // Make sure we clean up any subagents not tracked by this process + // before starting the update loop and creating new ones. + api.logger.Debug(api.ctx, "cleaning up subagents") + if err := api.cleanupSubAgents(api.ctx); err != nil { + api.logger.Error(api.ctx, "cleanup subagents failed", slog.Error(err)) + } else { + api.logger.Debug(api.ctx, "subagent cleanup complete") + } + // Perform an initial update to populate the container list, this // gives us a guarantee that the API has loaded the initial state // before returning any responses. This is useful for both tests @@ -360,7 +396,7 @@ func (api *API) handleList(rw http.ResponseWriter, r *http.Request) { // updateContainers fetches the latest container list, processes it, and // updates the cache. It performs locking for updating shared API state. func (api *API) updateContainers(ctx context.Context) error { - listCtx, listCancel := context.WithTimeout(ctx, listContainersTimeout) + listCtx, listCancel := context.WithTimeout(ctx, defaultOperationTimeout) defer listCancel() updated, err := api.ccli.List(listCtx) @@ -432,20 +468,6 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code continue } - // NOTE(mafredri): This name impl. may change to accommodate devcontainer agents RFC. - // If not in our known list, add as a runtime detected entry. - name := path.Base(workspaceFolder) - if api.devcontainerNames[name] { - // Try to find a unique name by appending a number. - for i := 2; ; i++ { - newName := fmt.Sprintf("%s-%d", name, i) - if !api.devcontainerNames[newName] { - name = newName - break - } - } - } - api.devcontainerNames[name] = true if configFile != "" { if err := api.watcher.Add(configFile); err != nil { api.logger.Error(ctx, "watch devcontainer config file failed", slog.Error(err), slog.F("file", configFile)) @@ -454,7 +476,7 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code api.knownDevcontainers[workspaceFolder] = codersdk.WorkspaceAgentDevcontainer{ ID: uuid.New(), - Name: name, + Name: "", // Updated later based on container state. WorkspaceFolder: workspaceFolder, ConfigPath: configFile, Status: "", // Updated later based on container state. @@ -466,19 +488,23 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code // Iterate through all known devcontainers and update their status // based on the current state of the containers. for _, dc := range api.knownDevcontainers { + if dc.Container != nil { + if !api.devcontainerNames[dc.Name] { + // If the devcontainer name wasn't set via terraform, we + // use the containers friendly name as a fallback which + // will keep changing as the dev container is recreated. + // TODO(mafredri): Parse the container label (i.e. devcontainer.json) for customization. + dc.Name = safeFriendlyName(dc.Container.FriendlyName) + } + dc.Container.DevcontainerStatus = dc.Status + dc.Container.DevcontainerDirty = dc.Dirty + } + switch { case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusStarting: - if dc.Container != nil { - dc.Container.DevcontainerStatus = dc.Status - dc.Container.DevcontainerDirty = dc.Dirty - } continue // This state is handled by the recreation routine. case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusError && (dc.Container == nil || dc.Container.CreatedAt.Before(api.recreateErrorTimes[dc.WorkspaceFolder])): - if dc.Container != nil { - dc.Container.DevcontainerStatus = dc.Status - dc.Container.DevcontainerDirty = dc.Dirty - } continue // The devcontainer needs to be recreated. case dc.Container != nil: @@ -494,7 +520,17 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code } dc.Container.DevcontainerDirty = dc.Dirty + if _, injected := api.injectedSubAgentProcs[dc.Container.ID]; !injected && dc.Status == codersdk.WorkspaceAgentDevcontainerStatusRunning { + err := api.injectSubAgentIntoContainerLocked(ctx, dc) + if err != nil { + api.logger.Error(ctx, "inject subagent into container failed", slog.Error(err)) + } + } + case dc.Container == nil: + if !api.devcontainerNames[dc.Name] { + dc.Name = "" + } dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStopped dc.Dirty = false } @@ -507,6 +543,18 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code api.containersErr = nil } +// safeFriendlyName returns a API safe version of the container's +// friendly name. +// +// See provisioner/regexes.go for the regex used to validate +// the friendly name on the API side. +func safeFriendlyName(name string) string { + name = strings.ToLower(name) + name = strings.ReplaceAll(name, "_", "-") + + return name +} + // refreshContainers triggers an immediate update of the container list // and waits for it to complete. func (api *API) refreshContainers(ctx context.Context) (err error) { @@ -624,7 +672,7 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques dc.Container.DevcontainerStatus = dc.Status } api.knownDevcontainers[dc.WorkspaceFolder] = dc - api.recreateWg.Add(1) + api.asyncWg.Add(1) go api.recreateDevcontainer(dc, configPath) api.mu.Unlock() @@ -640,10 +688,10 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques // It updates the devcontainer status and logs the process. The configPath is // passed as a parameter for the odd chance that the container being recreated // has a different config file than the one stored in the devcontainer state. -// The devcontainer state must be set to starting and the recreateWg must be +// The devcontainer state must be set to starting and the asyncWg must be // incremented before calling this function. func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, configPath string) { - defer api.recreateWg.Done() + defer api.asyncWg.Done() var ( err error @@ -803,6 +851,242 @@ func (api *API) markDevcontainerDirty(configPath string, modifiedAt time.Time) { } } +// cleanupSubAgents removes subagents that are no longer managed by +// this agent. This is usually only run at startup to ensure a clean +// slate. This method has an internal timeout to prevent blocking +// indefinitely if something goes wrong with the subagent deletion. +func (api *API) cleanupSubAgents(ctx context.Context) error { + agents, err := api.subAgentClient.List(ctx) + if err != nil { + return xerrors.Errorf("list agents: %w", err) + } + + api.mu.Lock() + defer api.mu.Unlock() + + injected := make(map[uuid.UUID]bool, len(api.injectedSubAgentProcs)) + for _, proc := range api.injectedSubAgentProcs { + injected[proc.agent.ID] = true + } + + ctx, cancel := context.WithTimeout(ctx, defaultOperationTimeout) + defer cancel() + + for _, agent := range agents { + if injected[agent.ID] { + continue + } + err := api.subAgentClient.Delete(ctx, agent.ID) + if err != nil { + api.logger.Error(ctx, "failed to delete agent", + slog.Error(err), + slog.F("agent_id", agent.ID), + slog.F("agent_name", agent.Name), + ) + } + } + + return nil +} + +// injectSubAgentIntoContainerLocked injects a subagent into a dev +// container and starts the subagent process. This method assumes that +// api.mu is held. +// +// This method uses an internal timeout to prevent blocking indefinitely +// if something goes wrong with the injection. +func (api *API) injectSubAgentIntoContainerLocked(ctx context.Context, dc codersdk.WorkspaceAgentDevcontainer) (err error) { + ctx, cancel := context.WithTimeout(ctx, defaultOperationTimeout) + defer cancel() + + container := dc.Container + if container == nil { + return xerrors.New("container is nil, cannot inject subagent") + } + + // Skip if subagent already exists for this container. + if _, injected := api.injectedSubAgentProcs[container.ID]; injected || api.closed { + return nil + } + + // Mark subagent as being injected immediately with a placeholder. + subAgent := subAgentProcess{ + ctx: context.Background(), + stop: func() {}, + } + api.injectedSubAgentProcs[container.ID] = subAgent + + // This is used to track the goroutine that will run the subagent + // process inside the container. It will be decremented when the + // subagent process completes or if an error occurs before we can + // start the subagent. + api.asyncWg.Add(1) + ranSubAgent := false + + // Clean up if injection fails. + defer func() { + if !ranSubAgent { + api.asyncWg.Done() + } + if err != nil { + // Mutex is held (defer re-lock). + delete(api.injectedSubAgentProcs, container.ID) + } + }() + + // Unlock the mutex to allow other operations while we + // inject the subagent into the container. + api.mu.Unlock() + defer api.mu.Lock() // Re-lock. + + logger := api.logger.With( + slog.F("devcontainer_id", dc.ID), + slog.F("devcontainer_name", dc.Name), + slog.F("workspace_folder", dc.WorkspaceFolder), + slog.F("config_path", dc.ConfigPath), + ) + + arch, err := api.ccli.DetectArchitecture(ctx, container.ID) + if err != nil { + return xerrors.Errorf("detect architecture: %w", err) + } + + logger.Info(ctx, "detected container architecture", slog.F("architecture", arch)) + + // For now, only support injecting if the architecture matches the host. + hostArch := runtime.GOARCH + + // TODO(mafredri): Add support for downloading agents for supported architectures. + if arch != hostArch { + logger.Warn(ctx, "skipping subagent injection for unsupported architecture", + slog.F("container_arch", arch), + slog.F("host_arch", hostArch)) + return nil + } + agentBinaryPath, err := os.Executable() + if err != nil { + return xerrors.Errorf("get agent binary path: %w", err) + } + agentBinaryPath, err = filepath.EvalSymlinks(agentBinaryPath) + if err != nil { + return xerrors.Errorf("resolve agent binary path: %w", err) + } + + // If we scripted this as a `/bin/sh` script, we could reduce these + // steps to one instruction, speeding up the injection process. + // + // Note: We use `path` instead of `filepath` here because we are + // working with Unix-style paths inside the container. + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "mkdir", "-p", path.Dir(coderPathInsideContainer)); err != nil { + return xerrors.Errorf("create agent directory in container: %w", err) + } + + if err := api.ccli.Copy(ctx, container.ID, agentBinaryPath, coderPathInsideContainer); err != nil { + return xerrors.Errorf("copy agent binary: %w", err) + } + + logger.Info(ctx, "copied agent binary to container") + + // Make sure the agent binary is executable so we can run it. + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chmod", "+x", coderPathInsideContainer); err != nil { + return xerrors.Errorf("set agent binary executable: %w", err) + } + + // Attempt to add CAP_NET_ADMIN to the binary to improve network + // performance (optional, allow to fail). + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "setcap", "cap_net_admin+ep", coderPathInsideContainer); err != nil { + logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err)) + } + + // The preparation of the subagent is done, now we can create the + // subagent record in the database to receive the auth token. + createdAgent, err := api.subAgentClient.Create(ctx, SubAgent{ + Name: dc.Name, + Directory: "/workspace", // TODO(mafredri): Use a more appropriate directory. + OperatingSystem: "linux", // Assuming Linux for dev containers. + Architecture: arch, + }) + if err != nil { + return xerrors.Errorf("create agent: %w", err) + } + + logger.Info(ctx, "created subagent record", slog.F("agent_id", createdAgent.ID)) + + // Start the subagent in the container in a new goroutine to avoid + // blocking. Note that we pass the api.ctx to the subagent process + // so that it isn't affected by the timeout. + go api.runSubAgentInContainer(api.ctx, dc, createdAgent, coderPathInsideContainer) + ranSubAgent = true + + return nil +} + +// runSubAgentInContainer runs the subagent process inside a dev +// container. The api.asyncWg must be incremented before calling this +// function, and it will be decremented when the subagent process +// completes or if an error occurs. +func (api *API) runSubAgentInContainer(ctx context.Context, dc codersdk.WorkspaceAgentDevcontainer, agent SubAgent, agentPath string) { + container := dc.Container // Must not be nil. + logger := api.logger.With( + slog.F("container_name", container.FriendlyName), + slog.F("agent_id", agent.ID), + ) + + agentCtx, agentStop := context.WithCancel(ctx) + defer func() { + agentStop() + + // Best effort cleanup of the agent record after the process + // completes. Note that we use the background context here + // because the api.ctx will be canceled when the API is closed. + // This may delay shutdown of the agent by the given timeout. + deleteCtx, cancel := context.WithTimeout(context.Background(), defaultOperationTimeout) + defer cancel() + err := api.subAgentClient.Delete(deleteCtx, agent.ID) + if err != nil { + logger.Error(deleteCtx, "failed to delete agent record after process completion", slog.Error(err)) + } + + api.mu.Lock() + delete(api.injectedSubAgentProcs, container.ID) + api.mu.Unlock() + + logger.Debug(ctx, "agent process cleanup complete") + api.asyncWg.Done() + }() + + api.mu.Lock() + if api.closed { + api.mu.Unlock() + // If the API is closed, we should not run the agent. + logger.Debug(ctx, "the API is closed, not running subagent in container") + return + } + // Update the placeholder with a valid subagent, context and stop. + api.injectedSubAgentProcs[container.ID] = subAgentProcess{ + agent: agent, + ctx: agentCtx, + stop: agentStop, + } + api.mu.Unlock() + + logger.Info(ctx, "starting subagent in dev container") + + err := api.dccli.Exec(agentCtx, dc.WorkspaceFolder, dc.ConfigPath, agentPath, []string{"agent"}, + WithContainerID(container.ID), + WithRemoteEnv( + // TODO(mafredri): Use the correct URL here. + "CODER_AGENT_URL="+"http://172.20.0.8:3000/", + "CODER_AGENT_TOKEN="+agent.AuthToken.String(), + ), + ) + if err != nil && !errors.Is(err, context.Canceled) { + logger.Error(ctx, "subagent process failed", slog.Error(err)) + } + + logger.Info(ctx, "subagent process finished") +} + func (api *API) Close() error { api.mu.Lock() if api.closed { @@ -811,6 +1095,12 @@ func (api *API) Close() error { } api.logger.Debug(api.ctx, "closing API") api.closed = true + + for _, proc := range api.injectedSubAgentProcs { + api.logger.Debug(api.ctx, "canceling subagent process", slog.F("agent_name", proc.agent.Name), slog.F("agent_id", proc.agent.ID)) + proc.stop() + } + api.cancel() // Interrupt all routines. api.mu.Unlock() // Release lock before waiting for goroutines. @@ -821,8 +1111,8 @@ func (api *API) Close() error { <-api.watcherDone <-api.updaterDone - // Wait for all devcontainer recreation tasks to complete. - api.recreateWg.Wait() + // Wait for all async tasks to complete. + api.asyncWg.Wait() api.logger.Debug(api.ctx, "closed API") return err diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go index 313da6f9f615f..f8e40b3ca41b1 100644 --- a/agent/agentcontainers/api_test.go +++ b/agent/agentcontainers/api_test.go @@ -6,6 +6,8 @@ import ( "math/rand" "net/http" "net/http/httptest" + "os" + "runtime" "strings" "testing" "time" @@ -190,6 +192,80 @@ func (w *fakeWatcher) sendEventWaitNextCalled(ctx context.Context, event fsnotif w.waitNext(ctx) } +// fakeSubAgentClient implements SubAgentClient for testing purposes. +type fakeSubAgentClient struct { + agents map[uuid.UUID]agentcontainers.SubAgent + nextID int + + listErrC chan error // If set, send to return error, close to return nil. + created []agentcontainers.SubAgent + createErrC chan error // If set, send to return error, close to return nil. + deleted []uuid.UUID + deleteErrC chan error // If set, send to return error, close to return nil. +} + +func (m *fakeSubAgentClient) List(ctx context.Context) ([]agentcontainers.SubAgent, error) { + var listErr error + if m.listErrC != nil { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case err, ok := <-m.listErrC: + if ok { + listErr = err + } + } + } + var agents []agentcontainers.SubAgent + for _, agent := range m.agents { + agents = append(agents, agent) + } + return agents, listErr +} + +func (m *fakeSubAgentClient) Create(ctx context.Context, agent agentcontainers.SubAgent) (agentcontainers.SubAgent, error) { + var createErr error + if m.createErrC != nil { + select { + case <-ctx.Done(): + return agentcontainers.SubAgent{}, ctx.Err() + case err, ok := <-m.createErrC: + if ok { + createErr = err + } + } + } + m.nextID++ + agent.ID = uuid.New() + agent.AuthToken = uuid.New() + if m.agents == nil { + m.agents = make(map[uuid.UUID]agentcontainers.SubAgent) + } + m.agents[agent.ID] = agent + m.created = append(m.created, agent) + return agent, createErr +} + +func (m *fakeSubAgentClient) Delete(ctx context.Context, id uuid.UUID) error { + var deleteErr error + if m.deleteErrC != nil { + select { + case <-ctx.Done(): + return ctx.Err() + case err, ok := <-m.deleteErrC: + if ok { + deleteErr = err + } + } + } + if m.agents == nil { + m.agents = make(map[uuid.UUID]agentcontainers.SubAgent) + } + delete(m.agents, id) + m.deleted = append(m.deleted, id) + return deleteErr +} + func TestAPI(t *testing.T) { t.Parallel() @@ -226,6 +302,7 @@ func TestAPI(t *testing.T) { initialData: initialDataPayload{makeResponse(), nil}, setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) { mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt), nil).After(preReq).AnyTimes() + mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() }, expected: makeResponse(fakeCt), }, @@ -244,6 +321,7 @@ func TestAPI(t *testing.T) { initialData: initialDataPayload{makeResponse(), assert.AnError}, setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) { mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt), nil).After(preReq).AnyTimes() + mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() }, expected: makeResponse(fakeCt), }, @@ -260,6 +338,7 @@ func TestAPI(t *testing.T) { initialData: initialDataPayload{makeResponse(fakeCt), nil}, setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) { mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt2), nil).After(preReq).AnyTimes() + mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() }, expected: makeResponse(fakeCt2), }, @@ -415,6 +494,7 @@ func TestAPI(t *testing.T) { containers: codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{validContainer}, }, + arch: "", // Unsupported architecture, don't inject subagent. }, devcontainerCLI: &fakeDevcontainerCLI{ upErr: xerrors.New("devcontainer CLI error"), @@ -429,6 +509,7 @@ func TestAPI(t *testing.T) { containers: codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{validContainer}, }, + arch: "", // Unsupported architecture, don't inject subagent. }, devcontainerCLI: &fakeDevcontainerCLI{}, wantStatus: []int{http.StatusAccepted, http.StatusConflict}, @@ -1151,6 +1232,166 @@ func TestAPI(t *testing.T) { assert.False(t, response.Devcontainers[0].Container.DevcontainerDirty, "dirty flag should be cleared on the container after container recreation") }) + + t.Run("SubAgentLifecycle", func(t *testing.T) { + t.Parallel() + + var ( + ctx = testutil.Context(t, testutil.WaitMedium) + logger = slog.Make() + mClock = quartz.NewMock(t) + mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t)) + fakeSAC = &fakeSubAgentClient{ + createErrC: make(chan error, 1), + deleteErrC: make(chan error, 1), + } + fakeDCCLI = &fakeDevcontainerCLI{ + execErrC: make(chan error, 1), + } + + testContainer = codersdk.WorkspaceAgentContainer{ + ID: "test-container-id", + FriendlyName: "test-container", + Image: "test-image", + Running: true, + CreatedAt: time.Now(), + Labels: map[string]string{ + agentcontainers.DevcontainerLocalFolderLabel: "/workspace", + agentcontainers.DevcontainerConfigFileLabel: "/workspace/.devcontainer/devcontainer.json", + }, + } + ) + + defer close(fakeDCCLI.execErrC) + + coderBin, err := os.Executable() + require.NoError(t, err) + + mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + Containers: []codersdk.WorkspaceAgentContainer{testContainer}, + }, nil).AnyTimes() + gomock.InOrder( + mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil), + mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil), + ) + + mClock.Set(time.Now()).MustWait(ctx) + tickerTrap := mClock.Trap().TickerFunc("updaterLoop") + + api := agentcontainers.NewAPI(logger, + agentcontainers.WithClock(mClock), + agentcontainers.WithContainerCLI(mCCLI), + agentcontainers.WithSubAgentClient(fakeSAC), + agentcontainers.WithDevcontainerCLI(fakeDCCLI), + ) + defer api.Close() + + // Allow initial agent creation to succeed. + testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil) + + // Make sure the ticker function has been registered + // before advancing the clock. + tickerTrap.MustWait(ctx).MustRelease(ctx) + tickerTrap.Close() + + // Pre-populate for next iteration (also verify previous consumption). + testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil) + + // Ensure we only inject the agent once. + for i := range 3 { + _, aw := mClock.AdvanceNext() + aw.MustWait(ctx) + + t.Logf("Iteration %d: agents created: %d", i+1, len(fakeSAC.created)) + + // Verify agent was created. + require.Len(t, fakeSAC.created, 1) + assert.Equal(t, "test-container", fakeSAC.created[0].Name) + assert.Equal(t, "/workspace", fakeSAC.created[0].Directory) + assert.Len(t, fakeSAC.deleted, 0) + } + + // Expect the agent to be reinjected. + gomock.InOrder( + mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil), + mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil), + ) + + // Terminate the agent and verify it is deleted. + testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, nil) + + // Allow cleanup to proceed. + testutil.RequireSend(ctx, t, fakeSAC.deleteErrC, xerrors.New("test termination")) + + // Wait until the agent recreation is started. + for len(fakeSAC.createErrC) > 0 { + _, aw := mClock.AdvanceNext() + aw.MustWait(ctx) + } + + // Verify agent was deleted. + require.Len(t, fakeSAC.deleted, 1) + assert.Equal(t, fakeSAC.created[0].ID, fakeSAC.deleted[0]) + + // Verify the agent recreated. + require.Len(t, fakeSAC.created, 2) + }) + + t.Run("SubAgentCleanup", func(t *testing.T) { + t.Parallel() + + var ( + existingAgentID = uuid.New() + existingAgentToken = uuid.New() + existingAgent = agentcontainers.SubAgent{ + ID: existingAgentID, + Name: "stopped-container", + Directory: "/tmp", + AuthToken: existingAgentToken, + } + + ctx = testutil.Context(t, testutil.WaitMedium) + logger = slog.Make() + mClock = quartz.NewMock(t) + mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t)) + fakeSAC = &fakeSubAgentClient{ + agents: map[uuid.UUID]agentcontainers.SubAgent{ + existingAgentID: existingAgent, + }, + } + ) + + mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + Containers: []codersdk.WorkspaceAgentContainer{}, + }, nil).AnyTimes() + + mClock.Set(time.Now()).MustWait(ctx) + tickerTrap := mClock.Trap().TickerFunc("updaterLoop") + + api := agentcontainers.NewAPI(logger, + agentcontainers.WithClock(mClock), + agentcontainers.WithContainerCLI(mCCLI), + agentcontainers.WithSubAgentClient(fakeSAC), + agentcontainers.WithDevcontainerCLI(&fakeDevcontainerCLI{}), + ) + defer api.Close() + + tickerTrap.MustWait(ctx).MustRelease(ctx) + tickerTrap.Close() + + _, aw := mClock.AdvanceNext() + aw.MustWait(ctx) + + // Verify agent was deleted. + assert.Contains(t, fakeSAC.deleted, existingAgentID) + assert.Empty(t, fakeSAC.agents) + }) } // mustFindDevcontainerByPath returns the devcontainer with the given workspace diff --git a/agent/agentcontainers/subagent.go b/agent/agentcontainers/subagent.go new file mode 100644 index 0000000000000..70899fb96f70d --- /dev/null +++ b/agent/agentcontainers/subagent.go @@ -0,0 +1,128 @@ +package agentcontainers + +import ( + "context" + + "github.com/google/uuid" + "golang.org/x/xerrors" + + "cdr.dev/slog" + + agentproto "github.com/coder/coder/v2/agent/proto" +) + +// SubAgent represents an agent running in a dev container. +type SubAgent struct { + ID uuid.UUID + Name string + AuthToken uuid.UUID + Directory string + Architecture string + OperatingSystem string +} + +// SubAgentClient is an interface for managing sub agents and allows +// changing the implementation without having to deal with the +// agentproto package directly. +type SubAgentClient interface { + // List returns a list of all agents. + List(ctx context.Context) ([]SubAgent, error) + // Create adds a new agent. + Create(ctx context.Context, agent SubAgent) (SubAgent, error) + // Delete removes an agent by its ID. + Delete(ctx context.Context, id uuid.UUID) error +} + +// NewSubAgentClient returns a SubAgentClient that uses the provided +// agent API client. +type subAgentAPIClient struct { + logger slog.Logger + api agentproto.DRPCAgentClient26 +} + +var _ SubAgentClient = (*subAgentAPIClient)(nil) + +func NewSubAgentClientFromAPI(logger slog.Logger, agentAPI agentproto.DRPCAgentClient26) SubAgentClient { + if agentAPI == nil { + panic("developer error: agentAPI cannot be nil") + } + return &subAgentAPIClient{ + logger: logger.Named("subagentclient"), + api: agentAPI, + } +} + +func (a *subAgentAPIClient) List(ctx context.Context) ([]SubAgent, error) { + a.logger.Debug(ctx, "listing sub agents") + resp, err := a.api.ListSubAgents(ctx, &agentproto.ListSubAgentsRequest{}) + if err != nil { + return nil, err + } + + agents := make([]SubAgent, len(resp.Agents)) + for i, agent := range resp.Agents { + id, err := uuid.FromBytes(agent.GetId()) + if err != nil { + return nil, err + } + authToken, err := uuid.FromBytes(agent.GetAuthToken()) + if err != nil { + return nil, err + } + agents[i] = SubAgent{ + ID: id, + Name: agent.GetName(), + AuthToken: authToken, + } + } + return agents, nil +} + +func (a *subAgentAPIClient) Create(ctx context.Context, agent SubAgent) (SubAgent, error) { + a.logger.Debug(ctx, "creating sub agent", slog.F("name", agent.Name), slog.F("directory", agent.Directory)) + resp, err := a.api.CreateSubAgent(ctx, &agentproto.CreateSubAgentRequest{ + Name: agent.Name, + Directory: agent.Directory, + Architecture: agent.Architecture, + OperatingSystem: agent.OperatingSystem, + }) + if err != nil { + return SubAgent{}, err + } + + agent.Name = resp.Agent.Name + agent.ID, err = uuid.FromBytes(resp.Agent.Id) + if err != nil { + return agent, err + } + agent.AuthToken, err = uuid.FromBytes(resp.Agent.AuthToken) + if err != nil { + return agent, err + } + return agent, nil +} + +func (a *subAgentAPIClient) Delete(ctx context.Context, id uuid.UUID) error { + a.logger.Debug(ctx, "deleting sub agent", slog.F("id", id.String())) + _, err := a.api.DeleteSubAgent(ctx, &agentproto.DeleteSubAgentRequest{ + Id: id[:], + }) + return err +} + +// noopSubAgentClient is a SubAgentClient that does nothing. +type noopSubAgentClient struct{} + +var _ SubAgentClient = noopSubAgentClient{} + +func (noopSubAgentClient) List(_ context.Context) ([]SubAgent, error) { + return nil, nil +} + +func (noopSubAgentClient) Create(_ context.Context, _ SubAgent) (SubAgent, error) { + return SubAgent{}, xerrors.New("noopSubAgentClient does not support creating sub agents") +} + +func (noopSubAgentClient) Delete(_ context.Context, _ uuid.UUID) error { + return xerrors.New("noopSubAgentClient does not support deleting sub agents") +} diff --git a/agent/api.go b/agent/api.go index 2e15530adc608..1c9a707fbb338 100644 --- a/agent/api.go +++ b/agent/api.go @@ -10,11 +10,12 @@ import ( "github.com/google/uuid" "github.com/coder/coder/v2/agent/agentcontainers" + "github.com/coder/coder/v2/agent/proto" "github.com/coder/coder/v2/coderd/httpapi" "github.com/coder/coder/v2/codersdk" ) -func (a *agent) apiHandler() (http.Handler, func() error) { +func (a *agent) apiHandler(aAPI proto.DRPCAgentClient26) (http.Handler, func() error) { r := chi.NewRouter() r.Get("/", func(rw http.ResponseWriter, r *http.Request) { httpapi.Write(r.Context(), rw, http.StatusOK, codersdk.Response{ @@ -45,6 +46,7 @@ func (a *agent) apiHandler() (http.Handler, func() error) { agentcontainers.WithScriptLogger(func(logSourceID uuid.UUID) agentcontainers.ScriptLogger { return a.logSender.GetScriptLogger(logSourceID) }), + agentcontainers.WithSubAgentClient(agentcontainers.NewSubAgentClientFromAPI(a.logger, aAPI)), } manifest := a.manifest.Load() if manifest != nil && len(manifest.Devcontainers) > 0 { From 34aa574754375e8079fa16e74f0ad3350d30197e Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 6 Jun 2025 12:06:09 +0000 Subject: [PATCH 02/18] implement sub agent url --- agent/agentcontainers/api.go | 12 ++++++++++-- agent/agentcontainers/api_test.go | 13 +++++++------ cli/agent.go | 4 ++++ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index 72f2a119c5e7d..109179d8017be 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -57,6 +57,7 @@ type API struct { clock quartz.Clock scriptLogger func(logSourceID uuid.UUID) ScriptLogger subAgentClient SubAgentClient + subAgentURL string mu sync.RWMutex closed bool @@ -121,6 +122,14 @@ func WithSubAgentClient(client SubAgentClient) Option { } } +// WithSubAgentURL sets the agent URL for the sub-agent for +// communicating with the control plane. +func WithSubAgentURL(url string) Option { + return func(api *API) { + api.subAgentURL = url + } +} + // WithDevcontainers sets the known devcontainers for the API. This // allows the API to be aware of devcontainers defined in the workspace // agent manifest. @@ -1075,8 +1084,7 @@ func (api *API) runSubAgentInContainer(ctx context.Context, dc codersdk.Workspac err := api.dccli.Exec(agentCtx, dc.WorkspaceFolder, dc.ConfigPath, agentPath, []string{"agent"}, WithContainerID(container.ID), WithRemoteEnv( - // TODO(mafredri): Use the correct URL here. - "CODER_AGENT_URL="+"http://172.20.0.8:3000/", + "CODER_AGENT_URL="+api.subAgentURL, "CODER_AGENT_TOKEN="+agent.AuthToken.String(), ), ) diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go index f8e40b3ca41b1..d7b69660ef410 100644 --- a/agent/agentcontainers/api_test.go +++ b/agent/agentcontainers/api_test.go @@ -1272,10 +1272,10 @@ func TestAPI(t *testing.T) { }, nil).AnyTimes() gomock.InOrder( mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), - mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil), mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), - mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil), - mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil, nil), ) mClock.Set(time.Now()).MustWait(ctx) @@ -1285,6 +1285,7 @@ func TestAPI(t *testing.T) { agentcontainers.WithClock(mClock), agentcontainers.WithContainerCLI(mCCLI), agentcontainers.WithSubAgentClient(fakeSAC), + agentcontainers.WithSubAgentURL("test-subagent-url"), agentcontainers.WithDevcontainerCLI(fakeDCCLI), ) defer api.Close() @@ -1317,10 +1318,10 @@ func TestAPI(t *testing.T) { // Expect the agent to be reinjected. gomock.InOrder( mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), - mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil), mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), - mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil), - mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil, nil), ) // Terminate the agent and verify it is deleted. diff --git a/cli/agent.go b/cli/agent.go index deca447664337..5d6037f9930ec 100644 --- a/cli/agent.go +++ b/cli/agent.go @@ -28,6 +28,7 @@ import ( "github.com/coder/serpent" "github.com/coder/coder/v2/agent" + "github.com/coder/coder/v2/agent/agentcontainers" "github.com/coder/coder/v2/agent/agentexec" "github.com/coder/coder/v2/agent/agentssh" "github.com/coder/coder/v2/agent/reaper" @@ -362,6 +363,9 @@ func (r *RootCmd) workspaceAgent() *serpent.Command { BlockFileTransfer: blockFileTransfer, Execer: execer, ExperimentalDevcontainersEnabled: experimentalDevcontainersEnabled, + ContainerAPIOptions: []agentcontainers.Option{ + agentcontainers.WithSubAgentURL(r.agentURL.String()), + }, }) promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger) From 7a3c8a3e64c0134640165426adaec6025b76bff9 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 6 Jun 2025 13:18:03 +0000 Subject: [PATCH 03/18] improve doc on container workspace folder, add todo --- agent/agentcontainers/api.go | 12 +++++++++--- agent/agentcontainers/devcontainer.go | 2 ++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index 109179d8017be..5919874454ff5 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -1010,9 +1010,15 @@ func (api *API) injectSubAgentIntoContainerLocked(ctx context.Context, dc coders // The preparation of the subagent is done, now we can create the // subagent record in the database to receive the auth token. createdAgent, err := api.subAgentClient.Create(ctx, SubAgent{ - Name: dc.Name, - Directory: "/workspace", // TODO(mafredri): Use a more appropriate directory. - OperatingSystem: "linux", // Assuming Linux for dev containers. + Name: dc.Name, + // The default workspaceFolder for devcontainers is /workspaces. + // However, it can be changed by setting {"workspaceFolder": "/src"} + // in the devcontainer.json. This information is not encoded into + // the container labels, so we must rely on the values parsed from + // the devcontainer.json file on disk. + // TODO(mafredri): Support custom workspace folders in the future. + Directory: DevcontainerDefaultContainerWorkspaceFolder, + OperatingSystem: "linux", // Assuming Linux for dev containers. Architecture: arch, }) if err != nil { diff --git a/agent/agentcontainers/devcontainer.go b/agent/agentcontainers/devcontainer.go index 09d4837d4b27a..f13963d7b63d7 100644 --- a/agent/agentcontainers/devcontainer.go +++ b/agent/agentcontainers/devcontainer.go @@ -18,6 +18,8 @@ const ( // DevcontainerConfigFileLabel is the label that contains the path to // the devcontainer.json configuration file. DevcontainerConfigFileLabel = "devcontainer.config_file" + // The default workspace folder inside the devcontainer. + DevcontainerDefaultContainerWorkspaceFolder = "/workspaces" ) const devcontainerUpScriptTemplate = ` From eb29bba31d149e384f735a25b30ad7d042bec62c Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 6 Jun 2025 15:04:40 +0000 Subject: [PATCH 04/18] fix coderd and cli tests --- agent/agentcontainers/api.go | 3 +++ cli/open_test.go | 16 ++++++++------ coderd/workspaceagents_test.go | 39 ++++++++++++++++++---------------- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index 5919874454ff5..17886da53826a 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -869,6 +869,9 @@ func (api *API) cleanupSubAgents(ctx context.Context) error { if err != nil { return xerrors.Errorf("list agents: %w", err) } + if len(agents) == 0 { + return nil + } api.mu.Lock() defer api.mu.Unlock() diff --git a/cli/open_test.go b/cli/open_test.go index 4441e51e58c4b..0f757c99bfcf1 100644 --- a/cli/open_test.go +++ b/cli/open_test.go @@ -306,8 +306,8 @@ func TestOpenVSCodeDevContainer(t *testing.T) { containerFolder := "/workspace/coder" ctrl := gomock.NewController(t) - mcl := acmock.NewMockContainerCLI(ctrl) - mcl.EXPECT().List(gomock.Any()).Return( + mccli := acmock.NewMockContainerCLI(ctrl) + mccli.EXPECT().List(gomock.Any()).Return( codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{ { @@ -327,6 +327,8 @@ func TestOpenVSCodeDevContainer(t *testing.T) { }, }, nil, ).AnyTimes() + // DetectArchitecture always returns "" for this test to disable agent injection. + mccli.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() client, workspace, agentToken := setupWorkspaceForAgent(t, func(agents []*proto.Agent) []*proto.Agent { agents[0].Directory = agentDir @@ -337,7 +339,7 @@ func TestOpenVSCodeDevContainer(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mcl)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mccli)) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() @@ -481,8 +483,8 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { containerFolder := "/workspace/coder" ctrl := gomock.NewController(t) - mcl := acmock.NewMockContainerCLI(ctrl) - mcl.EXPECT().List(gomock.Any()).Return( + mccli := acmock.NewMockContainerCLI(ctrl) + mccli.EXPECT().List(gomock.Any()).Return( codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{ { @@ -502,6 +504,8 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { }, }, nil, ).AnyTimes() + // DetectArchitecture always returns "" for this test to disable agent injection. + mccli.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() client, workspace, agentToken := setupWorkspaceForAgent(t, func(agents []*proto.Agent) []*proto.Agent { agents[0].Name = agentName @@ -511,7 +515,7 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mcl)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mccli)) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index f32c7b1458ca2..6a18ec76cdde2 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -1397,14 +1397,15 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { agentcontainers.DevcontainerConfigFileLabel: configFile, } devContainer = codersdk.WorkspaceAgentContainer{ - ID: uuid.NewString(), - CreatedAt: dbtime.Now(), - FriendlyName: testutil.GetRandomName(t), - Image: "busybox:latest", - Labels: dcLabels, - Running: true, - Status: "running", - DevcontainerDirty: true, + ID: uuid.NewString(), + CreatedAt: dbtime.Now(), + FriendlyName: testutil.GetRandomName(t), + Image: "busybox:latest", + Labels: dcLabels, + Running: true, + Status: "running", + DevcontainerDirty: true, + DevcontainerStatus: codersdk.WorkspaceAgentDevcontainerStatusRunning, } plainContainer = codersdk.WorkspaceAgentContainer{ ID: uuid.NewString(), @@ -1419,29 +1420,31 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { for _, tc := range []struct { name string - setupMock func(*acmock.MockContainerCLI, *acmock.MockDevcontainerCLI) (status int) + setupMock func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) (status int) }{ { name: "Recreate", - setupMock: func(mcl *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { - mcl.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + setupMock: func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { + mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{devContainer}, }, nil).AnyTimes() + // DetectArchitecture always returns "" for this test to disable agent injection. + mccli.EXPECT().DetectArchitecture(gomock.Any(), devContainer.ID).Return("", nil).AnyTimes() mdccli.EXPECT().Up(gomock.Any(), workspaceFolder, configFile, gomock.Any()).Return("someid", nil).Times(1) return 0 }, }, { name: "Container does not exist", - setupMock: func(mcl *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { - mcl.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{}, nil).AnyTimes() + setupMock: func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { + mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{}, nil).AnyTimes() return http.StatusNotFound }, }, { name: "Not a devcontainer", - setupMock: func(mcl *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { - mcl.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + setupMock: func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { + mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{plainContainer}, }, nil).AnyTimes() return http.StatusNotFound @@ -1452,9 +1455,9 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { t.Parallel() ctrl := gomock.NewController(t) - mcl := acmock.NewMockContainerCLI(ctrl) + mccli := acmock.NewMockContainerCLI(ctrl) mdccli := acmock.NewMockDevcontainerCLI(ctrl) - wantStatus := tc.setupMock(mcl, mdccli) + wantStatus := tc.setupMock(mccli, mdccli) logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug) client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{ Logger: &logger, @@ -1471,7 +1474,7 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { o.ExperimentalDevcontainersEnabled = true o.ContainerAPIOptions = append( o.ContainerAPIOptions, - agentcontainers.WithContainerCLI(mcl), + agentcontainers.WithContainerCLI(mccli), agentcontainers.WithDevcontainerCLI(mdccli), agentcontainers.WithWatcher(watcher.NewNoop()), ) From aa42ab8695d2e9c11fd4a23e575c49d155dc07ce Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 6 Jun 2025 16:11:16 +0000 Subject: [PATCH 05/18] fix --- agent/agentcontainers/api_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go index d7b69660ef410..fdbb5c694ff63 100644 --- a/agent/agentcontainers/api_test.go +++ b/agent/agentcontainers/api_test.go @@ -426,7 +426,7 @@ func TestAPI(t *testing.T) { FriendlyName: "container-name", Running: true, Labels: map[string]string{ - agentcontainers.DevcontainerLocalFolderLabel: "/workspace", + agentcontainers.DevcontainerLocalFolderLabel: "/workspaces", agentcontainers.DevcontainerConfigFileLabel: "/workspace/.devcontainer/devcontainer.json", }, } @@ -1256,7 +1256,7 @@ func TestAPI(t *testing.T) { Running: true, CreatedAt: time.Now(), Labels: map[string]string{ - agentcontainers.DevcontainerLocalFolderLabel: "/workspace", + agentcontainers.DevcontainerLocalFolderLabel: "/workspaces", agentcontainers.DevcontainerConfigFileLabel: "/workspace/.devcontainer/devcontainer.json", }, } @@ -1311,7 +1311,7 @@ func TestAPI(t *testing.T) { // Verify agent was created. require.Len(t, fakeSAC.created, 1) assert.Equal(t, "test-container", fakeSAC.created[0].Name) - assert.Equal(t, "/workspace", fakeSAC.created[0].Directory) + assert.Equal(t, "/workspaces", fakeSAC.created[0].Directory) assert.Len(t, fakeSAC.deleted, 0) } From fb4cdad38a984de036e5968d33d49905b41d8da6 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 08:49:11 +0000 Subject: [PATCH 06/18] skip test on win --- agent/agentcontainers/api_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go index fdbb5c694ff63..845eae2e6d33c 100644 --- a/agent/agentcontainers/api_test.go +++ b/agent/agentcontainers/api_test.go @@ -1236,6 +1236,10 @@ func TestAPI(t *testing.T) { t.Run("SubAgentLifecycle", func(t *testing.T) { t.Parallel() + if runtime.GOOS == "windows" { + t.Skip("Dev Container tests are not supported on Windows (this test uses mocks but fails due to Windows paths)") + } + var ( ctx = testutil.Context(t, testutil.WaitMedium) logger = slog.Make() From cf17cd40420611629c209a8fd8acad6eb0e0d601 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 09:02:13 +0000 Subject: [PATCH 07/18] fix review comments --- agent/agentcontainers/api.go | 45 ++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index 17886da53826a..8ca549df02bb9 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -296,7 +296,7 @@ func (api *API) updaterLoop() { if err := api.cleanupSubAgents(api.ctx); err != nil { api.logger.Error(api.ctx, "cleanup subagents failed", slog.Error(err)) } else { - api.logger.Debug(api.ctx, "subagent cleanup complete") + api.logger.Debug(api.ctx, "cleanup subagents complete") } // Perform an initial update to populate the container list, this @@ -440,6 +440,20 @@ func (api *API) updateContainers(ctx context.Context) error { // on the latest list of containers. This method assumes that api.mu is // held. func (api *API) processUpdatedContainersLocked(ctx context.Context, updated codersdk.WorkspaceAgentListContainersResponse) { + dcFields := func(dc codersdk.WorkspaceAgentDevcontainer) []slog.Field { + f := []slog.Field{ + slog.F("devcontainer_id", dc.ID), + slog.F("devcontainer_name", dc.Name), + slog.F("workspace_folder", dc.WorkspaceFolder), + slog.F("config_path", dc.ConfigPath), + } + if dc.Container != nil { + f = append(f, slog.F("container_id", dc.Container.ID)) + f = append(f, slog.F("container_name", dc.Container.FriendlyName)) + } + return f + } + // Reset the container links in known devcontainers to detect if // they still exist. for _, dc := range api.knownDevcontainers { @@ -460,6 +474,13 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code continue } + logger := api.logger.With( + slog.F("container_id", updated.Containers[i].ID), + slog.F("container_name", updated.Containers[i].FriendlyName), + slog.F("workspace_folder", workspaceFolder), + slog.F("config_file", configFile), + ) + if dc, ok := api.knownDevcontainers[workspaceFolder]; ok { // If no config path is set, this devcontainer was defined // in Terraform without the optional config file. Assume the @@ -468,7 +489,7 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code if dc.ConfigPath == "" && configFile != "" { dc.ConfigPath = configFile if err := api.watcher.Add(configFile); err != nil { - api.logger.Error(ctx, "watch devcontainer config file failed", slog.Error(err), slog.F("file", configFile)) + logger.With(dcFields(dc)...).Error(ctx, "watch devcontainer config file failed", slog.Error(err)) } } @@ -477,13 +498,7 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code continue } - if configFile != "" { - if err := api.watcher.Add(configFile); err != nil { - api.logger.Error(ctx, "watch devcontainer config file failed", slog.Error(err), slog.F("file", configFile)) - } - } - - api.knownDevcontainers[workspaceFolder] = codersdk.WorkspaceAgentDevcontainer{ + dc := codersdk.WorkspaceAgentDevcontainer{ ID: uuid.New(), Name: "", // Updated later based on container state. WorkspaceFolder: workspaceFolder, @@ -492,11 +507,21 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code Dirty: false, // Updated later based on config file changes. Container: container, } + + if configFile != "" { + if err := api.watcher.Add(configFile); err != nil { + logger.With(dcFields(dc)...).Error(ctx, "watch devcontainer config file failed", slog.Error(err)) + } + } + + api.knownDevcontainers[workspaceFolder] = dc } // Iterate through all known devcontainers and update their status // based on the current state of the containers. for _, dc := range api.knownDevcontainers { + logger := api.logger.With(dcFields(dc)...) + if dc.Container != nil { if !api.devcontainerNames[dc.Name] { // If the devcontainer name wasn't set via terraform, we @@ -532,7 +557,7 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code if _, injected := api.injectedSubAgentProcs[dc.Container.ID]; !injected && dc.Status == codersdk.WorkspaceAgentDevcontainerStatusRunning { err := api.injectSubAgentIntoContainerLocked(ctx, dc) if err != nil { - api.logger.Error(ctx, "inject subagent into container failed", slog.Error(err)) + logger.Error(ctx, "inject subagent into container failed", slog.Error(err)) } } From 780483b847896ee013248c31bd81ba49838a0924 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 09:29:46 +0000 Subject: [PATCH 08/18] ensure agent binary permissions owner/o+rx --- agent/agentcontainers/api.go | 6 +++++- agent/agentcontainers/api_test.go | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index 8ca549df02bb9..a3e0f35f4a672 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -1025,9 +1025,13 @@ func (api *API) injectSubAgentIntoContainerLocked(ctx context.Context, dc coders logger.Info(ctx, "copied agent binary to container") // Make sure the agent binary is executable so we can run it. - if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chmod", "+x", coderPathInsideContainer); err != nil { + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chmod", "0755", path.Dir(coderPathInsideContainer), coderPathInsideContainer); err != nil { return xerrors.Errorf("set agent binary executable: %w", err) } + // Set the owner of the agent binary to root:root (UID 0, GID 0). + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chown", "0:0", path.Dir(coderPathInsideContainer), coderPathInsideContainer); err != nil { + return xerrors.Errorf("set agent binary owner: %w", err) + } // Attempt to add CAP_NET_ADMIN to the binary to improve network // performance (optional, allow to fail). diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go index 845eae2e6d33c..ab8c2a4a30b00 100644 --- a/agent/agentcontainers/api_test.go +++ b/agent/agentcontainers/api_test.go @@ -1278,7 +1278,8 @@ func TestAPI(t *testing.T) { mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil), mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), - mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "0755", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chown", "0:0", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil), mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil, nil), ) @@ -1324,7 +1325,8 @@ func TestAPI(t *testing.T) { mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil), mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), - mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "0755", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chown", "0:0", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil), mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil, nil), ) From 934a222355c9edc851f8c8dea73fc6d8ae37e970 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 11:14:59 +0000 Subject: [PATCH 09/18] update cap net admin comment --- agent/agentcontainers/api.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index a3e0f35f4a672..8d366f95b60f7 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -1034,7 +1034,7 @@ func (api *API) injectSubAgentIntoContainerLocked(ctx context.Context, dc coders } // Attempt to add CAP_NET_ADMIN to the binary to improve network - // performance (optional, allow to fail). + // performance (optional, allow to fail). See `bootstrap_linux.sh`. if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "setcap", "cap_net_admin+ep", coderPathInsideContainer); err != nil { logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err)) } From 56c7cebb2ffd5723af91507130055d49706da62d Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 11:15:55 +0000 Subject: [PATCH 10/18] implement fake agent api sub agent methods --- agent/agenttest/client.go | 87 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 6 deletions(-) diff --git a/agent/agenttest/client.go b/agent/agenttest/client.go index a957c61000c70..4d32d24b40619 100644 --- a/agent/agenttest/client.go +++ b/agent/agenttest/client.go @@ -163,6 +163,10 @@ func (c *Client) GetConnectionReports() []*agentproto.ReportConnectionRequest { return c.fakeAgentAPI.GetConnectionReports() } +func (c *Client) GetSubAgents() []*agentproto.SubAgent { + return c.fakeAgentAPI.GetSubAgents() +} + type FakeAgentAPI struct { sync.Mutex t testing.TB @@ -177,6 +181,7 @@ type FakeAgentAPI struct { metadata map[string]agentsdk.Metadata timings []*agentproto.Timing connectionReports []*agentproto.ReportConnectionRequest + subAgents map[uuid.UUID]*agentproto.SubAgent getAnnouncementBannersFunc func() ([]codersdk.BannerConfig, error) getResourcesMonitoringConfigurationFunc func() (*agentproto.GetResourcesMonitoringConfigurationResponse, error) @@ -365,16 +370,86 @@ func (f *FakeAgentAPI) GetConnectionReports() []*agentproto.ReportConnectionRequ return slices.Clone(f.connectionReports) } -func (*FakeAgentAPI) CreateSubAgent(_ context.Context, _ *agentproto.CreateSubAgentRequest) (*agentproto.CreateSubAgentResponse, error) { - panic("unimplemented") +func (f *FakeAgentAPI) CreateSubAgent(ctx context.Context, req *agentproto.CreateSubAgentRequest) (*agentproto.CreateSubAgentResponse, error) { + f.Lock() + defer f.Unlock() + + f.logger.Debug(ctx, "create sub agent called", slog.F("req", req)) + + // Generate IDs for the new sub-agent. + subAgentID := uuid.New() + authToken := uuid.New() + + // Create the sub-agent proto object. + subAgent := &agentproto.SubAgent{ + Id: subAgentID[:], + Name: req.Name, + AuthToken: authToken[:], + } + + // Store the sub-agent in our map. + if f.subAgents == nil { + f.subAgents = make(map[uuid.UUID]*agentproto.SubAgent) + } + f.subAgents[subAgentID] = subAgent + + // For a fake implementation, we don't create workspace apps. + // Real implementations would handle req.Apps here. + return &agentproto.CreateSubAgentResponse{ + Agent: subAgent, + AppCreationErrors: nil, + }, nil +} + +func (f *FakeAgentAPI) DeleteSubAgent(ctx context.Context, req *agentproto.DeleteSubAgentRequest) (*agentproto.DeleteSubAgentResponse, error) { + f.Lock() + defer f.Unlock() + + f.logger.Debug(ctx, "delete sub agent called", slog.F("req", req)) + + subAgentID, err := uuid.FromBytes(req.Id) + if err != nil { + return nil, err + } + + // Remove the sub-agent from our map. + if f.subAgents != nil { + delete(f.subAgents, subAgentID) + } + + return &agentproto.DeleteSubAgentResponse{}, nil } -func (*FakeAgentAPI) DeleteSubAgent(_ context.Context, _ *agentproto.DeleteSubAgentRequest) (*agentproto.DeleteSubAgentResponse, error) { - panic("unimplemented") +func (f *FakeAgentAPI) ListSubAgents(ctx context.Context, req *agentproto.ListSubAgentsRequest) (*agentproto.ListSubAgentsResponse, error) { + f.Lock() + defer f.Unlock() + + f.logger.Debug(ctx, "list sub agents called", slog.F("req", req)) + + var agents []*agentproto.SubAgent + if f.subAgents != nil { + agents = make([]*agentproto.SubAgent, 0, len(f.subAgents)) + for _, agent := range f.subAgents { + agents = append(agents, agent) + } + } + + return &agentproto.ListSubAgentsResponse{ + Agents: agents, + }, nil } -func (*FakeAgentAPI) ListSubAgents(_ context.Context, _ *agentproto.ListSubAgentsRequest) (*agentproto.ListSubAgentsResponse, error) { - panic("unimplemented") +func (f *FakeAgentAPI) GetSubAgents() []*agentproto.SubAgent { + f.Lock() + defer f.Unlock() + var agents []*agentproto.SubAgent + if f.subAgents != nil { + agents = make([]*agentproto.SubAgent, 0, len(f.subAgents)) + for _, agent := range f.subAgents { + agents = append(agents, agent) + } + } + return agents } func NewFakeAgentAPI(t testing.TB, logger slog.Logger, manifest *agentproto.Manifest, statsCh chan *agentproto.Stats) *FakeAgentAPI { From 591a9bf0700814903b26e6cd5b3e572ac5d90d9d Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 11:27:03 +0000 Subject: [PATCH 11/18] do not set workspace folder if container id --- agent/agentcontainers/api.go | 2 +- agent/agentcontainers/devcontainercli.go | 20 ++++++++++++------- agent/agentcontainers/devcontainercli_test.go | 16 +++++++-------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index 8d366f95b60f7..4015a96ed4b1f 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -1120,7 +1120,7 @@ func (api *API) runSubAgentInContainer(ctx context.Context, dc codersdk.Workspac logger.Info(ctx, "starting subagent in dev container") err := api.dccli.Exec(agentCtx, dc.WorkspaceFolder, dc.ConfigPath, agentPath, []string{"agent"}, - WithContainerID(container.ID), + WithExecContainerID(container.ID), WithRemoteEnv( "CODER_AGENT_URL="+api.subAgentURL, "CODER_AGENT_TOKEN="+agent.AuthToken.String(), diff --git a/agent/agentcontainers/devcontainercli.go b/agent/agentcontainers/devcontainercli.go index 94b4de610a93b..6459d7cde03b3 100644 --- a/agent/agentcontainers/devcontainercli.go +++ b/agent/agentcontainers/devcontainercli.go @@ -52,9 +52,10 @@ func WithUpOutput(stdout, stderr io.Writer) DevcontainerCLIUpOptions { type DevcontainerCLIExecOptions func(*devcontainerCLIExecConfig) type devcontainerCLIExecConfig struct { - args []string // Additional arguments for the Exec command. - stdout io.Writer - stderr io.Writer + args []string // Additional arguments for the Exec command. + containerID string + stdout io.Writer + stderr io.Writer } // WithExecOutput sets additional stdout and stderr writers for logs @@ -66,10 +67,12 @@ func WithExecOutput(stdout, stderr io.Writer) DevcontainerCLIExecOptions { } } -// WithContainerID sets the container ID to target a specific container. -func WithContainerID(id string) DevcontainerCLIExecOptions { +// WithExecContainerID sets the container ID to target a specific +// container. Note that this option will unset the workspace folder to +// ensure properties from the container are inherited correctly. +func WithExecContainerID(id string) DevcontainerCLIExecOptions { return func(o *devcontainerCLIExecConfig) { - o.args = append(o.args, "--container-id", id) + o.containerID = id } } @@ -165,7 +168,10 @@ func (d *devcontainerCLI) Exec(ctx context.Context, workspaceFolder, configPath logger := d.logger.With(slog.F("workspace_folder", workspaceFolder), slog.F("config_path", configPath)) args := []string{"exec"} - if workspaceFolder != "" { + switch { + case conf.containerID != "": + args = append(args, "--container-id", conf.containerID) + case workspaceFolder != "": args = append(args, "--workspace-folder", workspaceFolder) } if configPath != "" { diff --git a/agent/agentcontainers/devcontainercli_test.go b/agent/agentcontainers/devcontainercli_test.go index 48325ab83fb21..dbb3c4dedbb85 100644 --- a/agent/agentcontainers/devcontainercli_test.go +++ b/agent/agentcontainers/devcontainercli_test.go @@ -182,8 +182,8 @@ func TestDevcontainerCLI_ArgsAndParsing(t *testing.T) { configPath: "", cmd: "echo", cmdArgs: []string{"hello"}, - opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithContainerID("test-container-123")}, - wantArgs: "exec --workspace-folder /test/workspace --container-id test-container-123 echo hello", + opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithExecContainerID("test-container-123")}, + wantArgs: "exec --container-id test-container-123 echo hello", wantError: false, }, { @@ -192,8 +192,8 @@ func TestDevcontainerCLI_ArgsAndParsing(t *testing.T) { configPath: "/test/config.json", cmd: "bash", cmdArgs: []string{"-c", "ls -la"}, - opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithContainerID("my-container")}, - wantArgs: "exec --workspace-folder /test/workspace --config /test/config.json --container-id my-container bash -c ls -la", + opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithExecContainerID("my-container")}, + wantArgs: "exec --container-id my-container --config /test/config.json bash -c ls -la", wantError: false, }, { @@ -203,9 +203,9 @@ func TestDevcontainerCLI_ArgsAndParsing(t *testing.T) { cmd: "cat", cmdArgs: []string{"/etc/hostname"}, opts: []agentcontainers.DevcontainerCLIExecOptions{ - agentcontainers.WithContainerID("test-container-789"), + agentcontainers.WithExecContainerID("test-container-789"), }, - wantArgs: "exec --workspace-folder /test/workspace --container-id test-container-789 cat /etc/hostname", + wantArgs: "exec --container-id test-container-789 cat /etc/hostname", wantError: false, }, } @@ -293,7 +293,7 @@ func TestDevcontainerCLI_WithOutput(t *testing.T) { errBuf := &bytes.Buffer{} // Simulate CLI execution for exec command with container ID. - wantArgs := "exec --workspace-folder /test/workspace --container-id test-container-456 echo hello" + wantArgs := "exec --container-id test-container-456 echo hello" testExecer := &testDevcontainerExecer{ testExePath: testExePath, wantArgs: wantArgs, @@ -306,7 +306,7 @@ func TestDevcontainerCLI_WithOutput(t *testing.T) { // Call Exec with WithExecOutput and WithContainerID to capture any command output. ctx := testutil.Context(t, testutil.WaitMedium) err = dccli.Exec(ctx, "/test/workspace", "", "echo", []string{"hello"}, - agentcontainers.WithContainerID("test-container-456"), + agentcontainers.WithExecContainerID("test-container-456"), agentcontainers.WithExecOutput(outBuf, errBuf), ) require.NoError(t, err, "Exec should succeed") From 9afa5ea9fa8307927e0bcea2de2a42d6ebf93573 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 12:42:13 +0000 Subject: [PATCH 12/18] add WithContainerLabelIncludeFilter --- agent/agentcontainers/api.go | 106 ++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 39 deletions(-) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index 4015a96ed4b1f..204fc9e2ae068 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -42,22 +42,23 @@ const ( // API is responsible for container-related operations in the agent. // It provides methods to list and manage containers. type API struct { - ctx context.Context - cancel context.CancelFunc - watcherDone chan struct{} - updaterDone chan struct{} - initialUpdateDone chan struct{} // Closed after first update in updaterLoop. - updateTrigger chan chan error // Channel to trigger manual refresh. - updateInterval time.Duration // Interval for periodic container updates. - logger slog.Logger - watcher watcher.Watcher - execer agentexec.Execer - ccli ContainerCLI - dccli DevcontainerCLI - clock quartz.Clock - scriptLogger func(logSourceID uuid.UUID) ScriptLogger - subAgentClient SubAgentClient - subAgentURL string + ctx context.Context + cancel context.CancelFunc + watcherDone chan struct{} + updaterDone chan struct{} + initialUpdateDone chan struct{} // Closed after first update in updaterLoop. + updateTrigger chan chan error // Channel to trigger manual refresh. + updateInterval time.Duration // Interval for periodic container updates. + logger slog.Logger + watcher watcher.Watcher + execer agentexec.Execer + ccli ContainerCLI + containerLabelIncludeFilter map[string]string // Labels to filter containers by. + dccli DevcontainerCLI + clock quartz.Clock + scriptLogger func(logSourceID uuid.UUID) ScriptLogger + subAgentClient SubAgentClient + subAgentURL string mu sync.RWMutex closed bool @@ -106,6 +107,16 @@ func WithContainerCLI(ccli ContainerCLI) Option { } } +// WithContainerLabelIncludeFilter sets a label filter for containers. +// This option can be given multiple times to filter by multiple labels. +// The behavior is such that only containers matching one or more of the +// provided labels will be included. +func WithContainerLabelIncludeFilter(label, value string) Option { + return func(api *API) { + api.containerLabelIncludeFilter[label] = value + } +} + // WithDevcontainerCLI sets the DevcontainerCLI implementation to use. // This can be used in tests to modify @devcontainer/cli behavior. func WithDevcontainerCLI(dccli DevcontainerCLI) Option { @@ -198,24 +209,25 @@ func WithScriptLogger(scriptLogger func(logSourceID uuid.UUID) ScriptLogger) Opt func NewAPI(logger slog.Logger, options ...Option) *API { ctx, cancel := context.WithCancel(context.Background()) api := &API{ - ctx: ctx, - cancel: cancel, - watcherDone: make(chan struct{}), - updaterDone: make(chan struct{}), - initialUpdateDone: make(chan struct{}), - updateTrigger: make(chan chan error), - updateInterval: defaultUpdateInterval, - logger: logger, - clock: quartz.NewReal(), - execer: agentexec.DefaultExecer, - subAgentClient: noopSubAgentClient{}, - devcontainerNames: make(map[string]bool), - knownDevcontainers: make(map[string]codersdk.WorkspaceAgentDevcontainer), - configFileModifiedTimes: make(map[string]time.Time), - recreateSuccessTimes: make(map[string]time.Time), - recreateErrorTimes: make(map[string]time.Time), - scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} }, - injectedSubAgentProcs: make(map[string]subAgentProcess), + ctx: ctx, + cancel: cancel, + watcherDone: make(chan struct{}), + updaterDone: make(chan struct{}), + initialUpdateDone: make(chan struct{}), + updateTrigger: make(chan chan error), + updateInterval: defaultUpdateInterval, + logger: logger, + clock: quartz.NewReal(), + execer: agentexec.DefaultExecer, + subAgentClient: noopSubAgentClient{}, + containerLabelIncludeFilter: make(map[string]string), + devcontainerNames: make(map[string]bool), + knownDevcontainers: make(map[string]codersdk.WorkspaceAgentDevcontainer), + configFileModifiedTimes: make(map[string]time.Time), + recreateSuccessTimes: make(map[string]time.Time), + recreateErrorTimes: make(map[string]time.Time), + scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} }, + injectedSubAgentProcs: make(map[string]subAgentProcess), } // The ctx and logger must be set before applying options to avoid // nil pointer dereference. @@ -266,7 +278,7 @@ func (api *API) watcherLoop() { continue } - now := api.clock.Now("watcherLoop") + now := api.clock.Now("agentcontainers", "watcherLoop") switch { case event.Has(fsnotify.Create | fsnotify.Write): api.logger.Debug(api.ctx, "devcontainer config file changed", slog.F("file", event.Name)) @@ -333,9 +345,9 @@ func (api *API) updaterLoop() { } return nil // Always nil to keep the ticker going. - }, "updaterLoop") + }, "agentcontainers", "updaterLoop") defer func() { - if err := ticker.Wait("updaterLoop"); err != nil && !errors.Is(err, context.Canceled) { + if err := ticker.Wait("agentcontainers", "updaterLoop"); err != nil && !errors.Is(err, context.Canceled) { api.logger.Error(api.ctx, "updater loop ticker failed", slog.Error(err)) } }() @@ -481,6 +493,22 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code slog.F("config_file", configFile), ) + if len(api.containerLabelIncludeFilter) > 0 { + var ok bool + for label, value := range api.containerLabelIncludeFilter { + if v, found := container.Labels[label]; found && v == value { + ok = true + } + } + // Verbose debug logging is fine here since typically filters + // are only used in development or testing environments. + if !ok { + logger.Debug(ctx, "container does not match include filter, ignoring dev container", slog.F("container_labels", container.Labels), slog.F("include_filter", api.containerLabelIncludeFilter)) + continue + } + logger.Debug(ctx, "container matches include filter, processing dev container", slog.F("container_labels", container.Labels), slog.F("include_filter", api.containerLabelIncludeFilter)) + } + if dc, ok := api.knownDevcontainers[workspaceFolder]; ok { // If no config path is set, this devcontainer was defined // in Terraform without the optional config file. Assume the @@ -781,7 +809,7 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con dc.Container.DevcontainerStatus = dc.Status } api.knownDevcontainers[dc.WorkspaceFolder] = dc - api.recreateErrorTimes[dc.WorkspaceFolder] = api.clock.Now("recreate", "errorTimes") + api.recreateErrorTimes[dc.WorkspaceFolder] = api.clock.Now("agentcontainers", "recreate", "errorTimes") api.mu.Unlock() return } @@ -803,7 +831,7 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con dc.Container.DevcontainerStatus = dc.Status } dc.Dirty = false - api.recreateSuccessTimes[dc.WorkspaceFolder] = api.clock.Now("recreate", "successTimes") + api.recreateSuccessTimes[dc.WorkspaceFolder] = api.clock.Now("agentcontainers", "recreate", "successTimes") api.knownDevcontainers[dc.WorkspaceFolder] = dc api.mu.Unlock() From 050177bf56b2e5aa9f758762ca83fe778dd4957f Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 15:37:22 +0000 Subject: [PATCH 13/18] add sub agent env and revert container id change --- agent/agentcontainers/api.go | 18 ++++++++++++++---- agent/agentcontainers/devcontainercli.go | 15 ++++++++------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index 204fc9e2ae068..dcaaed09ff9c3 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -59,6 +59,7 @@ type API struct { scriptLogger func(logSourceID uuid.UUID) ScriptLogger subAgentClient SubAgentClient subAgentURL string + subAgentEnv []string mu sync.RWMutex closed bool @@ -141,6 +142,13 @@ func WithSubAgentURL(url string) Option { } } +// WithSubAgent sets the environment variables for the sub-agent. +func WithSubAgentEnv(env ...string) Option { + return func(api *API) { + api.subAgentEnv = env + } +} + // WithDevcontainers sets the known devcontainers for the API. This // allows the API to be aware of devcontainers defined in the workspace // agent manifest. @@ -1147,12 +1155,14 @@ func (api *API) runSubAgentInContainer(ctx context.Context, dc codersdk.Workspac logger.Info(ctx, "starting subagent in dev container") + env := []string{ + "CODER_AGENT_URL=" + api.subAgentURL, + "CODER_AGENT_TOKEN=" + agent.AuthToken.String(), + } + env = append(env, api.subAgentEnv...) err := api.dccli.Exec(agentCtx, dc.WorkspaceFolder, dc.ConfigPath, agentPath, []string{"agent"}, WithExecContainerID(container.ID), - WithRemoteEnv( - "CODER_AGENT_URL="+api.subAgentURL, - "CODER_AGENT_TOKEN="+agent.AuthToken.String(), - ), + WithRemoteEnv(env...), ) if err != nil && !errors.Is(err, context.Canceled) { logger.Error(ctx, "subagent process failed", slog.Error(err)) diff --git a/agent/agentcontainers/devcontainercli.go b/agent/agentcontainers/devcontainercli.go index 6459d7cde03b3..6039dbf229198 100644 --- a/agent/agentcontainers/devcontainercli.go +++ b/agent/agentcontainers/devcontainercli.go @@ -68,11 +68,10 @@ func WithExecOutput(stdout, stderr io.Writer) DevcontainerCLIExecOptions { } // WithExecContainerID sets the container ID to target a specific -// container. Note that this option will unset the workspace folder to -// ensure properties from the container are inherited correctly. +// container. func WithExecContainerID(id string) DevcontainerCLIExecOptions { return func(o *devcontainerCLIExecConfig) { - o.containerID = id + o.args = append(o.args, "--container-id", id) } } @@ -168,10 +167,12 @@ func (d *devcontainerCLI) Exec(ctx context.Context, workspaceFolder, configPath logger := d.logger.With(slog.F("workspace_folder", workspaceFolder), slog.F("config_path", configPath)) args := []string{"exec"} - switch { - case conf.containerID != "": - args = append(args, "--container-id", conf.containerID) - case workspaceFolder != "": + // For now, always set workspace folder even if --container-id is provided. + // Otherwise the environment of exec will be incomplete, like `pwd` will be + // /home/coder instead of /workspaces/coder. The downside is that the local + // `devcontainer.json` config will overwrite settings serialized in the + // container label. + if workspaceFolder != "" { args = append(args, "--workspace-folder", workspaceFolder) } if configPath != "" { From d5eb3fcbf3757675d59af144b98ec1cdb91e42eb Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 15:54:20 +0000 Subject: [PATCH 14/18] add sub agent as part of autostart integration test --- agent/agent_test.go | 185 ++++++++++++++++++++++++++---- agent/agentcontainers/api_test.go | 4 +- cli/exp_rpty_test.go | 4 + cli/open_test.go | 14 ++- cli/ssh_test.go | 8 +- coderd/workspaceagents_test.go | 11 +- 6 files changed, 188 insertions(+), 38 deletions(-) diff --git a/agent/agent_test.go b/agent/agent_test.go index 3a2562237b603..4e86059266fe0 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -48,6 +48,7 @@ import ( "cdr.dev/slog/sloggers/slogtest" "github.com/coder/coder/v2/agent" + "github.com/coder/coder/v2/agent/agentcontainers" "github.com/coder/coder/v2/agent/agentssh" "github.com/coder/coder/v2/agent/agenttest" "github.com/coder/coder/v2/agent/proto" @@ -60,9 +61,16 @@ import ( "github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/tailnet/tailnettest" "github.com/coder/coder/v2/testutil" + "github.com/coder/quartz" ) func TestMain(m *testing.M) { + if os.Getenv("CODER_TEST_RUN_SUB_AGENT_MAIN") == "1" { + // If we're running as a subagent, we don't want to run the main tests. + // Instead, we just run the subagent tests. + exit := runSubAgentMain() + os.Exit(exit) + } goleak.VerifyTestMain(m, testutil.GoleakOptions...) } @@ -1930,6 +1938,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) { if os.Getenv("CODER_TEST_USE_DOCKER") != "1" { t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test") } + if _, err := exec.LookPath("devcontainer"); err != nil { + t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli") + } pool, err := dockertest.NewPool("") require.NoError(t, err, "Could not connect to docker") @@ -1955,6 +1966,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) { // nolint: dogsled conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, func(_ *agenttest.Client, o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) ctx := testutil.Context(t, testutil.WaitLong) ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "/bin/sh", func(arp *workspacesdk.AgentReconnectingPTYInit) { @@ -1986,6 +2000,60 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) { require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF) } +type subAgentRequestPayload struct { + Token string `json:"token"` + Directory string `json:"directory"` +} + +// runSubAgentMain is the main function for the sub-agent that connects +// to the control plane. It reads the CODER_AGENT_URL and +// CODER_AGENT_TOKEN environment variables, sends the token, and exits +// with a status code based on the response. +func runSubAgentMain() int { + url := os.Getenv("CODER_AGENT_URL") + token := os.Getenv("CODER_AGENT_TOKEN") + if url == "" || token == "" { + _, _ = fmt.Fprintln(os.Stderr, "CODER_AGENT_URL and CODER_AGENT_TOKEN must be set") + return 10 + } + + dir, err := os.Getwd() + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "failed to get current working directory: %v\n", err) + return 1 + } + payload := subAgentRequestPayload{ + Token: token, + Directory: dir, + } + b, err := json.Marshal(payload) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "failed to marshal payload: %v\n", err) + return 1 + } + + req, err := http.NewRequest("POST", url, bytes.NewReader(b)) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "failed to create request: %v\n", err) + return 1 + } + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + req = req.WithContext(ctx) + resp, err := http.DefaultClient.Do(req) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "agent connection failed: %v\n", err) + return 11 + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + _, _ = fmt.Fprintf(os.Stderr, "agent exiting with non-zero exit code %d\n", resp.StatusCode) + return 12 + } + _, _ = fmt.Println("sub-agent connected successfully") + return 0 +} + // This tests end-to-end functionality of auto-starting a devcontainer. // It runs "devcontainer up" which creates a real Docker container. As // such, it does not run by default in CI. @@ -1999,6 +2067,56 @@ func TestAgent_DevcontainerAutostart(t *testing.T) { if os.Getenv("CODER_TEST_USE_DOCKER") != "1" { t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test") } + if _, err := exec.LookPath("devcontainer"); err != nil { + t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli") + } + + // This HTTP handler handles requests from runSubAgentMain which + // acts as a fake sub-agent. We want to verify that the sub-agent + // connects and sends its token. We use a channel to signal + // that the sub-agent has connected successfully and then we wait + // until we receive another signal to return from the handler. This + // keeps the agent "alive" for as long as we want. + subAgentConnected := make(chan subAgentRequestPayload, 1) + subAgentReady := make(chan struct{}, 1) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Logf("Sub-agent request received: %s %s", r.Method, r.URL.Path) + + if r.Method != http.MethodPost { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + // Read the token from the request body. + var payload subAgentRequestPayload + if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { + http.Error(w, "Failed to read token", http.StatusBadRequest) + t.Logf("Failed to read token: %v", err) + return + } + defer r.Body.Close() + + t.Logf("Sub-agent request payload received: %+v", payload) + + // Signal that the sub-agent has connected successfully. + select { + case <-t.Context().Done(): + t.Logf("Test context done, not processing sub-agent request") + return + case subAgentConnected <- payload: + } + + // Wait for the signal to return from the handler. + select { + case <-t.Context().Done(): + t.Logf("Test context done, not waiting for sub-agent ready") + return + case <-subAgentReady: + } + + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() pool, err := dockertest.NewPool("") require.NoError(t, err, "Could not connect to docker") @@ -2016,9 +2134,10 @@ func TestAgent_DevcontainerAutostart(t *testing.T) { require.NoError(t, err, "create devcontainer directory") devcontainerFile := filepath.Join(devcontainerPath, "devcontainer.json") err = os.WriteFile(devcontainerFile, []byte(`{ - "name": "mywork", - "image": "busybox:latest", - "cmd": ["sleep", "infinity"] + "name": "mywork", + "image": "ubuntu:latest", + "cmd": ["sleep", "infinity"], + "runArgs": ["--network=host"] }`), 0o600) require.NoError(t, err, "write devcontainer.json") @@ -2043,9 +2162,24 @@ func TestAgent_DevcontainerAutostart(t *testing.T) { }, }, } + mClock := quartz.NewMock(t) + mClock.Set(time.Now()) + tickerFuncTrap := mClock.Trap().TickerFunc("agentcontainers") + //nolint:dogsled - conn, _, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) { + _, agentClient, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append( + o.ContainerAPIOptions, + // Only match this specific dev container. + agentcontainers.WithClock(mClock), + agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", tempWorkspaceFolder), + agentcontainers.WithSubAgentURL(srv.URL), + // The agent will copy "itself", but in the case of this test, the + // agent is actually this test binary. So we'll tell the test binary + // to execute the sub-agent main function via this env. + agentcontainers.WithSubAgentEnv("CODER_TEST_RUN_SUB_AGENT_MAIN=1"), + ) }) t.Logf("Waiting for container with label: devcontainer.local_folder=%s", tempWorkspaceFolder) @@ -2089,32 +2223,30 @@ func TestAgent_DevcontainerAutostart(t *testing.T) { ctx := testutil.Context(t, testutil.WaitLong) - ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "", func(opts *workspacesdk.AgentReconnectingPTYInit) { - opts.Container = container.ID - }) - require.NoError(t, err, "failed to create ReconnectingPTY") - defer ac.Close() - - // Use terminal reader so we can see output in case somethin goes wrong. - tr := testutil.NewTerminalReader(t, ac) + // Ensure the container update routine runs. + tickerFuncTrap.MustWait(ctx).MustRelease(ctx) + tickerFuncTrap.Close() + _, next := mClock.AdvanceNext() + next.MustWait(ctx) - require.NoError(t, tr.ReadUntil(ctx, func(line string) bool { - return strings.Contains(line, "#") || strings.Contains(line, "$") - }), "find prompt") + // Verify that a subagent was created. + subAgents := agentClient.GetSubAgents() + require.Len(t, subAgents, 1, "expected one sub agent") - wantFileName := "file-from-devcontainer" - wantFile := filepath.Join(tempWorkspaceFolder, wantFileName) + subAgent := subAgents[0] + subAgentID, err := uuid.FromBytes(subAgent.GetId()) + require.NoError(t, err, "failed to parse sub-agent ID") + t.Logf("Connecting to sub-agent: %s (ID: %s)", subAgent.Name, subAgentID) - require.NoError(t, json.NewEncoder(ac).Encode(workspacesdk.ReconnectingPTYRequest{ - // NOTE(mafredri): We must use absolute path here for some reason. - Data: fmt.Sprintf("touch /workspaces/mywork/%s; exit\r", wantFileName), - }), "create file inside devcontainer") + subAgentToken, err := uuid.FromBytes(subAgent.GetAuthToken()) + require.NoError(t, err, "failed to parse sub-agent token") - // Wait for the connection to close to ensure the touch was executed. - require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF) + payload := testutil.RequireReceive(ctx, t, subAgentConnected) + require.Equal(t, subAgentToken.String(), payload.Token, "sub-agent token should match") + require.Equal(t, "/workspaces/mywork", payload.Directory, "sub-agent directory should match") - _, err = os.Stat(wantFile) - require.NoError(t, err, "file should exist outside devcontainer") + // Allow the subagent to exit. + close(subAgentReady) } // TestAgent_DevcontainerRecreate tests that RecreateDevcontainer @@ -2173,6 +2305,9 @@ func TestAgent_DevcontainerRecreate(t *testing.T) { //nolint:dogsled conn, client, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", workspaceFolder), + ) }) ctx := testutil.Context(t, testutil.WaitLong) diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go index ab8c2a4a30b00..2580b069655bc 100644 --- a/agent/agentcontainers/api_test.go +++ b/agent/agentcontainers/api_test.go @@ -302,7 +302,6 @@ func TestAPI(t *testing.T) { initialData: initialDataPayload{makeResponse(), nil}, setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) { mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt), nil).After(preReq).AnyTimes() - mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() }, expected: makeResponse(fakeCt), }, @@ -321,7 +320,6 @@ func TestAPI(t *testing.T) { initialData: initialDataPayload{makeResponse(), assert.AnError}, setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) { mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt), nil).After(preReq).AnyTimes() - mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() }, expected: makeResponse(fakeCt), }, @@ -338,7 +336,6 @@ func TestAPI(t *testing.T) { initialData: initialDataPayload{makeResponse(fakeCt), nil}, setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) { mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt2), nil).After(preReq).AnyTimes() - mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() }, expected: makeResponse(fakeCt2), }, @@ -365,6 +362,7 @@ func TestAPI(t *testing.T) { api := agentcontainers.NewAPI(logger, agentcontainers.WithClock(mClock), agentcontainers.WithContainerCLI(mLister), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), ) defer api.Close() r.Mount("/", api.Routes()) diff --git a/cli/exp_rpty_test.go b/cli/exp_rpty_test.go index 355cc1741b5a9..923bf09bb0e15 100644 --- a/cli/exp_rpty_test.go +++ b/cli/exp_rpty_test.go @@ -9,6 +9,7 @@ import ( "github.com/ory/dockertest/v3/docker" "github.com/coder/coder/v2/agent" + "github.com/coder/coder/v2/agent/agentcontainers" "github.com/coder/coder/v2/agent/agenttest" "github.com/coder/coder/v2/cli/clitest" "github.com/coder/coder/v2/coderd/coderdtest" @@ -111,6 +112,9 @@ func TestExpRpty(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() diff --git a/cli/open_test.go b/cli/open_test.go index 0f757c99bfcf1..f7180ab260fbd 100644 --- a/cli/open_test.go +++ b/cli/open_test.go @@ -327,8 +327,6 @@ func TestOpenVSCodeDevContainer(t *testing.T) { }, }, nil, ).AnyTimes() - // DetectArchitecture always returns "" for this test to disable agent injection. - mccli.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() client, workspace, agentToken := setupWorkspaceForAgent(t, func(agents []*proto.Agent) []*proto.Agent { agents[0].Directory = agentDir @@ -339,7 +337,10 @@ func TestOpenVSCodeDevContainer(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mccli)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerCLI(mccli), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() @@ -504,8 +505,6 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { }, }, nil, ).AnyTimes() - // DetectArchitecture always returns "" for this test to disable agent injection. - mccli.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() client, workspace, agentToken := setupWorkspaceForAgent(t, func(agents []*proto.Agent) []*proto.Agent { agents[0].Name = agentName @@ -515,7 +514,10 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mccli)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerCLI(mccli), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() diff --git a/cli/ssh_test.go b/cli/ssh_test.go index 1774d8d131a9d..bee075283c083 100644 --- a/cli/ssh_test.go +++ b/cli/ssh_test.go @@ -2032,6 +2032,9 @@ func TestSSH_Container(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() @@ -2069,7 +2072,10 @@ func TestSSH_Container(t *testing.T) { }, nil).AnyTimes() _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mLister)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerCLI(mLister), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index 6a18ec76cdde2..1cb7e8d4b657b 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -1252,6 +1252,9 @@ func TestWorkspaceAgentContainers(t *testing.T) { }).Do() _ = agenttest.New(t, client.URL, r.AgentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) resources := coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID).Wait() require.Len(t, resources, 1, "expected one resource") @@ -1358,7 +1361,10 @@ func TestWorkspaceAgentContainers(t *testing.T) { _ = agenttest.New(t, client.URL, r.AgentToken, func(o *agent.Options) { o.Logger = logger.Named("agent") o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mcl)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerCLI(mcl), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) resources := coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID).Wait() require.Len(t, resources, 1, "expected one resource") @@ -1428,8 +1434,6 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{devContainer}, }, nil).AnyTimes() - // DetectArchitecture always returns "" for this test to disable agent injection. - mccli.EXPECT().DetectArchitecture(gomock.Any(), devContainer.ID).Return("", nil).AnyTimes() mdccli.EXPECT().Up(gomock.Any(), workspaceFolder, configFile, gomock.Any()).Return("someid", nil).Times(1) return 0 }, @@ -1477,6 +1481,7 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { agentcontainers.WithContainerCLI(mccli), agentcontainers.WithDevcontainerCLI(mdccli), agentcontainers.WithWatcher(watcher.NewNoop()), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), ) }) resources := coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID).Wait() From 1629bee5cfaf6543644fd04bcd4710f3cb33240a Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 16:05:12 +0000 Subject: [PATCH 15/18] fixup! add sub agent env and revert container id change --- agent/agentcontainers/devcontainercli.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/agent/agentcontainers/devcontainercli.go b/agent/agentcontainers/devcontainercli.go index 6039dbf229198..4e1ad93a715dc 100644 --- a/agent/agentcontainers/devcontainercli.go +++ b/agent/agentcontainers/devcontainercli.go @@ -52,10 +52,9 @@ func WithUpOutput(stdout, stderr io.Writer) DevcontainerCLIUpOptions { type DevcontainerCLIExecOptions func(*devcontainerCLIExecConfig) type devcontainerCLIExecConfig struct { - args []string // Additional arguments for the Exec command. - containerID string - stdout io.Writer - stderr io.Writer + args []string // Additional arguments for the Exec command. + stdout io.Writer + stderr io.Writer } // WithExecOutput sets additional stdout and stderr writers for logs From 67ee0c59ac1fb3edc178738b54ef3209d96fb8c9 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 16:27:35 +0000 Subject: [PATCH 16/18] fixup! add sub agent as part of autostart integration test --- coderd/workspaceagents_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index 1cb7e8d4b657b..ec0b692886918 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -1434,6 +1434,8 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{devContainer}, }, nil).AnyTimes() + // DetectArchitecture always returns "" for this test to disable agent injection. + mccli.EXPECT().DetectArchitecture(gomock.Any(), devContainer.ID).Return("", nil).AnyTimes() mdccli.EXPECT().Up(gomock.Any(), workspaceFolder, configFile, gomock.Any()).Return("someid", nil).Times(1) return 0 }, @@ -1481,7 +1483,7 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { agentcontainers.WithContainerCLI(mccli), agentcontainers.WithDevcontainerCLI(mdccli), agentcontainers.WithWatcher(watcher.NewNoop()), - agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + agentcontainers.WithContainerLabelIncludeFilter(agentcontainers.DevcontainerLocalFolderLabel, workspaceFolder), ) }) resources := coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID).Wait() From 757dc85818896fa3d62540860162e4a8ecaa3a5a Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 9 Jun 2025 16:34:42 +0000 Subject: [PATCH 17/18] fixup! fixup! add sub agent env and revert container id change --- agent/agentcontainers/devcontainercli_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/agent/agentcontainers/devcontainercli_test.go b/agent/agentcontainers/devcontainercli_test.go index dbb3c4dedbb85..b8b4120d2e8ab 100644 --- a/agent/agentcontainers/devcontainercli_test.go +++ b/agent/agentcontainers/devcontainercli_test.go @@ -183,7 +183,7 @@ func TestDevcontainerCLI_ArgsAndParsing(t *testing.T) { cmd: "echo", cmdArgs: []string{"hello"}, opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithExecContainerID("test-container-123")}, - wantArgs: "exec --container-id test-container-123 echo hello", + wantArgs: "exec --workspace-folder /test/workspace --container-id test-container-123 echo hello", wantError: false, }, { @@ -193,7 +193,7 @@ func TestDevcontainerCLI_ArgsAndParsing(t *testing.T) { cmd: "bash", cmdArgs: []string{"-c", "ls -la"}, opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithExecContainerID("my-container")}, - wantArgs: "exec --container-id my-container --config /test/config.json bash -c ls -la", + wantArgs: "exec --workspace-folder /test/workspace --config /test/config.json --container-id my-container bash -c ls -la", wantError: false, }, { @@ -205,7 +205,7 @@ func TestDevcontainerCLI_ArgsAndParsing(t *testing.T) { opts: []agentcontainers.DevcontainerCLIExecOptions{ agentcontainers.WithExecContainerID("test-container-789"), }, - wantArgs: "exec --container-id test-container-789 cat /etc/hostname", + wantArgs: "exec --workspace-folder /test/workspace --container-id test-container-789 cat /etc/hostname", wantError: false, }, } @@ -293,7 +293,7 @@ func TestDevcontainerCLI_WithOutput(t *testing.T) { errBuf := &bytes.Buffer{} // Simulate CLI execution for exec command with container ID. - wantArgs := "exec --container-id test-container-456 echo hello" + wantArgs := "exec --workspace-folder /test/workspace --container-id test-container-456 echo hello" testExecer := &testDevcontainerExecer{ testExePath: testExePath, wantArgs: wantArgs, From dc7f7c36b35409cf75823754e38fcbab235de5f2 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 10 Jun 2025 09:10:25 +0000 Subject: [PATCH 18/18] use the correct directory for the sub agent --- agent/agent_test.go | 4 ++ agent/agentcontainers/api.go | 35 +++++++++++----- agent/agentcontainers/api_test.go | 66 ++++++++++++++++++++++--------- agent/agenttest/client.go | 25 ++++++++++++ 4 files changed, 101 insertions(+), 29 deletions(-) diff --git a/agent/agent_test.go b/agent/agent_test.go index 4e86059266fe0..3ef9e4f4c75ba 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -2238,6 +2238,10 @@ func TestAgent_DevcontainerAutostart(t *testing.T) { require.NoError(t, err, "failed to parse sub-agent ID") t.Logf("Connecting to sub-agent: %s (ID: %s)", subAgent.Name, subAgentID) + gotDir, err := agentClient.GetSubAgentDirectory(subAgentID) + require.NoError(t, err, "failed to get sub-agent directory") + require.Equal(t, "/workspaces/mywork", gotDir, "sub-agent directory should match") + subAgentToken, err := uuid.FromBytes(subAgent.GetAuthToken()) require.NoError(t, err, "failed to parse sub-agent token") diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index dcaaed09ff9c3..301553c651048 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -1,9 +1,11 @@ package agentcontainers import ( + "bytes" "context" "errors" "fmt" + "io" "net/http" "os" "path" @@ -1075,17 +1077,30 @@ func (api *API) injectSubAgentIntoContainerLocked(ctx context.Context, dc coders logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err)) } + // Detect workspace folder by executing `pwd` in the container. + // NOTE(mafredri): This is a quick and dirty way to detect the + // workspace folder inside the container. In the future we will + // rely more on `devcontainer read-configuration`. + var pwdBuf bytes.Buffer + err = api.dccli.Exec(ctx, dc.WorkspaceFolder, dc.ConfigPath, "pwd", []string{}, + WithExecOutput(&pwdBuf, io.Discard), + WithExecContainerID(container.ID), + ) + if err != nil { + return xerrors.Errorf("check workspace folder in container: %w", err) + } + directory := strings.TrimSpace(pwdBuf.String()) + if directory == "" { + logger.Warn(ctx, "detected workspace folder is empty, using default workspace folder", + slog.F("default_workspace_folder", DevcontainerDefaultContainerWorkspaceFolder)) + directory = DevcontainerDefaultContainerWorkspaceFolder + } + // The preparation of the subagent is done, now we can create the // subagent record in the database to receive the auth token. createdAgent, err := api.subAgentClient.Create(ctx, SubAgent{ - Name: dc.Name, - // The default workspaceFolder for devcontainers is /workspaces. - // However, it can be changed by setting {"workspaceFolder": "/src"} - // in the devcontainer.json. This information is not encoded into - // the container labels, so we must rely on the values parsed from - // the devcontainer.json file on disk. - // TODO(mafredri): Support custom workspace folders in the future. - Directory: DevcontainerDefaultContainerWorkspaceFolder, + Name: dc.Name, + Directory: directory, OperatingSystem: "linux", // Assuming Linux for dev containers. Architecture: arch, }) @@ -1166,9 +1181,9 @@ func (api *API) runSubAgentInContainer(ctx context.Context, dc codersdk.Workspac ) if err != nil && !errors.Is(err, context.Canceled) { logger.Error(ctx, "subagent process failed", slog.Error(err)) + } else { + logger.Info(ctx, "subagent process finished") } - - logger.Info(ctx, "subagent process finished") } func (api *API) Close() error { diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go index 2580b069655bc..59b0461c7948a 100644 --- a/agent/agentcontainers/api_test.go +++ b/agent/agentcontainers/api_test.go @@ -64,7 +64,7 @@ type fakeDevcontainerCLI struct { upErr error upErrC chan error // If set, send to return err, close to return upErr. execErr error - execErrC chan error // If set, send to return err, close to return execErr. + execErrC chan func(cmd string, args ...string) error // If set, send fn to return err, nil or close to return execErr. } func (f *fakeDevcontainerCLI) Up(ctx context.Context, _, _ string, _ ...agentcontainers.DevcontainerCLIUpOptions) (string, error) { @@ -81,14 +81,14 @@ func (f *fakeDevcontainerCLI) Up(ctx context.Context, _, _ string, _ ...agentcon return f.upID, f.upErr } -func (f *fakeDevcontainerCLI) Exec(ctx context.Context, _, _ string, _ string, _ []string, _ ...agentcontainers.DevcontainerCLIExecOptions) error { +func (f *fakeDevcontainerCLI) Exec(ctx context.Context, _, _ string, cmd string, args []string, _ ...agentcontainers.DevcontainerCLIExecOptions) error { if f.execErrC != nil { select { case <-ctx.Done(): return ctx.Err() - case err, ok := <-f.execErrC: - if ok { - return err + case fn, ok := <-f.execErrC: + if ok && fn != nil { + return fn(cmd, args...) } } } @@ -1239,16 +1239,17 @@ func TestAPI(t *testing.T) { } var ( - ctx = testutil.Context(t, testutil.WaitMedium) - logger = slog.Make() - mClock = quartz.NewMock(t) - mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t)) - fakeSAC = &fakeSubAgentClient{ + ctx = testutil.Context(t, testutil.WaitMedium) + errTestTermination = xerrors.New("test termination") + logger = slogtest.Make(t, &slogtest.Options{IgnoredErrorIs: []error{errTestTermination}}).Leveled(slog.LevelDebug) + mClock = quartz.NewMock(t) + mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t)) + fakeSAC = &fakeSubAgentClient{ createErrC: make(chan error, 1), deleteErrC: make(chan error, 1), } fakeDCCLI = &fakeDevcontainerCLI{ - execErrC: make(chan error, 1), + execErrC: make(chan func(cmd string, args ...string) error, 1), } testContainer = codersdk.WorkspaceAgentContainer{ @@ -1264,8 +1265,6 @@ func TestAPI(t *testing.T) { } ) - defer close(fakeDCCLI.execErrC) - coderBin, err := os.Executable() require.NoError(t, err) @@ -1287,23 +1286,31 @@ func TestAPI(t *testing.T) { api := agentcontainers.NewAPI(logger, agentcontainers.WithClock(mClock), agentcontainers.WithContainerCLI(mCCLI), + agentcontainers.WithWatcher(watcher.NewNoop()), agentcontainers.WithSubAgentClient(fakeSAC), agentcontainers.WithSubAgentURL("test-subagent-url"), agentcontainers.WithDevcontainerCLI(fakeDCCLI), ) defer api.Close() - // Allow initial agent creation to succeed. + // Close before api.Close() defer to avoid deadlock after test. + defer close(fakeSAC.createErrC) + defer close(fakeSAC.deleteErrC) + defer close(fakeDCCLI.execErrC) + + // Allow initial agent creation and injection to succeed. testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil) + testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, func(cmd string, args ...string) error { + assert.Equal(t, "pwd", cmd) + assert.Empty(t, args) + return nil + }) // Exec pwd. // Make sure the ticker function has been registered // before advancing the clock. tickerTrap.MustWait(ctx).MustRelease(ctx) tickerTrap.Close() - // Pre-populate for next iteration (also verify previous consumption). - testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil) - // Ensure we only inject the agent once. for i := range 3 { _, aw := mClock.AdvanceNext() @@ -1318,6 +1325,8 @@ func TestAPI(t *testing.T) { assert.Len(t, fakeSAC.deleted, 0) } + t.Log("Agent injected successfully, now testing cleanup and reinjection...") + // Expect the agent to be reinjected. gomock.InOrder( mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), @@ -1329,10 +1338,27 @@ func TestAPI(t *testing.T) { ) // Terminate the agent and verify it is deleted. - testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, nil) + testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, func(_ string, args ...string) error { + if len(args) > 0 { + assert.Equal(t, "agent", args[0]) + } else { + assert.Fail(t, `want "agent" command argument`) + } + return errTestTermination + }) // Allow cleanup to proceed. - testutil.RequireSend(ctx, t, fakeSAC.deleteErrC, xerrors.New("test termination")) + testutil.RequireSend(ctx, t, fakeSAC.deleteErrC, nil) + + t.Log("Waiting for agent recreation...") + + // Allow agent recreation and reinjection to succeed. + testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil) + testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, func(cmd string, args ...string) error { + assert.Equal(t, "pwd", cmd) + assert.Empty(t, args) + return nil + }) // Exec pwd. // Wait until the agent recreation is started. for len(fakeSAC.createErrC) > 0 { @@ -1340,6 +1366,8 @@ func TestAPI(t *testing.T) { aw.MustWait(ctx) } + t.Log("Agent recreated successfully.") + // Verify agent was deleted. require.Len(t, fakeSAC.deleted, 1) assert.Equal(t, fakeSAC.created[0].ID, fakeSAC.deleted[0]) diff --git a/agent/agenttest/client.go b/agent/agenttest/client.go index 4d32d24b40619..0a2df141ff3d4 100644 --- a/agent/agenttest/client.go +++ b/agent/agenttest/client.go @@ -167,6 +167,10 @@ func (c *Client) GetSubAgents() []*agentproto.SubAgent { return c.fakeAgentAPI.GetSubAgents() } +func (c *Client) GetSubAgentDirectory(id uuid.UUID) (string, error) { + return c.fakeAgentAPI.GetSubAgentDirectory(id) +} + type FakeAgentAPI struct { sync.Mutex t testing.TB @@ -182,6 +186,7 @@ type FakeAgentAPI struct { timings []*agentproto.Timing connectionReports []*agentproto.ReportConnectionRequest subAgents map[uuid.UUID]*agentproto.SubAgent + subAgentDirs map[uuid.UUID]string getAnnouncementBannersFunc func() ([]codersdk.BannerConfig, error) getResourcesMonitoringConfigurationFunc func() (*agentproto.GetResourcesMonitoringConfigurationResponse, error) @@ -392,6 +397,10 @@ func (f *FakeAgentAPI) CreateSubAgent(ctx context.Context, req *agentproto.Creat f.subAgents = make(map[uuid.UUID]*agentproto.SubAgent) } f.subAgents[subAgentID] = subAgent + if f.subAgentDirs == nil { + f.subAgentDirs = make(map[uuid.UUID]string) + } + f.subAgentDirs[subAgentID] = req.GetDirectory() // For a fake implementation, we don't create workspace apps. // Real implementations would handle req.Apps here. @@ -452,6 +461,22 @@ func (f *FakeAgentAPI) GetSubAgents() []*agentproto.SubAgent { return agents } +func (f *FakeAgentAPI) GetSubAgentDirectory(id uuid.UUID) (string, error) { + f.Lock() + defer f.Unlock() + + if f.subAgentDirs == nil { + return "", xerrors.New("no sub-agent directories available") + } + + dir, ok := f.subAgentDirs[id] + if !ok { + return "", xerrors.New("sub-agent directory not found") + } + + return dir, nil +} + func NewFakeAgentAPI(t testing.TB, logger slog.Logger, manifest *agentproto.Manifest, statsCh chan *agentproto.Stats) *FakeAgentAPI { return &FakeAgentAPI{ t: t,