8000 feat(agent/agentcontainers): implement sub agent injection by mafredri · Pull Request #18245 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

feat(agent/agentcontainers): implement sub agent injection #18245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jun 10, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add sub agent as part of autostart integration test
  • Loading branch information
mafredri committed Jun 9, 2025
commit d5eb3fcbf3757675d59af144b98ec1cdb91e42eb
185 changes: 160 additions & 25 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
"cdr.dev/slog/sloggers/slogtest"

"github.com/coder/coder/v2/agent"
"github.com/coder/coder/v2/agent/agentcontainers"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/agenttest"
"github.com/coder/coder/v2/agent/proto"
Expand All @@ -60,9 +61,16 @@ import (
"github.com/coder/coder/v2/tailnet"
"github.com/coder/coder/v2/tailnet/tailnettest"
"github.com/coder/coder/v2/testutil"
"github.com/coder/quartz"
)

func TestMain(m *testing.M) {
if os.Getenv("CODER_TEST_RUN_SUB_AGENT_MAIN") == "1" {
// If we're running as a subagent, we don't want to run the main tests.
// Instead, we just run the subagent tests.
exit := runSubAgentMain()
os.Exit(exit)
}
goleak.VerifyTestMain(m, testutil.GoleakOptions...)
}

Expand Down Expand Up @@ -1930,6 +1938,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
if os.Getenv("CODER_TEST_USE_DOCKER") != "1" {
t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
}
if _, err := exec.LookPath("devcontainer"); err != nil {
t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli")
}

pool, err := dockertest.NewPool("")
require.NoError(t, err, "Could not connect to docker")
Expand All @@ -1955,6 +1966,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
// nolint: dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, func(_ *agenttest.Client, o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"),
)
})
ctx := testutil.Context(t, testutil.WaitLong)
ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "/bin/sh", func(arp *workspacesdk.AgentReconnectingPTYInit) {
Expand Down Expand Up @@ -1986,6 +2000,60 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF)
}

type subAgentRequestPayload struct {
Token string `json:"token"`
Directory string `json:"directory"`
}

// runSubAgentMain is the main function for the sub-agent that connects
// to the control plane. It reads the CODER_AGENT_URL and
// CODER_AGENT_TOKEN environment variables, sends the token, and exits
// with a status code based on the response.
func runSubAgentMain() int {
url := os.Getenv("CODER_AGENT_URL")
token := os.Getenv("CODER_AGENT_TOKEN")
if url == "" || token == "" {
_, _ = fmt.Fprintln(os.Stderr, "CODER_AGENT_URL and CODER_AGENT_TOKEN must be set")
return 10
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we name these specific status codes as something more meaningful to human eyes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They don't really have a meaning, just something to differentiate the states and started at 10 since I got tired of bumping everything as I added more stuff 😅, the println should hopefully be helpful here.

}

dir, err := os.Getwd()
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "failed to get current working directory: %v\n", err)
return 1
}
payload := subAgentRequestPayload{
Token: token,
Directory: dir,
}
b, err := json.Marshal(payload)
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "failed to marshal payload: %v\n", err)
return 1
}

req, err := http.NewRequest("POST", url, bytes.NewReader(b))
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "failed to create request: %v\n", err)
return 1
}
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
req = req.WithContext(ctx)
resp, err := http.DefaultClient.Do(req)
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "agent connection failed: %v\n", err)
return 11
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
_, _ = fmt.Fprintf(os.Stderr, "agent exiting with non-zero exit code %d\n", resp.StatusCode)
return 12
}
_, _ = fmt.Println("sub-agent connected successfully")
return 0
}

// This tests end-to-end functionality of auto-starting a devcontainer.
// It runs "devcontainer up" which creates a real Docker container. As
// such, it does not run by default in CI.
Expand All @@ -1999,6 +2067,56 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
if os.Getenv("CODER_TEST_USE_DOCKER") != "1" {
t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
}
if _, err := exec.LookPath("devcontainer"); err != nil {
t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli")
}

// This HTTP handler handles requests from runSubAgentMain which
// acts as a fake sub-agent. We want to verify that the sub-agent
// connects and sends its token. We use a channel to signal
// that the sub-agent has connected successfully and then we wait
// until we receive another signal to return from the handler. This
// keeps the agent "alive" for as long as we want.
subAgentConnected := make(chan subAgentRequestPayload, 1)
subAgentReady := make(chan struct{}, 1)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Logf("Sub-agent request received: %s %s", r.Method, r.URL.Path)

if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}

// Read the token from the request body.
var payload subAgentRequestPayload
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
http.Error(w, "Failed to read token", http.StatusBadRequest)
t.Logf("Failed to read token: %v", err)
return
}
defer r.Body.Close()

t.Logf("Sub-agent request payload received: %+v", payload)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: do we perhaps want to allow the caller to run some function against the paylaod?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We send it on the channel and do some verification already 👍🏻


// Signal that the sub-agent has connected successfully.
select {
case <-t.Context().Done():
t.Logf("Test context done, not processing sub-agent request")
return
case subAgentConnected <- payload:
}

// Wait for the signal to return from the handler.
select {
case <-t.Context().Done():
t.Logf("Test context done, not waiting for sub-agent ready")
return
case <-subAgentReady:
}

w.WriteHeader(http.StatusOK)
}))
defer srv.Close()

pool, err := dockertest.NewPool("")
require.NoError(t, err, "Could not connect to docker")
Expand All @@ -2016,9 +2134,10 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
require.NoError(t, err, "create devcontainer directory")
devcontainerFile := filepath.Join(devcontainerPath, "devcontainer.json")
err = os.WriteFile(devcontainerFile, []byte(`{
"name": "mywork",
"image": "busybox:latest",
"cmd": ["sleep", "infinity"]
"name": "mywork",
"image": "ubuntu:latest",
"cmd": ["sleep", "infinity"],
"runArgs": ["--network=host"]
}`), 0o600)
require.NoError(t, err, "write devcontainer.json")

Expand All @@ -2043,9 +2162,24 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
},
},
}
mClock := quartz.NewMock(t)
mClock.Set(time.Now())
tickerFuncTrap := mClock.Trap().TickerFunc("agentcontainers")

//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
_, agentClient, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(
o.ContainerAPIOptions,
// Only match this specific dev container.
agentcontainers.WithClock(mClock),
agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", tempWorkspaceFolder),
agentcontainers.WithSubAgentURL(srv.URL),
// The agent will copy "itself", but in the case of this test, the
// agent is actually this test binary. So we'll tell the test binary
// to execute the sub-agent main function via this env.
agentcontainers.WithSubAgentEnv("CODER_TEST_RUN_SUB_AGENT_MAIN=1"),
Comment on lines +2178 to +2181
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

)
})

t.Logf("Waiting for container with label: devcontainer.local_folder=%s", tempWorkspaceFolder)
Expand Down Expand Up @@ -2089,32 +2223,30 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {

ctx := testutil.Context(t, testutil.WaitLong)

ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "", func(opts *workspacesdk.AgentReconnectingPTYInit) {
opts.Container = container.ID
})
require.NoError(t, err, "failed to create ReconnectingPTY")
defer ac.Close()

// Use terminal reader so we can see output in case somethin goes wrong.
tr := testutil.NewTerminalReader(t, ac)
// Ensure the container update routine runs.
tickerFuncTrap.MustWait(ctx).MustRelease(ctx)
tickerFuncTrap.Close()
_, next := mClock.AdvanceNext()
next.MustWait(ctx)

require.NoError(t, tr.ReadUntil(ctx, func(line string) bool {
return strings.Contains(line, "#") || strings.Contains(line, "$")
}), "find prompt")
// Verify that a subagent was created.
subAgents := agentClient.GetSubAgents()
require.Len(t, subAgents, 1, "expected one sub agent")

wantFileName := "file-from-devcontainer"
wantFile := filepath.Join(tempWorkspaceFolder, wantFileName)
subAgent := subAgents[0]
subAgentID, err := uuid.FromBytes(subAgent.GetId())
require.NoError(t, err, "failed to parse sub-agent ID")
t.Logf("Connecting to sub-agent: %s (ID: %s)", subAgent.Name, subAgentID)

require.NoError(t, json.NewEncoder(ac).Encode(workspacesdk.ReconnectingPTYRequest{
// NOTE(mafredri): We must use absolute path here for some reason.
Data: fmt.Sprintf("touch /workspaces/mywork/%s; exit\r", wantFileName),
}), "create file inside devcontainer")
subAgentToken, err := uuid.FromBytes(subAgent.GetAuthToken())
require.NoError(t, err, "failed to parse sub-agent token")

// Wait for the connection to close to ensure the touch was executed.
require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF)
payload := testutil.RequireReceive(ctx, t, subAgentConnected)
require.Equal(t, subAgentToken.String(), payload.Token, "sub-agent token should match")
require.Equal(t, "/workspaces/mywork", payload.Directory, "sub-agent directory should match")

_, err = os.Stat(wantFile)
require.NoError(t, err, "file should exist outside devcontainer")
// Allow the subagent to exit.
close(subAgentReady)
}

// TestAgent_DevcontainerRecreate tests that RecreateDevcontainer
Expand Down Expand Up @@ -2173,6 +2305,9 @@ func TestAgent_DevcontainerRecreate(t *testing.T) {
//nolint:dogsled
conn, client, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", workspaceFolder),
)
})

ctx := testutil.Context(t, testutil.WaitLong)
Expand Down
4 changes: 1 addition & 3 deletions agent/agentcontainers/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,6 @@ func TestAPI(t *testing.T) {
initialData: initialDataPayload{makeResponse(), nil},
setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) {
mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt), nil).After(preReq).AnyTimes()
mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("<none>", nil).AnyTimes()
},
expected: makeResponse(fakeCt),
},
Expand All @@ -321,7 +320,6 @@ func TestAPI(t *testing.T) {
initialData: initialDataPayload{makeResponse(), assert.AnError},
setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) {
mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt), nil).After(preReq).AnyTimes()
mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("<none>", nil).AnyTimes()
},
expected: makeResponse(fakeCt),
},
Expand All @@ -338,7 +336,6 @@ func TestAPI(t *testing.T) {
initialData: initialDataPayload{makeResponse(fakeCt), nil},
setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) {
mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt2), nil).After(preReq).AnyTimes()
mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("<none>", nil).AnyTimes()
},
expected: makeResponse(fakeCt2),
},
Expand All @@ -365,6 +362,7 @@ func TestAPI(t *testing.T) {
api := agentcontainers.NewAPI(logger,
agentcontainers.WithClock(mClock),
agentcontainers.WithContainerCLI(mLister),
agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"),
)
defer api.Close()
r.Mount("/", api.Routes())
Expand Down
4 changes: 4 additions & 0 deletions cli/exp_rpty_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/ory/dockertest/v3/docker"

"github.com/coder/coder/v2/agent"
"github.com/coder/coder/v2/agent/agentcontainers"
"github.com/coder/coder/v2/agent/agenttest"
"github.com/coder/coder/v2/cli/clitest"
"github.com/coder/coder/v2/coderd/coderdtest"
Expand Down Expand Up @@ -111,6 +112,9 @@ func TestExpRpty(t *testing.T) {

_ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"),
)
})
_ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait()

Expand Down
14 changes: 8 additions & 6 deletions cli/open_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,6 @@ func TestOpenVSCodeDevContainer(t *testing.T) {
},
}, nil,
).AnyTimes()
// DetectArchitecture always returns "<none>" for this test to disable agent injection.
mccli.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("<none>", nil).AnyTimes()

client, workspace, agentToken := setupWorkspaceForAgent(t, func(agents []*proto.Agent) []*proto.Agent {
agents[0].Directory = agentDir
Expand All @@ -339,7 +337,10 @@ func TestOpenVSCodeDevContainer(t *testing.T) {

_ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mccli))
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
agentcontainers.WithContainerCLI(mccli),
agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"),
)
})
_ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait()

Expand Down Expand Up @@ -504,8 +505,6 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) {
},
}, nil,
).AnyTimes()
// DetectArchitecture always returns "<none>" for this test to disable agent injection.
mccli.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("<none>", nil).AnyTimes()

client, workspace, agentToken := setupWorkspaceForAgent(t, func(agents []*proto.Agent) []*proto.Agent {
agents[0].Name = agentName
Expand All @@ -515,7 +514,10 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) {

_ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mccli))
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
agentcontainers.WithContainerCLI(mccli),
agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"),
)
})
_ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait()

Expand Down
8 changes: 7 additions & 1 deletion cli/ssh_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2032,6 +2032,9 @@ func TestSSH_Container(t *testing.T) {

_ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"),
)
})
_ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait()

Expand Down Expand Up @@ -2069,7 +2072,10 @@ func TestSSH_Container(t *testing.T) {
}, nil).AnyTimes()
_ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mLister))
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
agentcontainers.WithContainerCLI(mLister),
agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"),
)
})
_ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait()

Expand Down
Loading
Loading
0