10000 feat(agent/agentcontainers): implement sub agent injection by mafredri · Pull Request #18245 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

feat(agent/agentcontainers): implement sub agent injection #18245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jun 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -1188,7 +1188,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
// createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates
// the tailnet using the information in the manifest
func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient26) error {
return func(ctx context.Context, _ proto.DRPCAgentClient26) (retErr error) {
return func(ctx context.Context, aAPI proto.DRPCAgentClient26) (retErr error) {
if err := manifestOK.wait(ctx); err != nil {
return xerrors.Errorf("no manifest: %w", err)
}
Expand All @@ -1208,6 +1208,7 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co
// agent API.
network, err = a.createTailnet(
a.gracefulCtx,
aAPI,
manifest.AgentID,
manifest.DERPMap,
manifest.DERPForceWebSockets,
Expand Down Expand Up @@ -1355,6 +1356,7 @@ func (a *agent) trackGoroutine(fn func()) error {

func (a *agent) createTailnet(
ctx context.Context,
aAPI proto.DRPCAgentClient26,
agentID uuid.UUID,
derpMap *tailcfg.DERPMap,
derpForceWebSockets, disableDirectConnections bool,
Expand Down Expand Up @@ -1487,7 +1489,7 @@ func (a *agent) createTailnet(
}()
if err = a.trackGoroutine(func() {
defer apiListener.Close()
apiHandler, closeAPIHAndler := a.apiHandler()
apiHandler, closeAPIHAndler := a.apiHandler(aAPI)
defer func() {
_ = closeAPIHAndler()
}()
Expand Down
187 changes: 163 additions & 24 deletions agent/agent_test.go
)
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
"cdr.dev/slog/sloggers/slogtest"

"github.com/coder/coder/v2/agent"
"github.com/coder/coder/v2/agent/agentcontainers"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/agenttest"
"github.com/coder/coder/v2/agent/proto"
Expand All @@ -60,9 +61,16 @@ import (
"github.com/coder/coder/v2/tailnet"
"github.com/coder/coder/v2/tailnet/tailnettest"
"github.com/coder/coder/v2/testutil"
"github.com/coder/quartz"
)

func TestMain(m *testing.M) {
if os.Getenv("CODER_TEST_RUN_SUB_AGENT_MAIN") == "1" {
// If we're running as a subagent, we don't want to run the main tests.
// Instead, we just run the subagent tests.
exit := runSubAgentMain()
os.Exit(exit)
}
goleak.VerifyTestMain(m, testutil.GoleakOptions...)
}

Expand Down Expand Up @@ -1930,6 +1938,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
if os.Getenv("CODER_TEST_USE_DOCKER") != "1" {
t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
}
if _, err := exec.LookPath("devcontainer"); err != nil {
t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli")
}

pool, err := dockertest.NewPool("")
require.NoError(t, err, "Could not connect to docker")
Expand All @@ -1955,6 +1966,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
// nolint: dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, func(_ *agenttest.Client, o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"),
)
})
ctx := testutil.Context(t, testutil.WaitLong)
ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "/bin/sh", func(arp *workspacesdk.AgentReconnectingPTYInit) {
Expand Down Expand Up @@ -1986,6 +2000,60 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF)
}

type subAgentRequestPayload struct {
Token string `json:"token"`
Directory string `json:"directory"`
}

// runSubAgentMain is the main function for the sub-agent that connects
// to the control plane. It reads the CODER_AGENT_URL and
// CODER_AGENT_TOKEN environment variables, sends the token, and exits
// with a status code based on the response.
func runSubAgentMain() int {
url := os.Getenv("CODER_AGENT_URL")
token := os.Getenv("CODER_AGENT_TOKEN")
if url == "" || token == "" {
_, _ = fmt.Fprintln(os.Stderr, "CODER_AGENT_URL and CODER_AGENT_TOKEN must be set")
return 10
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we name these specific status codes as something more meaningful to human eyes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They don't really have a meaning, just something to differentiate the states and started at 10 since I got tired of bumping everything as I added more stuff 😅, the println should hopefully be helpful here.

}

dir, err := os.Getwd()
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "failed to get current working directory: %v\n", err)
return 1
}
payload := subAgentRequestPayload{
Token: token,
Directory: dir,
}
b, err := json.Marshal(payload)
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "failed to marshal payload: %v\n", err)
return 1
}

req, err := http.NewRequest("POST", url, bytes.NewReader(b))
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "failed to create request: %v\n", err)
return 1
}
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
req = req.WithContext(ctx)
resp, err := http.DefaultClient.Do(req)
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "agent connection failed: %v\n", err)
return 11
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
_, _ = fmt.Fprintf(os.Stderr, "agent exiting with non-zero exit code %d\n", resp.StatusCode)
return 12
}
_, _ = fmt.Println("sub-agent connected successfully")
return 0
}

// This tests end-to-end functionality of auto-starting a devcontainer.
// It runs "devcontainer up" which creates a real Docker container. As
// such, it does not run by default in CI.
Expand All @@ -1999,6 +2067,56 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
if os.Getenv("CODER_TEST_USE_DOCKER") != "1" {
t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
}
if _, err := exec.LookPath("devcontainer"); err != nil {
t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli")
}

// This HTTP handler handles requests from runSubAgentMain which
// acts as a fake sub-agent. We want to verify that the sub-agent
// connects and sends its token. We use a channel to signal
// that the sub-agent has connected successfully and then we wait
// until we receive another signal to return from the handler. This
// keeps the agent "alive" for as long as we want.
subAgentConnected := make(chan subAgentRequestPayload, 1)
subAgentReady := make(chan struct{}, 1)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Logf("Sub-agent request received: %s %s", r.Method, r.URL.Path)

if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}

// Read the token from the request body.
var payload subAgentRequestPayload
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
http.Error(w, "Failed to read token", http.StatusBadRequest)
t.Logf("Failed to read token: %v", err)
return
}
defer r.Body.Close()

t.Logf("Sub-agent request payload received: %+v", payload)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: do we perhaps want to allow the caller to run some function against the paylaod?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We send it on the channel and do some verification already 👍🏻


// Signal that the sub-agent has connected successfully.
select {
case <-t.Context().Done():
t.Logf("Test context done, not processing sub-agent request")
return
case subAgentConnected <- payload:
}

// Wait for the signal to return from the handler.
select {
case <-t.Context().Done():
t.Logf("Test context done, not waiting for sub-agent ready")
return
case <-subAgentReady:
}

w.WriteHeader(http.StatusOK)
}))
defer srv.Close()

pool, err := dockertest.NewPool("")
require.NoError(t, err, "Could not connect to docker")
Expand All @@ -2016,9 +2134,10 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
require.NoError(t, err, "create devcontainer directory")
devcontainerFile := filepath.Join(devcontainerPath, "devcontainer.json")
err = os.WriteFile(devcontainerFile, []byte(`{
"name": "mywork",
"image": "busybox:latest",
"cmd": ["sleep", "infinity"]
"name": "mywork",
"image": "ubuntu:latest",
"cmd": ["sleep", "infinity"],
"runArgs": ["--network=host"]
}`), 0o600)
require.NoError(t, err, "write devcontainer.json")

Expand All @@ -2043,9 +2162,24 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
},
},
}
mClock := quartz.NewMock(t)
mClock.Set(time.Now())
tickerFuncTrap := mClock.Trap().TickerFunc("agentcontainers")

//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
_, agentClient, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(
o.ContainerAPIOptions,
// Only match this specific dev container.
agentcontainers.WithClock(mClock),
agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", tempWorkspaceFolder),
agentcontainers.WithSubAgentURL(srv.URL),
// The agent will copy "itself", but in the case of this test, the
// agent is actually this test binary. So we'll tell the test binary
// to execute the sub-agent main function via this env.
agentcontainers.WithSubAgentEnv("CODER_TEST_RUN_SUB_AGENT_MAIN=1"),
Comment on lines +2178 to +2181
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

})

t.Logf("Waiting for container with label: devcontainer.local_folder=%s", tempWorkspaceFolder)
Expand Down Expand Up @@ -2089,32 +2223,34 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {

ctx := testutil.Context(t, testutil.WaitLong)

ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "", func(opts *workspacesdk.AgentReconnectingPTYInit) {
opts.Container = container.ID
})
require.NoError(t, err, "failed to create ReconnectingPTY")
defer ac.Close()
// Ensure the container update routine runs.
tickerFuncTrap.MustWait(ctx).MustRelease(ctx)
tickerFuncTrap.Close()
_, next := mClock.AdvanceNext()
next.MustWait(ctx)

// Use terminal reader so we can see output in case somethin goes wrong.
tr := testutil.NewTerminalReader(t, ac)
// Verify that a subagent was created.
subAgents := agentClient.GetSubAgents()
require.Len(t, subAgents, 1, "expected one sub agent")

require.NoError(t, tr.ReadUntil(ctx, func(line string) bool {
return strings.Contains(line, "#") || strings.Contains(line, "$")
}), "find prompt")
subAgent := subAgents[0]
subAgentID, err := uuid.FromBytes(subAgent.GetId())
require.NoError(t, err, "failed to parse sub-agent ID")
t.Logf("Connecting to sub-agent: %s (ID: %s)", subAgent.Name, subAgentID)

wantFileName := "file-from-devcontainer"
wantFile := filepath.Join(tempWorkspaceFolder, wantFileName)
gotDir, err := agentClient.GetSubAgentDirectory(subAgentID)
require.NoError(t, err, "failed to get sub-agent directory")
require.Equal(t, "/workspaces/mywork", gotDir, "sub-agent directory should match")

require.NoError(t, json.NewEncoder(ac).Encode(workspacesdk.ReconnectingPTYRequest{
// NOTE(mafredri): We must use absolute path here for some reason.
Data: fmt.Sprintf("touch /workspaces/mywork/%s; exit\r", wantFileName),
}), "create file inside devcontainer")
subAgentToken, err := uuid.FromBytes(subAgent.GetAuthToken())
require.NoError(t, err, "failed to parse sub-agent token")

// Wait for the connection to close to ensure the touch was executed.
require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF)
payload := testutil.RequireReceive(ctx, t, subAgentConnected)
require.Equal(t, subAgentToken.String(), payload.Token, "sub-agent token should match")
require.Equal(t, "/workspaces/mywork", payload.Directory, "sub-agent directory should match")

_, err = os.Stat(wantFile)
require.NoError(t, err, "file should exist outside devcontainer")
// Allow the subagent to exit.
close(subAgentReady)
}

// TestAgent_DevcontainerRecreate tests that RecreateDevcontainer
Expand Down Expand Up @@ -2173,6 +2309,9 @@ func TestAgent_DevcontainerRecreate(t *testing.T) {
//nolint:dogsled
conn, client, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", workspaceFolder),
)
})

ctx := testutil.Context(t, testutil.WaitLong)
Expand Down
Loading
Loading
0