8000 feat: make trace provider in loadtest, add tracing to sdk (#4939) · coder/coder@d82364b · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d82364b

Browse files
authored
feat: make trace provider in loadtest, add tracing to sdk (#4939)
1 parent fa844d0 commit d82364b

24 files changed

+754
-203
lines changed

agent/agent_test.go

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,12 @@ func TestAgent(t *testing.T) {
5858

5959
t.Run("SSH", func(t *testing.T) {
6060
t.Parallel()
61+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
62+
defer cancel()
63+
6164
conn, stats := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0)
6265

63-
sshClient, err := conn.SSHClient()
66+
sshClient, err := conn.SSHClient(ctx)
6467
require.NoError(t, err)
6568
defer sshClient.Close()
6669
session, err := sshClient.NewSession()
@@ -75,9 +78,12 @@ func TestAgent(t *testing.T) {
7578
t.Run("ReconnectingPTY", func(t *testing.T) {
7679
t.Parallel()
7780

81+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
82+
defer cancel()
83+
7884
conn, stats := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0)
7985

80-
ptyConn, err := conn.ReconnectingPTY(uuid.NewString(), 128, 128, "/bin/bash")
86+
ptyConn, err := conn.ReconnectingPTY(ctx, uuid.NewString(), 128, 128, "/bin/bash")
8187
require.NoError(t, err)
8288
defer ptyConn.Close()
8389

@@ -217,14 +223,16 @@ func TestAgent(t *testing.T) {
217223

218224
t.Run("SFTP", func(t *testing.T) {
219225
t.Parallel()
226+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
227+
defer cancel()
220228
u, err := user.Current()
221229
require.NoError(t, err, "get current user")
222230
home := u.HomeDir
223231
if runtime.GOOS == "windows" {
224232
home = "/" + strings.ReplaceAll(home, "\\", "/")
225233
}
226234
conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0)
227-
sshClient, err := conn.SSHClient()
235+
sshClient, err := conn.SSHClient(ctx)
228236
require.NoError(t, err)
229237
defer sshClient.Close()
230238
client, err := sftp.NewClient(sshClient)
@@ -250,8 +258,11 @@ func TestAgent(t *testing.T) {
250258
t.Run("SCP", func(t *testing.T) {
251259
t.Parallel()
252260

261+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
262+
defer cancel()
263+
253264
conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0)
254-
sshClient, err := conn.SSHClient()
265+
sshClient, err := conn.SSHClient(ctx)
255266
require.NoError(t, err)
256267
defer sshClient.Close()
257268
scpClient, err := scp.NewClientBySSH(sshClient)
@@ -386,9 +397,12 @@ func TestAgent(t *testing.T) {
386397
t.Skip("ConPTY appears to be inconsistent on Windows.")
387398
}
388399

400+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
401+
defer cancel()
402+
389403
conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0)
390404
id := uuid.NewString()
391-
netConn, err := conn.ReconnectingPTY(id, 100, 100, "/bin/bash")
405+
netConn, err := conn.ReconnectingPTY(ctx, id, 100, 100, "/bin/bash")
392406
require.NoError(t, err)
393407
bufRead := bufio.NewReader(netConn)
394408

@@ -426,7 +440,7 @@ func TestAgent(t *testing.T) {
426440
expectLine(matchEchoOutput)
427441

428442
_ = netConn.Close()
429-
netConn, err = conn.ReconnectingPTY(id, 100, 100, "/bin/bash")
443+
netConn, err = conn.ReconnectingPTY(ctx, id, 100, 100, "/bin/bash")
430444
require.NoError(t, err)
431445
bufRead = bufio.NewReader(netConn)
432446

@@ -504,12 +518,14 @@ func TestAgent(t *testing.T) {
504518
t.Run("Speedtest", func(t *testing.T) {
505519
t.Parallel()
506520
t.Skip("This test is relatively flakey because of Tailscale's speedtest code...")
521+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
522+
defer cancel()
507523
derpMap := tailnettest.RunDERPAndSTUN(t)
508524
conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{
509525
DERPMap: derpMap,
510526
}, 0)
511527
defer conn.Close()
512-
res, err := conn.Speedtest(speedtest.Upload, 250*time.Millisecond)
528+
res, err := conn.Speedtest(ctx, speedtest.Upload, 250*time.Millisecond)
513529
require.NoError(t, err)
514530
t.Logf("%.2f MBits/s", res[len(res)-1].MBitsPerSecond())
515531
})
@@ -599,7 +615,10 @@ func setupSSHCommand(t *testing.T, beforeArgs []string, afterArgs []string) *exe
599615
if err != nil {
600616
return
601617
}
602-
ssh, err := agentConn.SSH()
618+
619+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
620+
ssh, err := agentConn.SSH(ctx)
621+
cancel()
603622
if err != nil {
604623
_ = conn.Close()
605624
return
@@ -626,8 +645,10 @@ func setupSSHCommand(t *testing.T, beforeArgs []string, afterArgs []string) *exe
626645
}
627646

628647
func setupSSHSession(t *testing.T, options codersdk.WorkspaceAgentMetadata) *ssh.Session {
648+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
649+
defer cancel()
629650
conn, _ := setupAgent(t, options, 0)
630-
sshClient, err := conn.SSHClient()
651+
sshClient, err := conn.SSHClient(ctx)
631652
require.NoError(t, err)
632653
t.Cleanup(func() {
633654
_ = sshClient.Close()

cli/agent_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ func TestWorkspaceAgent(t *testing.T) {
198198
return err == nil
199199
}, testutil.WaitMedium, testutil.IntervalFast)
200200

201-
sshClient, err := dialer.SSHClient()
201+
sshClient, err := dialer.SSHClient(ctx)
202202
require.NoError(t, err)
203203
defer sshClient.Close()
204204
session, err := sshClient.NewSession()

cli/configssh_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"github.com/coder/coder/provisioner/echo"
2929
"github.com/coder/coder/provisionersdk/proto"
3030
"github.com/coder/coder/pty/ptytest"
31+
"github.com/coder/coder/testutil"
3132
)
3233

3334
func sshConfigFileName(t *testing.T) (sshConfig string) {
@@ -131,7 +132,9 @@ func TestConfigSSH(t *testing.T) {
131132
if err != nil {
132133
break
133134
}
134-
ssh, err := agentConn.SSH()
135+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
136+
ssh, err := agentConn.SSH(ctx)
137+
cancel()
135138
assert.NoError(t, err)
136139
wg.Add(2)
137140
go func() {

cli/loadtest.go

Lines changed: 124 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,30 @@ import (
88
"os"
99
"strconv"
1010
"strings"
11+
"sync"
1112
"time"
1213

1314
"github.com/spf13/cobra"
15+
"go.opentelemetry.io/otel/trace"
1416
"golang.org/x/xerrors"
1517

1618
"github.com/coder/coder/cli/cliflag"
19+
"github.com/coder/coder/coderd/tracing"
1720
"github.com/coder/coder/codersdk"
1821
"github.com/coder/coder/loadtest/harness"
1922
)
2023

24+
const loadtestTracerName = "coder_loadtest"
25+
2126
func loadtest() *cobra.Command {
2227
var (
2328
configPath string
2429
outputSpecs []string
30+
31+
traceEnable bool
32+
traceCoder bool
33+
traceHoneycombAPIKey string
34+
tracePropagate bool
2535
)
2636
cmd := &cobra.Command{
2737
Use: "loadtest --config <path> [--output json[:path]] [--output text[:path]]]",
@@ -53,6 +63,8 @@ func loadtest() *cobra.Command {
5363
Hidden: true,
5464
Args: cobra.ExactArgs(0),
5565
RunE: func(cmd *cobra.Command, args []string) error {
66+
ctx := tracing.SetTracerName(cmd.Context(), loadtestTracerName)
67+
5668
config, err := loadLoadTestConfigFile(configPath, cmd.InOrStdin())
5769
if err != nil {
5870
return err
@@ -67,7 +79,7 @@ func loadtest() *cobra.Command {
6779
return err
6880
}
6981

70-
me, err := client.User(cmd.Context(), codersdk.Me)
82+
me, err := client.User(ctx, codersdk.Me)
7183
if err != nil {
7284
return xerrors.Errorf("fetch current user: %w", err)
7385
}
@@ -84,11 +96,43 @@ func loadtest() *cobra.Command {
8496
}
8597
}
8698
if !ok {
87-
return xerrors.Errorf("Not logged in as site owner. Load testing is only available to site owners.")
99+
return xerrors.Errorf("Not logged in as a site owner. Load testing is only available to site owners.")
100+
}
101+
102+
// Setup tracing and start a span.
103+
var (
104+
shouldTrace = traceEnable || traceCoder || traceHoneycombAPIKey != ""
105+
tracerProvider trace.TracerProvider = trace.NewNoopTracerProvider()
106+
closeTracingOnce sync.Once
107+
closeTracing = func(_ context.Context) error {
108+
return nil
109+
}
110+
)
111+
if shouldTrace {
112+
tracerProvider, closeTracing, err = tracing.TracerProvider(ctx, loadtestTracerName, tracing.TracerOpts{
113+
Default: traceEnable,
114+
Coder: traceCoder,
115+
Honeycomb: traceHoneycombAPIKey,
116+
})
117+
if err != nil {
118+
return xerrors.Errorf("initialize tracing: %w", err)
119+
}
120+
defer func() {
121+
closeTracingOnce.Do(func() {
122+
// Allow time for traces to flush even if command
123+
// context is canceled.
124+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
125+
defer cancel()
126+
_ = closeTracing(ctx)
127+
})
128+
}()
88129
}
130+
tracer := tracerProvider.Tracer(loadtestTracerName)
89131

90-
// Disable ratelimits for future requests.
132+
// Disable ratelimits and propagate tracing spans for future
133+
// requests. Individual tests will setup their own loggers.
91134
client.BypassRatelimits = true
135+
client.PropagateTracing = tracePropagate
92136

93137
// Prepare the test.
94138
strategy := config.Strategy.ExecutionStrategy()
@@ -99,18 +143,22 @@ func loadtest() *cobra.Command {
99143

100144
for j := 0; j < t.Count; j++ {
101145
id := strconv.Itoa(j)
102-
runner, err := t.NewRunner(client)
146+
runner, err := t.NewRunner(client.Clone())
103147
if err != nil {
104148
return xerrors.Errorf("create %q runner for %s/%s: %w", t.Type, name, id, err)
105149
}
106150

107-
th.AddRun(name, id, runner)
151+
th.AddRun(name, id, &runnableTraceWrapper{
152+
tracer: tracer,
153+
spanName: fmt.Sprintf("%s/%s", name, id),
154+
runner: runner,
155+
})
108156
}
109157
}
110158

111159
_, _ = fmt.Fprintln(cmd.ErrOrStderr(), "Running load test...")
112160

113-
testCtx := cmd.Context()
161+
testCtx := ctx
114162
if config.Timeout > 0 {
115163
var cancel func()
116164
testCtx, cancel = context.WithTimeout(testCtx, time.Duration(config.Timeout))
@@ -158,11 +206,24 @@ func loadtest() *cobra.Command {
158206

159207
// Cleanup.
160208
_, _ = fmt.Fprintln(cmd.ErrOrStderr(), "\nCleaning up...")
161-
err = th.Cleanup(cmd.Context())
209+
err = th.Cleanup(ctx)
162210
if err != nil {
163211
return xerrors.Errorf("cleanup tests: %w", err)
164212
}
165213

214+
// Upload traces.
215+
if shouldTrace {
216+
_, _ = fmt.Fprintln(cmd.ErrOrStderr(), "\nUploading traces...")
217+
closeTracingOnce.Do(func() {
218+
ctx, cancel := context.WithTimeout(ctx, 1*time.Minute)
219+
defer cancel()
220+
err := closeTracing(ctx)
221+
if err != nil {
222+
_, _ = fmt.Fprintf(cmd.ErrOrStderr(), "\nError uploading traces: %+v\n", err)
223+
}
224+
})
225+
}
226+
166227
if res.TotalFail > 0 {
167228
return xerrors.New("load test failed, see above for more details")
168229
}
@@ -173,6 +234,12 @@ func loadtest() *cobra.Command {
173234

174235
cliflag.StringVarP(cmd.Flags(), &configPath, "config", "", "CODER_LOADTEST_CONFIG_PATH", "", "Path to the load test configuration file, or - to read from stdin.")
175236
cliflag.StringArrayVarP(cmd.Flags(), &outputSpecs, "output", "", "CODER_LOADTEST_OUTPUTS", []string{"text"}, "Output formats, see usage for more information.")
237+
238+
cliflag.BoolVarP(cmd.Flags(), &traceEnable, "trace", "", "CODER_LOADTEST_TRACE", false, "Whether application tracing data is collected. It exports to a backend configured by environment variables. See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md")
239+
cliflag.BoolVarP(cmd.Flags(), &traceCoder, "trace-coder", "", "CODER_LOADTEST_TRACE_CODER", false, "Whether opentelemetry traces are sent to Coder. We recommend keeping this disabled unless we advise you to enable it.")
240+
cliflag.StringVarP(cmd.Flags(), &traceHoneycombAPIKey, "trace-honeycomb-api-key", "", "CODER_LOADTEST_TRACE_HONEYCOMB_API_KEY", "", "Enables trace exporting to Honeycomb.io using the provided API key.")
241+
cliflag.BoolVarP(cmd.Flags(), &tracePropagate, "trace-propagate", "", "CODER_LOADTEST_TRACE_PROPAGATE", false, "Enables trace propagation to the Coder backend, which will be used to correlate server-side spans with client-side spans. Only enable this if the server is configured with the exact same tracing configuration as the client.")
242+
176243
return cmd
177244
}
178245

@@ -271,3 +338,53 @@ func parseLoadTestOutputs(outputs []string) ([]loadTestOutput, error) {
271338

272339
return out, nil
273340
}
341+
342+
type runnableTraceWrapper struct {
343+
tracer trace.Tracer
344+
spanName string
345+
runner harness.Runnable
346+
347+
span trace.Span
348+
}
349+
350+
var _ harness.Runnable = &runnableTraceWrapper{}
351+
var _ harness.Cleanable = &runnableTraceWrapper{}
352+
353+
func (r *runnableTraceWrapper) Run(ctx context.Context, id string, logs io.Writer) error {
354+
ctx, span := r.tracer.Start(ctx, r.spanName, trace.WithNewRoot())
355+
defer span.End()
356+
r.span = span
357+
358+
traceID := "unknown trace ID"
359+
spanID := "unknown span ID"
360+
if span.SpanContext().HasTraceID() {
361+
traceID = span.SpanContext().TraceID().String()
362+
}
363+
if span.SpanContext().HasSpanID() {
364+
spanID = span.SpanContext().SpanID().String()
365+
}
366+
_, _ = fmt.Fprintf(logs, "Trace ID: %s\n", traceID)
367+
_, _ = fmt.Fprintf(logs, "Span ID: %s\n\n", spanID)
368+
369+
// Make a separate span for the run itself so the sub-spans are grouped
370+
// neatly. The cleanup span is also a child of the above span so this is
371+
// important for readability.
372+
ctx2, span2 := r.tracer.Start(ctx, r.spanName+" run")
373+
defer span2.End()
374+
return r.runner.Run(ctx2, id, logs)
375+
}
376+
377+
func (r *runnableTraceWrapper) Cleanup(ctx context.Context, id string) error {
378+
c, ok := r.runner.(harness.Cleanable)
379+
if !ok {
380+
return nil
381+
}
382+
383+
if r.span != nil {
384+
ctx = trace.ContextWithSpanContext(ctx, r.span.SpanContext())
385+
}
386+
ctx, span := r.tracer.Start(ctx, r.spanName+" cleanup")
387+
defer span.End()
388+
389+
return c.Cleanup(ctx, id)
390+
}

cli/loadtest_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ func TestLoadTest(t *testing.T) {
277277
require.NoError(t, err, msg)
278278
}
279279

280+
t.Logf("output %d:\n\n%s", i, string(b))
281+
280282
switch output.format {
281283
case "text":
282284
require.Contains(t, string(b), "Test results:", msg)

cli/server.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,9 @@ func Server(vip *viper.Viper, newAPI func(context.Context, *coderd.Options) (*co
128128

129129
if cfg.Trace.Enable.Value || shouldCoderTrace {
130130
sdkTracerProvider, closeTracing, err := tracing.TracerProvider(ctx, "coderd", tracing.TracerOpts{
131-
Default: cfg.Trace.Enable.Value,
132-
Coder: shouldCoderTrace,
131+
Default: cfg.Trace.Enable.Value,
132+
Coder: shouldCoderTrace,
133+
Honeycomb: cfg.Trace.HoneycombAPIKey.Value,
133134
})
134135
if err != nil {
135136
logger.Warn(ctx, "start telemetry exporter", slog.Error(err))

cli/speedtest.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ func speedtest() *cobra.Command {
9595
dir = tsspeedtest.Upload
9696
}
9797
cmd.Printf("Starting a %ds %s test...\n", int(duration.Seconds()), dir)
98-
results, err := conn.Speedtest(dir, duration)
98+
results, err := conn.Speedtest(ctx, dir, duration)
9999
if err != nil {
100100
return err
101101
}

0 commit comments

Comments
 (0)
0