8000 [FSSDK-9705] feat: add OpenTelemetry tracing (#400) · optimizely/agent@369fe72 · GitHub
[go: up one dir, main page]

Skip to content

Commit 369fe72

Browse files
committed
[FSSDK-9705] feat: add OpenTelemetry tracing (#400)
* add a basic tracing pipeline * add tracing config * update tracing config * use context * update handler with middleware * support both http & grpc protocal * refactor config * add unit test * update unit test * refactor code * make tracing disabled by default * add config doc * fix typo * collect more attributes * add common http attributes * collect http status code attribute * collect sdk key attribute * support distributed tracing * add unit test for trace id generator * update unit test * fix typo * fix typo in config file * update config.yaml * make trace_id configurable for distributed tracing
1 parent 97997ff commit 369fe72

File tree

15 files changed

+681
-68
lines changed

15 files changed

+681
-68
lines changed

cmd/optimizely/main.go

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package main
1818
import (
1919
"bytes"
2020
"context"
21+
"errors"
22+
"fmt"
2123
"os"
2224
"os/signal"
2325
"runtime"
@@ -32,6 +34,7 @@ import (
3234

3335
"github.com/optimizely/agent/config"
3436
"github.com/optimizely/agent/pkg/metrics"
37+
"github.com/optimizely/agent/pkg/middleware"
3538
"github.com/optimizely/agent/pkg/optimizely"
3639
"github.com/optimizely/agent/pkg/routers"
3740
"github.com/optimizely/agent/pkg/server"
@@ -42,6 +45,14 @@ import (
4245
// Initiate the loading of the userprofileservice plugins
4346
_ "github.com/optimizely/agent/plugins/userprofileservice/all"
4447
"github.com/optimizely/go-sdk/pkg/logging"
48+
"go.opentelemetry.io/otel"
49+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
50+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
51+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
52+
"go.opentelemetry.io/otel/exporters/stdout/stdouttrace"
53+
"go.opentelemetry.io/otel/sdk/resource"
54+
sdktrace "go.opentelemetry.io/otel/sdk/trace"
55+
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
4556
)
4657

4758
// Version holds the admin version
@@ -112,6 +123,97 @@ func initLogging(conf config.LogConfig) {
112123
}
113124
}
114125

126+
func getStdOutTraceProvider(conf config.OTELTracingConfig) (*sdktrace.TracerProvider, error) {
127+
f, err := os.Create(conf.Services.StdOut.Filename)
128+
if err != nil {
129+
return nil, fmt.Errorf("failed to create the trace file, error: %s", err.Error())
130+
}
131+
132+
exp, err := stdouttrace.New(
133+
stdouttrace.WithPrettyPrint(),
134+
stdouttrace.WithWriter(f),
135+
)
136+
if err != nil {
137+
return nil, fmt.Errorf("failed to create the collector exporter, error: %s", err.Error())
138+
}
139+
140+
res, err := resource.New(
141+
context.Background(),
142+
resource.WithAttributes(
143+
semconv.ServiceNameKey.String(conf.ServiceName),
144+
semconv.DeploymentEnvironmentKey.String(conf.Env),
145+
),
146+
)
147+
if err != nil {
148+
return nil, fmt.Errorf("failed to create the otel resource, error: %s", err.Error())
149+
}
150+
151+
return sdktrace.NewTracerProvider(
152+
sdktrace.WithBatcher(exp),
153+
sdktrace.WithResource(res),
154+
sdktrace.WithIDGenerator(middleware.NewTraceIDGenerator(conf.TraceIDHeaderKey)),
155+
), nil
156+
}
157+
158+
func getOTELTraceClient(conf config.OTELTracingConfig) (otlptrace.Client, error) {
159+
switch conf.Services.Remote.Protocol {
160+
case config.TracingRemoteProtocolHTTP:
161+
return otlptracehttp.NewClient(
162+
otlptracehttp.WithInsecure(),
163+
otlptracehttp.WithEndpoint(conf.Services.Remote.Endpoint),
164+
), nil
165+
case config.TracingRemoteProtocolGRPC:
166+
return otlptracegrpc.NewClient(
167+
otlptracegrpc.WithInsecure(),
168+
otlptracegrpc.WithEndpoint(conf.Services.Remote.Endpoint),
169+
), nil
170+
default:
171+
return nil, errors.New("unknown remote tracing protocal")
172+
}
173+
}
174+
175+
func getRemoteTraceProvider(conf config.OTELTracingConfig) (*sdktrace.TracerProvider, error) {
176+
res, err := resource.New(
177+
context.Background(),
178+
resource.WithAttributes(
179+
semconv.ServiceNameKey.String(conf.ServiceName),
180+
semconv.DeploymentEnvironmentKey.String(conf.Env),
181+
),
182+
)
183+
if err != nil {
184+
return nil, fmt.Errorf("failed to create the otel resource, error: %s", err.Error())
185+
}
186+
187+
traceClient, err := getOTELTraceClient(conf)
188+
if err != nil {
189+
return nil, fmt.Errorf("failed to create the remote trace client, error: %s", err.Error())
190+
}
191+
192+
traceExporter, err := otlptrace.New(context.Background(), traceClient)
193+
if err != nil {
194+
return nil, fmt.Errorf("failed to create the remote trace exporter, error: %s", err.Error())
195+
}
196+
197+
bsp := sdktrace.NewBatchSpanProcessor(traceExporter)
198+
return sdktrace.NewTracerProvider(
199+
sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.TraceIDRatioBased(conf.Services.Remote.SampleRate))),
200+
sdktrace.WithResource(res),
201+
sdktrace.WithSpanProcessor(bsp),
202+
sdktrace.WithIDGenerator(middleware.NewTraceIDGenerator(conf.TraceIDHeaderKey)),
203+
), nil
204+
}
205+
206+
func initTracing(conf config.OTELTracingConfig) (*sdktrace.TracerProvider, error) {
207+
switch conf.Default {
208+
case config.TracingServiceTypeRemote:
209+
return getRemoteTraceProvider(conf)
210+
case config.TracingServiceTypeStdOut:
211+
return getStdOutTraceProvider(conf)
212+
default:
213+
return nil, errors.New("unknown tracing service type")
214+
}
215+
}
216+
115217
func setRuntimeEnvironment(conf config.RuntimeConfig) {
116218
if conf.BlockProfileRate != 0 {
117219
log.Warn().Msgf("Setting non-zero blockProfileRate is NOT recommended for production")
@@ -133,6 +235,22 @@ func main() {
133235
conf := loadConfig(v)
134236
initLogging(conf.Log)
135237

238+
if conf.Tracing.Enabled {
239+
tp, err := initTracing(conf.Tracing.OpenTelemetry)
240+
if err != nil {
241+
log.Panic().Err(err).Msg("Unable to initialize tracing")
242+
}
243+
defer func() {
244+
if err := tp.Shutdown(context.Background()); err != nil {
245+
log.Error().Err(err).Msg("Failed to shutdown tracing")
246+
}
247+
}()
248+
otel.SetTracerProvider(tp)
249+
log.Info().Msg(fmt.Sprintf("Tracing enabled with service %q", conf.Tracing.OpenTelemetry.Default))
250+
} else {
251+
log.Info().Msg("Tracing disabled")
252+
}
253+
136254
conf.LogConfigWarnings()
137255

138256
setRuntimeEnvironment(conf.Runtime)
@@ -157,7 +275,7 @@ func main() {
157275
cancel()
158276
}()
159277

160-
apiRouter := routers.NewDefaultAPIRouter(optlyCache, conf.API, agentMetricsRegistry)
278+
apiRouter := routers.NewDefaultAPIRouter(optlyCache, *conf, agentMetricsRegistry)
161279
adminRouter := routers.NewAdminRouter(*conf)
162280

163281
log.Info().Str("version", conf.Version).Msg("Starting services.")

cmd/optimizely/main_test.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,3 +362,92 @@ func TestLoggingWithIncludeSdkKey(t *testing.T) {
362362
})
363363
assert.False(t, optimizely.ShouldIncludeSDKKey)
364364
}
365+
366+
func Test_initTracing(t *testing.T) {
367+
type args struct {
368+
conf config.OTELTracingConfig
369+
}
370+
tests := []struct {
371+
name string
372+
args args
373+
wantErr bool
374+
}{
375+
{
376+
name: "should return error when exporter type is not supported",
377+
args: args{
378+
conf: config.OTELTracingConfig{
379+
Default: "unsupported",
380+
},
381+
},
382+
wantErr: true,
383+
},
384+
{
385+
name: "should return no error stdout tracing exporter",
386+
args: args{
387+
conf: config.OTELTracingConfig{
388+
Default: "stdout",
389+
Services: config.TracingServiceConfig{
390+
StdOut: config.TracingStdOutConfig{
391+
Filename: "trace.out",
392+
},
393+
},
394+
},
395+
},
396+
wantErr: false,
397+
},
398+
{
399+
name: "should return no error for remote tracing exporter with http protocal",
400+
args: args{
401+
conf: config.OTELTracingConfig{
402+
Default: "remote",
403+
Services: config.TracingServiceConfig{
404+
Remote: config.TracingRemoteConfig{
405+
Endpoint: "localhost:1234",
406+
Protocol: "http",
407+
},
408+
},
409+
},
410+
},
411+
wantErr: false,
412+
},
413+
{
414+
name: "should return no error for remote tracing exporter with grpc protocal",
415+
args: args{
416+
conf: config.OTELTracingConfig{
417+
Default: "remote",
418+
Services: config.TracingServiceConfig{
419+
Remote: config.TracingRemoteConfig{
420+
Endpoint: "localhost:1234",
421+
Protocol: "grpc",
422+
},
423+
},
424+
},
425+
},
426+
wantErr: false,
427+
},
428+
{
429+
name: "should return no error for remote tracing exporter with invalid protocal",
430+
args: args{
431+
conf: config.OTELTracingConfig{
432+
Default: "remote",
433+
Services: config.TracingServiceConfig{
434+
Remote: config.TracingRemoteConfig{
435+
Endpoint: "localhost:1234",
436+
Protocol: "udp/invalid",
437+
},
438+
},
439+
},
440+
},
441+
wantErr: true,
442+
},
443+
}
444+
for _, tt := range tests {
445+
t.Run(tt.name, func(t *testing.T) {
446+
_, err := initTracing(tt.args.conf)
447+
if (err != nil) != tt.wantErr {
448+
t.Errorf("initTracing() error = %v, wantErr %v", err, tt.wantErr)
449+
return
450+
}
451+
})
452+
}
453+
}

config.yaml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,45 @@ log:
2626
## to set whether or not the SDK key is included in the logging output.
2727
includeSdkKey: true
2828

29+
##
30+
## tracing: tracing configuration
31+
##
32+
tracing:
33+
## bydefault tracing is disabled
34+
## to enable tracing set enabled to true
35+
enabled: false
36+
# opentelemetry tracing configuration
37+
opentelemetry:
38+
## bydefault stdout exporter is enabled
39+
## to enable remote exporter set default as "remote"
40+
default: "stdout"
41+
## tracing service name
42+
serviceName: "optimizely-agent"
43+
## tracing environment name
44+
## example: for production environment env can be set as "prod"
45+
env: "dev"
46+
## HTTP Header Key for TraceID in Distributed Tracing
47+
## The value set in HTTP Header must be a hex compliant with the W3C trace-context specification.
48+
## See more at https://www.w3.org/TR/trace-context/#trace-id
49+
traceIDHeaderKey: "X-Optimizely-Trace-ID"
50+
## tracing service configuration
51+
services:
52+
## stdout exporter configuration
53+
stdout:
54+
## for stdout tracing data is saved in the specified file
55+
filename: "trace.out"
56+
## remote exporter configuration
57+
remote:
58+
## remote collector endpoint
59+
endpoint: "localhost:4317"
60+
## supported protocols are "http" and "grpc"
61+
protocol: "grpc"
62+
## "sampleRate" refers to the rate at which traces are collected and recorded.
63+
## sampleRate >= 1 will always sample.
64+
## sampleRate < 0 are treated as zero i.e. never sample.
65+
sampleRate: 1.0
66+
67+
2968
##
3069
## http server configuration
3170
##

config/config.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ func NewDefaultConfig() *AgentConfig {
7171
IncludeSDKKey: true,
7272
Level: "info",
7373
},
74+
Tracing: TracingConfig{
75+
Enabled: false,
76+
},
7477
Client: ClientConfig{
7578
PollingInterval: 1 * time.Minute,
7679
BatchSize: 10,
@@ -124,6 +127,7 @@ type AgentConfig struct {
124127
Admin AdminConfig `json:"admin"`
125128
API APIConfig `json:"api"`
126129
Log LogConfig `json:"log"`
130+
Tracing TracingConfig `json:"tracing"`
127131
Client ClientConfig `json:"client"`
128132
Runtime RuntimeConfig `json:"runtime"`
129133
Server ServerConfig `json:"server"`
@@ -173,6 +177,48 @@ type LogConfig struct {
173177
Level string `json:"level"`
174178
}
175179

180+
type TracingConfig struct {
181+
Enabled bool `json:"enabled"`
182+
OpenTelemetry OTELTracingConfig `json:"opentelemetry"`
183+
}
184+
185+
type TracingServiceType string
186+
187+
const (
188+
TracingServiceTypeStdOut TracingServiceType = "stdout"
189+
TracingServiceTypeRemote TracingServiceType = "remote"
190+
)
191+
192+
type TracingRemoteProtocol string
193+
194+
const (
195+
TracingRemoteProtocolGRPC TracingRemoteProtocol = "grpc"
196+
TracingRemoteProtocolHTTP TracingRemoteProtocol = "http"
197+
)
198+
199+
type OTELTracingConfig struct {
200+
Default TracingServiceType `json:"default"`
201+
ServiceName string `json:"serviceName"`
202+
Env string `json:"env"`
203+
TraceIDHeaderKey string `json:"traceIDHeaderKey"`
204+
Services TracingServiceConfig `json:"services"`
205+
}
206+
207+
type TracingServiceConfig struct {
208+
StdOut TracingStdOutConfig `json:"stdout"`
209+
Remote TracingRemoteConfig `json:"remote"`
210+
}
211+
212+
type TracingStdOutConfig struct {
213+
Filename string `json:"filename"`
214+
}
215+
216+
type TracingRemoteConfig struct {
217+
Endpoint string `json:"endpoint"`
218+
Protocol TracingRemoteProtocol `json:"protocol"`
219+
SampleRate float64 `json:"sampleRate"`
220+
}
221+
176222
// PluginConfigs defines the generic mapping of middleware plugins
177223
type PluginConfigs map[string]interface{}
178224

0 commit comments

Comments
 (0)
0