8000 chore: add additional network telemetry stats & events by ethanndickson · Pull Request #13800 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

chore: add additional network telemetry stats & events #13800

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
watch for conn changes
  • Loading branch information
ethanndickson committed Jul 9, 2024
commit 0bebad47aec962cb62856dea8fd0c36c1c765116
51 changes: 43 additions & 8 deletions tailnet/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ func NewConn(options *Options) (conn *Conn, err error) {
nodeUp.setAddresses(options.Addresses)
nodeUp.setBlockEndpoints(options.BlockEndpoints)

ctx, ctxCancel := context.WithCancel(context.Background())
server := &Conn{
id: uuid.New(),
closed: make(chan struct{}),
Expand All @@ -283,6 +284,8 @@ func NewConn(options *Options) (conn *Conn, err error) {
telemetrySink: options.TelemetrySink,
telemetryStore: telemetryStore,
createdAt: time.Now(),
watchCtx: ctx,
watchCancel: ctxCancel,
}
defer func() {
if err != nil {
Expand All @@ -293,8 +296,17 @@ func NewConn(options *Options) (conn *Conn, err error) {
server.wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) {
server.telemetryStore.setNetInfo(ni)
nodeUp.setNetInfo(ni)
if server.telemetryStore.connectedIP != nil {
_, _, _, _ = server.Ping(ctx, *server.telemetryStore.connectedIP)
}
})
server.wireguardEngine.AddNetworkMapCallback(server.networkMapCallback)
server.wireguardEngine.AddNetworkMapCallback(func(nm *netmap.NetworkMap) {
Copy link
Member Author
@ethanndickson ethanndickson Jul 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use this callback to ensure we always have the latest Tailscale node data for a given peer.

server.telemetryStore.updateNetworkMap(nm)
if server.telemetryStore.connectedIP != nil {
_, _, _, _ = server.Ping(ctx, *server.telemetryStore.connectedIP)
}
})
go server.watchConnChange()
} else {
server.wireguardEngine.SetNetInfoCallback(nodeUp.setNetInfo)
}
Expand Down Expand Up @@ -361,6 +373,9 @@ type Conn struct {
telemetryStore *TelemetryStore
telemetryWg sync.WaitGroup

watchCtx context.Context
watchCancel func()

trafficStats *connstats.Statistics
}

Expand Down Expand Up @@ -542,6 +557,7 @@ func (c *Conn) Closed() <-chan struct{} {
// Close shuts down the Wireguard connection.
func (c *Conn) Close() error {
c.logger.Info(context.Background(), "closing tailnet Conn")
c.watchCancel()
c.telemetryWg.Wait()
c.configMaps.close()
c.nodeUpdater.close()
Expand Down Expand Up @@ -771,13 +787,6 @@ func (c *Conn) newTelemetryEvent() *proto.TelemetryEvent {
return event
}

func (c *Conn) networkMapCallback(nm *netmap.NetworkMap) {
c.telemetryStore.updateNetworkMap(nm)
if c.telemetryStore.connectedIP != nil {
_, _, _, _ = c.Ping(context.Background(), *c.telemetryStore.connectedIP)
}
}

func (c *Conn) sendTelemetryBackground(e *proto.TelemetryEvent) {
c.telemetryWg.Add(1)
go func() {
Expand All @@ -786,6 +795,32 @@ func (c *Conn) sendTelemetryBackground(e *proto.TelemetryEvent) {
}()
}

// Watch for changes in the connection type (P2P<->DERP) and send telemetry events.
func (c *Conn) watchConnChange() {
ticker := time.NewTicker(time.Millisecond * 50)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we have to manually check if the connection has changed like this, P2PSetup can be ~50ms off it's true value.

defer ticker.Stop()
for {
select {
case <-c.watchCtx.Done():
return
case <-ticker.C:
}
status := c.Status()
peers := status.Peers()
if len(peers) > 1 {
// Not a CLI<->agent connection, stop watching
return
} else if len(peers) == 0 {
continue
}
peer := status.Peer[peers[0]]
// If the connection type has changed, send a telemetry event with the latest ping stats
if c.telemetryStore.checkConnType(peer.Relay) && c.telemetryStore.connectedIP != nil {
_, _, _, _ = c.Ping(c.watchCtx, *c.telemetryStore.connectedIP)
}
}
}

// PeerDiagnostics is a checklist of human-readable conditions necessary to establish an encrypted
// tunnel to a peer via a Conn
type PeerDiagnostics struct {
Expand Down
18 changes: 18 additions & 0 deletions tailnet/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type TelemetryStore struct {
connectedIP *netip.Addr
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To support coderd and agent telemetry in the future, we'll need to change this to a map of peers to nodes/IPs.

// 0 if not connected
nodeIDRemote uint64
p2p bool
}

func newTelemetryStore() (*TelemetryStore, error) {
Expand Down Expand Up @@ -84,6 +85,23 @@ func (b *TelemetryStore) markConnected(ip *netip.Addr, connCreatedAt time.Time,
b.application = application
}

// Return whether we've changed to/from a P2P connection
func (b *TelemetryStore) checkConnType(relay string) bool {
b.mu.Lock()
defer b.mu.Unlock()

if b.p2p && relay == "" {
return false
} else if !b.p2p && relay == "" {
b.p2p = true
return true
} else if b.p2p && relay != "" {
b.p2p = false
return true
}
return false
}

func (b *TelemetryStore) updateRemoteNodeIDLocked(nm *netmap.NetworkMap) {
if b.connectedIP == nil {
return
Expand Down
Loading
0