diff --git a/agent/api.go b/agent/api.go index e7ed1ca6a13f7..2df791d6fbb68 100644 --- a/agent/api.go +++ b/agent/api.go @@ -37,6 +37,7 @@ func (a *agent) apiHandler() http.Handler { } promHandler := PrometheusMetricsHandler(a.prometheusRegistry, a.logger) r.Get("/api/v0/listening-ports", lp.handler) + r.Get("/api/v0/netcheck", a.HandleNetcheck) r.Get("/debug/logs", a.HandleHTTPDebugLogs) r.Get("/debug/magicsock", a.HandleHTTPDebugMagicsock) r.Get("/debug/magicsock/debug-logging/{state}", a.HandleHTTPMagicsockDebugLoggingState) diff --git a/agent/health.go b/agent/health.go new file mode 100644 index 0000000000000..10a2054280abd --- /dev/null +++ b/agent/health.go @@ -0,0 +1,31 @@ +package agent + +import ( + "net/http" + + "github.com/coder/coder/v2/coderd/healthcheck/health" + "github.com/coder/coder/v2/coderd/httpapi" + "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/codersdk/healthsdk" +) + +func (a *agent) HandleNetcheck(rw http.ResponseWriter, r *http.Request) { + ni := a.TailnetConn().GetNetInfo() + + ifReport, err := healthsdk.RunInterfacesReport() + if err != nil { + httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{ + Message: "Failed to run interfaces report", + Detail: err.Error(), + }) + return + } + + httpapi.Write(r.Context(), rw, http.StatusOK, healthsdk.AgentNetcheckReport{ + BaseReport: healthsdk.BaseReport{ + Severity: health.SeverityOK, + }, + NetInfo: ni, + Interfaces: ifReport, + }) +} diff --git a/cli/cliui/agent.go b/cli/cliui/agent.go index 95606543da5f4..a0079d3b3c81e 100644 --- a/cli/cliui/agent.go +++ b/cli/cliui/agent.go @@ -10,8 +10,11 @@ import ( "github.com/google/uuid" "golang.org/x/xerrors" + "tailscale.com/tailcfg" "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/codersdk/healthsdk" + "github.com/coder/coder/v2/codersdk/workspacesdk" "github.com/coder/coder/v2/tailnet" ) @@ -346,3 +349,55 @@ func PeerDiagnostics(w io.Writer, d tailnet.PeerDiagnostics) { _, _ = fmt.Fprint(w, "✘ Wireguard is not connected\n") } } + +type ConnDiags struct { + ConnInfo *workspacesdk.AgentConnectionInfo + PingP2P bool + DisableDirect bool + LocalNetInfo *tailcfg.NetInfo + LocalInterfaces *healthsdk.InterfacesReport + AgentNetcheck *healthsdk.AgentNetcheckReport + // TODO: More diagnostics +} + +func ConnDiagnostics(w io.Writer, d ConnDiags) { + if d.AgentNetcheck != nil { + for _, msg := range d.AgentNetcheck.Interfaces.Warnings { + _, _ = fmt.Fprintf(w, "❗ Agent: %s\n", msg.Message) + } + } + + if d.LocalInterfaces != nil { + for _, msg := range d.LocalInterfaces.Warnings { + _, _ = fmt.Fprintf(w, "❗ Client: %s\n", msg.Message) + } + } + + if d.PingP2P { + _, _ = fmt.Fprint(w, "✔ You are connected directly (p2p)\n") + return + } + _, _ = fmt.Fprint(w, "❗ You are connected via a DERP relay, not directly (p2p)\n") + + if d.DisableDirect { + _, _ = fmt.Fprint(w, "❗ Direct connections are disabled locally, by `--disable-direct` or `CODER_DISABLE_DIRECT`\n") + return + } + + if d.ConnInfo != nil && d.ConnInfo.DisableDirectConnections { + _, _ = fmt.Fprint(w, "❗ Your Coder administrator has blocked direct connections\n") + return + } + + if d.ConnInfo != nil && d.ConnInfo.DERPMap != nil && !d.ConnInfo.DERPMap.HasSTUN() { + _, _ = fmt.Fprint(w, "✘ The DERP map is not configured to use STUN, which will prevent direct connections from starting outside of local networks\n") + } + + if d.LocalNetInfo != nil && d.LocalNetInfo.MappingVariesByDestIP.EqualBool(true) { + _, _ = fmt.Fprint(w, "❗ Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers\n") + } + + if d.AgentNetcheck != nil && d.AgentNetcheck.NetInfo != nil && d.AgentNetcheck.NetInfo.MappingVariesByDestIP.EqualBool(true) { + _, _ = fmt.Fprint(w, "❗ Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers\n") + } +} diff --git a/cli/cliui/agent_test.go b/cli/cliui/agent_test.go index 47c9d21900751..fced0c67ea1b9 100644 --- a/cli/cliui/agent_test.go +++ b/cli/cliui/agent_test.go @@ -20,8 +20,11 @@ import ( "github.com/coder/coder/v2/cli/clitest" "github.com/coder/coder/v2/cli/cliui" + "github.com/coder/coder/v2/coderd/healthcheck/health" "github.com/coder/coder/v2/coderd/util/ptr" "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/codersdk/healthsdk" + "github.com/coder/coder/v2/codersdk/workspacesdk" "github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/testutil" "github.com/coder/serpent" @@ -672,3 +675,129 @@ func TestPeerDiagnostics(t *testing.T) { }) } } + +func TestConnDiagnostics(t *testing.T) { + t.Parallel() + testCases := []struct { + name string + diags cliui.ConnDiags + want []string + }{ + { + name: "Direct", + diags: cliui.ConnDiags{ + ConnInfo: &workspacesdk.AgentConnectionInfo{}, + PingP2P: true, + LocalNetInfo: &tailcfg.NetInfo{}, + }, + want: []string{ + `✔ You are connected directly (p2p)`, + }, + }, + { + name: "DirectBlocked", + diags: cliui.ConnDiags{ + ConnInfo: &workspacesdk.AgentConnectionInfo{ + DisableDirectConnections: true, + }, + }, + want: []string{ + `❗ You are connected via a DERP relay, not directly (p2p)`, + `❗ Your Coder administrator has blocked direct connections`, + }, + }, + { + name: "NoStun", + diags: cliui.ConnDiags{ + ConnInfo: &workspacesdk.AgentConnectionInfo{ + DERPMap: &tailcfg.DERPMap{}, + }, + LocalNetInfo: &tailcfg.NetInfo{}, + }, + want: []string{ + `❗ You are connected via a DERP relay, not directly (p2p)`, + `✘ The DERP map is not configured to use STUN, which will prevent direct connections from starting outside of local networks`, + }, + }, + { + name: "ClientHardNat", + diags: cliui.ConnDiags{ + LocalNetInfo: &tailcfg.NetInfo{ + MappingVariesByDestIP: "true", + }, + }, + want: []string{ + `❗ You are connected via a DERP relay, not directly (p2p)`, + `❗ Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`, + }, + }, + { + name: "AgentHardNat", + diags: cliui.ConnDiags{ + ConnInfo: &workspacesdk.AgentConnectionInfo{}, + PingP2P: false, + LocalNetInfo: &tailcfg.NetInfo{}, + AgentNetcheck: &healthsdk.AgentNetcheckReport{ + NetInfo: &tailcfg.NetInfo{MappingVariesByDestIP: "true"}, + }, + }, + want: []string{ + `❗ You are connected via a DERP relay, not directly (p2p)`, + `❗ Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`, + }, + }, + { + name: "AgentInterfaceWarnings", + diags: cliui.ConnDiags{ + PingP2P: true, + AgentNetcheck: &healthsdk.AgentNetcheckReport{ + Interfaces: healthsdk.InterfacesReport{ + BaseReport: healthsdk.BaseReport{ + Warnings: []health.Message{ + health.Messagef(health.CodeInterfaceSmallMTU, "network interface eth0 has MTU 1280, (less than 1378), which may cause problems with direct connections"), + }, + }, + }, + }, + }, + want: []string{ + `❗ Agent: network interface eth0 has MTU 1280, (less than 1378), which may cause problems with direct connections`, + `✔ You are connected directly (p2p)`, + }, + }, + { + name: "LocalInterfaceWarnings", + diags: cliui.ConnDiags{ + PingP2P: true, + LocalInterfaces: &healthsdk.InterfacesReport{ + BaseReport: healthsdk.BaseReport{ + Warnings: []health.Message{ + health.Messagef(health.CodeInterfaceSmallMTU, "network interface eth1 has MTU 1310, (less than 1378), which may cause problems with direct connections"), + }, + }, + }, + }, + want: []string{ + `❗ Client: network interface eth1 has MTU 1310, (less than 1378), which may cause problems with direct connections`, + `✔ You are connected directly (p2p)`, + }, + }, + } + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + r, w := io.Pipe() + go func() { + defer w.Close() + cliui.ConnDiagnostics(w, tc.diags) + }() + bytes, err := io.ReadAll(r) + require.NoError(t, err) + output := string(bytes) + for _, want := range tc.want { + require.Contains(t, output, want) + } + }) + } +} diff --git a/cli/ping.go b/cli/ping.go index 644754283ee58..e36a518b41c7a 100644 --- a/cli/ping.go +++ b/cli/ping.go @@ -2,7 +2,9 @@ package cli import ( "context" + "errors" "fmt" + "net/http" "time" "golang.org/x/xerrors" @@ -14,6 +16,7 @@ import ( "github.com/coder/coder/v2/cli/cliui" "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/codersdk/healthsdk" "github.com/coder/coder/v2/codersdk/workspacesdk" "github.com/coder/serpent" ) @@ -61,7 +64,8 @@ func (r *RootCmd) ping() *serpent.Command { if !r.disableNetworkTelemetry { opts.EnableTelemetry = true } - conn, err := workspacesdk.New(client).DialAgent(ctx, workspaceAgent.ID, opts) + client := workspacesdk.New(client) + conn, err := client.DialAgent(ctx, workspaceAgent.ID, opts) if err != nil { return err } @@ -138,11 +142,44 @@ func (r *RootCmd) ping() *serpent.Command { ) if n == int(pingNum) { - diags := conn.GetPeerDiagnostics() - cliui.PeerDiagnostics(inv.Stdout, diags) - return nil + break } } + ctx, cancel = context.WithTimeout(inv.Context(), 30*time.Second) + defer cancel() + diags := conn.GetPeerDiagnostics() + cliui.PeerDiagnostics(inv.Stdout, diags) + + connDiags := cliui.ConnDiags{ + PingP2P: didP2p, + DisableDirect: r.disableDirect, + LocalNetInfo: conn.GetNetInfo(), + } + connInfo, err := client.AgentConnectionInfoGeneric(ctx) + if err == nil { + connDiags.ConnInfo = &connInfo + } else { + _, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve connection info from server: %v\n", err) + } + ifReport, err := healthsdk.RunInterfacesReport() + if err == nil { + connDiags.LocalInterfaces = &ifReport + } else { + _, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve local interfaces report: %v\n", err) + } + agentNetcheck, err := conn.Netcheck(ctx) + if err == nil { + connDiags.AgentNetcheck = &agentNetcheck + } else { + var sdkErr *codersdk.Error + if errors.As(err, &sdkErr) && sdkErr.StatusCode() == http.StatusNotFound { + _, _ = fmt.Fprint(inv.Stdout, "Could not generate full connection report as the workspace agent is outdated\n") + } else { + _, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve connection report from agent: %v\n", err) + } + } + cliui.ConnDiagnostics(inv.Stdout, connDiags) + return nil }, } diff --git a/codersdk/healthsdk/healthsdk.go b/codersdk/healthsdk/healthsdk.go index 007abff5e3277..158f630f1b4dc 100644 --- a/codersdk/healthsdk/healthsdk.go +++ b/codersdk/healthsdk/healthsdk.go @@ -273,3 +273,10 @@ type ClientNetcheckReport struct { DERP DERPHealthReport `json:"derp"` Interfaces InterfacesReport `json:"interfaces"` } + +// @typescript-ignore AgentNetcheckReport +type AgentNetcheckReport struct { + BaseReport + NetInfo *tailcfg.NetInfo `json:"net_info"` + Interfaces InterfacesReport `json:"interfaces"` +} diff --git a/codersdk/workspacesdk/agentconn.go b/codersdk/workspacesdk/agentconn.go index ed9da4c2a04bf..c7cbf31f8cbab 100644 --- a/codersdk/workspacesdk/agentconn.go +++ b/codersdk/workspacesdk/agentconn.go @@ -22,6 +22,7 @@ import ( "github.com/coder/coder/v2/coderd/tracing" "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/codersdk/healthsdk" "github.com/coder/coder/v2/tailnet" ) @@ -241,6 +242,23 @@ func (c *AgentConn) ListeningPorts(ctx context.Context) (codersdk.WorkspaceAgent return resp, json.NewDecoder(res.Body).Decode(&resp) } +// Netcheck returns a network check report from the workspace agent. +func (c *AgentConn) Netcheck(ctx context.Context) (healthsdk.AgentNetcheckReport, error) { + ctx, span := tracing.StartSpan(ctx) + defer span.End() + res, err := c.apiRequest(ctx, http.MethodGet, "/api/v0/netcheck", nil) + if err != nil { + return healthsdk.AgentNetcheckReport{}, xerrors.Errorf("do request: %w", err) + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return healthsdk.AgentNetcheckReport{}, codersdk.ReadBodyAsError(res) + } + + var resp healthsdk.AgentNetcheckReport + return resp, json.NewDecoder(res.Body).Decode(&resp) +} + // DebugMagicsock makes a request to the workspace agent's magicsock debug endpoint. func (c *AgentConn) DebugMagicsock(ctx context.Context) ([]byte, error) { ctx, span := tracing.StartSpan(ctx) diff --git a/tailnet/conn.go b/tailnet/conn.go index 4fd4e4facbb0d..7726525e7be70 100644 --- a/tailnet/conn.go +++ b/tailnet/conn.go @@ -294,6 +294,9 @@ func NewConn(options *Options) (conn *Conn, err error) { }() if server.telemetryStore != nil { server.wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) { + server.mutex.Lock() + server.lastNetInfo = ni.Clone() + server.mutex.Unlock() server.telemetryStore.setNetInfo(ni) nodeUp.setNetInfo(ni) server.telemetryStore.pingPeer(server) @@ -304,7 +307,12 @@ func NewConn(options *Options) (conn *Conn, err error) { }) go server.watchConnChange() } else { - server.wireguardEngine.SetNetInfoCallback(nodeUp.setNetInfo) + server.wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) { + server.mutex.Lock() + server.lastNetInfo = ni.Clone() + server.mutex.Unlock() + nodeUp.setNetInfo(ni) + }) } server.wireguardEngine.SetStatusCallback(nodeUp.setStatus) server.magicConn.SetDERPForcedWebsocketCallback(nodeUp.setDERPForcedWebsocket) @@ -373,6 +381,13 @@ type Conn struct { watchCancel func() trafficStats *connstats.Statistics + lastNetInfo *tailcfg.NetInfo +} + +func (c *Conn) GetNetInfo() *tailcfg.NetInfo { + c.mutex.Lock() + defer c.mutex.Unlock() + return c.lastNetInfo.Clone() } func (c *Conn) SetTunnelDestination(id uuid.UUID) {