10000 fix: stop selecting direct connections with too-small MTU (#85) · coder/tailscale@83bb998 · GitHub
[go: up one dir, main page]

Skip to content

Commit 83bb998

Browse files
authored
fix: stop selecting direct connections with too-small MTU (#85)
related to: coder/coder#15523 This PR: 1. pads disco Ping and Pong packets so they are the largest size we expect to get from the inner tunnel IP stack, which has a hardcoded MTU of 1280 (minimum to support IPv6). 2. configures Windows, macOS, and Linux _not_ to fragment UDP packets sent out over the magicsock. The end result is that Disco Ping and Pong packets are not directly exchanged over paths with too-small MTU, and thus, those endpoints are not chosen for direct connections. (Alternate direct paths with bigger MTU may be chosen, or we may fall back to DERP.)
2 parents 84e6aa3 + 4ec7ef5 commit 83bb998

File tree

6 files changed

+208
-12
lines changed

6 files changed

+208
-12
lines changed

disco/disco.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"net/netip"
2828

2929
"go4.org/mem"
30+
"golang.org/x/crypto/nacl/box"
3031
"tailscale.com/types/key"
3132
)
3233

@@ -48,6 +49,19 @@ const (
4849

4950
const v0 = byte(0)
5051

52+
// v1 Ping and Pong are padded as follows. CallMeMaybe is still on v0 and unpadded.
53+
const v1 = byte(1)
54+
55+
// paddedPayloadLen is the desired length we want to pad Ping and Pong payloads
56+
// to so that they are the maximum size of a Wireguard packet we would
57+
// subsequently send. This ensures that any UDP paths we discover will actually
58+
// support the packet sizes the net stack will send over those paths. Any peers
59+
// behind a small-MTU link will have to depend on DERP.
60+
// c.f. https://github.com/coder/coder/issues/15523
61+
// Our inner IP packets can be up to 1280 bytes, with the Wireguard header of
62+
// 30 bytes, that is 1310. The final 2 is the inner payload header's type and version.
63+
const paddedPayloadLen = 1310 - len(Magic) - keyLen - NonceLen - box.Overhead - 2
64+
5165
var errShort = errors.New("short message")
5266

5367
// LooksLikeDiscoWrapper reports whether p looks like it's a packet
@@ -120,12 +134,8 @@ type Ping struct {
120134
}
121135

122136
func (m *Ping) AppendMarshal(b []byte) []byte {
123-
dataLen := 12
124137
hasKey := !m.NodeKey.IsZero()
125-
if hasKey {
126-
dataLen += key.NodePublicRawLen
127-
}
128-
ret, d := appendMsgHeader(b, TypePing, v0, dataLen)
138+
ret, d := appendMsgHeader(b, TypePing, v1, paddedPayloadLen)
129139
n := copy(d, m.TxID[:])
130140
if hasKey {
131141
m.NodeKey.AppendTo(d[:n])
@@ -217,7 +227,7 @@ type Pong struct {
217227
const pongLen = 12 + 16 + 2
218228

219229
func (m *Pong) AppendMarshal(b []byte) []byte {
220-
ret, d := appendMsgHeader(b, TypePong, v0, pongLen)
230+
ret, d := appendMsgHeader(b, TypePong, v1, paddedPayloadLen)
221231
d = d[copy(d, m.TxID[:]):]
222232
ip16 := m.Src.Addr().As16()
223233
d = d[copy(d, ip16[:]):]

disco/disco_test.go

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"testing"
1212

1313
"go4.org/mem"
14+
"golang.org/x/crypto/nacl/box"
1415
"tailscale.com/types/key"
1516
)
1617

@@ -25,31 +26,31 @@ func TestMarshalAndParse(t *testing.T) {
2526
m: &Ping{
2627
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
2728
},
28-
want: "01 00 01 02 03 04 05 06 07 08 09 0a 0b 0c",
29+
want: "01 01 01 02 03 04 05 06 07 08 09 0a 0b 0c",
2930
},
3031
{
3132
name: "ping_with_nodekey_src",
3233
m: &Ping{
3334
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
3435
NodeKey: key.NodePublicFromRaw32(mem.B([]byte{1: 1, 2: 2, 30: 30, 31: 31})),
3536
},
36-
want: "01 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 00 01 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 1e 1f",
37+
want: "01 01 01 02 03 04 05 06 07 08 09 0a 0b 0c 00 01 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 1e 1f",
3738
},
3839
{
3940
name: "pong",
4041
m: &Pong{
4142
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
4243
Src: mustIPPort("2.3.4.5:1234"),
4344
},
44-
want: "02 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 00 00 00 00 00 00 00 00 00 00 ff ff 02 03 04 05 04 d2",
45+
want: "02 01 01 02 03 04 05 06 07 08 09 0a 0b 0c 00 00 00 00 00 00 00 00 00 00 ff ff 02 03 04 05 04 d2",
4546
},
4647
{
4748
name: "pongv6",
4849
m: &Pong{
4950
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
5051
Src: mustIPPort("[fed0::12]:6666"),
5152
},
52-
want: "02 00 01 02 03 04 05 06 07 08 09 0a 0b 0c fe d0 00 00 00 00 00 00 00 00 00 00 00 00 00 12 1a 0a",
53+
want: "02 01 01 02 03 04 05 06 07 08 09 0a 0b 0c fe d0 00 00 00 00 00 00 00 00 00 00 00 00 00 12 1a 0a",
5354
},
5455
{
5556
name: "call_me_maybe",
@@ -75,10 +76,23 @@ func TestMarshalAndParse(t *testing.T) {
7576
if !ok {
7677
t.Fatalf("didn't start with foo: got %q", got)
7778
}
79+
// CODER: 1310 is max size of a Wireguard packet we will send.
80+
expectedLen := 1310 - len(Magic) - keyLen - NonceLen - box.Overhead
81+
switch tt.m.(type) {
82+
case *Ping:
83+
if len(got) != expectedLen {
84+
t.Fatalf("Ping not padded: got len %d, want len %d", len(got), expectedLen)
85+
}
86+
case *Pong:
87+
if len(got) != expectedLen {
88+
t.Fatalf("Pong not padded: got len %d, want len %d", len(got), expectedLen)
89+
}
90+
// CallMeMaybe is unpadded
91+
}
7892

7993
gotHex := fmt.Sprintf("% x", got)
80-
if gotHex != tt.want {
81-
t.Fatalf("wrong marshal\n got: %s\nwant: %s\n", gotHex, tt.want)
94+
if !strings.HasPrefix(gotHex, tt.want) {
95+
t.Fatalf("wrong marshal\n got: %s\nwant prefix: %s\n", gotHex, tt.want)
8296
}
8397

8498
back, err := Parse([]byte(got))
@@ -92,6 +106,69 @@ func TestMarshalAndParse(t *testing.T) {
92106
}
93107
}
94108

109+
func TestParsePingPongV0(t *testing.T) {
110+
tests := []struct {
111+
name string
112+
payload []byte
113+
m Message
114+
}{
115+
{
116+
name: "ping",
117+
m: &Ping{
118+
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
119+
},
120+
payload: []byte{0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c},
121+
},
122+
{
123+
name: "ping_with_nodekey_src",
124+
m: &Ping{
125+
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
126+
NodeKey: key.NodePublicFromRaw32(mem.B([]byte{1: 1, 2: 2, 30: 30, 31: 31})),
127+
},
128+
payload: []byte{
129+
0x01, 0x00,
130+
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
131+
0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
132+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x1f},
133+
},
134+
{
135+
name: "pong",
136+
m: &Pong{
137+
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
138+
Src: mustIPPort("2.3.4.5:1234"),
139+
},
140+
payload: []byte{
141+
0x02, 0x00,
142+
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
143+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x02, 0x03, 0x04, 0x05,
144+
0x04, 0xd2},
145+
},
146+
{
147+
name: "pongv6",
148+
m: &Pong{
149+
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
150+
Src: mustIPPort("[fed0::12]:6666"),
151+
},
152+
payload: []byte{
153+
0x02, 0x00,
154+
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
155+
0xfe, 0xd0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12,
156+
0x1a, 0x0a},
157+
},
158+
}
159+
for _, tt := range tests {
160+
t.Run(tt.name, func(t *testing.T) {
161+
back, err := Parse(tt.payload)
162+
if err != nil {
163+
t.Fatalf("parse back: %v", err)
164+
}
165+
if !reflect.DeepEqual(back, tt.m) {
166+
t.Errorf("message in %+v doesn't match Parse result %+v", tt.m, back)
167+
}
168+
})
169+
}
170+
}
171+
95172
func mustIPPort(s string) netip.AddrPort {
96173
ipp, err := netip.ParseAddrPort(s)
97174
if err != nil {

wgengine/magicsock/magicsock.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2377,6 +2377,12 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur
23772377
continue
23782378
}
23792379
trySetSocketBuffer(pconn, c.logf)
2380+
// CODER: https://github.com/coder/coder/issues/15523
2381+
// Attempt to tell the OS not to fragment packets over this interface. We pad disco Ping and Pong packets to the
2382+
// size of the direct UDP packets that get sent for direct connections. Thus, any interfaces or paths that
2383+
// cannot fully support direct connections due to MTU limitations will not be selected. If no direct paths meet
2384+
// the MTU requirements for a peer, we will fall back to DERP for that peer.
2385+
tryPreventFragmentation(pconn, c.logf, network)
23802386
// Success.
23812387
if debugBindSocket() {
23822388
c.logf("magicsock: bindSocket: successfully listened %v port %d", network, port)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package magicsock
2+
3+
import (
4+
"net"
5+
6+
"golang.org/x/sys/unix"
7+
"tailscale.com/types/logger"
8+
"tailscale.com/types/nettype"
9+
)
10+
11+
func tryPreventFragmentation(pconn nettype.PacketConn, logf logger.Logf, network string) {
12+
if c, ok := pconn.(*net.UDPConn); ok {
13+
s, err := c.SyscallConn()
14+
if err != nil {
15+
logf("magicsock: dontfrag: failed to get syscall conn: %v", err)
16+
}
17+
level := unix.IPPROTO_IP
18+
option := unix.IP_DONTFRAG
19+
if network == "udp6" {
20+
level = unix.IPPROTO_IPV6
21+
option = unix.IPV6_DONTFRAG
22+
}
23+
err = s.Control(func(fd uintptr) {
24+
err := unix.SetsockoptInt(int(fd), level, option, 1)
25+
if err != nil {
26+
logf("magicsock: dontfrag: SetsockoptInt failed: %v", err)
27+
}
28+
})
29+
if err != nil {
30+
logf("magicsock: dontfrag: control connection failed: %v", err)
31+
}
32+
logf("magicsock: dontfrag: success on %s", pconn.LocalAddr().String())
33+
return
34+
}
35+
logf("magicsock: dontfrag: failed because it was not a UDPConn")
36+
}

wgengine/magicsock/magicsock_linux.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,3 +404,30 @@ func init() {
404404
// message. These contain a single uint16 of data.
405405
controlMessageSize = unix.CmsgSpace(2)
406406
}
407+
408+
func tryPreventFragmentation(pconn nettype.PacketConn, logf logger.Logf, network string) {
409+
if c, ok := pconn.(*net.UDPConn); ok {
410+
s, err := c.SyscallConn()
411+
if err != nil {
412+
logf("magicsock: dontfrag: failed to get syscall conn: %v", err)
413+
}
414+
level := unix.IPPROTO_IP
415+
option := unix.IP_MTU_DISCOVER
416+
if network == "udp6" {
417+
level = unix.IPPROTO_IPV6
418+
option = unix.IPV6_MTU_DISCOVER
419+
}
420+
err = s.Control(func(fd uintptr) {
421+
err := unix.SetsockoptInt(int(fd), level, option, unix.IP_PMTUDISC_DO)
422+
if err != nil {
423+
logf("magicsock: dontfrag: SetsockoptInt failed: %v", err)
424+
}
425+
})
426+
if err != nil {
427+
logf("magicsock: dontfrag: control connection failed: %v", err)
428+
}
429+
logf("magicsock: dontfrag: success on %s", pconn.LocalAddr().String())
430+
return
431+
}
432+
logf("magicsock: dontfrag: failed because it was not a UDPConn")
433+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package magicsock
2+
3+
import (
4+
"net"
5+
6+
"golang.org/x/sys/windows"
7+
"tailscale.com/types/logger"
8+
"tailscale.com/types/nettype"
9+
)
10+
11+
// https://github.com/tpn/winsdk-10/blob/9b69fd26ac0c7d0b83d378dba01080e93349c2ed/Include/10.0.16299.0/shared/ws2ipdef.h
12+
const (
13+
IP_MTU_DISCOVER = 71 // IPV6_MTU_DISCOVER has the same value, which is nice.
14+
IP_PMTUDISC_DO = 1
15+
)
16+
17+
func tryPreventFragmentation(pconn nettype.PacketConn, logf logger.Logf, network string) {
18+
if c, ok := pconn.(*net.UDPConn); ok {
19+
s, err := c.SyscallConn()
20+
if err != nil {
21+
logf("magicsock: dontfrag: failed to get syscall conn: %v", err)
22+
}
23+
level := windows.IPPROTO_IP
24+
if network == "udp6" {
25+
level = windows.IPPROTO_IPV6
26+
}
27+
err = s.Control(func(fd uintptr) {
28+
err := windows.SetsockoptInt(windows.Handle(fd), level, IP_MTU_DISCOVER, IP_PMTUDISC_DO)
29+
if err != nil {
30+
logf("magicsock: dontfrag: SetsockoptInt failed: %v", err)
31+
}
32+
})
33+
if err != nil {
34+
logf("magicsock: dontfrag: control connection failed: %v", err)
35+
}
36+
logf("magicsock: dontfrag: success on %s", pconn.LocalAddr().String())
37+
return
38+
}
39+
logf("magicsock: dontfrag: failed because it was not a UDPConn")
40+
}

0 commit comments

Comments
 (0)
0