8000 fix socket-bypass-tcpip bpf demo · lx1036/code@42f6fff · GitHub
[go: up one dir, main page]

Skip to content

Commit 42f6fff

Browse files
author
shenming
committed
fix socket-bypass-tcpip bpf demo
1 parent 4d0c5de commit 42f6fff

File tree

2 files changed

+117
-29
lines changed

2 files changed

+117
-29
lines changed

go/k8s/bpf/xdp-l4lb/xdp-cilium-l4lb/cilium/test/tproxy/socket-bypass-tcpip/socket_bypass_tcpip_test.go

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ import (
77
"github.com/sirupsen/logrus"
88
"github.com/stretchr/testify/suite"
99
"golang.org/x/sys/unix"
10+
"net"
1011
"os"
1112
"os/signal"
1213
"syscall"
1314
"testing"
14-
"net"
1515
)
1616

1717
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go bpf test_socket_bypass_tcpip.c -- -I.
@@ -28,6 +28,10 @@ const (
2828
EXT_PORT = 7007
2929
)
3030

31+
/**
32+
没有验证成功!!!
33+
*/
34+
3135
func init() {
3236
logrus.SetReportCaller(true)
3337
}
@@ -37,7 +41,7 @@ type SocketBypassTCPIPSuite struct {
3741

3842
objs *bpfObjects
3943
cgroupLink link.Link
40-
skMsgLink link.Link
44+
skMsgLink *ProgAttachSkMsg
4145
}
4246

4347
func TestSocketBypassTCPIPSuite(t *testing.T) {
@@ -75,7 +79,8 @@ func (s *SocketBypassTCPIPSuite) SetupSuite() {
7579
}
7680
s.cgroupLink = l1
7781

78-
// bpftool prog attach id progID msg_verdict id mapID
82+
// `bpftool prog attach id progID msg_verdict id mapID` attach a sockops map
83+
// https://github.com/cilium/cilium/blob/1c466d26ff0edfb5021d024f755d4d00bc744792/pkg/sockops/sockops.go#L47-L60
7984
l2, err := AttachSkMsg(objs.bpfPrograms.BpfTcpipBypass, objs.bpfMaps.SockOpsMap)
8085
if err != nil {
8186
logrus.Errorf("AttachSkMsg err: %v", err)
@@ -101,8 +106,10 @@ func (s *SocketBypassTCPIPSuite) TearDownSuite() {
101106
}
102107
}
103108

109+
// CGO_ENABLED=0 go test -v -testify.m ^TestSocketBypass$ .
110+
// `netstat -tulpn`
104111
func (s *SocketBypassTCPIPSuite) TestSocketBypass() {
105-
// only TCP
112+
// only TCP, listen at 127.0.0.1:7007
106113
serverFd, err := makeServer(unix.SOCK_STREAM, nil, EXT_IP4, EXT_PORT)
107114
if err != nil {
108115
return
@@ -115,7 +122,7 @@ func (s *SocketBypassTCPIPSuite) TestSocketBypass() {
115122
tcpEcho(clientFd, serverFd, "testing")
116123
}
117124

118-
// 127.0.0.1:5432 connect 127.0.0.1:7007
125+
// 127.0.0.1:5432 > 127.0.0.1:7007
119126
func makeClient(socketType int, ip string, port int) int {
120127
var err error
121128
var sockfd int
@@ -201,7 +208,7 @@ func makeServer(socketType int, reuseportProg *ebpf.Program, ip string, port int
201208
}
202209
}
203210

204-
// bind 127.0.0.1:8008
211+
// bind 127.0.0.1:7007
205212
ipAddr := net.ParseIP(ip)
206213
sa := &unix.SockaddrInet4{
207214
Port: port,
@@ -304,18 +311,52 @@ func tcpEcho(clientFd, serverFd int, echoData string) {
304311
}
305312
}
306313

307-
func AttachSkMsg(prog *ebpf.Program, bpfMap *ebpf.Map) (link.Link, error) {
314+
type ProgAttachSkMsg struct {
315+
mapId ebpf.MapID
316+
program *ebpf.Program
317+
attachType ebpf.AttachType
318+
}
319+
320+
func (skMsg *ProgAttachSkMsg) Close() error {
321+
err := link.RawDetachProgram(link.RawDetachProgramOptions{
322+
Target: int(skMsg.mapId),
323+
Program: skMsg.program,
324+
Attach: skMsg.attachType,
325+
})
326+
if err != nil {
327+
return fmt.Errorf("close cgroup: %s", err)
328+
}
329+
return nil
330+
}
331+
332+
func AttachSkMsg(prog *ebpf.Program, bpfMap *ebpf.Map) (*ProgAttachSkMsg, error) {
308333
if t := prog.Type(); t != ebpf.SkMsg {
309-
return nil, fmt.Errorf("invalid program type %s, expected XDP", t)
334+
return nil, fmt.Errorf("invalid program type %s, expected SkMsg", t)
335+
}
336+
337+
info, err := bpfMap.Info()
338+
if err != nil {
339+
return nil, err
340+
}
341+
mapId, ok := info.ID()
342+
if !ok {
343+
return nil, fmt.Errorf("invalid map id: %d", mapId)
310344
}
311345

312-
rawLink, err := link.AttachRawLink(link.RawLinkOptions{
313-
Target: bpfMap.FD(),
346+
err = link.RawAttachProgram(link.RawAttachProgramOptions{
347+
Target: int(mapId),
314348
Program: prog,
315349
Attach: ebpf.AttachSkMsgVerdict,
350+
Flags: 0,
316351
})
317352

318-
return rawLink, err
353+
skMsg := &ProgAttachSkMsg{
354+
mapId: mapId,
355+
program: prog,
356+
attachType: ebpf.AttachSkMsgVerdict,
357+
}
358+
359+
return skMsg, nil
319360
}
320361

321362
func joinCgroup(path string) string {

go/k8s/bpf/xdp-l4lb/xdp-cilium-l4lb/cilium/test/tproxy/socket-bypass-tcpip/test_socket_bypass_tcpip.c

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,46 @@
55
#include <bpf/bpf_helpers.h>
66
#include <bpf/bpf_endian.h>
77

8+
#ifndef barrier
9+
# define barrier() asm volatile("": : :"memory")
10+
#endif
11+
12+
static __always_inline void bpf_barrier(void)
13+
{
14+
/* Workaround to avoid verifier complaint:
15+
* "dereference of modified ctx ptr R5 off=48+0, ctx+const is allowed,
16+
* ctx+const+const is not"
17+
*/
18+
barrier();
19+
}
20+
21+
#ifndef __READ_ONCE
22+
# define __READ_ONCE(X) (*(volatile typeof(X) *)&X)
23+
#endif
24+
25+
#ifndef READ_ONCE
26+
# define READ_ONCE(X) \
27+
({ typeof(X) __val = __READ_ONCE(X); \
28+
bpf_barrier(); \
29+
__val; })
30+
#endif
31+
832
struct sock_key {
933
__u32 sip4;
1034
__u32 dip4;
1135
__u8 family;
12-
// __u8 pad1;
13-
// __u16 pad2;
36+
__u8 pad1;
37+
__u16 pad2;
1438
// // this padding required for 64bit alignment
1539
// // else ebpf kernel verifier rejects loading
1640
// // of the program
17-
// __u32 pad3;
41+
__u32 pad3;
1842
__u32 sport;
1943
__u32 dport;
20-
};
21-
//} __attribute__((packed));
44+
//};
45+
} __attribute__((packed));
2246

47+
// `bpftool map dump name sock_ops_map -j | jq`
2348
struct {
2449
__uint(type, BPF_MAP_TYPE_SOCKHASH);
2550
__uint(max_entries, 65535);
@@ -30,10 +55,15 @@ struct {
3055

3156
static __always_inline void sk_msg_extract4_key(struct sk_msg_md *msg, struct sock_key *key) {
3257
key->family = 1;
33-
key->sip4 = msg->remote_ip4;
34-
key->dip4 = msg->local_ip4;
35-
key->sport = msg->remote_port >> 16;
36-
key->dport = bpf_htonl(msg->local_port) >> 16;
58+
// key->sip4 = msg->remote_ip4;
59+
// key->dip4 = msg->local_ip4;
60+
// key->sport = msg->remote_port >> 16;
61+
// key->dport = bpf_htonl(msg->local_port) >> 16;
62+
63+
key->sip4 = msg->local_ip4;
64+
key->dip4 = msg->remote_ip4;
65+
key->sport = bpf_htonl(msg->local_port) >> 16;
66+
key->dport = (msg->remote_port) >> 16;
3767
}
3868

3969
// hook sendmsg call on a socket, @see SEC("cgroup/sendmsg4")
@@ -45,8 +75,9 @@ int bpf_tcpip_bypass(struct sk_msg_md *msg)
4575
struct sock_key key = {};
4676
sk_msg_extract4_key(msg, &key);
4777
// bpf_msg_redirect_map()
48-
bpf_msg_redirect_hash(msg, &sock_ops_map, &key, BPF_F_INGRESS);
49-
return SK_PASS;
78+
bpf_printk("total size of sk_msg is %d, port %d --> %d", msg->size, bpf_ntohl(msg->remote_port), msg->local_port);
79+
return (int)bpf_msg_redirect_hash(msg, &sock_ops_map, &key, BPF_F_INGRESS);
80+
// return SK_PASS;
5081
}
5182

5283
static __always_inline void bpf_sock_ops_ipv4(struct bpf_sock_ops *skops) {
@@ -55,19 +86,35 @@ static __always_inline void bpf_sock_ops_ipv4(struct bpf_sock_ops *skops) {
5586

5687
// keep ip and port in network byte order
5788
key.family = 1; // 只有指针才是 key->family, @see sk_msg_extract4_key(), 这里为何是 1???
58-
key.sip4 = skops->local_ip4; // 为何这里互换???
59-
key.dip4 = skops->remote_ip4;
60-
// local_port is in host byte order, and remote_port is in network byte order
61-
key.sport = bpf_htonl(skops->local_port) >> 16; // ???
62-
key.dport = skops->remote_port >> 16;
89+
// key.sip4 = skops->local_ip4; // 为何这里互换???
90+
// key.dip4 = skops->remote_ip4;
91+
// // local_port is in host byte order, and remote_port is in network byte order
92+
// key.sport = (bpf_htonl(skops->local_port) >> 16); // ???
93+
// /* clang-7.1 or higher seems to think it can do a 16-bit read here
94+
// * which unfortunately most kernels (as of October 2019) do not
95+
// * support, which leads to verifier failures. Insert a READ_ONCE
96+
// * to make sure that a 32-bit read followed by shift is generated.
97+
// */
98+
// key.dport = (skops->remote_port) >> 16;
99+
63100

101+
key.dip4 = skops->local_ip4;
102+
key.dport = (bpf_htonl(skops->local_port) >> 16);
103+
key.sip4 = skops->remote_ip4;
104+
key.sport = (skops->remote_port) >> 16;
105+
106+
/**
107+
* 这里没有报错,但是 map 里为空: `bpftool map dump name sock_ops_map -j | jq`
108+
*/
64109
ret = (int)bpf_sock_hash_update(skops, &sock_ops_map, &key, BPF_NOEXIST);
65110
if (ret != 0) {
66111
bpf_printk("sock_hash_update() failed, ret: %d", ret);
67112
}
68113

69114
// __u32 remote_port; /* Stored in network byte order */ 因为是 network byte order,且是 u32,所以必须 bpf_ntohl()
70115
// __u32 local_port; /* stored in host byte order */
116+
// sockmap: op 4, port 5432 --> 7007, client 端是 5432, server 端是 7007
117+
// sockmap: op 5, port 7007 --> 5432
71118
bpf_printk("sockmap: op %d, port %d --> %d", skops->op, skops->local_port, bpf_ntohl(skops->remote_port));
72119
}
73120

@@ -82,8 +129,8 @@ int bpf_sockops_v4(struct bpf_sock_ops *skops)
82129
* active: source socket sending SYN
83130
* passive: destination socket responding with ACK for the SYN
84131
*/
85-
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
86-
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
132+
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: // 4
133+
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: // 5
87134
if (family == AF_INET) { // only ipv4
88135
bpf_sock_ops_ipv4(skops);
89136
}

0 commit comments

Comments
 (0)
0