5
5
#include <bpf/bpf_helpers.h>
6
6
#include <bpf/bpf_endian.h>
7
7
8
+ #ifndef barrier
9
+ # define barrier () asm volatile("": : :"memory")
10
+ #endif
11
+
12
+ static __always_inline void bpf_barrier (void )
13
+ {
14
+ /* Workaround to avoid verifier complaint:
15
+ * "dereference of modified ctx ptr R5 off=48+0, ctx+const is allowed,
16
+ * ctx+const+const is not"
17
+ */
18
+ barrier ();
19
+ }
20
+
21
+ #ifndef __READ_ONCE
22
+ # define __READ_ONCE (X ) (*(volatile typeof(X) *)&X)
23
+ #endif
24
+
25
+ #ifndef READ_ONCE
26
+ # define READ_ONCE (X ) \
27
+ ({ typeof(X) __val = __READ_ONCE(X); \
28
+ bpf_barrier(); \
29
+ __val; })
30
+ #endif
31
+
8
32
struct sock_key {
9
33
__u32 sip4 ;
10
34
__u32 dip4 ;
11
35
__u8 family ;
12
- // __u8 pad1;
13
- // __u16 pad2;
36
+ __u8 pad1 ;
37
+ __u16 pad2 ;
14
38
// // this padding required for 64bit alignment
15
39
// // else ebpf kernel verifier rejects loading
16
40
// // of the program
17
- // __u32 pad3;
41
+ __u32 pad3 ;
18
42
__u32 sport ;
19
43
__u32 dport ;
20
- };
21
- // } __attribute__((packed));
44
+ // };
45
+ } __attribute__((packed ));
22
46
47
+ // `bpftool map dump name sock_ops_map -j | jq`
23
48
struct {
24
49
__uint (type , BPF_MAP_TYPE_SOCKHASH );
25
50
__uint (max_entries , 65535 );
@@ -30,10 +55,15 @@ struct {
30
55
31
56
static __always_inline void sk_msg_extract4_key (struct sk_msg_md * msg , struct sock_key * key ) {
32
57
key -> family = 1 ;
33
- key -> sip4 = msg -> remote_ip4 ;
34
- key -> dip4 = msg -> local_ip4 ;
35
- key -> sport = msg -> remote_port >> 16 ;
36
- key -> dport = bpf_htonl (msg -> local_port ) >> 16 ;
58
+ // key->sip4 = msg->remote_ip4;
59
+ // key->dip4 = msg->local_ip4;
60
+ // key->sport = msg->remote_port >> 16;
61
+ // key->dport = bpf_htonl(msg->local_port) >> 16;
62
+
63
+ key -> sip4 = msg -> local_ip4 ;
64
+ key -> dip4 = msg -> remote_ip4 ;
65
+ key -> sport = bpf_htonl (msg -> local_port ) >> 16 ;
66
+ key -> dport = (msg -> remote_port ) >> 16 ;
37
67
}
38
68
39
69
// hook sendmsg call on a socket, @see SEC("cgroup/sendmsg4")
@@ -45,8 +75,9 @@ int bpf_tcpip_bypass(struct sk_msg_md *msg)
45
75
struct sock_key key = {};
46
76
sk_msg_extract4_key (msg , & key );
47
77
// bpf_msg_redirect_map()
48
- bpf_msg_redirect_hash (msg , & sock_ops_map , & key , BPF_F_INGRESS );
49
- return SK_PASS ;
78
+ bpf_printk ("total size of sk_msg is %d, port %d --> %d" , msg -> size , bpf_ntohl (msg -> remote_port ), msg -> local_port );
79
+ return (int )bpf_msg_redirect_hash (msg , & sock_ops_map , & key , BPF_F_INGRESS );
80
+ // return SK_PASS;
50
81
}
51
82
52
83
static __always_inline void bpf_sock_ops_ipv4 (struct bpf_sock_ops * skops ) {
@@ -55,19 +86,35 @@ static __always_inline void bpf_sock_ops_ipv4(struct bpf_sock_ops *skops) {
55
86
56
87
// keep ip and port in network byte order
57
88
key .family = 1 ; // 只有指针才是 key->family, @see sk_msg_extract4_key(), 这里为何是 1???
58
- key .sip4 = skops -> local_ip4 ; // 为何这里互换???
59
- key .dip4 = skops -> remote_ip4 ;
60
- // local_port is in host byte order, and remote_port is in network byte order
61
- key .sport = bpf_htonl (skops -> local_port ) >> 16 ; // ???
62
- key .dport = skops -> remote_port >> 16 ;
89
+ // key.sip4 = skops->local_ip4; // 为何这里互换???
90
+ // key.dip4 = skops->remote_ip4;
91
+ // // local_port is in host byte order, and remote_port is in network byte order
92
+ // key.sport = (bpf_htonl(skops->local_port) >> 16); // ???
93
+ // /* clang-7.1 or higher seems to think it can do a 16-bit read here
94
+ // * which unfortunately most kernels (as of October 2019) do not
95
+ // * support, which leads to verifier failures. Insert a READ_ONCE
96
+ // * to make sure that a 32-bit read followed by shift is generated.
97
+ // */
98
+ // key.dport = (skops->remote_port) >> 16;
99
+
63
100
101
+ key .dip4 = skops -> local_ip4 ;
102
+ key .dport = (bpf_htonl (skops -> local_port ) >> 16 );
103
+ key .sip4 = skops -> remote_ip4 ;
104
+ key .sport = (skops -> remote_port ) >> 16 ;
105
+
106
+ /**
107
+ * 这里没有报错,但是 map 里为空: `bpftool map dump name sock_ops_map -j | jq`
108
+ */
64
109
ret = (int )bpf_sock_hash_update (skops , & sock_ops_map , & key , BPF_NOEXIST );
65
110
if (ret != 0 ) {
66
111
bpf_printk ("sock_hash_update() failed, ret: %d" , ret );
67
112
}
68
113
69
114
// __u32 remote_port; /* Stored in network byte order */ 因为是 network byte order,且是 u32,所以必须 bpf_ntohl()
70
115
// __u32 local_port; /* stored in host byte order */
116
+ // sockmap: op 4, port 5432 --> 7007, client 端是 5432, server 端是 7007
117
+ // sockmap: op 5, port 7007 --> 5432
71
118
bpf_printk ("sockmap: op %d, port %d --> %d" , skops -> op , skops -> local_port , bpf_ntohl (skops -> remote_port ));
72
119
}
73
120
@@ -82,8 +129,8 @@ int bpf_sockops_v4(struct bpf_sock_ops *skops)
82
129
* active: source socket sending SYN
83
130
* passive: destination socket responding with ACK for the SYN
84
131
*/
85
- case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB :
86
- case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB :
132
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB : // 4
133
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB : // 5
87
134
if (family == AF_INET ) { // only ipv4
88
135
bpf_sock_ops_ipv4 (skops );
89
136
}
0 commit comments