8000 Merge pull request #3076 from tomastigera/tomas-bpf-fix-ext-local · projectcalico/felix@e34477f · GitHub
[go: up one dir, main page]

Skip to content

Commit e34477f

Browse files
authored
Merge pull request #3076 from tomastigera/tomas-bpf-fix-ext-local
bpf: fix ExternalTrafficPolicy=Local
2 parents ce618bd + dc645b7 commit e34477f

File tree

10 files changed

+137
-55
lines changed

10 files changed

+137
-55
lines changed

bpf-gpl/connect_balancer.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ static CALI_BPF_INLINE int do_nat_common(struct bpf_sock_addr *ctx, __u8 proto,
4040
nat_lookup_result res = NAT_LOOKUP_ALLOW;
4141
__u16 dport_he = (__u16)(bpf_ntohl(ctx->user_port)>>16);
4242
struct calico_nat_dest *nat_dest;
43-
nat_dest = calico_v4_nat_lookup3(0, ctx->user_ip4, proto, dport_he, false, &res,
43+
nat_dest = calico_v4_nat_lookup(0, ctx->user_ip4, proto, dport_he, false, &res,
4444
proto == IPPROTO_UDP && !connect ? UDP_NOT_SEEN_TIMEO : 0, /* enforce affinity UDP */
45-
proto == IPPROTO_UDP && !connect /* update affinity timer */);
45+
proto == IPPROTO_UDP && !connect /* update affinity timer */);
4646
if (!nat_dest) {
4747
CALI_INFO("NAT miss.\n");
4848
if (res == NAT_NO_BACKEND) {

bpf-gpl/nat_lookup.h

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@
1515
#include "routes.h"
1616
#include "nat_types.h"
1717

18-
static CALI_BPF_INLINE struct calico_nat_dest* calico_v4_nat_lookup3(__be32 ip_src,
19-
__be32 ip_dst,
20-
__u8 ip_proto,
21-
__u16 dport,
22-
bool from_tun,
23-
nat_lookup_result *res,
24-
int affinity_always_timeo,
25-
bool affinity_tmr_update)
18+
static CALI_BPF_INLINE struct calico_nat_dest* calico_v4_nat_lookup(__be32 ip_src,
19+
__be32 ip_dst,
20+
__u8 ip_proto,
21+
__u16 dport,
22+
bool from_tun,
23+
nat_lookup_result *res,
24+
int affinity_always_timeo,
25+
bool affinity_tmr_update)
2626
{
2727
struct calico_nat_v4_key nat_key = {
2828
.prefixlen = NAT_PREFIX_LEN_WITH_SRC_MATCH_IN_BITS,
@@ -102,7 +102,27 @@ static CALI_BPF_INLINE struct calico_nat_dest* calico_v4_nat_lookup3(__be32 ip_s
102102
*res = NAT_FE_LOOKUP_DROP;
103103
return NULL;
104104
}
105-
__u32 count = from_tun ? nat_lv1_val->local : nat_lv1_val->count;
105+
__u32 count = nat_lv1_val->count;
106+
107+
if (from_tun) {
108+
count = nat_lv1_val->local;
109+
} else if (nat_lv1_val->flags & (NAT_FLG_INTERNAL_LOCAL | NAT_FLG_EXTERNAL_LOCAL)) {
110+
bool local_traffic = true;
111+
112+
if (CALI_F_FROM_HEP) {
113+
struct cali_rt *rt = cali_rt_lookup(ip_src);
114+
115+
if (!rt || (!cali_rt_is_host(rt) && !cali_rt_is_workload(rt))) {
116+
local_traffic = false;
117+
}
118+
}
119+
120+
if ((local_traffic && (nat_lv1_val->flags & NAT_FLG_INTERNAL_LOCAL)) ||
121+
(!local_traffic && (nat_lv1_val->flags & NAT_FLG_EXTERNAL_LOCAL))) {
122+
count = nat_lv1_val->local;
123+
CALI_DEBUG("local_traffic %d count %d flags 0x%x\n", local_traffic, count, nat_lv1_val->flags);
124+
}
125+
}
106126

107127
CALI_DEBUG("NAT: 1st level hit; id=%d\n", nat_lv1_val->id);
108128

@@ -179,18 +199,12 @@ static CALI_BPF_INLINE struct calico_nat_dest* calico_v4_nat_lookup3(__be32 ip_s
179199
return nat_lv2_val;
180200
}
181201

182-
static CALI_BPF_INLINE struct calico_nat_dest* calico_v4_nat_lookup(__be32 ip_src, __be32 ip_dst,
183-
__u8 ip_proto, __u16 dport, nat_lookup_result *res)
184-
{
185-
return calico_v4_nat_lookup3(ip_src, ip_dst, ip_proto, dport, false, res, 0, false);
186-
}
187-
188202
static CALI_BPF_INLINE struct calico_nat_dest* calico_v4_nat_lookup2(__be32 ip_src, __be32 ip_dst,
189203
__u8 ip_proto, __u16 dport,
190204
bool from_tun,
191205
nat_lookup_result *res)
192206
{
193-
return calico_v4_nat_lookup3(ip_src, ip_dst, ip_proto, dport, from_tun, res, 0, false);
207+
return calico_v4_nat_lookup(ip_src, ip_dst, ip_proto, dport, from_tun, res, 0, false);
194208
}
195209

196210
#endif /* __CALI_NAT_LOOKUP_H__ */

bpf-gpl/nat_types.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,13 @@ struct calico_nat_v4_value {
5353
__u32 count;
5454
__u32 local;
5555
__u32 affinity_timeo;
56+
__u32 flags;
5657
};
5758

58-
CALI_MAP(cali_v4_nat_fe, 2,
59+
#define NAT_FLG_EXTERNAL_LOCAL 0x1
60+
#define NAT_FLG_INTERNAL_LOCAL 0x2
61+
62+
CALI_MAP(cali_v4_nat_fe, 3,
5963
BPF_MAP_TYPE_LPM_TRIE,
6064
union calico_nat_v4_lpm_key, struct calico_nat_v4_value,
6165
511000, BPF_F_NO_PREALLOC, MAP_PIN_GLOBAL)

bpf/nat/maps.go

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ const frontendAffKeySize = 8
5151
// uint32_t count;
5252
// uint32_t local;
5353
// uint32_t affinity_timeo;
54+
// uint32_t flags;
5455
// };
55-
const frontendValueSize = 16
56+
const frontendValueSize = 20
5657

5758
// struct calico_nat_secondary_v4_key {
5859
// uint32_t id;
@@ -138,6 +139,16 @@ func (k FrontendKey) String() string {
138139
return fmt.Sprintf("NATKey{Proto:%v Addr:%v Port:%v SrcAddr:%v}", k.Proto(), k.Addr(), k.Port(), k.SrcCIDR())
139140
}
140141

142+
const (
143+
NATFlgExternalLocal = 0x1
144+
NATFlgInternalLocal = 0x2
145+
)
146+
147+
var flgTostr = map[int]string{
148+
NATFlgExternalLocal: "external-local",
149+
NATFlgInternalLocal: "internal-local",
150+
}
151+
141152
type FrontendValue [frontendValueSize]byte
142153

143154
func NewNATValue(id uint32, count, local, affinityTimeo uint32) FrontendValue {
@@ -149,6 +160,12 @@ func NewNATValue(id uint32, count, local, affinityTimeo uint32) FrontendValue {
149160
return v
150161
}
151162

163+
func NewNATValueWithFlags(id uint32, count, local, affinityTimeo, flags uint32) FrontendValue {
164+
v := NewNATValue(id, count, local, affinityTimeo)
165+
binary.LittleEndian.PutUint32(v[16:20], flags)
166+
return v
167+
}
168+
152169
func (v FrontendValue) ID() uint32 {
153170
return binary.LittleEndian.Uint32(v[:4])
154171
}
@@ -166,9 +183,32 @@ func (v FrontendValue) AffinityTimeout() time.Duration {
166183
return time.Duration(secs) * time.Second
167184
}
168185

186+
func (v FrontendValue) Flags() uint32 {
187+
return binary.LittleEndian.Uint32(v[16:20])
188+
}
189+
190+
func (v FrontendValue) FlagsAsString() string {
191+
flgs := v.Flags()
192+
fstr := ""
193+
194+
for i := 0; i < 32; i++ {
195+
flg := uint32(1 << i)
196+
if flgs&flg != 0 {
197+
fstr += flgTostr[int(flg)]
198+
}
199+
flgs &= ^flg
200+
if flgs == 0 {
201+
break
202+
}
203+
fstr += ", "
204+
}
205+
206+
return fstr
207+
}
208+
169209
func (v FrontendValue) String() string {
170-
return fmt.Sprintf("NATValue{ID:%d,Count:%d,LocalCount:%d,AffinityTimeout:%d}",
171-
v.ID(), v.Count(), v.LocalCount(), v.AffinityTimeout())
210+
return fmt.Sprintf("NATValue{ID:%d,Count:%d,LocalCount:%d,AffinityTimeout:%d,Flags:{%s}}",
211+
v.ID(), v.Count(), v.LocalCount(), v.AffinityTimeout(), v.FlagsAsString())
172212
}
173213

174214
func (v FrontendValue) AsBytes() []byte {
@@ -237,7 +277,7 @@ var FrontendMapParameters = bpf.MapParameters{
237277
MaxEntries: 511000,
238278
Name: "cali_v4_nat_fe",
239279
Flags: unix.BPF_F_NO_PREALLOC,
240-
Version: 2,
280+
Version: 3,
241281
}
242282

243283
func FrontendMap(mc *bpf.MapContext) bpf.Map {

bpf/proxy/kube-proxy.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,20 @@ import (
2020

2121
"github.com/pkg/errors"
2222
log "github.com/sirupsen/logrus"
23+
utilfeature "k8s.io/apiserver/pkg/util/feature"
2324
"k8s.io/client-go/kubernetes"
2425

26+
"github.com/projectcalico/felix/bpf"
2527
"github.com/projectcalico/felix/bpf/cachingmap"
26-
2728
"github.com/projectcalico/felix/bpf/nat"
28-
29-
"github.com/projectcalico/felix/bpf"
3029
"github.com/projectcalico/felix/bpf/routes"
3130
)
3231

32+
func init() {
33+
// Alpha since 1.21 Beta since 1.22 default true - no harm in supporting it by default.
34+
_ = utilfeature.DefaultMutableFeatureGate.Set("ServiceInternalTrafficPolicy=true")
35+
}
36+
3337
// KubeProxy is a wrapper of Proxy that deals with higher level issue like
3438
// configuration, restarting etc.
3539
type KubeProxy struct {

bpf/proxy/syncer.go

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -489,13 +489,15 @@ func (s *Syncer) applyDerived(
489489
local := svc.localCount
490490

491491
skey = getSvcKey(sname, getSvcKeyExtra(t, sinfo.ClusterIP().String()))
492+
flags := uint32(0)
493+
492494
switch t {
493-
case svcTypeLoadBalancer:
494-
// Handle LB services the same as NodePort type.
495-
fallthrough
496-
case svcTypeNodePort:
495+
case svcTypeNodePort, svcTypeLoadBalancer, svcTypeNodePortRemote:
497496
if sinfo.NodeLocalExternal() {
498-
count = local // use only local eps
497+
flags |= nat.NATFlgExternalLocal
498+
}
499+
if sinfo.NodeLocalInternal() {
500+
flags |= nat.NATFlgInternalLocal
499501
}
500502
}
501503

@@ -506,11 +508,11 @@ func (s *Syncer) applyDerived(
506508
svc: sinfo,
507509
}
508510

509-
if err := s.writeSvc(sinfo, svc.id, count, local); err != nil {
511+
if err := s.writeSvc(sinfo, svc.id, count, local, flags); err != nil {
510512
return err
511513
}
512514
if svcTypeLoadBalancer == t || svcTypeExternalIP == t {
513-
err := s.writeLBSrcRangeSvcNATKeys(sinfo, svc.id, count, local)
515+
err := s.writeLBSrcRangeSvcNATKeys(sinfo, svc.id, count, local, flags)
514516
if err != nil {
515517
log.Debug("Failed to write LB source range NAT keys")
516518
}
@@ -587,7 +589,7 @@ func (s *Syncer) apply(state DPSyncerState) error {
587589
npInfo := serviceInfoFromK8sServicePort(sinfo)
588590
npInfo.clusterIP = npip
589591
npInfo.port = nport
590-
if npip.Equal(podNPIP) && sinfo.NodeLocalExternal() {
592+
if npip.Equal(podNPIP) && sinfo.NodeLocalInternal() {
591593
// do not program the meta entry, program each node
592594
// separately
593595
continue
@@ -598,7 +600,7 @@ func (s *Syncer) apply(state DPSyncerState) error {
598600
continue
599601
}
600602
}
601-
if sinfo.NodeLocalExternal() {
603+
if sinfo.NodeLocalInternal() {
602604
if miss := s.expandAndApplyNodePorts(sname, sinfo, eps, nport, s.rt.Lookup); miss != nil {
603605
expNPMisses = append(expNPMisses, miss)
604606
}
@@ -718,7 +720,7 @@ func (s *Syncer) updateService(skey svcKey, sinfo k8sp.ServicePort, id uint32, e
718720
cnt++
719721
}
720722

721-
if err := s.writeSvc(sinfo, id, cnt, local); err != nil {
723+
if err := s.writeSvc(sinfo, id, cnt, local, 0); err != nil {
722724
return 0, 0, err
723725
}
724726

@@ -800,7 +802,7 @@ func getSvcNATKeyLBSrcRange(svc k8sp.ServicePort) ([]nat.FrontendKey, error) {
800802
return keys, nil
801803
}
802804

803-
func (s *Syncer) writeLBSrcRangeSvcNATKeys(svc k8sp.ServicePort, svcID uint32, count, local int) error {
805+
func (s *Syncer) writeLBSrcRangeSvcNATKeys(svc k8sp.ServicePort, svcID uint32, count, local int, flags uint32) error {
804806
var key nat.FrontendKey
805807
affinityTimeo := uint32(0)
806808
if svc.SessionAffinityType() == v1.ServiceAffinityClientIP {
@@ -814,7 +816,7 @@ func (s *Syncer) writeLBSrcRangeSvcNATKeys(svc k8sp.ServicePort, svcID uint32, c
814816
if err != nil {
815817
return err
816818
}
817-
val := nat.NewNATValue(svcID, uint32(count), uint32(local), affinityTimeo)
819+
val := nat.NewNATValueWithFlags(svcID, uint32(count), uint32(local), affinityTimeo, flags)
818820
for _, key := range keys {
819821
if log.GetLevel() >= log.DebugLevel {
820822
log.Debugf("bpf map writing %s:%s", key, val)
@@ -830,7 +832,7 @@ func (s *Syncer) writeLBSrcRangeSvcNATKeys(svc k8sp.ServicePort, svcID uint32, c
830832
return nil
831833
}
832834

833-
func (s *Syncer) writeSvc(svc k8sp.ServicePort, svcID uint32, count, local int) error {
835+
func (s *Syncer) writeSvc(svc k8sp.ServicePort, svcID uint32, count, local int, flags uint32) error {
834836
key, err := getSvcNATKey(svc)
835837
if err != nil {
836838
return err
@@ -841,7 +843,7 @@ func (s *Syncer) writeSvc(svc k8sp.ServicePort, svcID uint32, count, local int)
841843
affinityTimeo = uint32(svc.StickyMaxAgeSeconds())
842844
}
843845

844-
val := nat.NewNATValue(svcID, uint32(count), uint32(local), affinityTimeo)
846+
val := nat.NewNATValueWithFlags(svcID, uint32(count), uint32(local), affinityTimeo, flags)
845847

846848
if log.GetLevel() >= log.DebugLevel {
847849
log.Debugf("bpf map writing %s:%s", key, val)
@@ -1416,6 +1418,7 @@ func K8sSvcWithNodePort(np int) K8sServicePortOption {
14161418
func K8sSvcWithLocalOnly() K8sServicePortOption {
14171419
return func(s interface{}) {
14181420
s.(*serviceInfo).nodeLocalExternal = true
1421+
s.(*serviceInfo).nodeLocalInternal = true
14191422
}
14201423
}
14211424

bpf/proxy/syncer_test.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -607,8 +607,9 @@ var _ = Describe("BPF Syncer", func() {
607607
k = nat.NewNATKey(net.IPv4(10, 123, 0, 1), 4444, proxy.ProtoV1ToIntPanic(v1.ProtocolTCP))
608608
Expect(svcs.m).To(HaveKey(k))
609609
local := svcs.m[k]
610-
Expect(local.Count()).To(Equal(uint32(1)))
610+
Expect(local.Count()).To(Equal(uint32(3)))
611611
Expect(local.LocalCount()).To(Equal(uint32(1)))
612+
Expect(local.Flags()).To(Equal(uint32(nat.NATFlgInternalLocal | nat.NATFlgExternalLocal)))
612613

613614
k = nat.NewNATKey(net.IPv4(10, 0, 0, 2), 2222, proxy.ProtoV1ToIntPanic(v1.ProtocolTCP))
614615
Expect(svcs.m).To(HaveKey(k))
@@ -738,7 +739,8 @@ var _ = Describe("BPF Syncer", func() {
738739
val2, ok := svcs.m[nat.NewNATKey(net.IPv4(192, 168, 0, 1), 4444, proxy.ProtoV1ToIntPanic(v1.ProtocolTCP))]
739740
Expect(ok).To(BeTrue())
740741
Expect(val2.ID()).To(Equal(val1.ID()))
741-
Expect(val2.Count()).To(Equal(uint32(0)))
742+
Expect(val2.Count()).To(Equal(uint32(4)))
743+
Expect(val2.LocalCount()).To(Equal(uint32(0)))
742744

743745
val3, ok := svcs.m[nat.NewNATKey(net.IPv4(10, 123, 0, 1), 4444, proxy.ProtoV1ToIntPanic(v1.ProtocolTCP))]
744746
Expect(ok).To(BeTrue())
@@ -811,7 +813,9 @@ var _ = Describe("BPF Syncer", func() {
811813
val2, ok := svcs.m[nat.NewNATKey(net.IPv4(192, 168, 0, 1), 4444, proxy.ProtoV1ToIntPanic(v1.ProtocolTCP))]
812814
Expect(ok).To(BeTrue())
813815
Expect(val2.ID()).To(Equal(val1.ID()))
814-
Expect(val2.Count()).To(Equal(uint32(2)))
816+
Expect(val2.Count()).To(Equal(uint32(4)))
817+
Expect(val2.LocalCount()).To(Equal(uint32(2)))
818+
Expect(val2.Flags()).To(Equal(uint32(nat.NATFlgInternalLocal | nat.NATFlgExternalLocal)))
815819

816820
val3, ok := svcs.m[nat.NewNATKey(net.IPv4(10, 123, 0, 1), 4444, proxy.ProtoV1ToIntPanic(v1.ProtocolTCP))]
817821
Expect(ok).To(BeTrue())

cmd/calico-bpf/commands/nat.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,12 @@ func dumpNice(printf printfFn, natMap nat.MapMem, back nat.BackendMapMem) {
9494
count := nv.Count()
9595
local := nv.LocalCount()
9696
id := nv.ID()
97-
printf("%s port %d proto %d id %d count %d local %d\n",
98-
nk.Addr(), nk.Port(), nk.Proto(), id, count, local)
97+
flags := nv.FlagsAsString()
98+
if flags != "" {
99+
flags = " flags " + flags
100+
}
101+
printf("%s port %d proto %d id %d count %d local %d%s\n",
102+
nk.Addr(), nk.Port(), nk.Proto(), id, count, local, flags)
99103
for i := uint32(0); i < count; i++ {
100104
bk := nat.NewNATBackendKey(id, uint32(i))
101105
bv, ok := back[bk]

0 commit comments

Comments
 (0)
0