8000 ipv6: Stop rt6_info from using inet_peer's metrics · bsd-unix/linux@4b32b5a · GitHub
[go: up one dir, main page]

Skip to content

Commit 4b32b5a

Browse files
iamkafaidavem330
authored andcommitted
ipv6: Stop rt6_info from using inet_peer's metrics
inet_peer is indexed by the dst address alone. However, the fib6 tree could have multiple routing entries (rt6_info) for the same dst. For example, 1. A /128 dst via multiple gateways. 2. A RTF_CACHE route cloned from a /128 route. In the above cases, all of them will share the same metrics and step on each other. This patch will steer away from inet_peer's metrics and use dst_cow_metrics_generic() for everything. Change Highlights: 1. Remove rt6_cow_metrics() which currently acquires metrics from inet_peer for DST_HOST route (i.e. /128 route). 2. Add rt6i_pmtu to take care of the pmtu update to avoid creating a full size metrics just to override the RTAX_MTU. 3. After (2), the RTF_CACHE route can also share the metrics with its dst.from route, by: dst_init_metrics(&cache_rt->dst, dst_metrics_ptr(cache_rt->dst.from), true); 4. Stop creating RTF_CACHE route by cloning another RTF_CACHE route. Instead, directly clone from rt->dst. [ Currently, cloning from another RTF_CACHE is only possible during rt6_do_redirect(). Also, the old clone is removed from the tree immediately after the new clone is added. ] In case of cloning from an older redirect RTF_CACHE, it should work as before. In case of cloning from an older pmtu RTF_CACHE, this patch will forget the pmtu and re-learn it (if there is any) from the redirected route. The _rt6i_peer and DST_METRICS_FORCE_OVERWRITE will be removed in the next cleanup patch. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Cc: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 653437d commit 4b32b5a

File tree

2 files changed

+60
-52
lines changed

2 files changed

+60
-52
lines changed

include/net/ip6_fib.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ struct rt6_info {
124124
unsigned long _rt6i_peer;
125125

126126
u32 rt6i_metric;
127+
u32 rt6i_pmtu;
127128
/* more non-fragment space at head required */
128129
unsigned short rt6i_nfheader_len;
129130
u8 rt6i_protocol;
@@ -189,15 +190,6 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
189190
rt0->rt6i_flags |= RTF_EXPIRES;
190191
}
191192

192-
static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
193-
{
194-
struct dst_entry *new = (struct dst_entry *) from;
195-
196-
rt->rt6i_flags &= ~RTF_EXPIRES;
197-
dst_hold(new);
198-
rt->dst.from = new;
199-
}
200-
201193
static inline void ip6_rt_put(struct rt6_info *rt)
202194
{
203195
/* dst_release() accepts a NULL parameter.

net/ipv6/route.c

Lines changed: 59 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
9292
struct sk_buff *skb, u32 mtu);
9393
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
9494
struct sk_buff *skb);
95+
static void rt6_dst_from_metrics_check(struct rt6_info *rt);
9596
static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
9697

9798
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -136,33 +137,12 @@ static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
136137

137138
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138139
{
139-
struct rt6_info *rt = (struct rt6_info *) dst;
140-
struct inet_peer *peer;
141-
u32 *p = NULL;
140+
struct rt6_info *rt = (struct rt6_info *)dst;
142141

143-
if (!(rt->dst.flags & DST_HOST))
142+
if (rt->rt6i_flags & RTF_CACHE)
143+
return NULL;
144+
else
144145
return dst_cow_metrics_generic(dst, old);
145-
146-
peer = rt6_get_peer_create(rt);
147-
if (peer) {
148-
u32 *old_p = __DST_METRICS_PTR(old);
149-
unsigned long prev, new;
150-
151-
p = peer->metrics;
152-
if (inet_metrics_new(peer) ||
153-
(old & DST_METRICS_FORCE_OVERWRITE))
154-
memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155-
156-
new = (unsigned long) p;
157-
prev = cmpxchg(&dst->_metrics, old, new);
158-
159-
if (prev != old) {
160-
p = __DST_METRICS_PTR(prev);
161-
if (prev & DST_METRICS_READ_ONLY)
162-
p = NULL;
163-
}
164-
}
165-
return p;
166146
}
167147

168148
static inline const void *choose_neigh_daddr(struct rt6_info *rt,
@@ -323,8 +303,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
323303
struct inet6_dev *idev = rt->rt6i_idev;
324304
struct dst_entry *from = dst->from;
325305

326-
if (!(rt->dst.flags & DST_HOST))
327-
dst_destroy_metrics_generic(dst);
306+
dst_destroy_metrics_generic(dst);
328307

329308
if (idev) {
330309
rt->rt6i_idev = NULL;
@@ -333,11 +312,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
333312

334313
dst->from = NULL;
335314
dst_release(from);
336-
337-
if (rt6_has_peer(rt)) {
338-
struct inet_peer *peer = rt6_peer_ptr(rt);
339-
inet_putpeer(peer);
340-
}
341315
}
342316

343317
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -1003,6 +977,7 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1003977
goto redo_fib6_lookup_lock;
1004978

1005979
out2:
980+
rt6_dst_from_metrics_check(rt);
1006981
rt->dst.lastuse = jiffies;
1007982
rt->dst.__use++;
1008983

@@ -1111,6 +1086,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
11111086
* Destination cache support functions
11121087
*/
11131088

1089+
static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1090+
{
1091+
if (rt->dst.from &&
1092+
dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1093+
dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1094+
}
1095+
11141096
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
11151097
{
11161098
struct rt6_info *rt;
@@ -1127,6 +1109,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
11271109
if (rt6_check_expired(rt))
11281110
return NULL;
11291111

1112+
rt6_dst_from_metrics_check(rt);
1113+
11301114
return dst;
11311115
}
11321116

@@ -1179,7 +1163,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
11791163
if (mtu < IPV6_MIN_MTU)
11801164
mtu = IPV6_MIN_MTU;
11811165

1182-
dst_metric_set(dst, RTAX_MTU, mtu);
1166+
rt6->rt6i_pmtu = mtu;
11831167
rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
11841168
}
11851169
}
@@ -1359,9 +1343,14 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
13591343

13601344
static unsigned int ip6_mtu(const struct dst_entry *dst)
13611345
{
1346+
const struct rt6_info *rt = (const struct rt6_info *)dst;
1347+
unsigned int mtu = rt->rt6i_pmtu;
13621348
struct inet6_dev *idev;
1363-
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
13641349

1350+
if (mtu)
1351+
goto out;
1352+
1353+
mtu = dst_metric_raw(dst, RTAX_MTU);
13651354
if (mtu)
13661355
goto out;
13671356

@@ -1947,12 +1936,27 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
19471936
* Misc support functions
19481937
*/
19491938

1939+
static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1940+
{
1941+
BUG_ON(from->dst.from);
1942+
1943+
rt->rt6i_flags &= ~RTF_EXPIRES;
1944+
dst_hold(&from->dst);
1945+
rt->dst.from = &from->dst;
1946+
dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1947+
}
1948+
19501949
static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
19511950
const struct in6_addr *dest)
19521951
{
19531952
struct net *net = dev_net(ort->dst.dev);
1954-
struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1955-
ort->rt6i_table);
1953+
struct rt6_info *rt;
1954+
1955+
if (ort->rt6i_flags & RTF_CACHE)
1956+
ort = (struct rt6_info *)ort->dst.from;
1957+
1958+
rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1959+
ort->rt6i_table);
19561960

19571961
if (rt) {
19581962
rt->dst.input = ort->dst.input;
@@ -1961,7 +1965,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
19611965

19621966
rt->rt6i_dst.addr = *dest;
19631967
rt->rt6i_dst.plen = 128;
1964-
dst_copy_metrics(&rt->dst, &ort->dst);
19651968
rt->dst.error = ort->dst.error;
19661969
rt->rt6i_idev = ort->rt6i_idev;
19671970
if (rt->rt6i_idev)
@@ -2393,11 +2396,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
23932396
PMTU discouvery.
23942397
*/
23952398
if (rt->dst.dev == arg->dev &&
2396-
!dst_metric_locked(&rt->dst, RTAX_MTU) &&
2397-
(dst_mtu(&rt->dst) >= arg->mtu ||
2398-
(dst_mtu(&rt->dst) < arg->mtu &&
2399-
dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2400-
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2399+
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
2400+
if (rt->rt6i_flags & RTF_CACHE) {
2401+
/* For RTF_CACHE with rt6i_pmtu == 0
2402+
* (i.e. a redirected route),
2403+
* the metrics of its rt->dst.from has already
2404+
* been updated.
2405+
*/
2406+
if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2407+
rt->rt6i_pmtu = arg->mtu;
2408+
} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2409+
(dst_mtu(&rt->dst) < arg->mtu &&
2410+
dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2411+
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2412+
}
24012413
}
24022414
return 0;
24032415
}
@@ -2627,6 +2639,7 @@ static int rt6_fill_node(struct net *net,
26272639
int iif, int type, u32 portid, u32 seq,
26282640
int prefix, int nowait, unsigned int flags)
26292641
{
2642+
u32 metrics[RTAX_MAX];
26302643
struct rtmsg *rtm;
26312644
struct nlmsghdr *nlh;
26322645
long expires;
@@ -2740,7 +2753,10 @@ static int rt6_fill_node(struct net *net,
27402753
goto nla_put_failure;
27412754
}
27422755

2743-
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2756+
memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2757+
if (rt->rt6i_pmtu)
2758+
metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2759+
if (rtnetlink_put_metrics(skb, metrics) < 0)
27442760
goto nla_put_failure;
27452761

27462762
if (rt->rt6i_flags & RTF_GATEWAY) {

0 commit comments

Comments
 (0)
0