ipv6: fix ECMP route replacement

When replacing an IPv6 multipath route with "ip route replace", i.e.
NLM_F_CREATE | NLM_F_REPLACE, fib6_add_rt2node() replaces only first
matching route without fixing its siblings, resulting in corrupted
siblings linked list; removing one of the siblings can then end in an
infinite loop.

IPv6 ECMP implementation is a bit different from IPv4 so that route
replacement cannot work in exactly the same way. This should be a
reasonable approximation:

1. If the new route is ECMP-able and there is a matching ECMP-able one
already, replace it and all its siblings (if any).

2. If the new route is ECMP-able and no matching ECMP-able route exists,
replace first matching non-ECMP-able (if any) or just add the new one.

3. If the new route is not ECMP-able, replace first matching
non-ECMP-able route (if any) or add the new route.

We also need to remove the NLM_F_REPLACE flag after replacing old
route(s) by first nexthop of an ECMP route so that each subsequent
nexthop does not replace previous one.

Fixes: 51ebd31815 ("ipv6: add support of equal cost multipath (ECMP)")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Michal Kubeček 2015-05-18 20:54:00 +02:00 committed by David S. Miller
parent 35f1b4e96b
commit 2759647247
2 changed files with 44 additions and 6 deletions

View file

@ -693,6 +693,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
{ {
struct rt6_info *iter = NULL; struct rt6_info *iter = NULL;
struct rt6_info **ins; struct rt6_info **ins;
struct rt6_info **fallback_ins = NULL;
int replace = (info->nlh && int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE)); (info->nlh->nlmsg_flags & NLM_F_REPLACE));
int add = (!info->nlh || int add = (!info->nlh ||
@ -716,8 +717,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_EXCL)) (info->nlh->nlmsg_flags & NLM_F_EXCL))
return -EEXIST; return -EEXIST;
if (replace) { if (replace) {
found++; if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
break; found++;
break;
}
if (rt_can_ecmp)
fallback_ins = fallback_ins ?: ins;
goto next_iter;
} }
if (iter->dst.dev == rt->dst.dev && if (iter->dst.dev == rt->dst.dev &&
@ -753,9 +759,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (iter->rt6i_metric > rt->rt6i_metric) if (iter->rt6i_metric > rt->rt6i_metric)
break; break;
next_iter:
ins = &iter->dst.rt6_next; ins = &iter->dst.rt6_next;
} }
if (fallback_ins && !found) {
/* No ECMP-able route found, replace first non-ECMP one */
ins = fallback_ins;
iter = *ins;
found++;
}
/* Reset round-robin state, if necessary */ /* Reset round-robin state, if necessary */
if (ins == &fn->leaf) if (ins == &fn->leaf)
fn->rr_ptr = NULL; fn->rr_ptr = NULL;
@ -815,6 +829,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
} }
} else { } else {
int nsiblings;
if (!found) { if (!found) {
if (add) if (add)
goto add; goto add;
@ -835,8 +851,27 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
info->nl_net->ipv6.rt6_stats->fib_route_nodes++; info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO; fn->fn_flags |= RTN_RTINFO;
} }
nsiblings = iter->rt6i_nsiblings;
fib6_purge_rt(iter, fn, info->nl_net); fib6_purge_rt(iter, fn, info->nl_net);
rt6_release(iter); rt6_release(iter);
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
ins = &rt->dst.rt6_next;
iter = *ins;
while (iter) {
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->dst.rt6_next;
fib6_purge_rt(iter, fn, info->nl_net);
rt6_release(iter);
nsiblings--;
} else {
ins = &iter->dst.rt6_next;
}
iter = *ins;
}
WARN_ON(nsiblings != 0);
}
} }
return 0; return 0;

View file

@ -2541,11 +2541,14 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add)
} }
} }
/* Because each route is added like a single route we remove /* Because each route is added like a single route we remove
* this flag after the first nexthop (if there is a collision, * these flags after the first nexthop: if there is a collision,
* we have already fail to add the first nexthop: * we have already failed to add the first nexthop:
* fib6_add_rt2node() has reject it). * fib6_add_rt2node() has rejected it; when replacing, old
* nexthops have been replaced by first new, the rest should
* be added to it.
*/ */
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
rtnh = rtnh_next(rtnh, &remaining); rtnh = rtnh_next(rtnh, &remaining);
} }