summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/ip6_fib.h1
-rw-r--r--include/net/ipv6_stubs.h1
-rw-r--r--net/ipv4/nexthop.c25
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/route.c19
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh63
6 files changed, 108 insertions, 2 deletions
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index c412dde4d67d..83b8070d1cc9 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -485,6 +485,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
void fib6_nh_release(struct fib6_nh *fib6_nh);
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh);
int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index afbce90c4480..45e0339be6fa 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -47,6 +47,7 @@ struct ipv6_stub {
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
+ void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh);
void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify);
void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 9e8100728d46..a69a9e76f99f 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1899,15 +1899,36 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
/* if any FIB entries reference this nexthop, any dst entries
* need to be regenerated
*/
-static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh,
+ struct nexthop *replaced_nh)
{
struct fib6_info *f6i;
+ struct nh_group *nhg;
+ int i;
if (!list_empty(&nh->fi_list))
rt_cache_flush(net);
list_for_each_entry(f6i, &nh->f6i_list, nh_list)
ipv6_stub->fib6_update_sernum(net, f6i);
+
+ /* if an IPv6 group was replaced, we have to release all old
+ * dsts to make sure all refcounts are released
+ */
+ if (!replaced_nh->is_group)
+ return;
+
+ /* new dsts must use only the new nexthop group */
+ synchronize_net();
+
+ nhg = rtnl_dereference(replaced_nh->nh_grp);
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+ struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info);
+
+ if (nhi->family == AF_INET6)
+ ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh);
+ }
}
static int replace_nexthop_grp(struct net *net, struct nexthop *old,
@@ -2247,7 +2268,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old,
err = replace_nexthop_single(net, old, new, extack);
if (!err) {
- nh_rt_cache_flush(net, old);
+ nh_rt_cache_flush(net, old, new);
__remove_nexthop(net, new, NULL);
nexthop_put(new);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 0c4da163535a..dab4a047590b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1026,6 +1026,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
.fib6_nh_init = fib6_nh_init,
.fib6_nh_release = fib6_nh_release,
+ .fib6_nh_release_dsts = fib6_nh_release_dsts,
.fib6_update_sernum = fib6_update_sernum_stub,
.fib6_rt_update = fib6_rt_update,
.ip6_del_rt = ip6_del_rt,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3ae25b8ffbd6..42d60c76d30a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3680,6 +3680,25 @@ void fib6_nh_release(struct fib6_nh *fib6_nh)
fib_nh_common_release(&fib6_nh->nh_common);
}
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
+{
+ int cpu;
+
+ if (!fib6_nh->rt6i_pcpu)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info *pcpu_rt, **ppcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+ pcpu_rt = xchg(ppcpu_rt, NULL);
+ if (pcpu_rt) {
+ dst_dev_put(&pcpu_rt->dst);
+ dst_release(&pcpu_rt->dst);
+ }
+ }
+}
+
static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
gfp_t gfp_flags,
struct netlink_ext_ack *extack)
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index b5a69ad191b0..d444ee6aa3cb 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -629,6 +629,66 @@ ipv6_fcnal()
log_test $? 0 "Nexthops removed on admin down"
}
+ipv6_grp_refs()
+{
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP link set dev veth1 up"
+ run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10"
+ run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20"
+ run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10"
+ run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20"
+ run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10"
+ run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20"
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20"
+ run_cmd "$IP nexthop add id 102 group 100"
+ run_cmd "$IP route add 2001:db8:101::1/128 nhid 102"
+
+ # create per-cpu dsts through nh 100
+ run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
+
+ # remove nh 100 from the group to delete the route potentially leaving
+ # a stale per-cpu dst which holds a reference to the nexthop's net
+ # device and to the IPv6 route
+ run_cmd "$IP nexthop replace id 102 group 101"
+ run_cmd "$IP route del 2001:db8:101::1/128"
+
+ # add both nexthops to the group so a reference is taken on them
+ run_cmd "$IP nexthop replace id 102 group 100/101"
+
+ # if the bug described in commit "net: nexthop: release IPv6 per-cpu
+ # dsts when replacing a nexthop group" exists at this point we have
+ # an unlinked IPv6 route (but not freed due to stale dst) with a
+ # reference over the group so we delete the group which will again
+ # only unlink it due to the route reference
+ run_cmd "$IP nexthop del id 102"
+
+ # delete the nexthop with stale dst, since we have an unlinked
+ # group with a ref to it and an unlinked IPv6 route with ref to the
+ # group, the nh will only be unlinked and not freed so the stale dst
+ # remains forever and we get a net device refcount imbalance
+ run_cmd "$IP nexthop del id 100"
+
+ # if a reference was lost this command will hang because the net device
+ # cannot be removed
+ timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1
+
+ # we can't cleanup if the command is hung trying to delete the netdev
+ if [ $? -eq 137 ]; then
+ return 1
+ fi
+
+ # cleanup
+ run_cmd "$IP link del veth1.20"
+ run_cmd "$IP nexthop flush"
+
+ return 0
+}
+
ipv6_grp_fcnal()
{
local rc
@@ -734,6 +794,9 @@ ipv6_grp_fcnal()
run_cmd "$IP nexthop add id 108 group 31/24"
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+
+ ipv6_grp_refs
+ log_test $? 0 "Nexthop group replace refcounts"
}
ipv6_res_grp_fcnal()