diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-02-08 05:55:15 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-02-08 05:55:16 +0300 |
commit | b6b614558ed5b2ca50edacc0f2fbf5f52158c86c (patch) | |
tree | f7dcd82263393d8c8dee032a7f6926cd43f9a045 /net | |
parent | a1e55f51035e6aa65cf2d11d2147f2bf9edf81f9 (diff) | |
parent | 8962daccc2d32812fe24bd21496c036eb4f454b0 (diff) | |
download | linux-b6b614558ed5b2ca50edacc0f2fbf5f52158c86c.tar.xz |
Merge branch 'net-more-factorization-in-cleanup_net-paths'
Eric Dumazet says:
====================
net: more factorization in cleanup_net() paths
This series is inspired by recent syzbot reports hinting to RTNL and
workqueue abuses.
rtnl_lock() is unfair to (single threaded) cleanup_net(), because
many threads can cause contention on it.
This series adds a new (struct pernet_operations) method,
so that cleanup_net() can hold RTNL longer once it finally
acquires it.
It also factorizes unregister_netdevice_many(), to further
reduce stalls in cleanup_net().
Link: https://lore.kernel.org/netdev/CANn89iLJrrJs+6Vc==Un4rVKcpV0Eof4F_4w1_wQGxUCE2FWAg@mail.gmail.com/T/#u
https://lore.kernel.org/netdev/170688415193.5216.10499830272732622816@kwain/
====================
Link: https://lore.kernel.org/r/20240206144313.2050392-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/br.c | 15 | ||||
-rw-r--r-- | net/core/net_namespace.c | 31 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 24 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 10 | ||||
-rw-r--r-- | net/ipv4/ip_vti.c | 8 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 8 | ||||
-rw-r--r-- | net/ipv4/nexthop.c | 38 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 12 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 12 | ||||
-rw-r--r-- | net/ipv6/ip6_vti.c | 12 | ||||
-rw-r--r-- | net/ipv6/sit.c | 13 | ||||
-rw-r--r-- | net/xfrm/xfrm_interface_core.c | 14 |
12 files changed, 115 insertions, 82 deletions
diff --git a/net/bridge/br.c b/net/bridge/br.c index ac19b797dbec..2cab878e0a39 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -356,26 +356,21 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on) clear_bit(opt, &br->options); } -static void __net_exit br_net_exit_batch(struct list_head *net_list) +static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net_device *dev; struct net *net; - LIST_HEAD(list); - - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) for_each_netdev(net, dev) if (netif_is_bridge_master(dev)) - br_dev_delete(dev, &list); - - unregister_netdevice_many(&list); - - rtnl_unlock(); + br_dev_delete(dev, dev_to_kill); } static struct pernet_operations br_net_ops = { - .exit_batch = br_net_exit_batch, + .exit_batch_rtnl = br_net_exit_batch_rtnl, }; static const struct stp_proto br_stp_proto = { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72799533426b..233ec0cdd011 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -318,8 +318,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with pernet_ops_rwsem held */ const struct pernet_operations *ops, *saved_ops; - int error = 0; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); + int error = 0; refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); @@ -358,6 +359,15 @@ out_undo: synchronize_rcu(); ops = saved_ops; + rtnl_lock(); + list_for_each_entry_continue_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + + ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); @@ -573,6 +583,7 @@ static void cleanup_net(struct work_struct *work) struct net *net, *tmp, *last; struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); @@ -613,6 +624,14 @@ static void cleanup_net(struct work_struct *work) */ synchronize_rcu(); + rtnl_lock(); + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + /* Run all of the network namespace exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); @@ -1193,7 +1212,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_ { ops_pre_exit_list(ops, net_exit_list); synchronize_rcu(); + + if (ops->exit_batch_rtnl) { + LIST_HEAD(dev_kill_list); + + rtnl_lock(); + ops->exit_batch_rtnl(net_exit_list, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + } ops_exit_list(ops, net_exit_list); + ops_free_list(ops, net_exit_list); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5169c3c72cff..aad5125b7a65 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1025,14 +1025,16 @@ static int __net_init ipgre_init_net(struct net *net) return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } -static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) +static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); + ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops, + dev_to_kill); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, - .exit_batch = ipgre_exit_batch_net, + .exit_batch_rtnl = ipgre_exit_batch_rtnl, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1697,14 +1699,16 @@ static int __net_init ipgre_tap_init_net(struct net *net) return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); } -static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) +static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); + ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops, + dev_to_kill); } static struct pernet_operations ipgre_tap_net_ops = { .init = ipgre_tap_init_net, - .exit_batch = ipgre_tap_exit_batch_net, + .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1715,14 +1719,16 @@ static int __net_init erspan_init_net(struct net *net) &erspan_link_ops, "erspan0"); } -static void __net_exit erspan_exit_batch_net(struct list_head *net_list) +static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); + ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops, + dev_to_kill); } static struct pernet_operations erspan_net_ops = { .init = erspan_init_net, - .exit_batch = erspan_exit_batch_net, + .exit_batch_rtnl = erspan_exit_batch_rtnl, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index beeae624c412..00da0b80320f 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1130,19 +1130,17 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, } void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, - struct rtnl_link_ops *ops) + struct rtnl_link_ops *ops, + struct list_head *dev_to_kill) { struct ip_tunnel_net *itn; struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) { itn = net_generic(net, id); - ip_tunnel_destroy(net, itn, &list, ops); + ip_tunnel_destroy(net, itn, dev_to_kill, ops); } - unregister_netdevice_many(&list); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9ab9b3ebe0cd..fb1f52d21311 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -510,14 +510,16 @@ static int __net_init vti_init_net(struct net *net) return 0; } -static void __net_exit vti_exit_batch_net(struct list_head *list_net) +static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops); + ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops, + dev_to_kill); } static struct pernet_operations vti_net_ops = { .init = vti_init_net, - .exit_batch = vti_exit_batch_net, + .exit_batch_rtnl = vti_exit_batch_rtnl, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 27b8f83c6ea2..0151eea06cc5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -592,14 +592,16 @@ static int __net_init ipip_init_net(struct net *net) return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); } -static void __net_exit ipip_exit_batch_net(struct list_head *list_net) +static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops); + ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops, + dev_to_kill); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, - .exit_batch = ipip_exit_batch_net, + .exit_batch_rtnl = ipip_exit_batch_rtnl, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index bbff68b5b5d4..70509da4f080 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3631,17 +3631,24 @@ unlock: } EXPORT_SYMBOL(register_nexthop_notifier); -int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) { int err; - rtnl_lock(); err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, nb); - if (err) - goto unlock; - nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); -unlock: + if (!err) + nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); + return err; +} +EXPORT_SYMBOL(__unregister_nexthop_notifier); + +int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = __unregister_nexthop_notifier(net, nb); rtnl_unlock(); return err; } @@ -3737,16 +3744,20 @@ out: } EXPORT_SYMBOL(nexthop_res_grp_activity_update); -static void __net_exit nexthop_net_exit_batch(struct list_head *net_list) +static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) { + ASSERT_RTNL(); + list_for_each_entry(net, net_list, exit_list) flush_all_nexthops(net); - kfree(net->nexthop.devhash); - } - rtnl_unlock(); +} + +static void __net_exit nexthop_net_exit(struct net *net) +{ + kfree(net->nexthop.devhash); + net->nexthop.devhash = NULL; } static int __net_init nexthop_net_init(struct net *net) @@ -3764,7 +3775,8 @@ static int __net_init nexthop_net_init(struct net *net) static struct pernet_operations nexthop_net_ops = { .init = nexthop_net_init, - .exit_batch = nexthop_net_exit_batch, + .exit = nexthop_net_exit, + .exit_batch_rtnl = nexthop_net_exit_batch_rtnl, }; static int __init nexthop_init(void) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 070d87abf7c0..428f03e9da45 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1632,21 +1632,19 @@ err_alloc_dev: return err; } -static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list) +static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) - ip6gre_destroy_tunnels(net, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + ip6gre_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations ip6gre_net_ops = { .init = ip6gre_init_net, - .exit_batch = ip6gre_exit_batch_net, + .exit_batch_rtnl = ip6gre_exit_batch_rtnl, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), }; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9bbabf750a21..bfb0a6c601c1 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2282,21 +2282,19 @@ err_alloc_dev: return err; } -static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list) +static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) - ip6_tnl_destroy_tunnels(net, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + ip6_tnl_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, - .exit_batch = ip6_tnl_exit_batch_net, + .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), }; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index e550240c85e1..cfe1b1ad4d85 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1174,24 +1174,22 @@ err_alloc_dev: return err; } -static void __net_exit vti6_exit_batch_net(struct list_head *net_list) +static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct vti6_net *ip6n; struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) { ip6n = net_generic(net, vti6_net_id); - vti6_destroy_tunnels(ip6n, &list); + vti6_destroy_tunnels(ip6n, dev_to_kill); } - unregister_netdevice_many(&list); - rtnl_unlock(); } static struct pernet_operations vti6_net_ops = { .init = vti6_init_net, - .exit_batch = vti6_exit_batch_net, + .exit_batch_rtnl = vti6_exit_batch_rtnl, .id = &vti6_net_id, .size = sizeof(struct vti6_net), }; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cc24cefdb85c..61b2b71fa8be 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1875,22 +1875,19 @@ err_alloc_dev: return err; } -static void __net_exit sit_exit_batch_net(struct list_head *net_list) +static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { - LIST_HEAD(list); struct net *net; - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) - sit_destroy_tunnels(net, &list); - - unregister_netdevice_many(&list); - rtnl_unlock(); + sit_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations sit_net_ops = { .init = sit_init_net, - .exit_batch = sit_exit_batch_net, + .exit_batch_rtnl = sit_exit_batch_rtnl, .id = &sit_net_id, .size = sizeof(struct sit_net), }; diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 21d50d75c260..dafefef3cf51 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -957,12 +957,12 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { .get_link_net = xfrmi_get_link_net, }; -static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) +static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_exit_list, exit_list) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); struct xfrm_if __rcu **xip; @@ -973,18 +973,16 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) for (xip = &xfrmn->xfrmi[i]; (xi = rtnl_dereference(*xip)) != NULL; xip = &xi->next) - unregister_netdevice_queue(xi->dev, &list); + unregister_netdevice_queue(xi->dev, dev_to_kill); } xi = rtnl_dereference(xfrmn->collect_md_xfrmi); if (xi) - unregister_netdevice_queue(xi->dev, &list); + unregister_netdevice_queue(xi->dev, dev_to_kill); } - unregister_netdevice_many(&list); - rtnl_unlock(); } static struct pernet_operations xfrmi_net_ops = { - .exit_batch = xfrmi_exit_batch_net, + .exit_batch_rtnl = xfrmi_exit_batch_rtnl, .id = &xfrmi_net_id, .size = sizeof(struct xfrmi_net), }; |