summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-07-28 21:36:00 +0300
committerJakub Kicinski <kuba@kernel.org>2023-07-28 21:36:00 +0300
commit5bdc312c1d06fb4ef02bd62fd077e4fa722012e6 (patch)
treeb977b7a1bda1dfea681cdcc02acdfd6f51ec7d41 /net
parent083476a2023ce64991e17565707e205a1bf78d63 (diff)
parent84e00d9bd4e472bd9b145ed40dbd132dd7a15462 (diff)
downloadlinux-5bdc312c1d06fb4ef02bd62fd077e4fa722012e6.tar.xz
Merge branch 'net-store-netdevs-in-an-xarray'
Jakub Kicinski says: ==================== net: store netdevs in an xarray One of more annoying developer experience gaps we have in netlink is iterating over netdevs. It's painful. Add an xarray to make it trivial. v1: https://lore.kernel.org/all/20230722014237.4078962-1-kuba@kernel.org/ ==================== Link: https://lore.kernel.org/r/20230726185530.2247698-1-kuba@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c82
-rw-r--r--net/core/netdev-genl.c37
-rw-r--r--net/ethtool/netlink.c65
-rw-r--r--net/ethtool/tunnels.c71
4 files changed, 105 insertions, 150 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index e7ffcfa037f7..b58674774a57 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -388,6 +388,8 @@ static void list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock(&dev_base_lock);
+ /* We reserved the ifindex, this can't fail */
+ WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
dev_base_seq_inc(net);
}
@@ -397,8 +399,12 @@ static void list_netdevice(struct net_device *dev)
*/
static void unlist_netdevice(struct net_device *dev, bool lock)
{
+ struct net *net = dev_net(dev);
+
ASSERT_RTNL();
+ xa_erase(&net->dev_by_index, dev->ifindex);
+
/* Unlink dev from the device chain */
if (lock)
write_lock(&dev_base_lock);
@@ -9565,23 +9571,35 @@ err_out:
}
/**
- * dev_new_index - allocate an ifindex
- * @net: the applicable net namespace
+ * dev_index_reserve() - allocate an ifindex in a namespace
+ * @net: the applicable net namespace
+ * @ifindex: requested ifindex, pass %0 to get one allocated
+ *
+ * Allocate a ifindex for a new device. Caller must either use the ifindex
+ * to store the device (via list_netdevice()) or call dev_index_release()
+ * to give the index up.
*
- * Returns a suitable unique value for a new device interface
- * number. The caller must hold the rtnl semaphore or the
- * dev_base_lock to be sure it remains unique.
+ * Return: a suitable unique value for a new device interface number or -errno.
*/
-static int dev_new_index(struct net *net)
+static int dev_index_reserve(struct net *net, u32 ifindex)
{
- int ifindex = net->ifindex;
+ int err;
- for (;;) {
- if (++ifindex <= 0)
- ifindex = 1;
- if (!__dev_get_by_index(net, ifindex))
- return net->ifindex = ifindex;
- }
+ if (!ifindex)
+ err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
+ xa_limit_31b, &net->ifindex, GFP_KERNEL);
+ else
+ err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
+ if (err < 0)
+ return err;
+
+ return ifindex;
+}
+
+static void dev_index_release(struct net *net, int ifindex)
+{
+ /* Expect only unused indexes, unlist_netdevice() removes the used */
+ WARN_ON(xa_erase(&net->dev_by_index, ifindex));
}
/* Delayed registration/unregisteration */
@@ -10051,11 +10069,10 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
}
- ret = -EBUSY;
- if (!dev->ifindex)
- dev->ifindex = dev_new_index(net);
- else if (__dev_get_by_index(net, dev->ifindex))
+ ret = dev_index_reserve(net, dev->ifindex);
+ if (ret < 0)
goto err_uninit;
+ dev->ifindex = ret;
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
@@ -10102,7 +10119,7 @@ int register_netdevice(struct net_device *dev)
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
- goto err_uninit;
+ goto err_ifindex_release;
ret = netdev_register_kobject(dev);
write_lock(&dev_base_lock);
@@ -10158,6 +10175,8 @@ out:
err_uninit_notify:
call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
+err_ifindex_release:
+ dev_index_release(net, dev->ifindex);
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -11035,9 +11054,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
}
/* Check that new_ifindex isn't used yet. */
- err = -EBUSY;
- if (new_ifindex && __dev_get_by_index(net, new_ifindex))
- goto out;
+ if (new_ifindex) {
+ err = dev_index_reserve(net, new_ifindex);
+ if (err < 0)
+ goto out;
+ } else {
+ /* If there is an ifindex conflict assign a new one */
+ err = dev_index_reserve(net, dev->ifindex);
+ if (err == -EBUSY)
+ err = dev_index_reserve(net, 0);
+ if (err < 0)
+ goto out;
+ new_ifindex = err;
+ }
/*
* And now a mini version of register_netdevice unregister_netdevice.
@@ -11065,13 +11094,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
rcu_barrier();
new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
- /* If there is an ifindex conflict assign a new one */
- if (!new_ifindex) {
- if (__dev_get_by_index(net, dev->ifindex))
- new_ifindex = dev_new_index(net);
- else
- new_ifindex = dev->ifindex;
- }
rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
new_ifindex);
@@ -11249,6 +11271,9 @@ static int __net_init netdev_init(struct net *net)
if (net->dev_index_head == NULL)
goto err_idx;
+ net->ifindex = 1;
+ xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC);
+
RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
return 0;
@@ -11346,6 +11371,7 @@ static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ xa_destroy(&net->dev_by_index);
if (net != &init_net)
WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 65ef4867fc49..797c813c7c77 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -101,43 +101,22 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
- int idx = 0, s_idx;
- int h, s_h;
- int err;
-
- s_h = cb->args[0];
- s_idx = cb->args[1];
+ int err = 0;
rtnl_lock();
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
-
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry(netdev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- err = netdev_nl_dev_fill(netdev, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- NETDEV_CMD_DEV_GET);
- if (err < 0)
- break;
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, netdev, cb->args[0]) {
+ err = netdev_nl_dev_fill(netdev, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ NETDEV_CMD_DEV_GET);
+ if (err < 0)
+ break;
}
-
rtnl_unlock();
if (err != -EMSGSIZE)
return err;
- cb->args[1] = idx;
- cb->args[0] = h;
- cb->seq = net->dev_base_seq;
-
return skb->len;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 39a459b0111b..ae344f1b0bbd 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -252,8 +252,7 @@ int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
* @ops: request ops of currently processed message type
* @req_info: parsed request header of processed request
* @reply_data: data needed to compose the reply
- * @pos_hash: saved iteration position - hashbucket
- * @pos_idx: saved iteration position - index
+ * @pos_ifindex: saved iteration position - ifindex
*
* These parameters are kept in struct netlink_callback as context preserved
* between iterations. They are initialized by ethnl_default_start() and used
@@ -263,8 +262,7 @@ struct ethnl_dump_ctx {
const struct ethnl_request_ops *ops;
struct ethnl_req_info *req_info;
struct ethnl_reply_data *reply_data;
- int pos_hash;
- int pos_idx;
+ unsigned long pos_ifindex;
};
static const struct ethnl_request_ops *
@@ -490,55 +488,27 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
{
struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
struct net *net = sock_net(skb->sk);
- int s_idx = ctx->pos_idx;
- int h, idx = 0;
+ struct net_device *dev;
int ret = 0;
rtnl_lock();
- for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
- struct net_device *dev;
- unsigned int seq;
-
- head = &net->dev_index_head[h];
-
-restart_chain:
- seq = net->dev_base_seq;
- cb->seq = seq;
- idx = 0;
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- dev_hold(dev);
- rtnl_unlock();
-
- ret = ethnl_default_dump_one(skb, dev, ctx, cb);
- dev_put(dev);
- if (ret < 0) {
- if (ret == -EOPNOTSUPP)
- goto lock_and_cont;
- if (likely(skb->len))
- ret = skb->len;
- goto out;
- }
-lock_and_cont:
- rtnl_lock();
- if (net->dev_base_seq != seq) {
- s_idx = idx + 1;
- goto restart_chain;
- }
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
+ dev_hold(dev);
+ rtnl_unlock();
+
+ ret = ethnl_default_dump_one(skb, dev, ctx, cb);
+
+ rtnl_lock();
+ dev_put(dev);
+ if (ret < 0 && ret != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ ret = skb->len;
+ break;
+ }
}
rtnl_unlock();
-out:
- ctx->pos_hash = h;
- ctx->pos_idx = idx;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-
return ret;
}
@@ -584,8 +554,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
ctx->ops = ops;
ctx->req_info = req_info;
ctx->reply_data = reply_data;
- ctx->pos_hash = 0;
- ctx->pos_idx = 0;
+ ctx->pos_ifindex = 0;
return 0;
diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c
index 67fb414ca859..05f752557b5e 100644
--- a/net/ethtool/tunnels.c
+++ b/net/ethtool/tunnels.c
@@ -212,8 +212,7 @@ err_unlock_rtnl:
struct ethnl_tunnel_info_dump_ctx {
struct ethnl_req_info req_info;
- int pos_hash;
- int pos_idx;
+ unsigned long ifindex;
};
int ethnl_tunnel_info_start(struct netlink_callback *cb)
@@ -243,57 +242,39 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
- int s_idx = ctx->pos_idx;
- int h, idx = 0;
+ struct net_device *dev;
int ret = 0;
void *ehdr;
rtnl_lock();
- cb->seq = net->dev_base_seq;
- for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
- struct net_device *dev;
-
- head = &net->dev_index_head[h];
- idx = 0;
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
-
- ehdr = ethnl_dump_put(skb, cb,
- ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
- if (!ehdr) {
- ret = -EMSGSIZE;
- goto out;
- }
-
- ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER);
- if (ret < 0) {
- genlmsg_cancel(skb, ehdr);
- goto out;
- }
-
- ctx->req_info.dev = dev;
- ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb);
- ctx->req_info.dev = NULL;
- if (ret < 0) {
- genlmsg_cancel(skb, ehdr);
- if (ret == -EOPNOTSUPP)
- goto cont;
- goto out;
- }
- genlmsg_end(skb, ehdr);
-cont:
- idx++;
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ ehdr = ethnl_dump_put(skb, cb,
+ ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
+ if (!ehdr) {
+ ret = -EMSGSIZE;
+ break;
}
+
+ ret = ethnl_fill_reply_header(skb, dev,
+ ETHTOOL_A_TUNNEL_INFO_HEADER);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
+ }
+
+ ctx->req_info.dev = dev;
+ ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb);
+ ctx->req_info.dev = NULL;
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ if (ret == -EOPNOTSUPP)
+ continue;
+ break;
+ }
+ genlmsg_end(skb, ehdr);
}
-out:
rtnl_unlock();
- ctx->pos_hash = h;
- ctx->pos_idx = idx;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-
if (ret == -EMSGSIZE && skb->len)
return skb->len;
return ret;