summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2024-03-01 13:43:11 +0300
committerDavid S. Miller <davem@davemloft.net>2024-03-01 13:43:11 +0300
commit76f06cbd7b310e705465e11ca93be8db247aec50 (patch)
tree61788d5f714fe9d344d820b9e2bdd4bb8c100bb0
parente2d890afba8e4b48b0377efb9821f50f6b93fe5c (diff)
parent8ee60f9c41fb01440e8a8f97127869c9b1978362 (diff)
downloadlinux-76f06cbd7b310e705465e11ca93be8db247aec50.tar.xz
Merge branch 'netdevsim-link'
David Wei says: ==================== netdevsim: link and forward skbs between ports This patchset adds the ability to link two netdevsim ports together and forward skbs between them, similar to veth. The goal is to use netdevsim for testing features e.g. zero copy Rx using io_uring. This feature was tested locally on QEMU, and a selftest is included. I ran netdev selftests CI style and all tests but the following passed: - gro.sh - l2tp.sh - ip_local_port_range.sh gro.sh fails because virtme-ng mounts as read-only and it tries to write to log.txt. This issue was reported to virtme-ng upstream. l2tp.sh and ip_local_port_range.sh both fail for me on net-next/main as well. --- v13->v14: - implement ndo_get_iflink() - fix returning 0 if peer is already linked during linking or not linked during unlinking - bump dropped counter if nsim_ipsec_tx() fails and generally reorder nsim_start_xmit() - fix overflowing lines and indentations v12->v13: - wait for socat listening port to be ready before sending data in selftest v11->v12: - fix leaked netns refs - fix rtnetlink.sh kci_test_ipsec_offload() selftest v10->v11: - add udevadm settle after creating netdevsims in selftest v9->v10: - fix not freeing skb when not there is no peer - prevent possible id clashes in selftest - cleanup selftest on error paths v8->v9: - switch to getting netns using fd rather than id - prevent linking a netdevsim to itself - update tests v7->v8: - fix not dereferencing RCU ptr using rcu_dereference() - remove unused variables in selftest v6->v7: - change link syntax to netnsid:ifidx - replace dev_get_by_index() with __dev_get_by_index() - check for NULL peer when linking - add a sysfs attribute for unlinking - only update Tx stats if not dropped - update selftest v5->v6: - reworked to link two netdevsims using sysfs attribute on the bus device instead of debugfs due to deadlock possibility if a netdevsim is removed during linking - removed unnecessary patch maintaining a list of probed nsim_devs - updated selftest v4->v5: - reduce nsim_dev_list_lock critical section - fixed missing mutex unlock during unwind ladder - rework nsim_dev_peer_write synchronization to take devlink lock as well as rtnl_lock - return err msgs to user during linking if port doesn't exist or linking to self - update tx stats outside of RCU lock v3->v4: - maintain a mutex protected list of probed nsim_devs instead of using nsim_bus_dev - fixed synchronization issues by taking rtnl_lock - track tx_dropped skbs v2->v3: - take lock when traversing nsim_bus_dev_list - take device ref when getting a nsim_bus_dev - return 0 if nsim_dev_peer_read cannot find the port - address code formatting - do not hard code values in selftests - add Makefile for selftests v1->v2: - renamed debugfs file from "link" to "peer" - replaced strstep() with sscanf() for consistency - increased char[] buf sz to 22 for copying id + port from user - added err msg w/ expected fmt when linking as a hint to user - prevent linking port to itself - protect peer ptr using RCU ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/netdevsim/bus.c145
-rw-r--r--drivers/net/netdevsim/netdev.c53
-rw-r--r--drivers/net/netdevsim/netdevsim.h3
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile1
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh143
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh2
6 files changed, 342 insertions, 5 deletions
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 0c5aff63d242..64c0cdd31bf8 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -232,9 +232,154 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count)
}
static BUS_ATTR_WO(del_device);
+static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+ struct netdevsim *nsim_a, *nsim_b, *peer;
+ struct net_device *dev_a, *dev_b;
+ unsigned int ifidx_a, ifidx_b;
+ int netnsfd_a, netnsfd_b, err;
+ struct net *ns_a, *ns_b;
+
+ err = sscanf(buf, "%d:%u %d:%u", &netnsfd_a, &ifidx_a, &netnsfd_b,
+ &ifidx_b);
+ if (err != 4) {
+ pr_err("Format for linking two devices is \"netnsfd_a:ifidx_a netnsfd_b:ifidx_b\" (int uint int uint).\n");
+ return -EINVAL;
+ }
+
+ ns_a = get_net_ns_by_fd(netnsfd_a);
+ if (IS_ERR(ns_a)) {
+ pr_err("Could not find netns with fd: %d\n", netnsfd_a);
+ return -EINVAL;
+ }
+
+ ns_b = get_net_ns_by_fd(netnsfd_b);
+ if (IS_ERR(ns_b)) {
+ pr_err("Could not find netns with fd: %d\n", netnsfd_b);
+ put_net(ns_a);
+ return -EINVAL;
+ }
+
+ err = -EINVAL;
+ rtnl_lock();
+ dev_a = __dev_get_by_index(ns_a, ifidx_a);
+ if (!dev_a) {
+ pr_err("Could not find device with ifindex %u in netnsfd %d\n",
+ ifidx_a, netnsfd_a);
+ goto out_err;
+ }
+
+ if (!netdev_is_nsim(dev_a)) {
+ pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
+ ifidx_a, netnsfd_a);
+ goto out_err;
+ }
+
+ dev_b = __dev_get_by_index(ns_b, ifidx_b);
+ if (!dev_b) {
+ pr_err("Could not find device with ifindex %u in netnsfd %d\n",
+ ifidx_b, netnsfd_b);
+ goto out_err;
+ }
+
+ if (!netdev_is_nsim(dev_b)) {
+ pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
+ ifidx_b, netnsfd_b);
+ goto out_err;
+ }
+
+ if (dev_a == dev_b) {
+ pr_err("Cannot link a netdevsim to itself\n");
+ goto out_err;
+ }
+
+ err = -EBUSY;
+ nsim_a = netdev_priv(dev_a);
+ peer = rtnl_dereference(nsim_a->peer);
+ if (peer) {
+ pr_err("Netdevsim %d:%u is already linked\n", netnsfd_a,
+ ifidx_a);
+ goto out_err;
+ }
+
+ nsim_b = netdev_priv(dev_b);
+ peer = rtnl_dereference(nsim_b->peer);
+ if (peer) {
+ pr_err("Netdevsim %d:%u is already linked\n", netnsfd_b,
+ ifidx_b);
+ goto out_err;
+ }
+
+ err = 0;
+ rcu_assign_pointer(nsim_a->peer, nsim_b);
+ rcu_assign_pointer(nsim_b->peer, nsim_a);
+
+out_err:
+ put_net(ns_b);
+ put_net(ns_a);
+ rtnl_unlock();
+
+ return !err ? count : err;
+}
+static BUS_ATTR_WO(link_device);
+
+static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+ struct netdevsim *nsim, *peer;
+ struct net_device *dev;
+ unsigned int ifidx;
+ int netnsfd, err;
+ struct net *ns;
+
+ err = sscanf(buf, "%u:%u", &netnsfd, &ifidx);
+ if (err != 2) {
+ pr_err("Format for unlinking a device is \"netnsfd:ifidx\" (int uint).\n");
+ return -EINVAL;
+ }
+
+ ns = get_net_ns_by_fd(netnsfd);
+ if (IS_ERR(ns)) {
+ pr_err("Could not find netns with fd: %d\n", netnsfd);
+ return -EINVAL;
+ }
+
+ err = -EINVAL;
+ rtnl_lock();
+ dev = __dev_get_by_index(ns, ifidx);
+ if (!dev) {
+ pr_err("Could not find device with ifindex %u in netnsfd %d\n",
+ ifidx, netnsfd);
+ goto out_put_netns;
+ }
+
+ if (!netdev_is_nsim(dev)) {
+ pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
+ ifidx, netnsfd);
+ goto out_put_netns;
+ }
+
+ nsim = netdev_priv(dev);
+ peer = rtnl_dereference(nsim->peer);
+ if (!peer)
+ goto out_put_netns;
+
+ err = 0;
+ RCU_INIT_POINTER(nsim->peer, NULL);
+ RCU_INIT_POINTER(peer->peer, NULL);
+
+out_put_netns:
+ put_net(ns);
+ rtnl_unlock();
+
+ return !err ? count : err;
+}
+static BUS_ATTR_WO(unlink_device);
+
static struct attribute *nsim_bus_attrs[] = {
&bus_attr_new_device.attr,
&bus_attr_del_device.attr,
+ &bus_attr_link_device.attr,
+ &bus_attr_unlink_device.attr,
NULL
};
ATTRIBUTE_GROUPS(nsim_bus);
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 77e8250282a5..8330bc0bcb7e 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -29,18 +29,35 @@
static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct netdevsim *ns = netdev_priv(dev);
+ unsigned int len = skb->len;
+ struct netdevsim *peer_ns;
+ rcu_read_lock();
if (!nsim_ipsec_tx(ns, skb))
- goto out;
+ goto out_drop_free;
+ peer_ns = rcu_dereference(ns->peer);
+ if (!peer_ns)
+ goto out_drop_free;
+
+ skb_tx_timestamp(skb);
+ if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
+ goto out_drop_cnt;
+
+ rcu_read_unlock();
u64_stats_update_begin(&ns->syncp);
ns->tx_packets++;
- ns->tx_bytes += skb->len;
+ ns->tx_bytes += len;
u64_stats_update_end(&ns->syncp);
+ return NETDEV_TX_OK;
-out:
+out_drop_free:
dev_kfree_skb(skb);
-
+out_drop_cnt:
+ rcu_read_unlock();
+ u64_stats_update_begin(&ns->syncp);
+ ns->tx_dropped++;
+ u64_stats_update_end(&ns->syncp);
return NETDEV_TX_OK;
}
@@ -70,6 +87,7 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
start = u64_stats_fetch_begin(&ns->syncp);
stats->tx_bytes = ns->tx_bytes;
stats->tx_packets = ns->tx_packets;
+ stats->tx_dropped = ns->tx_dropped;
} while (u64_stats_fetch_retry(&ns->syncp, start));
}
@@ -265,6 +283,21 @@ nsim_set_features(struct net_device *dev, netdev_features_t features)
return 0;
}
+static int nsim_get_iflink(const struct net_device *dev)
+{
+ struct netdevsim *nsim, *peer;
+ int iflink;
+
+ nsim = netdev_priv(dev);
+
+ rcu_read_lock();
+ peer = rcu_dereference(nsim->peer);
+ iflink = peer ? READ_ONCE(peer->netdev->ifindex) : 0;
+ rcu_read_unlock();
+
+ return iflink;
+}
+
static const struct net_device_ops nsim_netdev_ops = {
.ndo_start_xmit = nsim_start_xmit,
.ndo_set_rx_mode = nsim_set_rx_mode,
@@ -282,6 +315,7 @@ static const struct net_device_ops nsim_netdev_ops = {
.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
.ndo_setup_tc = nsim_setup_tc,
.ndo_set_features = nsim_set_features,
+ .ndo_get_iflink = nsim_get_iflink,
.ndo_bpf = nsim_bpf,
};
@@ -302,7 +336,6 @@ static void nsim_setup(struct net_device *dev)
eth_hw_addr_random(dev);
dev->tx_queue_len = 0;
- dev->flags |= IFF_NOARP;
dev->flags &= ~IFF_MULTICAST;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
IFF_NO_QUEUE;
@@ -413,8 +446,13 @@ err_free_netdev:
void nsim_destroy(struct netdevsim *ns)
{
struct net_device *dev = ns->netdev;
+ struct netdevsim *peer;
rtnl_lock();
+ peer = rtnl_dereference(ns->peer);
+ if (peer)
+ RCU_INIT_POINTER(peer->peer, NULL);
+ RCU_INIT_POINTER(ns->peer, NULL);
unregister_netdevice(dev);
if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
nsim_macsec_teardown(ns);
@@ -427,6 +465,11 @@ void nsim_destroy(struct netdevsim *ns)
free_netdev(dev);
}
+bool netdev_is_nsim(struct net_device *dev)
+{
+ return dev->netdev_ops == &nsim_netdev_ops;
+}
+
static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 028c825b86db..553c4b9b4f63 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -98,6 +98,7 @@ struct netdevsim {
u64 tx_packets;
u64 tx_bytes;
+ u64 tx_dropped;
struct u64_stats_sync syncp;
struct nsim_bus_dev *nsim_bus_dev;
@@ -125,11 +126,13 @@ struct netdevsim {
} udp_ports;
struct nsim_ethtool ethtool;
+ struct netdevsim __rcu *peer;
};
struct netdevsim *
nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port);
void nsim_destroy(struct netdevsim *ns);
+bool netdev_is_nsim(struct net_device *dev);
void nsim_ethtool_init(struct netdevsim *ns);
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
index 7a29a05bea8b..5bace0b7fb57 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/Makefile
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -10,6 +10,7 @@ TEST_PROGS = devlink.sh \
fib.sh \
hw_stats_l3.sh \
nexthop.sh \
+ peer.sh \
psample.sh \
tc-mq-visibility.sh \
udp_tunnel_nic.sh \
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
new file mode 100755
index 000000000000..aed62d9e6c0a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ../../../net/net_helper.sh
+
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+socat_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_1_SYS/net -exec basename {} \;)
+ NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_2_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_DEV_1_NAME netns nssv
+ ip link set $NSIM_DEV_2_NAME netns nscl
+
+ ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
+ ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
+ ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+socat_check || exit 4
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_DEV_1_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_1_FD}</var/run/netns/nssv
+NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex)
+
+NSIM_DEV_2_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_2_FD}</var/run/netns/nscl
+NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex)
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netdevsim should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netnsid should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with self should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+# argument error checking
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "invalid arg should fail"
+ cleanup_ns
+ exit 1
+fi
+
+# send/recv packets
+
+tmp_file=$(mktemp)
+ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file &
+pid=$!
+res=0
+
+wait_local_port_listen nssv 1234 tcp
+
+echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234
+
+count=$(cat $tmp_file | wc -c)
+if [[ $count -ne 3 ]]; then
+ echo "expected 3 bytes, got $count"
+ res=1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
+
+kill $pid
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit $res
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 874a2952aa8e..bdf6f10d0558 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -801,6 +801,8 @@ kci_test_ipsec_offload()
end_test "FAIL: ipsec_offload SA offload missing from list output"
fi
+ # we didn't create a peer, make sure we can Tx
+ ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55
# use ping to exercise the Tx path
ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null