summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/bpf/xsk.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-22 05:24:12 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-22 05:24:12 +0300
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /tools/testing/selftests/bpf/xsk.c
parent36289a03bcd3aabdf66de75cb6d1b4ee15726438 (diff)
parentd1fabc68f8e0541d41657096dc713cb01775652d (diff)
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.xz
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'tools/testing/selftests/bpf/xsk.c')
-rw-r--r--tools/testing/selftests/bpf/xsk.c677
1 files changed, 36 insertions, 641 deletions
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index 39d349509ba4..687d83e707f8 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -49,10 +49,7 @@
#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
-enum xsk_prog {
- XSK_PROG_FALLBACK,
- XSK_PROG_REDIRECT_FLAGS,
-};
+#define XSKMAP_SIZE 1
struct xsk_umem {
struct xsk_ring_prod *fill_save;
@@ -74,43 +71,16 @@ struct xsk_ctx {
int refcount;
int ifindex;
struct list_head list;
- int prog_fd;
- int link_fd;
- int xsks_map_fd;
- char ifname[IFNAMSIZ];
- bool has_bpf_link;
};
struct xsk_socket {
struct xsk_ring_cons *rx;
struct xsk_ring_prod *tx;
- __u64 outstanding_tx;
struct xsk_ctx *ctx;
struct xsk_socket_config config;
int fd;
};
-struct xsk_nl_info {
- bool xdp_prog_attached;
- int ifindex;
- int fd;
-};
-
-/* Up until and including Linux 5.3 */
-struct xdp_ring_offset_v1 {
- __u64 producer;
- __u64 consumer;
- __u64 desc;
-};
-
-/* Up until and including Linux 5.3 */
-struct xdp_mmap_offsets_v1 {
- struct xdp_ring_offset_v1 rx;
- struct xdp_ring_offset_v1 tx;
- struct xdp_ring_offset_v1 fr;
- struct xdp_ring_offset_v1 cr;
-};
-
int xsk_umem__fd(const struct xsk_umem *umem)
{
return umem ? umem->fd : -EINVAL;
@@ -153,55 +123,17 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
if (!usr_cfg) {
cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- cfg->libbpf_flags = 0;
- cfg->xdp_flags = 0;
cfg->bind_flags = 0;
return 0;
}
- if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
- return -EINVAL;
-
cfg->rx_size = usr_cfg->rx_size;
cfg->tx_size = usr_cfg->tx_size;
- cfg->libbpf_flags = usr_cfg->libbpf_flags;
- cfg->xdp_flags = usr_cfg->xdp_flags;
cfg->bind_flags = usr_cfg->bind_flags;
return 0;
}
-static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
-{
- struct xdp_mmap_offsets_v1 off_v1;
-
- /* getsockopt on a kernel <= 5.3 has no flags fields.
- * Copy over the offsets to the correct places in the >=5.4 format
- * and put the flags where they would have been on that kernel.
- */
- memcpy(&off_v1, off, sizeof(off_v1));
-
- off->rx.producer = off_v1.rx.producer;
- off->rx.consumer = off_v1.rx.consumer;
- off->rx.desc = off_v1.rx.desc;
- off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
-
- off->tx.producer = off_v1.tx.producer;
- off->tx.consumer = off_v1.tx.consumer;
- off->tx.desc = off_v1.tx.desc;
- off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
-
- off->fr.producer = off_v1.fr.producer;
- off->fr.consumer = off_v1.fr.consumer;
- off->fr.desc = off_v1.fr.desc;
- off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
-
- off->cr.producer = off_v1.cr.producer;
- off->cr.consumer = off_v1.cr.consumer;
- off->cr.desc = off_v1.cr.desc;
- off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
-}
-
static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
{
socklen_t optlen;
@@ -215,11 +147,6 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
if (optlen == sizeof(*off))
return 0;
- if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
- xsk_mmap_offsets_v1(off);
- return 0;
- }
-
return -EINVAL;
}
@@ -340,531 +267,56 @@ out_umem_alloc:
return err;
}
-struct xsk_umem_config_v1 {
- __u32 fill_size;
- __u32 comp_size;
- __u32 frame_size;
- __u32 frame_headroom;
-};
-
-static enum xsk_prog get_xsk_prog(void)
-{
- enum xsk_prog detected = XSK_PROG_FALLBACK;
- char data_in = 0, data_out;
- struct bpf_insn insns[] = {
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, XDP_PASS),
- BPF_EMIT_CALL(BPF_FUNC_redirect_map),
- BPF_EXIT_INSN(),
- };
- LIBBPF_OPTS(bpf_test_run_opts, opts,
- .data_in = &data_in,
- .data_size_in = 1,
- .data_out = &data_out,
- );
-
- int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);
-
- map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
- if (map_fd < 0)
- return detected;
-
- insns[0].imm = map_fd;
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
- if (prog_fd < 0) {
- close(map_fd);
- return detected;
- }
-
- ret = bpf_prog_test_run_opts(prog_fd, &opts);
- if (!ret && opts.retval == XDP_PASS)
- detected = XSK_PROG_REDIRECT_FLAGS;
- close(prog_fd);
- close(map_fd);
- return detected;
-}
-
-static int xsk_load_xdp_prog(struct xsk_socket *xsk)
-{
- static const int log_buf_size = 16 * 1024;
- struct xsk_ctx *ctx = xsk->ctx;
- char log_buf[log_buf_size];
- int prog_fd;
-
- /* This is the fallback C-program:
- * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
- * {
- * int ret, index = ctx->rx_queue_index;
- *
- * // A set entry here means that the correspnding queue_id
- * // has an active AF_XDP socket bound to it.
- * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
- * if (ret > 0)
- * return ret;
- *
- * // Fallback for pre-5.3 kernels, not supporting default
- * // action in the flags parameter.
- * if (bpf_map_lookup_elem(&xsks_map, &index))
- * return bpf_redirect_map(&xsks_map, index, 0);
- * return XDP_PASS;
- * }
- */
- struct bpf_insn prog[] = {
- /* r2 = *(u32 *)(r1 + 16) */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
- /* *(u32 *)(r10 - 4) = r2 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
- /* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
- /* r3 = XDP_PASS */
- BPF_MOV64_IMM(BPF_REG_3, 2),
- /* call bpf_redirect_map */
- BPF_EMIT_CALL(BPF_FUNC_redirect_map),
- /* if w0 != 0 goto pc+13 */
- BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
- /* r2 = r10 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- /* r2 += -4 */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- /* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
- /* call bpf_map_lookup_elem */
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- /* r1 = r0 */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- /* r0 = XDP_PASS */
- BPF_MOV64_IMM(BPF_REG_0, 2),
- /* if r1 == 0 goto pc+5 */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
- /* r2 = *(u32 *)(r10 - 4) */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
- /* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
- /* r3 = 0 */
- BPF_MOV64_IMM(BPF_REG_3, 0),
- /* call bpf_redirect_map */
- BPF_EMIT_CALL(BPF_FUNC_redirect_map),
- /* The jumps are to this instruction */
- BPF_EXIT_INSN(),
- };
-
- /* This is the post-5.3 kernel C-program:
- * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
- * {
- * return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS);
- * }
- */
- struct bpf_insn prog_redirect_flags[] = {
- /* r2 = *(u32 *)(r1 + 16) */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
- /* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
- /* r3 = XDP_PASS */
- BPF_MOV64_IMM(BPF_REG_3, 2),
- /* call bpf_redirect_map */
- BPF_EMIT_CALL(BPF_FUNC_redirect_map),
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt[] = {ARRAY_SIZE(prog),
- ARRAY_SIZE(prog_redirect_flags),
- };
- struct bpf_insn *progs[] = {prog, prog_redirect_flags};
- enum xsk_prog option = get_xsk_prog();
- LIBBPF_OPTS(bpf_prog_load_opts, opts,
- .log_buf = log_buf,
- .log_size = log_buf_size,
- );
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
- progs[option], insns_cnt[option], &opts);
- if (prog_fd < 0) {
- pr_warn("BPF log buffer:\n%s", log_buf);
- return prog_fd;
- }
-
- ctx->prog_fd = prog_fd;
- return 0;
-}
-
-static int xsk_create_bpf_link(struct xsk_socket *xsk)
-{
- DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
- struct xsk_ctx *ctx = xsk->ctx;
- __u32 prog_id = 0;
- int link_fd;
- int err;
-
- err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
- if (err) {
- pr_warn("getting XDP prog id failed\n");
- return err;
- }
-
- /* if there's a netlink-based XDP prog loaded on interface, bail out
- * and ask user to do the removal by himself
- */
- if (prog_id) {
- pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
- return -EINVAL;
- }
-
- opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
-
- link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
- if (link_fd < 0) {
- pr_warn("bpf_link_create failed: %s\n", strerror(errno));
- return link_fd;
- }
-
- ctx->link_fd = link_fd;
- return 0;
-}
-
-static int xsk_get_max_queues(struct xsk_socket *xsk)
-{
- struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
- struct xsk_ctx *ctx = xsk->ctx;
- struct ifreq ifr = {};
- int fd, err, ret;
-
- fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
- if (fd < 0)
- return -errno;
-
- ifr.ifr_data = (void *)&channels;
- bpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
- err = ioctl(fd, SIOCETHTOOL, &ifr);
- if (err && errno != EOPNOTSUPP) {
- ret = -errno;
- goto out;
- }
-
- if (err) {
- /* If the device says it has no channels, then all traffic
- * is sent to a single stream, so max queues = 1.
- */
- ret = 1;
- } else {
- /* Take the max of rx, tx, combined. Drivers return
- * the number of channels in different ways.
- */
- ret = max(channels.max_rx, channels.max_tx);
- ret = max(ret, (int)channels.max_combined);
- }
-
-out:
- close(fd);
- return ret;
-}
-
-static int xsk_create_bpf_maps(struct xsk_socket *xsk)
-{
- struct xsk_ctx *ctx = xsk->ctx;
- int max_queues;
- int fd;
-
- max_queues = xsk_get_max_queues(xsk);
- if (max_queues < 0)
- return max_queues;
-
- fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map",
- sizeof(int), sizeof(int), max_queues, NULL);
- if (fd < 0)
- return fd;
-
- ctx->xsks_map_fd = fd;
-
- return 0;
-}
-
-static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
-{
- struct xsk_ctx *ctx = xsk->ctx;
-
- bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
- close(ctx->xsks_map_fd);
-}
-
-static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
-{
- __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
- __u32 map_len = sizeof(struct bpf_map_info);
- struct bpf_prog_info prog_info = {};
- struct xsk_ctx *ctx = xsk->ctx;
- struct bpf_map_info map_info;
- int fd, err;
-
- err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
- if (err)
- return err;
-
- num_maps = prog_info.nr_map_ids;
-
- map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
- if (!map_ids)
- return -ENOMEM;
-
- memset(&prog_info, 0, prog_len);
- prog_info.nr_map_ids = num_maps;
- prog_info.map_ids = (__u64)(unsigned long)map_ids;
-
- err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
- if (err)
- goto out_map_ids;
-
- ctx->xsks_map_fd = -1;
-
- for (i = 0; i < prog_info.nr_map_ids; i++) {
- fd = bpf_map_get_fd_by_id(map_ids[i]);
- if (fd < 0)
- continue;
-
- memset(&map_info, 0, map_len);
- err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
- if (err) {
- close(fd);
- continue;
- }
-
- if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
- ctx->xsks_map_fd = fd;
- break;
- }
-
- close(fd);
- }
-
- if (ctx->xsks_map_fd == -1)
- err = -ENOENT;
-
-out_map_ids:
- free(map_ids);
- return err;
-}
-
-static int xsk_set_bpf_maps(struct xsk_socket *xsk)
-{
- struct xsk_ctx *ctx = xsk->ctx;
-
- return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
- &xsk->fd, 0);
-}
-
-static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
+bool xsk_is_in_mode(u32 ifindex, int mode)
{
- struct bpf_link_info link_info;
- __u32 link_len;
- __u32 id = 0;
- int err;
- int fd;
-
- while (true) {
- err = bpf_link_get_next_id(id, &id);
- if (err) {
- if (errno == ENOENT) {
- err = 0;
- break;
- }
- pr_warn("can't get next link: %s\n", strerror(errno));
- break;
- }
-
- fd = bpf_link_get_fd_by_id(id);
- if (fd < 0) {
- if (errno == ENOENT)
- continue;
- pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
- err = -errno;
- break;
- }
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ int ret;
- link_len = sizeof(struct bpf_link_info);
- memset(&link_info, 0, link_len);
- err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
- if (err) {
- pr_warn("can't get link info: %s\n", strerror(errno));
- close(fd);
- break;
- }
- if (link_info.type == BPF_LINK_TYPE_XDP) {
- if (link_info.xdp.ifindex == ifindex) {
- *link_fd = fd;
- if (prog_id)
- *prog_id = link_info.prog_id;
- break;
- }
- }
- close(fd);
+ ret = bpf_xdp_query(ifindex, mode, &opts);
+ if (ret) {
+ printf("XDP mode query returned error %s\n", strerror(errno));
+ return false;
}
- return err;
-}
+ if (mode == XDP_FLAGS_DRV_MODE)
+ return opts.attach_mode == XDP_ATTACHED_DRV;
+ else if (mode == XDP_FLAGS_SKB_MODE)
+ return opts.attach_mode == XDP_ATTACHED_SKB;
-static bool xsk_probe_bpf_link(void)
-{
- LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE);
- struct bpf_insn insns[2] = {
- BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
- BPF_EXIT_INSN()
- };
- int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns);
- int ifindex_lo = 1;
- bool ret = false;
- int err;
-
- err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
- if (err)
- return ret;
-
- if (link_fd >= 0)
- return true;
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
- if (prog_fd < 0)
- return ret;
-
- link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
- close(prog_fd);
-
- if (link_fd >= 0) {
- ret = true;
- close(link_fd);
- }
-
- return ret;
+ return false;
}
-static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
+int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
{
- char ifname[IFNAMSIZ];
- struct xsk_ctx *ctx;
- char *interface;
-
- ctx = calloc(1, sizeof(*ctx));
- if (!ctx)
- return -ENOMEM;
-
- interface = if_indextoname(ifindex, &ifname[0]);
- if (!interface) {
- free(ctx);
- return -errno;
- }
-
- ctx->ifindex = ifindex;
- bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
-
- xsk->ctx = ctx;
- xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
+ int prog_fd;
- return 0;
+ prog_fd = bpf_program__fd(prog);
+ return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
}
-static int xsk_init_xdp_res(struct xsk_socket *xsk,
- int *xsks_map_fd)
+void xsk_detach_xdp_program(int ifindex, u32 xdp_flags)
{
- struct xsk_ctx *ctx = xsk->ctx;
- int err;
-
- err = xsk_create_bpf_maps(xsk);
- if (err)
- return err;
-
- err = xsk_load_xdp_prog(xsk);
- if (err)
- goto err_load_xdp_prog;
-
- if (ctx->has_bpf_link)
- err = xsk_create_bpf_link(xsk);
- else
- err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd,
- xsk->config.xdp_flags, NULL);
-
- if (err)
- goto err_attach_xdp_prog;
-
- if (!xsk->rx)
- return err;
-
- err = xsk_set_bpf_maps(xsk);
- if (err)
- goto err_set_bpf_maps;
-
- return err;
-
-err_set_bpf_maps:
- if (ctx->has_bpf_link)
- close(ctx->link_fd);
- else
- bpf_xdp_detach(ctx->ifindex, 0, NULL);
-err_attach_xdp_prog:
- close(ctx->prog_fd);
-err_load_xdp_prog:
- xsk_delete_bpf_maps(xsk);
- return err;
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
}
-static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
+void xsk_clear_xskmap(struct bpf_map *map)
{
- struct xsk_ctx *ctx = xsk->ctx;
- int err;
-
- ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (ctx->prog_fd < 0) {
- err = -errno;
- goto err_prog_fd;
- }
- err = xsk_lookup_bpf_maps(xsk);
- if (err)
- goto err_lookup_maps;
-
- if (!xsk->rx)
- return err;
-
- err = xsk_set_bpf_maps(xsk);
- if (err)
- goto err_set_maps;
+ u32 index = 0;
+ int map_fd;
- return err;
-
-err_set_maps:
- close(ctx->xsks_map_fd);
-err_lookup_maps:
- close(ctx->prog_fd);
-err_prog_fd:
- if (ctx->has_bpf_link)
- close(ctx->link_fd);
- return err;
+ map_fd = bpf_map__fd(map);
+ bpf_map_delete_elem(map_fd, &index);
}
-static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk)
{
- struct xsk_socket *xsk = _xdp;
- struct xsk_ctx *ctx = xsk->ctx;
- __u32 prog_id = 0;
- int err;
+ int map_fd, sock_fd;
+ u32 index = 0;
- if (ctx->has_bpf_link)
- err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
- else
- err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
+ map_fd = bpf_map__fd(map);
+ sock_fd = xsk_socket__fd(xsk);
- if (err)
- return err;
-
- err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
- xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
-
- if (!err && xsks_map_fd)
- *xsks_map_fd = ctx->xsks_map_fd;
-
- return err;
-}
-
-int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd)
-{
- return __xsk_setup_xdp_prog(xsk, xsks_map_fd);
+ return bpf_map_update_elem(map_fd, &index, &sock_fd, 0);
}
static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
@@ -913,7 +365,7 @@ out_free:
static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
struct xsk_umem *umem, int ifindex,
- const char *ifname, __u32 queue_id,
+ __u32 queue_id,
struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp)
{
@@ -940,51 +392,15 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
ctx->refcount = 1;
ctx->umem = umem;
ctx->queue_id = queue_id;
- bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
ctx->fill = fill;
ctx->comp = comp;
list_add(&ctx->list, &umem->ctx_list);
- ctx->has_bpf_link = xsk_probe_bpf_link();
return ctx;
}
-static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
-{
- free(xsk->ctx);
- free(xsk);
-}
-
-int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
-{
- xsk->ctx->xsks_map_fd = fd;
- return xsk_set_bpf_maps(xsk);
-}
-
-int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
-{
- struct xsk_socket *xsk;
- int res;
-
- xsk = calloc(1, sizeof(*xsk));
- if (!xsk)
- return -ENOMEM;
-
- res = xsk_create_xsk_struct(ifindex, xsk);
- if (res) {
- free(xsk);
- return -EINVAL;
- }
-
- res = __xsk_setup_xdp_prog(xsk, xsks_map_fd);
-
- xsk_destroy_xsk_struct(xsk);
-
- return res;
-}
-
int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
- const char *ifname,
+ int ifindex,
__u32 queue_id, struct xsk_umem *umem,
struct xsk_ring_cons *rx,
struct xsk_ring_prod *tx,
@@ -998,7 +414,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
struct xdp_mmap_offsets off;
struct xsk_socket *xsk;
struct xsk_ctx *ctx;
- int err, ifindex;
+ int err;
if (!umem || !xsk_ptr || !(rx || tx))
return -EFAULT;
@@ -1013,13 +429,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
if (err)
goto out_xsk_alloc;
- xsk->outstanding_tx = 0;
- ifindex = if_nametoindex(ifname);
- if (!ifindex) {
- err = -errno;
- goto out_xsk_alloc;
- }
-
if (umem->refcount++ > 0) {
xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
if (xsk->fd < 0) {
@@ -1039,8 +448,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
goto out_socket;
}
- ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
- fill, comp);
+ ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp);
if (!ctx) {
err = -ENOMEM;
goto out_socket;
@@ -1138,12 +546,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
goto out_mmap_tx;
}
- if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
- err = __xsk_setup_xdp_prog(xsk, NULL);
- if (err)
- goto out_mmap_tx;
- }
-
*xsk_ptr = xsk;
umem->fill_save = NULL;
umem->comp_save = NULL;
@@ -1167,7 +569,7 @@ out_xsk_alloc:
return err;
}
-int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex,
__u32 queue_id, struct xsk_umem *umem,
struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
const struct xsk_socket_config *usr_config)
@@ -1175,7 +577,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
if (!umem)
return -EFAULT;
- return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
+ return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem,
rx, tx, umem->fill_save,
umem->comp_save, usr_config);
}
@@ -1219,13 +621,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
ctx = xsk->ctx;
umem = ctx->umem;
- if (ctx->refcount == 1) {
- xsk_delete_bpf_maps(xsk);
- close(ctx->prog_fd);
- if (ctx->has_bpf_link)
- close(ctx->link_fd);
- }
-
xsk_put_ctx(ctx, true);
err = xsk_get_mmap_offsets(xsk->fd, &off);