diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-12-01 03:56:09 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-12-01 03:58:42 +0300 |
commit | 753c8608f3e579307493a63b9242667aee35a751 (patch) | |
tree | 4197358069e8db7bc0d36a474612f7ffefc7ba72 /net | |
parent | 975f2d73a99f35b57ffa2ad7bff8562225cdcfcb (diff) | |
parent | f690ff9122d2ca8e38769f3bcf217bd3df681a36 (diff) | |
download | linux-753c8608f3e579307493a63b9242667aee35a751.tar.xz |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2023-11-30
We've added 30 non-merge commits during the last 7 day(s) which contain
a total of 58 files changed, 1598 insertions(+), 154 deletions(-).
The main changes are:
1) Add initial TX metadata implementation for AF_XDP with support in mlx5
and stmmac drivers. Two types of offloads are supported right now, that
is, TX timestamp and TX checksum offload, from Stanislav Fomichev with
stmmac implementation from Song Yoong Siang.
2) Change BPF verifier logic to validate global subprograms lazily instead
of unconditionally before the main program, so they can be guarded using
BPF CO-RE techniques, from Andrii Nakryiko.
3) Add BPF link_info support for uprobe multi link along with bpftool
integration for the latter, from Jiri Olsa.
4) Use pkg-config in BPF selftests to determine ld flags which is
in particular needed for linking statically, from Akihiko Odaki.
5) Fix a few BPF selftest failures to adapt to the upcoming LLVM18,
from Yonghong Song.
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (30 commits)
bpf/tests: Remove duplicate JSGT tests
selftests/bpf: Add TX side to xdp_hw_metadata
selftests/bpf: Convert xdp_hw_metadata to XDP_USE_NEED_WAKEUP
selftests/bpf: Add TX side to xdp_metadata
selftests/bpf: Add csum helpers
selftests/xsk: Support tx_metadata_len
xsk: Add option to calculate TX checksum in SW
xsk: Validate xsk_tx_metadata flags
xsk: Document tx_metadata_len layout
net: stmmac: Add Tx HWTS support to XDP ZC
net/mlx5e: Implement AF_XDP TX timestamp and checksum offload
tools: ynl: Print xsk-features from the sample
xsk: Add TX timestamp and TX checksum offload support
xsk: Support tx_metadata_len
selftests/bpf: Use pkg-config for libelf
selftests/bpf: Override PKG_CONFIG for static builds
selftests/bpf: Choose pkg-config for the target
bpftool: Add support to display uprobe_multi links
selftests/bpf: Add link_info test for uprobe_multi link
selftests/bpf: Use bpf_link__destroy in fill_link_info tests
...
====================
Conflicts:
Documentation/netlink/specs/netdev.yaml:
839ff60df3ab ("net: page_pool: add nlspec for basic access to page pools")
48eb03dd2630 ("xsk: Add TX timestamp and TX checksum offload support")
https://lore.kernel.org/all/20231201094705.1ee3cab8@canb.auug.org.au/
While at it also regen, tree is dirty after:
48eb03dd2630 ("xsk: Add TX timestamp and TX checksum offload support")
looks like code wasn't re-rendered after "render-max" was removed.
Link: https://lore.kernel.org/r/20231130145708.32573-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/bpf/test_run.c | 2 | ||||
-rw-r--r-- | net/core/netdev-genl.c | 13 | ||||
-rw-r--r-- | net/xdp/xdp_umem.c | 11 | ||||
-rw-r--r-- | net/xdp/xsk.c | 56 | ||||
-rw-r--r-- | net/xdp/xsk_buff_pool.c | 2 | ||||
-rw-r--r-- | net/xdp/xsk_queue.h | 19 |
6 files changed, 91 insertions, 12 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index c9fdcc5cdce1..711cf5d59816 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -542,7 +542,7 @@ struct bpf_fentry_test_t { int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) { - asm volatile (""); + asm volatile ("": "+r"(arg)); return (long)arg; } diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index fe61f85bcf33..10f2124e9e23 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -6,6 +6,7 @@ #include <net/net_namespace.h> #include <net/sock.h> #include <net/xdp.h> +#include <net/xdp_sock.h> #include "netdev-genl-gen.h" @@ -13,6 +14,7 @@ static int netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { + u64 xsk_features = 0; u64 xdp_rx_meta = 0; void *hdr; @@ -26,11 +28,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC + if (netdev->xsk_tx_metadata_ops) { + if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp) + xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP; + if (netdev->xsk_tx_metadata_ops->tmo_request_checksum) + xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM; + } + if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, netdev->xdp_features, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, - xdp_rx_meta, NETDEV_A_DEV_PAD)) { + xdp_rx_meta, NETDEV_A_DEV_PAD) || + nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, + xsk_features, NETDEV_A_DEV_PAD)) { genlmsg_cancel(rsp, hdr); return -EINVAL; } diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 06cead2b8e34..caa340134b0e 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -148,6 +148,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) return 0; } +#define XDP_UMEM_FLAGS_VALID ( \ + XDP_UMEM_UNALIGNED_CHUNK_FLAG | \ + XDP_UMEM_TX_SW_CSUM | \ + 0) + static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; @@ -167,7 +172,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; } - if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) + if (mr->flags & ~XDP_UMEM_FLAGS_VALID) return -EINVAL; if (!unaligned_chunks && !is_power_of_2(chunk_size)) @@ -199,6 +204,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; + if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8) + return -EINVAL; + umem->size = size; umem->headroom = headroom; umem->chunk_size = chunk_size; @@ -207,6 +215,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; + umem->tx_metadata_len = mr->tx_metadata_len; INIT_LIST_HEAD(&umem->xsk_dma_list); refcount_set(&umem->users, 1); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ae9f8cb611f6..281d49b4fca4 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -571,6 +571,13 @@ static u32 xsk_get_num_desc(struct sk_buff *skb) static void xsk_destruct_skb(struct sk_buff *skb) { + struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta; + + if (compl->tx_timestamp) { + /* sw completion timestamp, not a real one */ + *compl->tx_timestamp = ktime_get_tai_fast_ns(); + } + xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb)); sock_wfree(skb); } @@ -655,8 +662,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct xdp_desc *desc) { + struct xsk_tx_metadata *meta = NULL; struct net_device *dev = xs->dev; struct sk_buff *skb = xs->skb; + bool first_frag = false; int err; if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) { @@ -687,6 +696,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, kfree_skb(skb); goto free_err; } + + first_frag = true; } else { int nr_frags = skb_shinfo(skb)->nr_frags; struct page *page; @@ -709,12 +720,45 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); } + + if (first_frag && desc->options & XDP_TX_METADATA) { + if (unlikely(xs->pool->tx_metadata_len == 0)) { + err = -EINVAL; + goto free_err; + } + + meta = buffer - xs->pool->tx_metadata_len; + if (unlikely(!xsk_buff_valid_tx_metadata(meta))) { + err = -EINVAL; + goto free_err; + } + + if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) { + if (unlikely(meta->request.csum_start + + meta->request.csum_offset + + sizeof(__sum16) > len)) { + err = -EINVAL; + goto free_err; + } + + skb->csum_start = hr + meta->request.csum_start; + skb->csum_offset = meta->request.csum_offset; + skb->ip_summed = CHECKSUM_PARTIAL; + + if (unlikely(xs->pool->tx_sw_csum)) { + err = skb_checksum_help(skb); + if (err) + goto free_err; + } + } + } } skb->dev = dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; + xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); xsk_set_destructor_arg(skb); return skb; @@ -1283,6 +1327,14 @@ struct xdp_umem_reg_v1 { __u32 headroom; }; +struct xdp_umem_reg_v2 { + __u64 addr; /* Start of packet data area */ + __u64 len; /* Length of packet data area */ + __u32 chunk_size; + __u32 headroom; + __u32 flags; +}; + static int xsk_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -1326,8 +1378,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(struct xdp_umem_reg_v1)) return -EINVAL; - else if (optlen < sizeof(mr)) + else if (optlen < sizeof(struct xdp_umem_reg_v2)) mr_size = sizeof(struct xdp_umem_reg_v1); + else if (optlen < sizeof(mr)) + mr_size = sizeof(struct xdp_umem_reg_v2); if (copy_from_sockptr(&mr, optval, mr_size)) return -EFAULT; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 49cb9f9a09be..4f6f538a5462 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -85,6 +85,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, XDP_PACKET_HEADROOM; pool->umem = umem; pool->addrs = umem->addrs; + pool->tx_metadata_len = umem->tx_metadata_len; + pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM; INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 13354a1e4280..6f2d1621c992 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -137,21 +137,23 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) static inline bool xp_unused_options_set(u32 options) { - return options & ~XDP_PKT_CONTD; + return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA); } static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 offset = desc->addr & (pool->chunk_size - 1); + u64 addr = desc->addr - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; + u64 offset = addr & (pool->chunk_size - 1); if (!desc->len) return false; - if (offset + desc->len > pool->chunk_size) + if (offset + len > pool->chunk_size) return false; - if (desc->addr >= pool->addrs_cnt) + if (addr >= pool->addrs_cnt) return false; if (xp_unused_options_set(desc->options)) @@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); + u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; if (!desc->len) return false; - if (desc->len > pool->chunk_size) + if (len > pool->chunk_size) return false; - if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || - xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) + if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt || + xp_desc_crosses_non_contig_pg(pool, addr, len)) return false; if (xp_unused_options_set(desc->options)) |