summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-12-01 03:56:09 +0300
committerJakub Kicinski <kuba@kernel.org>2023-12-01 03:58:42 +0300
commit753c8608f3e579307493a63b9242667aee35a751 (patch)
tree4197358069e8db7bc0d36a474612f7ffefc7ba72 /net
parent975f2d73a99f35b57ffa2ad7bff8562225cdcfcb (diff)
parentf690ff9122d2ca8e38769f3bcf217bd3df681a36 (diff)
downloadlinux-753c8608f3e579307493a63b9242667aee35a751.tar.xz
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2023-11-30 We've added 30 non-merge commits during the last 7 day(s) which contain a total of 58 files changed, 1598 insertions(+), 154 deletions(-). The main changes are: 1) Add initial TX metadata implementation for AF_XDP with support in mlx5 and stmmac drivers. Two types of offloads are supported right now, that is, TX timestamp and TX checksum offload, from Stanislav Fomichev with stmmac implementation from Song Yoong Siang. 2) Change BPF verifier logic to validate global subprograms lazily instead of unconditionally before the main program, so they can be guarded using BPF CO-RE techniques, from Andrii Nakryiko. 3) Add BPF link_info support for uprobe multi link along with bpftool integration for the latter, from Jiri Olsa. 4) Use pkg-config in BPF selftests to determine ld flags which is in particular needed for linking statically, from Akihiko Odaki. 5) Fix a few BPF selftest failures to adapt to the upcoming LLVM18, from Yonghong Song. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (30 commits) bpf/tests: Remove duplicate JSGT tests selftests/bpf: Add TX side to xdp_hw_metadata selftests/bpf: Convert xdp_hw_metadata to XDP_USE_NEED_WAKEUP selftests/bpf: Add TX side to xdp_metadata selftests/bpf: Add csum helpers selftests/xsk: Support tx_metadata_len xsk: Add option to calculate TX checksum in SW xsk: Validate xsk_tx_metadata flags xsk: Document tx_metadata_len layout net: stmmac: Add Tx HWTS support to XDP ZC net/mlx5e: Implement AF_XDP TX timestamp and checksum offload tools: ynl: Print xsk-features from the sample xsk: Add TX timestamp and TX checksum offload support xsk: Support tx_metadata_len selftests/bpf: Use pkg-config for libelf selftests/bpf: Override PKG_CONFIG for static builds selftests/bpf: Choose pkg-config for the target bpftool: Add support to display uprobe_multi links selftests/bpf: Add link_info test for uprobe_multi link selftests/bpf: Use bpf_link__destroy in fill_link_info tests ... ==================== Conflicts: Documentation/netlink/specs/netdev.yaml: 839ff60df3ab ("net: page_pool: add nlspec for basic access to page pools") 48eb03dd2630 ("xsk: Add TX timestamp and TX checksum offload support") https://lore.kernel.org/all/20231201094705.1ee3cab8@canb.auug.org.au/ While at it also regen, tree is dirty after: 48eb03dd2630 ("xsk: Add TX timestamp and TX checksum offload support") looks like code wasn't re-rendered after "render-max" was removed. Link: https://lore.kernel.org/r/20231130145708.32573-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c2
-rw-r--r--net/core/netdev-genl.c13
-rw-r--r--net/xdp/xdp_umem.c11
-rw-r--r--net/xdp/xsk.c56
-rw-r--r--net/xdp/xsk_buff_pool.c2
-rw-r--r--net/xdp/xsk_queue.h19
6 files changed, 91 insertions, 12 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c9fdcc5cdce1..711cf5d59816 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -542,7 +542,7 @@ struct bpf_fentry_test_t {
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
{
- asm volatile ("");
+ asm volatile ("": "+r"(arg));
return (long)arg;
}
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index fe61f85bcf33..10f2124e9e23 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -6,6 +6,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/xdp.h>
+#include <net/xdp_sock.h>
#include "netdev-genl-gen.h"
@@ -13,6 +14,7 @@ static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info)
{
+ u64 xsk_features = 0;
u64 xdp_rx_meta = 0;
void *hdr;
@@ -26,11 +28,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
+ if (netdev->xsk_tx_metadata_ops) {
+ if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
+ xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
+ if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
+ xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
+ }
+
if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
netdev->xdp_features, NETDEV_A_DEV_PAD) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
- xdp_rx_meta, NETDEV_A_DEV_PAD)) {
+ xdp_rx_meta, NETDEV_A_DEV_PAD) ||
+ nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
+ xsk_features, NETDEV_A_DEV_PAD)) {
genlmsg_cancel(rsp, hdr);
return -EINVAL;
}
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 06cead2b8e34..caa340134b0e 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -148,6 +148,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0;
}
+#define XDP_UMEM_FLAGS_VALID ( \
+ XDP_UMEM_UNALIGNED_CHUNK_FLAG | \
+ XDP_UMEM_TX_SW_CSUM | \
+ 0)
+
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
@@ -167,7 +172,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+ if (mr->flags & ~XDP_UMEM_FLAGS_VALID)
return -EINVAL;
if (!unaligned_chunks && !is_power_of_2(chunk_size))
@@ -199,6 +204,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
+ if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
+ return -EINVAL;
+
umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
@@ -207,6 +215,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
+ umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ae9f8cb611f6..281d49b4fca4 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -571,6 +571,13 @@ static u32 xsk_get_num_desc(struct sk_buff *skb)
static void xsk_destruct_skb(struct sk_buff *skb)
{
+ struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta;
+
+ if (compl->tx_timestamp) {
+ /* sw completion timestamp, not a real one */
+ *compl->tx_timestamp = ktime_get_tai_fast_ns();
+ }
+
xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
sock_wfree(skb);
}
@@ -655,8 +662,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct xdp_desc *desc)
{
+ struct xsk_tx_metadata *meta = NULL;
struct net_device *dev = xs->dev;
struct sk_buff *skb = xs->skb;
+ bool first_frag = false;
int err;
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
@@ -687,6 +696,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
kfree_skb(skb);
goto free_err;
}
+
+ first_frag = true;
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
@@ -709,12 +720,45 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
}
+
+ if (first_frag && desc->options & XDP_TX_METADATA) {
+ if (unlikely(xs->pool->tx_metadata_len == 0)) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ meta = buffer - xs->pool->tx_metadata_len;
+ if (unlikely(!xsk_buff_valid_tx_metadata(meta))) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) {
+ if (unlikely(meta->request.csum_start +
+ meta->request.csum_offset +
+ sizeof(__sum16) > len)) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ skb->csum_start = hr + meta->request.csum_start;
+ skb->csum_offset = meta->request.csum_offset;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ if (unlikely(xs->pool->tx_sw_csum)) {
+ err = skb_checksum_help(skb);
+ if (err)
+ goto free_err;
+ }
+ }
+ }
}
skb->dev = dev;
skb->priority = READ_ONCE(xs->sk.sk_priority);
skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
+ xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
xsk_set_destructor_arg(skb);
return skb;
@@ -1283,6 +1327,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
+struct xdp_umem_reg_v2 {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 flags;
+};
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1326,8 +1378,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(mr))
+ else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
+ else if (optlen < sizeof(mr))
+ mr_size = sizeof(struct xdp_umem_reg_v2);
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 49cb9f9a09be..4f6f538a5462 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = umem->tx_metadata_len;
+ pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 13354a1e4280..6f2d1621c992 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -137,21 +137,23 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
static inline bool xp_unused_options_set(u32 options)
{
- return options & ~XDP_PKT_CONTD;
+ return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA);
}
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 offset = desc->addr & (pool->chunk_size - 1);
+ u64 addr = desc->addr - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
+ u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len)
return false;
- if (offset + desc->len > pool->chunk_size)
+ if (offset + len > pool->chunk_size)
return false;
- if (desc->addr >= pool->addrs_cnt)
+ if (addr >= pool->addrs_cnt)
return false;
if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len)
return false;
- if (desc->len > pool->chunk_size)
+ if (len > pool->chunk_size)
return false;
- if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
- xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;
if (xp_unused_options_set(desc->options))