From 391145ba2accc48b596f3d438af1a6255b62a555 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 31 Oct 2023 14:56:24 -0700 Subject: bpf: Add __bpf_kfunc_{start,end}_defs macros BPF kfuncs are meant to be called from BPF programs. Accordingly, most kfuncs are not called from anywhere in the kernel, which the -Wmissing-prototypes warning is unhappy about. We've peppered __diag_ignore_all("-Wmissing-prototypes", ... everywhere kfuncs are defined in the codebase to suppress this warning. This patch adds two macros meant to bound one or many kfunc definitions. All existing kfunc definitions which use these __diag calls to suppress -Wmissing-prototypes are migrated to use the newly-introduced macros. A new __diag_ignore_all - for "-Wmissing-declarations" - is added to the __bpf_kfunc_start_defs macro based on feedback from Andrii on an earlier version of this patch [0] and another recent mailing list thread [1]. In the future we might need to ignore different warnings or do other kfunc-specific things. This change will make it easier to make such modifications for all kfunc defs. [0]: https://lore.kernel.org/bpf/CAEf4BzaE5dRWtK6RPLnjTW-MW9sx9K3Fn6uwqCTChK2Dcb1Xig@mail.gmail.com/ [1]: https://lore.kernel.org/bpf/ZT+2qCc%2FaXep0%2FLf@krava/ Signed-off-by: Dave Marchevsky Suggested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Cc: Jiri Olsa Acked-by: Jiri Olsa Acked-by: David Vernet Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20231031215625.2343848-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index c2231c64d60b..dc5ce962f600 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -84,6 +84,15 @@ */ #define __bpf_kfunc __used noinline +#define __bpf_kfunc_start_defs() \ + __diag_push(); \ + __diag_ignore_all("-Wmissing-declarations", \ + "Global kfuncs as their definitions will be in BTF");\ + __diag_ignore_all("-Wmissing-prototypes", \ + "Global kfuncs as their definitions will be in BTF") + +#define __bpf_kfunc_end_defs() __diag_pop() + /* * Return the name of the passed struct, if exists, or halt the build if for * example the structure gets renamed. In this way, developers have to revisit -- cgit v1.2.3 From 15fb6f2b6c4c3c129adc2412ae12ec15e60a6adb Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 31 Oct 2023 14:56:25 -0700 Subject: bpf: Add __bpf_hook_{start,end} macros Not all uses of __diag_ignore_all(...) in BPF-related code in order to suppress warnings are wrapping kfunc definitions. Some "hook point" definitions - small functions meant to be used as attach points for fentry and similar BPF progs - need to suppress -Wmissing-declarations. We could use __bpf_kfunc_{start,end}_defs added in the previous patch in such cases, but this might be confusing to someone unfamiliar with BPF internals. Instead, this patch adds __bpf_hook_{start,end} macros, currently having the same effect as __bpf_kfunc_{start,end}_defs, then uses them to suppress warnings for two hook points in the kernel itself and some bpf_testmod hook points as well. Signed-off-by: Dave Marchevsky Cc: Yafang Shao Acked-by: Jiri Olsa Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20231031215625.2343848-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 2 ++ kernel/cgroup/rstat.c | 9 +++------ net/socket.c | 8 ++------ tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 ++---- 4 files changed, 9 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index dc5ce962f600..59d404e22814 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -92,6 +92,8 @@ "Global kfuncs as their definitions will be in BTF") #define __bpf_kfunc_end_defs() __diag_pop() +#define __bpf_hook_start() __bpf_kfunc_start_defs() +#define __bpf_hook_end() __bpf_kfunc_end_defs() /* * Return the name of the passed struct, if exists, or halt the build if for diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index d80d7a608141..c0adb7254b45 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -156,19 +156,16 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, * optimize away the callsite. Therefore, __weak is needed to ensure that the * call is still emitted, by telling the compiler that we don't know what the * function might eventually be. - * - * __diag_* below are needed to dismiss the missing prototype warning. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "kfuncs which will be used in BPF programs"); + +__bpf_hook_start(); __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu) { } -__diag_pop(); +__bpf_hook_end(); /* see cgroup_rstat_flush() */ static void cgroup_rstat_flush_locked(struct cgroup *cgrp) diff --git a/net/socket.c b/net/socket.c index 0d1c4e78fc7f..3379c64217a4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1685,20 +1685,16 @@ struct file *__sys_socket_file(int family, int type, int protocol) * Therefore, __weak is needed to ensure that the call is still * emitted, by telling the compiler that we don't know what the * function might eventually be. - * - * __diag_* below are needed to dismiss the missing prototype warning. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "A fmod_ret entry point for BPF programs"); +__bpf_hook_start(); __weak noinline int update_socket_protocol(int family, int type, int protocol) { return protocol; } -__diag_pop(); +__bpf_hook_end(); int __sys_socket(int family, int type, int protocol) { diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index a5e246f7b202..91907b321f91 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -39,9 +39,7 @@ struct bpf_testmod_struct_arg_4 { int b; }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in bpf_testmod.ko BTF"); +__bpf_hook_start(); noinline int bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) { @@ -335,7 +333,7 @@ noinline int bpf_fentry_shadow_test(int a) } EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); -__diag_pop(); +__bpf_hook_end(); static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .attr = { .name = "bpf_testmod", .mode = 0666, }, -- cgit v1.2.3 From 1726483b79a72e0150734d5367e4a0238bf8fcff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Oct 2023 14:10:37 +0000 Subject: inet: shrink struct flowi_common I am looking at syzbot reports triggering kernel stack overflows involving a cascade of ipvlan devices. We can save 8 bytes in struct flowi_common. This patch alone will not fix the issue, but is a start. Fixes: 24ba14406c5c ("route: Add multipath_hash in flowi_common to make user-define hash") Signed-off-by: Eric Dumazet Cc: wenxu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20231025141037.3448203-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/flow.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 7f0adda3bf2f..335bbc52171c 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -40,8 +40,8 @@ struct flowi_common { #define FLOWI_FLAG_KNOWN_NH 0x02 __u32 flowic_secid; kuid_t flowic_uid; - struct flowi_tunnel flowic_tun_key; __u32 flowic_multipath_hash; + struct flowi_tunnel flowic_tun_key; }; union flowi_uli { -- cgit v1.2.3 From f55d8e60f10909dbc5524e261041e1d28d7d20d8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 28 Oct 2023 21:25:11 +0200 Subject: net: ethtool: Fix documentation of ethtool_sprintf() This function takes a pointer to a pointer, unlike sprintf() which is passed a plain pointer. Fix up the documentation to make this clear. Fixes: 7888fe53b706 ("ethtool: Add common function for filling out strings") Cc: Alexander Duyck Cc: Justin Stitt Cc: stable@vger.kernel.org Signed-off-by: Andrew Lunn Reviewed-by: Justin Stitt Link: https://lore.kernel.org/r/20231028192511.100001-1-andrew@lunn.ch Signed-off-by: Paolo Abeni --- include/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 226a36ed5aa1..689028257fcc 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1045,10 +1045,10 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, /** * ethtool_sprintf - Write formatted string to ethtool string data - * @data: Pointer to start of string to update + * @data: Pointer to a pointer to the start of string to update * @fmt: Format of string to write * - * Write formatted string to data. Update data to point at start of + * Write formatted string to *data. Update *data to point at start of * next string. */ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); -- cgit v1.2.3 From e8ae8ad479e2d037daa33756e5e72850a7bd37a9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 24 Oct 2023 09:53:33 +1100 Subject: Fix termination state for idr_for_each_entry_ul() The comment for idr_for_each_entry_ul() states after normal termination @entry is left with the value NULL This is not correct in the case where UINT_MAX has an entry in the idr. In that case @entry will be non-NULL after termination. No current code depends on the documentation being correct, but to save future code we should fix it. Also fix idr_for_each_entry_continue_ul(). While this is not documented as leaving @entry as NULL, the mellanox driver appears to depend on it doing so. So make that explicit in the documentation as well as in the code. Fixes: e33d2b74d805 ("idr: fix overflow case for idr_for_each_entry_ul()") Cc: Matthew Wilcox Cc: Chris Mi Cc: Cong Wang Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/idr.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index a0dce14090a9..da5f5fa4a3a6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -200,7 +200,7 @@ static inline void idr_preload_end(void) */ #define idr_for_each_entry_ul(idr, entry, tmp, id) \ for (tmp = 0, id = 0; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /** @@ -224,10 +224,12 @@ static inline void idr_preload_end(void) * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. + * After normal termination @entry is left with the value NULL. This + * is convenient for a "not found" value. */ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ for (tmp = id; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /* -- cgit v1.2.3 From cdbab6236605dc11780779d9af689aea7d58cab1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Oct 2023 06:19:45 +0000 Subject: tcp: fix fastopen code vs usec TS After blamed commit, TFO client-ack-dropped-then-recovery-ms-timestamps packetdrill test failed. David Morley and Neal Cardwell started investigating and Neal pointed that we had : tcp_conn_request() tcp_try_fastopen() -> tcp_fastopen_create_child -> child = inet_csk(sk)->icsk_af_ops->syn_recv_sock() -> tcp_create_openreq_child() -> copy req_usec_ts from req: newtp->tcp_usec_ts = treq->req_usec_ts; // now the new TFO server socket always does usec TS, no matter // what the route options are... send_synack() -> tcp_make_synack() // disable tcp_rsk(req)->req_usec_ts if route option is not present: if (tcp_rsk(req)->req_usec_ts < 0) tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); tcp_conn_request() has the initial dst, we can initialize tcp_rsk(req)->req_usec_ts there instead of later in send_synack(); This means tcp_rsk(req)->req_usec_ts can be a boolean. Many thanks to David an Neal for their help. Fixes: 614e8316aa4c ("tcp: add support for usec resolution in TCP TS values") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202310302216.f79d78bc-oliver.sang@intel.com Suggested-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: David Morley Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_input.c | 7 ++++--- net/ipv4/tcp_output.c | 2 -- 4 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ec4e9367f5b0..68f3d315d2e1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -152,7 +152,7 @@ struct tcp_request_sock { u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; - s8 req_usec_ts; + bool req_usec_ts; #if IS_ENABLED(CONFIG_MPTCP) bool drop_req; #endif diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 98b25e5d147b..d37282c06e3d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -306,7 +306,7 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, treq->af_specific = af_ops; treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; - treq->req_usec_ts = -1; + treq->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 50aaa1527150..bcb55d98004c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7115,7 +7115,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, req->syncookie = want_cookie; tcp_rsk(req)->af_specific = af_ops; tcp_rsk(req)->ts_off = 0; - tcp_rsk(req)->req_usec_ts = -1; + tcp_rsk(req)->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) tcp_rsk(req)->is_mptcp = 0; #endif @@ -7143,9 +7143,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!dst) goto drop_and_free; - if (tmp_opt.tstamp_ok) + if (tmp_opt.tstamp_ok) { + tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); - + } if (!want_cookie && !isn) { int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f558c054cf6e..0d8dd5b7e2e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3693,8 +3693,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); memset(&opts, 0, sizeof(opts)); - if (tcp_rsk(req)->req_usec_ts < 0) - tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) -- cgit v1.2.3 From 02f0717e9835dbdeee26084e42086fbd32e64eb8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Nov 2023 04:52:33 +0000 Subject: net/tcp: fix possible out-of-bounds reads in tcp_hash_fail() syzbot managed to trigger a fault by sending TCP packets with all flags being set. v2: - While fixing this bug, add PSH flag handling and represent flags the way tcpdump does : [S], [S.], [P.] - Print 4-tuples more consistently between families. BUG: KASAN: stack-out-of-bounds in string_nocheck lib/vsprintf.c:645 [inline] BUG: KASAN: stack-out-of-bounds in string+0x394/0x3d0 lib/vsprintf.c:727 Read of size 1 at addr ffffc9000397f3f5 by task syz-executor299/5039 CPU: 1 PID: 5039 Comm: syz-executor299 Not tainted 6.6.0-rc7-syzkaller-02075-g55c900477f5b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 string_nocheck lib/vsprintf.c:645 [inline] string+0x394/0x3d0 lib/vsprintf.c:727 vsnprintf+0xc5f/0x1870 lib/vsprintf.c:2818 vprintk_store+0x3a0/0xb80 kernel/printk/printk.c:2191 vprintk_emit+0x14c/0x5f0 kernel/printk/printk.c:2288 vprintk+0x7b/0x90 kernel/printk/printk_safe.c:45 _printk+0xc8/0x100 kernel/printk/printk.c:2332 tcp_inbound_hash.constprop.0+0xdb2/0x10d0 include/net/tcp.h:2760 tcp_v6_rcv+0x2b31/0x34d0 net/ipv6/tcp_ipv6.c:1882 ip6_protocol_deliver_rcu+0x33b/0x13d0 net/ipv6/ip6_input.c:438 ip6_input_finish+0x14f/0x2f0 net/ipv6/ip6_input.c:483 NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip6_input+0xce/0x440 net/ipv6/ip6_input.c:492 dst_input include/net/dst.h:461 [inline] ip6_rcv_finish net/ipv6/ip6_input.c:79 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ipv6_rcv+0x563/0x720 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core+0x115/0x180 net/core/dev.c:5527 __netif_receive_skb+0x1f/0x1b0 net/core/dev.c:5641 netif_receive_skb_internal net/core/dev.c:5727 [inline] netif_receive_skb+0x133/0x700 net/core/dev.c:5786 tun_rx_batched+0x429/0x780 drivers/net/tun.c:1579 tun_get_user+0x29e7/0x3bc0 drivers/net/tun.c:2002 tun_chr_write_iter+0xe8/0x210 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:1956 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x650/0xe40 fs/read_write.c:584 ksys_write+0x12f/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 2717b5adea9e ("net/tcp: Add tcp_hash_fail() ratelimited logs") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Dmitry Safonov Cc: Francesco Ruggeri Cc: David Ahern Reviewed-by: Dmitry Safonov Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index a375a171ef3c..b56be10838f0 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -124,7 +124,7 @@ struct tcp_ao_info { #define tcp_hash_fail(msg, family, skb, fmt, ...) \ do { \ const struct tcphdr *th = tcp_hdr(skb); \ - char hdr_flags[5] = {}; \ + char hdr_flags[6]; \ char *f = hdr_flags; \ \ if (th->fin) \ @@ -133,17 +133,18 @@ do { \ *f++ = 'S'; \ if (th->rst) \ *f++ = 'R'; \ + if (th->psh) \ + *f++ = 'P'; \ if (th->ack) \ - *f++ = 'A'; \ - if (f != hdr_flags) \ - *f = ' '; \ + *f++ = '.'; \ + *f = 0; \ if ((family) == AF_INET) { \ - net_info_ratelimited("%s for (%pI4, %d)->(%pI4, %d) %s" fmt "\n", \ + net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \ msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ &ip_hdr(skb)->daddr, ntohs(th->dest), \ hdr_flags, ##__VA_ARGS__); \ } else { \ - net_info_ratelimited("%s for [%pI6c]:%u->[%pI6c]:%u %s" fmt "\n", \ + net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \ msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ hdr_flags, ##__VA_ARGS__); \ -- cgit v1.2.3 From d93f9528573e1d419b69ca5ff4130201d05f6b90 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Nov 2023 11:52:27 -0700 Subject: nfsd: regenerate user space parsers after ynl-gen changes Commit 8cea95b0bd79 ("tools: ynl-gen: handle do ops with no input attrs") added support for some of the previously-skipped ops in nfsd. Regenerate the user space parsers to fill them in. Signed-off-by: Jakub Kicinski Acked-by: Chuck Lever Signed-off-by: David S. Miller --- include/uapi/linux/nfsd_netlink.h | 6 +- tools/net/ynl/generated/nfsd-user.c | 120 ++++++++++++++++++++++++++++++++++-- tools/net/ynl/generated/nfsd-user.h | 44 +++++++++++-- 3 files changed, 156 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h index c8ae72466ee6..3cd044edee5d 100644 --- a/include/uapi/linux/nfsd_netlink.h +++ b/include/uapi/linux/nfsd_netlink.h @@ -3,8 +3,8 @@ /* Documentation/netlink/specs/nfsd.yaml */ /* YNL-GEN uapi header */ -#ifndef _UAPI_LINUX_NFSD_H -#define _UAPI_LINUX_NFSD_H +#ifndef _UAPI_LINUX_NFSD_NETLINK_H +#define _UAPI_LINUX_NFSD_NETLINK_H #define NFSD_FAMILY_NAME "nfsd" #define NFSD_FAMILY_VERSION 1 @@ -36,4 +36,4 @@ enum { NFSD_CMD_MAX = (__NFSD_CMD_MAX - 1) }; -#endif /* _UAPI_LINUX_NFSD_H */ +#endif /* _UAPI_LINUX_NFSD_NETLINK_H */ diff --git a/tools/net/ynl/generated/nfsd-user.c b/tools/net/ynl/generated/nfsd-user.c index fec6828680ce..360b6448c6e9 100644 --- a/tools/net/ynl/generated/nfsd-user.c +++ b/tools/net/ynl/generated/nfsd-user.c @@ -50,9 +50,116 @@ struct ynl_policy_nest nfsd_rpc_status_nest = { /* Common nested types */ /* ============== NFSD_CMD_RPC_STATUS_GET ============== */ /* NFSD_CMD_RPC_STATUS_GET - dump */ -void nfsd_rpc_status_get_list_free(struct nfsd_rpc_status_get_list *rsp) +int nfsd_rpc_status_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data) { - struct nfsd_rpc_status_get_list *next = rsp; + struct nfsd_rpc_status_get_rsp_dump *dst; + struct ynl_parse_arg *yarg = data; + unsigned int n_compound_ops = 0; + const struct nlattr *attr; + int i; + + dst = yarg->data; + + if (dst->compound_ops) + return ynl_error_parse(yarg, "attribute already present (rpc-status.compound-ops)"); + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NFSD_A_RPC_STATUS_XID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.xid = 1; + dst->xid = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_FLAGS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.flags = 1; + dst->flags = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_PROG) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.prog = 1; + dst->prog = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_VERSION) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.version = 1; + dst->version = mnl_attr_get_u8(attr); + } else if (type == NFSD_A_RPC_STATUS_PROC) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.proc = 1; + dst->proc = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_SERVICE_TIME) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.service_time = 1; + dst->service_time = mnl_attr_get_u64(attr); + } else if (type == NFSD_A_RPC_STATUS_SADDR4) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.saddr4 = 1; + dst->saddr4 = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_DADDR4) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.daddr4 = 1; + dst->daddr4 = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_SADDR6) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = mnl_attr_get_payload_len(attr); + dst->_present.saddr6_len = len; + dst->saddr6 = malloc(len); + memcpy(dst->saddr6, mnl_attr_get_payload(attr), len); + } else if (type == NFSD_A_RPC_STATUS_DADDR6) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = mnl_attr_get_payload_len(attr); + dst->_present.daddr6_len = len; + dst->daddr6 = malloc(len); + memcpy(dst->daddr6, mnl_attr_get_payload(attr), len); + } else if (type == NFSD_A_RPC_STATUS_SPORT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sport = 1; + dst->sport = mnl_attr_get_u16(attr); + } else if (type == NFSD_A_RPC_STATUS_DPORT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dport = 1; + dst->dport = mnl_attr_get_u16(attr); + } else if (type == NFSD_A_RPC_STATUS_COMPOUND_OPS) { + n_compound_ops++; + } + } + + if (n_compound_ops) { + dst->compound_ops = calloc(n_compound_ops, sizeof(*dst->compound_ops)); + dst->n_compound_ops = n_compound_ops; + i = 0; + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + if (mnl_attr_get_type(attr) == NFSD_A_RPC_STATUS_COMPOUND_OPS) { + dst->compound_ops[i] = mnl_attr_get_u32(attr); + i++; + } + } + } + + return MNL_CB_OK; +} + +void +nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp) +{ + struct nfsd_rpc_status_get_rsp_list *next = rsp; while ((void *)next != YNL_LIST_END) { rsp = next; @@ -65,15 +172,16 @@ void nfsd_rpc_status_get_list_free(struct nfsd_rpc_status_get_list *rsp) } } -struct nfsd_rpc_status_get_list *nfsd_rpc_status_get_dump(struct ynl_sock *ys) +struct nfsd_rpc_status_get_rsp_list * +nfsd_rpc_status_get_dump(struct ynl_sock *ys) { struct ynl_dump_state yds = {}; struct nlmsghdr *nlh; int err; yds.ys = ys; - yds.alloc_sz = sizeof(struct nfsd_rpc_status_get_list); - yds.cb = nfsd_rpc_status_get_rsp_parse; + yds.alloc_sz = sizeof(struct nfsd_rpc_status_get_rsp_list); + yds.cb = nfsd_rpc_status_get_rsp_dump_parse; yds.rsp_cmd = NFSD_CMD_RPC_STATUS_GET; yds.rsp_policy = &nfsd_rpc_status_nest; @@ -86,7 +194,7 @@ struct nfsd_rpc_status_get_list *nfsd_rpc_status_get_dump(struct ynl_sock *ys) return yds.first; free_list: - nfsd_rpc_status_get_list_free(yds.first); + nfsd_rpc_status_get_rsp_list_free(yds.first); return NULL; } diff --git a/tools/net/ynl/generated/nfsd-user.h b/tools/net/ynl/generated/nfsd-user.h index b6b69501031a..989c6e209ced 100644 --- a/tools/net/ynl/generated/nfsd-user.h +++ b/tools/net/ynl/generated/nfsd-user.h @@ -21,13 +21,47 @@ const char *nfsd_op_str(int op); /* Common nested types */ /* ============== NFSD_CMD_RPC_STATUS_GET ============== */ /* NFSD_CMD_RPC_STATUS_GET - dump */ -struct nfsd_rpc_status_get_list { - struct nfsd_rpc_status_get_list *next; - struct nfsd_rpc_status_get_rsp obj __attribute__ ((aligned (8))); +struct nfsd_rpc_status_get_rsp_dump { + struct { + __u32 xid:1; + __u32 flags:1; + __u32 prog:1; + __u32 version:1; + __u32 proc:1; + __u32 service_time:1; + __u32 saddr4:1; + __u32 daddr4:1; + __u32 saddr6_len; + __u32 daddr6_len; + __u32 sport:1; + __u32 dport:1; + } _present; + + __u32 xid /* big-endian */; + __u32 flags; + __u32 prog; + __u8 version; + __u32 proc; + __s64 service_time; + __u32 saddr4 /* big-endian */; + __u32 daddr4 /* big-endian */; + void *saddr6; + void *daddr6; + __u16 sport /* big-endian */; + __u16 dport /* big-endian */; + unsigned int n_compound_ops; + __u32 *compound_ops; +}; + +struct nfsd_rpc_status_get_rsp_list { + struct nfsd_rpc_status_get_rsp_list *next; + struct nfsd_rpc_status_get_rsp_dump obj __attribute__((aligned(8))); }; -void nfsd_rpc_status_get_list_free(struct nfsd_rpc_status_get_list *rsp); +void +nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp); -struct nfsd_rpc_status_get_list *nfsd_rpc_status_get_dump(struct ynl_sock *ys); +struct nfsd_rpc_status_get_rsp_list * +nfsd_rpc_status_get_dump(struct ynl_sock *ys); #endif /* _LINUX_NFSD_GEN_H */ -- cgit v1.2.3 From 9bc64bd0cd765f696fcd40fc98909b1f7c73b2ba Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 3 Nov 2023 16:14:10 +0100 Subject: net/sched: act_ct: Always fill offloading tuple iifidx Referenced commit doesn't always set iifidx when offloading the flow to hardware. Fix the following cases: - nf_conn_act_ct_ext_fill() is called before extension is created with nf_conn_act_ct_ext_add() in tcf_ct_act(). This can cause rule offload with unspecified iifidx when connection is offloaded after only single original-direction packet has been processed by tc data path. Always fill the new nf_conn_act_ct_ext instance after creating it in nf_conn_act_ct_ext_add(). - Offloading of unidirectional UDP NEW connections is now supported, but ct flow iifidx field is not updated when connection is promoted to bidirectional which can result reply-direction iifidx to be zero when refreshing the connection. Fill in the extension and update flow iifidx before calling flow_offload_refresh(). Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tuple iifidx") Reviewed-by: Paul Blakey Signed-off-by: Vlad Buslov Reviewed-by: Simon Horman Fixes: 6a9bad0069cf ("net/sched: act_ct: offload UDP NEW connections") Link: https://lore.kernel.org/r/20231103151410.764271-1-vladbu@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_conntrack_act_ct.h | 30 ++++++++++++++++------------- net/openvswitch/conntrack.c | 2 +- net/sched/act_ct.c | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h index 078d3c52c03f..e5f2f0b73a9a 100644 --- a/include/net/netfilter/nf_conntrack_act_ct.h +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -20,7 +20,22 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf #endif } -static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +static inline struct +nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { #if IS_ENABLED(CONFIG_NET_ACT_CT) struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); @@ -29,22 +44,11 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn * return act_ct; act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); return act_ct; #else return NULL; #endif } -static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ -#if IS_ENABLED(CONFIG_NET_ACT_CT) - struct nf_conn_act_ct_ext *act_ct_ext; - - act_ct_ext = nf_conn_act_ct_ext_find(ct); - if (dev_net(skb->dev) == &init_net && act_ct_ext) - act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; -#endif -} - #endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 0b9a785dea45..3019a4406ca4 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -985,7 +985,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (err) return err; - nf_conn_act_ct_ext_add(ct); + nf_conn_act_ct_ext_add(skb, ct, ctinfo); } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && labels_nonzero(&info->labels.mask)) { err = ovs_ct_set_labels(ct, key, &info->labels.value, diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9583645e86c2..0db0ecf1d110 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -376,6 +376,17 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry, entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir]; } +static void tcf_ct_flow_ct_ext_ifidx_update(struct flow_offload *entry) +{ + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(entry->ct); + if (act_ct_ext) { + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL); + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY); + } +} + static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, struct nf_conn *ct, bool tcp, bool bidirectional) @@ -671,6 +682,8 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, else ctinfo = IP_CT_ESTABLISHED_REPLY; + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); + tcf_ct_flow_ct_ext_ifidx_update(flow); flow_offload_refresh(nf_ft, flow, force_refresh); if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { /* Process this flow in SW to allow promoting to ASSURED */ @@ -1034,7 +1047,7 @@ do_nat: tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); if (!nf_ct_is_confirmed(ct)) - nf_conn_act_ct_ext_add(ct); + nf_conn_act_ct_ext_add(skb, ct, ctinfo); /* This will take care of sending queued events * even if the connection is already confirmed. -- cgit v1.2.3