From bbc1d24724e110b86a1a7c3c1724ce0d62cc1e2e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:04 -0800 Subject: bpf: Take into account BPF token when fetching helper protos Instead of performing unconditional system-wide bpf_capable() and perfmon_capable() calls inside bpf_base_func_proto() function (and other similar ones) to determine eligibility of a given BPF helper for a given program, use previously recorded BPF token during BPF_PROG_LOAD command handling to inform the decision. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-8-andrii@kernel.org --- net/netfilter/nf_bpf_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 0e4beae421f8..5257d5e7eb09 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, static const struct bpf_func_proto * bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } const struct bpf_verifier_ops netfilter_verifier_ops = { -- cgit v1.2.3 From da5141bbe0c2693d85f14a89ee991921904f4d0c Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 21 Dec 2023 14:31:58 +0100 Subject: netfilter: nf_tables: Introduce NFT_TABLE_F_PERSIST This companion flag to NFT_TABLE_F_OWNER requests the kernel to keep the table around after the process has exited. It marks such table as orphaned (by dropping OWNER flag but keeping PERSIST flag in place), which opens it for other processes to manipulate. For the sake of simplicity, PERSIST flag may not be altered though. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 5 ++++- net/netfilter/nf_tables_api.c | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index fbce238abdc1..3fee994721cd 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -180,13 +180,16 @@ enum nft_hook_attributes { * * @NFT_TABLE_F_DORMANT: this table is not active * @NFT_TABLE_F_OWNER: this table is owned by a process + * @NFT_TABLE_F_PERSIST: this table shall outlive its owner */ enum nft_table_flags { NFT_TABLE_F_DORMANT = 0x1, NFT_TABLE_F_OWNER = 0x2, + NFT_TABLE_F_PERSIST = 0x4, }; #define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT | \ - NFT_TABLE_F_OWNER) + NFT_TABLE_F_OWNER | \ + NFT_TABLE_F_PERSIST) /** * enum nft_table_attributes - nf_tables table netlink attributes diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c537104411e7..6a96f0003faa 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1219,6 +1219,9 @@ static int nf_tables_updtable(struct nft_ctx *ctx) flags & NFT_TABLE_F_OWNER)) return -EOPNOTSUPP; + if ((flags ^ ctx->table->flags) & NFT_TABLE_F_PERSIST) + return -EOPNOTSUPP; + /* No dormant off/on/off/on games in single transaction */ if (ctx->table->flags & __NFT_TABLE_F_UPDATE) return -EINVAL; @@ -11345,6 +11348,10 @@ again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && n->portid == table->nlpid) { + if (table->flags & NFT_TABLE_F_PERSIST) { + table->flags &= ~NFT_TABLE_F_OWNER; + continue; + } __nft_release_hook(net, table); list_del_rcu(&table->list); to_delete[deleted++] = table; -- cgit v1.2.3 From 31bf508be656a429a17e3adb31e9acae5c1a6299 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 21 Dec 2023 14:31:59 +0100 Subject: netfilter: nf_tables: Implement table adoption support Allow a new process to take ownership of a previously owned table, useful mostly for firewall management services restarting or suspending when idle. By extending __NFT_TABLE_F_UPDATE, the on/off/on check in nf_tables_updtable() also covers table adoption, although it is actually not needed: Table adoption is irreversible because nf_tables_updtable() rejects attempts to drop NFT_TABLE_F_OWNER so table->nlpid setting can happen just once within the transaction. If the transaction commences, table's nlpid and flags fields are already set and no further action is required. If it aborts, the table returns to orphaned state. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nf_tables_api.c | 19 ++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4e1ea18eb5f0..ac7c94d3648e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1271,6 +1271,12 @@ static inline bool nft_table_has_owner(const struct nft_table *table) return table->flags & NFT_TABLE_F_OWNER; } +static inline bool nft_table_is_orphan(const struct nft_table *table) +{ + return (table->flags & (NFT_TABLE_F_OWNER | NFT_TABLE_F_PERSIST)) == + NFT_TABLE_F_PERSIST; +} + static inline bool nft_base_chain_netdev(int family, u32 hooknum) { return family == NFPROTO_NETDEV || diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6a96f0003faa..b0e0d039897e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1194,8 +1194,10 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) #define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) #define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) #define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) +#define __NFT_TABLE_F_WAS_ORPHAN (__NFT_TABLE_F_INTERNAL << 2) #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ - __NFT_TABLE_F_WAS_AWAKEN) + __NFT_TABLE_F_WAS_AWAKEN | \ + __NFT_TABLE_F_WAS_ORPHAN) static int nf_tables_updtable(struct nft_ctx *ctx) { @@ -1215,8 +1217,8 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((nft_table_has_owner(ctx->table) && !(flags & NFT_TABLE_F_OWNER)) || - (!nft_table_has_owner(ctx->table) && - flags & NFT_TABLE_F_OWNER)) + (flags & NFT_TABLE_F_OWNER && + !nft_table_is_orphan(ctx->table))) return -EOPNOTSUPP; if ((flags ^ ctx->table->flags) & NFT_TABLE_F_PERSIST) @@ -1248,6 +1250,13 @@ static int nf_tables_updtable(struct nft_ctx *ctx) } } + if ((flags & NFT_TABLE_F_OWNER) && + !nft_table_has_owner(ctx->table)) { + ctx->table->nlpid = ctx->portid; + ctx->table->flags |= NFT_TABLE_F_OWNER | + __NFT_TABLE_F_WAS_ORPHAN; + } + nft_trans_table_update(trans) = true; nft_trans_commit_list_add_tail(ctx->net, trans); @@ -10423,6 +10432,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; } + if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) { + trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER; + trans->ctx.table->nlpid = 0; + } trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { -- cgit v1.2.3 From a128885ace60ff3da326dab7208fd7a04ce0b138 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Jan 2024 13:26:09 +0100 Subject: netfilter: nf_tables: pass flags to set backend selection routine No need to refetch the flag from the netlink attribute, pass the existing flags variable which already provide validated flags. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b0e0d039897e..7f25a04e4b81 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4247,23 +4247,18 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) * given, in that case the amount of memory per element is used. */ static const struct nft_set_ops * -nft_select_set_ops(const struct nft_ctx *ctx, - const struct nlattr * const nla[], +nft_select_set_ops(const struct nft_ctx *ctx, u32 flags, const struct nft_set_desc *desc) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; const struct nft_set_type *type; - u32 flags = 0; int i; lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); - if (nla[NFTA_SET_FLAGS] != NULL) - flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); - bops = NULL; best.size = ~0; best.lookup = ~0; @@ -5149,7 +5144,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(&ctx, nla, &desc); + ops = nft_select_set_ops(&ctx, flags, &desc); if (IS_ERR(ops)) return PTR_ERR(ops); -- cgit v1.2.3 From 2ae6e9a03dadee5b2749d391d2e6df3056c5b6dd Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 23 Jan 2024 10:46:31 +0800 Subject: netfilter: nf_conncount: Use KMEM_CACHE instead of kmem_cache_create() Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Signed-off-by: Florian Westphal --- net/netfilter/nf_conncount.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 5d8ed6c90b7e..8715617b02fe 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -605,15 +605,11 @@ static int __init nf_conncount_modinit(void) for (i = 0; i < CONNCOUNT_SLOTS; ++i) spin_lock_init(&nf_conncount_locks[i]); - conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", - sizeof(struct nf_conncount_tuple), - 0, 0, NULL); + conncount_conn_cachep = KMEM_CACHE(nf_conncount_tuple, 0); if (!conncount_conn_cachep) return -ENOMEM; - conncount_rb_cachep = kmem_cache_create("nf_conncount_rb", - sizeof(struct nf_conncount_rb), - 0, 0, NULL); + conncount_rb_cachep = KMEM_CACHE(nf_conncount_rb, 0); if (!conncount_rb_cachep) { kmem_cache_destroy(conncount_conn_cachep); return -ENOMEM; -- cgit v1.2.3 From d5f9142fb96d4386ff4d06745bbd8f8c73b3791b Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 17 Jan 2024 15:20:45 +0800 Subject: ipvs: Simplify the allocation of ip_vs_conn slab caches Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Acked-by: Simon Horman Signed-off-by: Florian Westphal --- net/netfilter/ipvs/ip_vs_conn.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a743db073887..98d7dbe3d787 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1511,9 +1511,7 @@ int __init ip_vs_conn_init(void) return -ENOMEM; /* Allocate ip_vs_conn slab cache */ - ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn", - sizeof(struct ip_vs_conn), 0, - SLAB_HWCACHE_ALIGN, NULL); + ip_vs_conn_cachep = KMEM_CACHE(ip_vs_conn, SLAB_HWCACHE_ALIGN); if (!ip_vs_conn_cachep) { kvfree(ip_vs_conn_tab); return -ENOMEM; -- cgit v1.2.3 From a9525c7f6219cee9284c0031c5930e8d41384677 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 24 Jan 2024 10:21:11 +0100 Subject: netfilter: xtables: allow xtables-nft only builds Add hidden IP(6)_NF_IPTABLES_LEGACY symbol. When any of the "old" builtin tables are enabled the "old" iptables interface will be supported. To disable the old set/getsockopt interface the existing options for the builtin tables need to be turned off: CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER is not set CONFIG_IP_NF_NAT is not set CONFIG_IP_NF_MANGLE is not set CONFIG_IP_NF_RAW is not set CONFIG_IP_NF_SECURITY is not set Same for CONFIG_IP6_NF_ variants. This allows to build a kernel that only supports ip(6)tables-nft (iptables-over-nftables api). In the future the _LEGACY symbol will become visible and the select statements will be turned into 'depends on', but for now be on safe side so "make oldconfig" won't break things. Signed-off-by: Florian Westphal --- net/ipv4/netfilter/Kconfig | 15 ++++++++++++--- net/ipv4/netfilter/Makefile | 2 +- net/ipv6/netfilter/Kconfig | 20 ++++++++++++++------ net/ipv6/netfilter/Makefile | 2 +- net/netfilter/Kconfig | 12 ++++++------ 5 files changed, 34 insertions(+), 17 deletions(-) (limited to 'net/netfilter') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 070475392236..783523087281 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -10,6 +10,10 @@ config NF_DEFRAG_IPV4 tristate default n +# old sockopt interface and eval loop +config IP_NF_IPTABLES_LEGACY + tristate + config NF_SOCKET_IPV4 tristate "IPv4 socket lookup support" help @@ -152,7 +156,7 @@ config IP_NF_MATCH_ECN config IP_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' depends on NETFILTER_ADVANCED - depends on IP_NF_MANGLE || IP_NF_RAW + depends on IP_NF_MANGLE || IP_NF_RAW || NFT_COMPAT help This option allows you to match packets whose replies would go out via the interface the packet came in. @@ -173,6 +177,7 @@ config IP_NF_MATCH_TTL config IP_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n + select IP_NF_IPTABLES_LEGACY help Packet filtering defines a table `filter', which has a series of rules for simple packet filtering at local input, forwarding and @@ -182,7 +187,7 @@ config IP_NF_FILTER config IP_NF_TARGET_REJECT tristate "REJECT target support" - depends on IP_NF_FILTER + depends on IP_NF_FILTER || NFT_COMPAT select NF_REJECT_IPV4 default m if NETFILTER_ADVANCED=n help @@ -212,6 +217,7 @@ config IP_NF_NAT default m if NETFILTER_ADVANCED=n select NF_NAT select NETFILTER_XT_NAT + select IP6_NF_IPTABLES_LEGACY help This enables the `nat' table in iptables. This allows masquerading, port forwarding and other forms of full Network Address Port @@ -252,6 +258,7 @@ endif # IP_NF_NAT config IP_NF_MANGLE tristate "Packet mangling" default m if NETFILTER_ADVANCED=n + select IP_NF_IPTABLES_LEGACY help This option adds a `mangle' table to iptables: see the man page for iptables(8). This table is used for various packet alterations @@ -261,7 +268,7 @@ config IP_NF_MANGLE config IP_NF_TARGET_ECN tristate "ECN target support" - depends on IP_NF_MANGLE + depends on IP_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds a `ECN' target, which can be used in the iptables mangle @@ -286,6 +293,7 @@ config IP_NF_TARGET_TTL # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' + select IP_NF_IPTABLES_LEGACY help This option adds a `raw' table to iptables. This table is the very first in the netfilter framework and hooks in at the PREROUTING @@ -299,6 +307,7 @@ config IP_NF_SECURITY tristate "Security table" depends on SECURITY depends on NETFILTER_ADVANCED + select IP_NF_IPTABLES_LEGACY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 5a26f9de1ab9..85502d4dfbb4 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o # generic IP tables -obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o +obj-$(CONFIG_IP_NF_IPTABLES_LEGACY) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 0ba62f4868f9..f3c8e2d918e1 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -6,6 +6,10 @@ menu "IPv6: Netfilter Configuration" depends on INET && IPV6 && NETFILTER +# old sockopt interface and eval loop +config IP6_NF_IPTABLES_LEGACY + tristate + config NF_SOCKET_IPV6 tristate "IPv6 socket lookup support" help @@ -147,7 +151,7 @@ config IP6_NF_MATCH_MH config IP6_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' depends on NETFILTER_ADVANCED - depends on IP6_NF_MANGLE || IP6_NF_RAW + depends on IP6_NF_MANGLE || IP6_NF_RAW || NFT_COMPAT help This option allows you to match packets whose replies would go out via the interface the packet came in. @@ -186,6 +190,8 @@ config IP6_NF_TARGET_HL config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n + select IP6_NF_IPTABLES_LEGACY + tristate help Packet filtering defines a table `filter', which has a series of rules for simple packet filtering at local input, forwarding and @@ -195,7 +201,7 @@ config IP6_NF_FILTER config IP6_NF_TARGET_REJECT tristate "REJECT target support" - depends on IP6_NF_FILTER + depends on IP6_NF_FILTER || NFT_COMPAT select NF_REJECT_IPV6 default m if NETFILTER_ADVANCED=n help @@ -221,6 +227,7 @@ config IP6_NF_TARGET_SYNPROXY config IP6_NF_MANGLE tristate "Packet mangling" default m if NETFILTER_ADVANCED=n + select IP6_NF_IPTABLES_LEGACY help This option adds a `mangle' table to iptables: see the man page for iptables(8). This table is used for various packet alterations @@ -230,6 +237,7 @@ config IP6_NF_MANGLE config IP6_NF_RAW tristate 'raw table support (required for TRACE)' + select IP6_NF_IPTABLES_LEGACY help This option adds a `raw' table to ip6tables. This table is the very first in the netfilter framework and hooks in at the PREROUTING @@ -243,6 +251,7 @@ config IP6_NF_SECURITY tristate "Security table" depends on SECURITY depends on NETFILTER_ADVANCED + select IP6_NF_IPTABLES_LEGACY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. @@ -254,6 +263,7 @@ config IP6_NF_NAT depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_NAT + select IP6_NF_IPTABLES_LEGACY select NETFILTER_XT_NAT help This enables the `nat' table in ip6tables. This allows masquerading, @@ -262,25 +272,23 @@ config IP6_NF_NAT To compile it as a module, choose M here. If unsure, say N. -if IP6_NF_NAT - config IP6_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" select NETFILTER_XT_TARGET_MASQUERADE + depends on IP6_NF_NAT help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE. config IP6_NF_TARGET_NPT tristate "NPT (Network Prefix translation) target support" + depends on IP6_NF_NAT || NFT_COMPAT help This option adds the `SNPT' and `DNPT' target, which perform stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296. To compile it as a module, choose M here. If unsure, say N. -endif # IP6_NF_NAT - endif # IP6_NF_IPTABLES endmenu diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b8d6dc9aeeb6..66ce6fa5b2f5 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -4,7 +4,7 @@ # # Link order matters here. -obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o +obj-$(CONFIG_IP6_NF_IPTABLES_LEGACY) += ip6_tables.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 441d1f134110..df2dc21304ef 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -818,7 +818,7 @@ config NETFILTER_XT_TARGET_AUDIT config NETFILTER_XT_TARGET_CHECKSUM tristate "CHECKSUM target support" - depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds a `CHECKSUM' target, which can be used in the iptables mangle @@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_CONNSECMARK config NETFILTER_XT_TARGET_CT tristate '"CT" target support' depends on NF_CONNTRACK - depends on IP_NF_RAW || IP6_NF_RAW + depends on IP_NF_RAW || IP6_NF_RAW || NFT_COMPAT depends on NETFILTER_ADVANCED help This options adds a `CT' target, which allows to specify initial @@ -880,7 +880,7 @@ config NETFILTER_XT_TARGET_CT config NETFILTER_XT_TARGET_DSCP tristate '"DSCP" and "TOS" target support' - depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds a `DSCP' target, which allows you to manipulate @@ -896,7 +896,7 @@ config NETFILTER_XT_TARGET_DSCP config NETFILTER_XT_TARGET_HL tristate '"HL" hoplimit target support' - depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds the "HL" (for IPv6) and "TTL" (for IPv4) @@ -1080,7 +1080,7 @@ config NETFILTER_XT_TARGET_TPROXY depends on NETFILTER_ADVANCED depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n - depends on IP_NF_MANGLE + depends on IP_NF_MANGLE || NFT_COMPAT select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n select NF_TPROXY_IPV4 @@ -1147,7 +1147,7 @@ config NETFILTER_XT_TARGET_TCPMSS config NETFILTER_XT_TARGET_TCPOPTSTRIP tristate '"TCPOPTSTRIP" target support' - depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds a "TCPOPTSTRIP" target, which allows you to strip -- cgit v1.2.3 From 6f3189f38a3e995232e028a4c341164c4aca1b20 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Sun, 28 Jan 2024 18:24:08 -0700 Subject: bpf: treewide: Annotate BPF kfuncs in BTF This commit marks kfuncs as such inside the .BTF_ids section. The upshot of these annotations is that we'll be able to automatically generate kfunc prototypes for downstream users. The process is as follows: 1. In source, use BTF_KFUNCS_START/END macro pair to mark kfuncs 2. During build, pahole injects into BTF a "bpf_kfunc" BTF_DECL_TAG for each function inside BTF_KFUNCS sets 3. At runtime, vmlinux or module BTF is made available in sysfs 4. At runtime, bpftool (or similar) can look at provided BTF and generate appropriate prototypes for functions with "bpf_kfunc" tag To ensure future kfunc are similarly tagged, we now also return error inside kfunc registration for untagged kfuncs. For vmlinux kfuncs, we also WARN(), as initcall machinery does not handle errors. Signed-off-by: Daniel Xu Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/e55150ceecbf0a5d961e608941165c0bee7bc943.1706491398.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 8 ++++---- drivers/hid/bpf/hid_bpf_dispatch.c | 8 ++++---- fs/verity/measure.c | 4 ++-- kernel/bpf/btf.c | 8 ++++++++ kernel/bpf/cpumask.c | 4 ++-- kernel/bpf/helpers.c | 8 ++++---- kernel/bpf/map_iter.c | 4 ++-- kernel/cgroup/rstat.c | 4 ++-- kernel/trace/bpf_trace.c | 8 ++++---- net/bpf/test_run.c | 8 ++++---- net/core/filter.c | 20 ++++++++++---------- net/core/xdp.c | 4 ++-- net/ipv4/bpf_tcp_ca.c | 4 ++-- net/ipv4/fou_bpf.c | 4 ++-- net/ipv4/tcp_bbr.c | 4 ++-- net/ipv4/tcp_cubic.c | 4 ++-- net/ipv4/tcp_dctcp.c | 4 ++-- net/netfilter/nf_conntrack_bpf.c | 4 ++-- net/netfilter/nf_nat_bpf.c | 4 ++-- net/xfrm/xfrm_interface_bpf.c | 4 ++-- net/xfrm/xfrm_state_bpf.c | 4 ++-- .../testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 8 ++++---- 22 files changed, 70 insertions(+), 62 deletions(-) (limited to 'net/netfilter') diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 7985c6615f3c..a8f5782bd833 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -177,10 +177,10 @@ In addition to kfuncs' arguments, verifier may need more information about the type of kfunc(s) being registered with the BPF subsystem. To do so, we define flags on a set of kfuncs as follows:: - BTF_SET8_START(bpf_task_set) + BTF_KFUNCS_START(bpf_task_set) BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) - BTF_SET8_END(bpf_task_set) + BTF_KFUNCS_END(bpf_task_set) This set encodes the BTF ID of each kfunc listed above, and encodes the flags along with it. Ofcourse, it is also allowed to specify no flags. @@ -347,10 +347,10 @@ Once the kfunc is prepared for use, the final step to making it visible is registering it with the BPF subsystem. Registration is done per BPF program type. An example is shown below:: - BTF_SET8_START(bpf_task_set) + BTF_KFUNCS_START(bpf_task_set) BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) - BTF_SET8_END(bpf_task_set) + BTF_KFUNCS_END(bpf_task_set) static const struct btf_kfunc_id_set bpf_task_kfunc_set = { .owner = THIS_MODULE, diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index d9ef45fcaeab..02c441aaa217 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -172,9 +172,9 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr * The following set contains all functions we agree BPF programs * can use. */ -BTF_SET8_START(hid_bpf_kfunc_ids) +BTF_KFUNCS_START(hid_bpf_kfunc_ids) BTF_ID_FLAGS(func, hid_bpf_get_data, KF_RET_NULL) -BTF_SET8_END(hid_bpf_kfunc_ids) +BTF_KFUNCS_END(hid_bpf_kfunc_ids) static const struct btf_kfunc_id_set hid_bpf_kfunc_set = { .owner = THIS_MODULE, @@ -440,12 +440,12 @@ static const struct btf_kfunc_id_set hid_bpf_fmodret_set = { }; /* for syscall HID-BPF */ -BTF_SET8_START(hid_bpf_syscall_kfunc_ids) +BTF_KFUNCS_START(hid_bpf_syscall_kfunc_ids) BTF_ID_FLAGS(func, hid_bpf_attach_prog) BTF_ID_FLAGS(func, hid_bpf_allocate_context, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, hid_bpf_release_context, KF_RELEASE) BTF_ID_FLAGS(func, hid_bpf_hw_request) -BTF_SET8_END(hid_bpf_syscall_kfunc_ids) +BTF_KFUNCS_END(hid_bpf_syscall_kfunc_ids) static const struct btf_kfunc_id_set hid_bpf_syscall_kfunc_set = { .owner = THIS_MODULE, diff --git a/fs/verity/measure.c b/fs/verity/measure.c index bf7a5f4cccaf..3969d54158d1 100644 --- a/fs/verity/measure.c +++ b/fs/verity/measure.c @@ -159,9 +159,9 @@ __bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_ker __bpf_kfunc_end_defs(); -BTF_SET8_START(fsverity_set_ids) +BTF_KFUNCS_START(fsverity_set_ids) BTF_ID_FLAGS(func, bpf_get_fsverity_digest, KF_TRUSTED_ARGS) -BTF_SET8_END(fsverity_set_ids) +BTF_KFUNCS_END(fsverity_set_ids) static int bpf_get_fsverity_digest_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c8c6e6cf18e7..ef380e546952 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -8124,6 +8124,14 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, { enum btf_kfunc_hook hook; + /* All kfuncs need to be tagged as such in BTF. + * WARN() for initcall registrations that do not check errors. + */ + if (!(kset->set->flags & BTF_SET8_KFUNCS)) { + WARN_ON(!kset->owner); + return -EINVAL; + } + hook = bpf_prog_type_to_kfunc_hook(prog_type); return __register_btf_kfunc_id_set(hook, kset); } diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 2e73533a3811..dad0fb1c8e87 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -424,7 +424,7 @@ __bpf_kfunc u32 bpf_cpumask_weight(const struct cpumask *cpumask) __bpf_kfunc_end_defs(); -BTF_SET8_START(cpumask_kfunc_btf_ids) +BTF_KFUNCS_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) @@ -450,7 +450,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_weight, KF_RCU) -BTF_SET8_END(cpumask_kfunc_btf_ids) +BTF_KFUNCS_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bcb951a2ecf4..4db1c658254c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2544,7 +2544,7 @@ __bpf_kfunc void bpf_throw(u64 cookie) __bpf_kfunc_end_defs(); -BTF_SET8_START(generic_btf_ids) +BTF_KFUNCS_START(generic_btf_ids) #ifdef CONFIG_KEXEC_CORE BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif @@ -2573,7 +2573,7 @@ BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_throw) -BTF_SET8_END(generic_btf_ids) +BTF_KFUNCS_END(generic_btf_ids) static const struct btf_kfunc_id_set generic_kfunc_set = { .owner = THIS_MODULE, @@ -2589,7 +2589,7 @@ BTF_ID(struct, cgroup) BTF_ID(func, bpf_cgroup_release_dtor) #endif -BTF_SET8_START(common_btf_ids) +BTF_KFUNCS_START(common_btf_ids) BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) BTF_ID_FLAGS(func, bpf_rdonly_cast) BTF_ID_FLAGS(func, bpf_rcu_read_lock) @@ -2618,7 +2618,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) BTF_ID_FLAGS(func, bpf_dynptr_size) BTF_ID_FLAGS(func, bpf_dynptr_clone) -BTF_SET8_END(common_btf_ids) +BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index 6abd7c5df4b3..9575314f40a6 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -213,9 +213,9 @@ __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_map_iter_kfunc_ids) +BTF_KFUNCS_START(bpf_map_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_map_iter_kfunc_ids) +BTF_KFUNCS_END(bpf_map_iter_kfunc_ids) static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index a8350d2d63e6..07e2284bb499 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -562,10 +562,10 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) } /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */ -BTF_SET8_START(bpf_rstat_kfunc_ids) +BTF_KFUNCS_START(bpf_rstat_kfunc_ids) BTF_ID_FLAGS(func, cgroup_rstat_updated) BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE) -BTF_SET8_END(bpf_rstat_kfunc_ids) +BTF_KFUNCS_END(bpf_rstat_kfunc_ids) static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 64fdaf79d113..241ddf5e3895 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1412,14 +1412,14 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, __bpf_kfunc_end_defs(); -BTF_SET8_START(key_sig_kfunc_set) +BTF_KFUNCS_START(key_sig_kfunc_set) BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE) #endif -BTF_SET8_END(key_sig_kfunc_set) +BTF_KFUNCS_END(key_sig_kfunc_set) static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = { .owner = THIS_MODULE, @@ -1475,9 +1475,9 @@ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, __bpf_kfunc_end_defs(); -BTF_SET8_START(fs_kfunc_set_ids) +BTF_KFUNCS_START(fs_kfunc_set_ids) BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) -BTF_SET8_END(fs_kfunc_set_ids) +BTF_KFUNCS_END(fs_kfunc_set_ids) static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dfd919374017..5535f9adc658 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -617,21 +617,21 @@ CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor); __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_test_modify_return_ids) +BTF_KFUNCS_START(bpf_test_modify_return_ids) BTF_ID_FLAGS(func, bpf_modify_return_test) BTF_ID_FLAGS(func, bpf_modify_return_test2) BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE) -BTF_SET8_END(bpf_test_modify_return_ids) +BTF_KFUNCS_END(bpf_test_modify_return_ids) static const struct btf_kfunc_id_set bpf_test_modify_return_set = { .owner = THIS_MODULE, .set = &bpf_test_modify_return_ids, }; -BTF_SET8_START(test_sk_check_kfunc_ids) +BTF_KFUNCS_START(test_sk_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) -BTF_SET8_END(test_sk_check_kfunc_ids) +BTF_KFUNCS_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) diff --git a/net/core/filter.c b/net/core/filter.c index 358870408a51..524adf1fa6d0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11982,21 +11982,21 @@ int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, return 0; } -BTF_SET8_START(bpf_kfunc_check_set_skb) +BTF_KFUNCS_START(bpf_kfunc_check_set_skb) BTF_ID_FLAGS(func, bpf_dynptr_from_skb) -BTF_SET8_END(bpf_kfunc_check_set_skb) +BTF_KFUNCS_END(bpf_kfunc_check_set_skb) -BTF_SET8_START(bpf_kfunc_check_set_xdp) +BTF_KFUNCS_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) -BTF_SET8_END(bpf_kfunc_check_set_xdp) +BTF_KFUNCS_END(bpf_kfunc_check_set_xdp) -BTF_SET8_START(bpf_kfunc_check_set_sock_addr) +BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr) BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) -BTF_SET8_END(bpf_kfunc_check_set_sock_addr) +BTF_KFUNCS_END(bpf_kfunc_check_set_sock_addr) -BTF_SET8_START(bpf_kfunc_check_set_tcp_reqsk) +BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk) BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_kfunc_check_set_tcp_reqsk) +BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk) static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, @@ -12075,9 +12075,9 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_sk_iter_kfunc_ids) +BTF_KFUNCS_START(bpf_sk_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_sk_iter_kfunc_ids) +BTF_KFUNCS_END(bpf_sk_iter_kfunc_ids) static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/net/core/xdp.c b/net/core/xdp.c index 4869c1c2d8f3..034fb80f3fbe 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -771,11 +771,11 @@ __bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, __bpf_kfunc_end_defs(); -BTF_SET8_START(xdp_metadata_kfunc_ids) +BTF_KFUNCS_START(xdp_metadata_kfunc_ids) #define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC -BTF_SET8_END(xdp_metadata_kfunc_ids) +BTF_KFUNCS_END(xdp_metadata_kfunc_ids) static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 834edc18463a..7f518ea5f4ac 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -201,13 +201,13 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids) +BTF_KFUNCS_START(bpf_tcp_ca_check_kfunc_ids) BTF_ID_FLAGS(func, tcp_reno_ssthresh) BTF_ID_FLAGS(func, tcp_reno_cong_avoid) BTF_ID_FLAGS(func, tcp_reno_undo_cwnd) BTF_ID_FLAGS(func, tcp_slow_start) BTF_ID_FLAGS(func, tcp_cong_avoid_ai) -BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids) +BTF_KFUNCS_END(bpf_tcp_ca_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c index 4da03bf45c9b..06e5572f296f 100644 --- a/net/ipv4/fou_bpf.c +++ b/net/ipv4/fou_bpf.c @@ -100,10 +100,10 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, __bpf_kfunc_end_defs(); -BTF_SET8_START(fou_kfunc_set) +BTF_KFUNCS_START(fou_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_set_fou_encap) BTF_ID_FLAGS(func, bpf_skb_get_fou_encap) -BTF_SET8_END(fou_kfunc_set) +BTF_KFUNCS_END(fou_kfunc_set) static const struct btf_kfunc_id_set fou_bpf_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 22358032dd48..05dc2d05bc7c 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1155,7 +1155,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET8_START(tcp_bbr_check_kfunc_ids) +BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, bbr_init) @@ -1168,7 +1168,7 @@ BTF_ID_FLAGS(func, bbr_min_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) #endif #endif -BTF_SET8_END(tcp_bbr_check_kfunc_ids) +BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 0fd78ecb67e7..44869ea089e3 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET8_START(tcp_cubic_check_kfunc_ids) +BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, cubictcp_init) @@ -496,7 +496,7 @@ BTF_ID_FLAGS(func, cubictcp_cwnd_event) BTF_ID_FLAGS(func, cubictcp_acked) #endif #endif -BTF_SET8_END(tcp_cubic_check_kfunc_ids) +BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index bb23bb5b387a..e33fbe4933e4 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -260,7 +260,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET8_START(tcp_dctcp_check_kfunc_ids) +BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, dctcp_init) @@ -271,7 +271,7 @@ BTF_ID_FLAGS(func, dctcp_cwnd_undo) BTF_ID_FLAGS(func, dctcp_state) #endif #endif -BTF_SET8_END(tcp_dctcp_check_kfunc_ids) +BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 475358ec8212..d2492d050fe6 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -467,7 +467,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status) __bpf_kfunc_end_defs(); -BTF_SET8_START(nf_ct_kfunc_set) +BTF_KFUNCS_START(nf_ct_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL) @@ -478,7 +478,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) -BTF_SET8_END(nf_ct_kfunc_set) +BTF_KFUNCS_END(nf_ct_kfunc_set) static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c index 6e3b2f58855f..481be15609b1 100644 --- a/net/netfilter/nf_nat_bpf.c +++ b/net/netfilter/nf_nat_bpf.c @@ -54,9 +54,9 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct, __bpf_kfunc_end_defs(); -BTF_SET8_START(nf_nat_kfunc_set) +BTF_KFUNCS_START(nf_nat_kfunc_set) BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) -BTF_SET8_END(nf_nat_kfunc_set) +BTF_KFUNCS_END(nf_nat_kfunc_set) static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c index 7d5e920141e9..5ea15037ebd1 100644 --- a/net/xfrm/xfrm_interface_bpf.c +++ b/net/xfrm/xfrm_interface_bpf.c @@ -93,10 +93,10 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp __bpf_kfunc_end_defs(); -BTF_SET8_START(xfrm_ifc_kfunc_set) +BTF_KFUNCS_START(xfrm_ifc_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info) BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info) -BTF_SET8_END(xfrm_ifc_kfunc_set) +BTF_KFUNCS_END(xfrm_ifc_kfunc_set) static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c index 9e20d4a377f7..2248eda741f8 100644 --- a/net/xfrm/xfrm_state_bpf.c +++ b/net/xfrm/xfrm_state_bpf.c @@ -117,10 +117,10 @@ __bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __bpf_kfunc_end_defs(); -BTF_SET8_START(xfrm_state_kfunc_set) +BTF_KFUNCS_START(xfrm_state_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE) BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE) -BTF_SET8_END(xfrm_state_kfunc_set) +BTF_KFUNCS_END(xfrm_state_kfunc_set) static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = { .owner = THIS_MODULE, diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 6f163a0f1c94..4754c662b39f 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -343,12 +343,12 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET8_START(bpf_testmod_common_kfunc_ids) +BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_kfunc_common_test) -BTF_SET8_END(bpf_testmod_common_kfunc_ids) +BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { .owner = THIS_MODULE, @@ -494,7 +494,7 @@ __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused return arg; } -BTF_SET8_START(bpf_testmod_check_kfunc_ids) +BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) BTF_ID_FLAGS(func, bpf_kfunc_call_test2) @@ -520,7 +520,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) -BTF_SET8_END(bpf_testmod_check_kfunc_ids) +BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) static int bpf_testmod_ops_init(struct btf *btf) { -- cgit v1.2.3 From 1ebb85f9c03de0b66c334de219f224159e24e549 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:31:01 +0000 Subject: netfilter: conntrack: expedite rcu in nf_conntrack_cleanup_net_list nf_conntrack_cleanup_net_list() is calling synchronize_net() while RTNL is not held. This effectively calls synchronize_rcu(). synchronize_rcu() is much slower than synchronize_rcu_expedited(), and cleanup_net() is currently single threaded. In many workloads we want cleanup_net() to be faster, in order to free memory and various sysfs and procfs entries as fast as possible. Signed-off-by: Eric Dumazet Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e5f3864d353..90e6bd2c3000 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2530,7 +2530,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) * netfilter framework. Roll on, two-stage module * delete... */ - synchronize_net(); + synchronize_rcu_expedited(); i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { -- cgit v1.2.3 From aa23cfe6ab500dbbfa630602b403f433522daf9f Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 24 Jan 2024 16:15:38 +0800 Subject: netfilter: expect: Simplify the allocation of slab caches in nf_conntrack_expect_init Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_expect.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 81ca348915c9..21fa550966f0 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -722,9 +722,7 @@ int nf_conntrack_expect_init(void) nf_ct_expect_hsize = 1; } nf_ct_expect_max = nf_ct_expect_hsize * 4; - nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", - sizeof(struct nf_conntrack_expect), - 0, 0, NULL); + nf_ct_expect_cachep = KMEM_CACHE(nf_conntrack_expect, 0); if (!nf_ct_expect_cachep) return -ENOMEM; -- cgit v1.2.3 From 79578be4d35c842a802487e2f31c2aed80cc005f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 20:24:24 +0100 Subject: netfilter: nf_log: consolidate check for NULL logger in lookup function Consolidate pointer fetch to logger and check for NULL in __find_logger(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index e16f158388bb..e0bfeb75766f 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -31,10 +31,10 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) int i; for (i = 0; i < NF_LOG_TYPE_MAX; i++) { - if (loggers[pf][i] == NULL) + log = nft_log_dereference(loggers[pf][i]); + if (!log) continue; - log = nft_log_dereference(loggers[pf][i]); if (!strncasecmp(str_logger, log->name, strlen(log->name))) return log; } -- cgit v1.2.3 From c47ec2b120b4a9d573e65baa33ff3f542f7ba273 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 20:24:25 +0100 Subject: netfilter: nf_log: validate nf_logger_find_get() Sanitize nf_logger_find_get() input parameters, no caller in the tree passes invalid values. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_log.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index e0bfeb75766f..370f8231385c 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -156,6 +156,11 @@ int nf_logger_find_get(int pf, enum nf_log_type type) struct nf_logger *logger; int ret = -ENOENT; + if (pf >= ARRAY_SIZE(loggers)) + return -EINVAL; + if (type >= NF_LOG_TYPE_MAX) + return -EINVAL; + if (pf == NFPROTO_INET) { ret = nf_logger_find_get(NFPROTO_IPV4, type); if (ret < 0) -- cgit v1.2.3 From 29a280025580d72bc0daf6c040518a069870421f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 20:24:57 +0100 Subject: netfilter: nft_osf: simplify init path Remove useless branch to check for errors in nft_parse_register_store(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nft_osf.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 7f61506e5b44..7fec57ff736f 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -63,7 +63,6 @@ static int nft_osf_init(const struct nft_ctx *ctx, { struct nft_osf *priv = nft_expr_priv(expr); u32 flags; - int err; u8 ttl; if (!tb[NFTA_OSF_DREG]) @@ -83,13 +82,9 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->flags = flags; } - err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, - NULL, NFT_DATA_VALUE, - NFT_OSF_MAXGENRELEN); - if (err < 0) - return err; - - return 0; + return nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, + NFT_OSF_MAXGENRELEN); } static int nft_osf_dump(struct sk_buff *skb, -- cgit v1.2.3 From f04df573faf90bb828a2241b650598c02c074323 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Feb 2024 16:23:37 +0100 Subject: netfilter: nft_set_pipapo: constify lookup fn args where possible Those get called from packet path, content must not be modified. No functional changes intended. Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 18 ++++++----- net/netfilter/nft_set_pipapo.h | 6 ++-- net/netfilter/nft_set_pipapo_avx2.c | 59 ++++++++++++++++++++++--------------- 3 files changed, 48 insertions(+), 35 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index aa1d9e93a9a0..dedb17a4e07c 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -360,7 +360,7 @@ * Return: -1 on no match, bit position on 'match_only', 0 otherwise. */ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, - union nft_pipapo_map_bucket *mt, bool match_only) + const union nft_pipapo_map_bucket *mt, bool match_only) { unsigned long bitset; int k, ret = -1; @@ -412,9 +412,9 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; u8 genmask = nft_genmask_cur(net); + const struct nft_pipapo_match *m; + const struct nft_pipapo_field *f; const u8 *rp = (const u8 *)key; - struct nft_pipapo_match *m; - struct nft_pipapo_field *f; bool map_index; int i; @@ -519,11 +519,13 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); struct nft_pipapo *priv = nft_set_priv(set); - struct nft_pipapo_match *m = priv->clone; unsigned long *res_map, *fill_map = NULL; - struct nft_pipapo_field *f; + const struct nft_pipapo_match *m; + const struct nft_pipapo_field *f; int i; + m = priv->clone; + res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC); if (!res_map) { ret = ERR_PTR(-ENOMEM); @@ -1597,7 +1599,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; - struct nft_pipapo_field *f; + const struct nft_pipapo_field *f; int i, start, rules_fx; start = first_rule; @@ -2039,8 +2041,8 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, { struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); - struct nft_pipapo_match *m; - struct nft_pipapo_field *f; + const struct nft_pipapo_match *m; + const struct nft_pipapo_field *f; int i, r; rcu_read_lock(); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 3842c7341a9f..42464e7c24ac 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -187,7 +187,7 @@ struct nft_pipapo_elem { }; int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, - union nft_pipapo_map_bucket *mt, bool match_only); + const union nft_pipapo_map_bucket *mt, bool match_only); /** * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets @@ -195,7 +195,7 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, * @dst: Area to store result * @data: Input data selecting table buckets */ -static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f, +static inline void pipapo_and_field_buckets_4bit(const struct nft_pipapo_field *f, unsigned long *dst, const u8 *data) { @@ -223,7 +223,7 @@ static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f, * @dst: Area to store result * @data: Input data selecting table buckets */ -static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f, +static inline void pipapo_and_field_buckets_8bit(const struct nft_pipapo_field *f, unsigned long *dst, const u8 *data) { diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index a3a8ddca9918..d08407d589ea 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -212,8 +212,9 @@ static int nft_pipapo_avx2_refill(int offset, unsigned long *map, * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; u8 pg[2] = { pkt[0] >> 4, pkt[0] & 0xf }; @@ -274,8 +275,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; u8 pg[4] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf }; @@ -350,8 +352,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { u8 pg[8] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, @@ -445,8 +448,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { u8 pg[12] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, @@ -534,8 +538,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { u8 pg[32] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, @@ -669,8 +674,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -726,8 +732,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -790,8 +797,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -865,8 +873,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -950,8 +959,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -1042,8 +1052,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { unsigned long bsize = f->bsize; int i, ret = -1, b; @@ -1119,9 +1130,9 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); + const struct nft_pipapo_match *m; + const struct nft_pipapo_field *f; const u8 *rp = (const u8 *)key; - struct nft_pipapo_match *m; - struct nft_pipapo_field *f; unsigned long *res, *fill; bool map_index; int i, ret = 0; -- cgit v1.2.3 From 07ace0bbe03b3d8e85869af1dec5e4087b1d57b8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Feb 2024 16:23:38 +0100 Subject: netfilter: nft_set_pipapo: do not rely on ZERO_SIZE_PTR pipapo relies on kmalloc(0) returning ZERO_SIZE_PTR (i.e., not NULL but pointer is invalid). Rework this to not call slab allocator when we'd request a 0-byte allocation. Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index dedb17a4e07c..625c06b8bc39 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -525,6 +525,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, int i; m = priv->clone; + if (m->bsize_max == 0) + return ret; res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC); if (!res_map) { @@ -1367,11 +1369,17 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) src->bsize * sizeof(*dst->lt) * src->groups * NFT_PIPAPO_BUCKETS(src->bb)); - dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL); - if (!dst->mt) - goto out_mt; + if (src->rules > 0) { + dst->mt = kvmalloc_array(src->rules, sizeof(*src->mt), + GFP_KERNEL); + if (!dst->mt) + goto out_mt; + + memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt)); + } else { + dst->mt = NULL; + } - memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt)); src++; dst++; } -- cgit v1.2.3 From aac14d516c2b575af20b426fa04129a28d45c287 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Feb 2024 16:23:39 +0100 Subject: netfilter: nft_set_pipapo: shrink data structures The set uses a mix of 'int', 'unsigned int', and size_t. The rule count limit is NFT_PIPAPO_RULE0_MAX, which cannot exceed INT_MAX (a few helpers use 'int' as return type). Add a compile-time assertion for this. Replace size_t usage in structs with unsigned int or u8 where the stored values are smaller. Replace signed-int arguments for lengths with 'unsigned int' where possible. Last, remove lt_aligned member: its set but never read. struct nft_pipapo_match 40 bytes -> 32 bytes struct nft_pipapo_field 56 bytes -> 32 bytes Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 62 ++++++++++++++++++++++++++++-------------- net/netfilter/nft_set_pipapo.h | 29 +++++++------------- 2 files changed, 51 insertions(+), 40 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 625c06b8bc39..b63278b2017a 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -359,11 +359,13 @@ * * Return: -1 on no match, bit position on 'match_only', 0 otherwise. */ -int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, +int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, + unsigned long *dst, const union nft_pipapo_map_bucket *mt, bool match_only) { unsigned long bitset; - int k, ret = -1; + unsigned int k; + int ret = -1; for (k = 0; k < len; k++) { bitset = map[k]; @@ -631,13 +633,17 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set, * * Return: 0 on success, -ENOMEM on allocation failure. */ -static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules) +static int pipapo_resize(struct nft_pipapo_field *f, + unsigned int old_rules, unsigned int rules) { long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p; union nft_pipapo_map_bucket *new_mt, *old_mt = f->mt; - size_t new_bucket_size, copy; + unsigned int new_bucket_size, copy; int group, bucket; + if (rules >= NFT_PIPAPO_RULE0_MAX) + return -ENOSPC; + new_bucket_size = DIV_ROUND_UP(rules, BITS_PER_LONG); #ifdef NFT_PIPAPO_ALIGN new_bucket_size = roundup(new_bucket_size, @@ -690,7 +696,7 @@ mt: if (new_lt) { f->bsize = new_bucket_size; - NFT_PIPAPO_LT_ASSIGN(f, new_lt); + f->lt = new_lt; kvfree(old_lt); } @@ -847,8 +853,8 @@ static void pipapo_lt_8b_to_4b(int old_groups, int bsize, */ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) { + unsigned int groups, bb; unsigned long *new_lt; - int groups, bb; size_t lt_size; lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize * @@ -898,7 +904,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) f->groups = groups; f->bb = bb; kvfree(f->lt); - NFT_PIPAPO_LT_ASSIGN(f, new_lt); + f->lt = new_lt; } /** @@ -915,7 +921,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int mask_bits) { - int rule = f->rules, group, ret, bit_offset = 0; + unsigned int rule = f->rules, group, ret, bit_offset = 0; ret = pipapo_resize(f, f->rules, f->rules + 1); if (ret) @@ -1255,8 +1261,14 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, /* Validate */ start_p = start; end_p = end; + + /* some helpers return -1, or 0 >= for valid rule pos, + * so we cannot support more than INT_MAX rules at this time. + */ + BUILD_BUG_ON(NFT_PIPAPO_RULE0_MAX > INT_MAX); + nft_pipapo_for_each_field(f, i, m) { - if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX) + if (f->rules >= NFT_PIPAPO_RULE0_MAX) return -ENOSPC; if (memcmp(start_p, end_p, @@ -1362,7 +1374,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new_lt) goto out_lt; - NFT_PIPAPO_LT_ASSIGN(dst, new_lt); + dst->lt = new_lt; memcpy(NFT_PIPAPO_LT_ALIGN(new_lt), NFT_PIPAPO_LT_ALIGN(src->lt), @@ -1433,10 +1445,10 @@ out_scratch: * * Return: Number of rules that originated from the same entry as @first. */ -static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first) +static unsigned int pipapo_rules_same_key(struct nft_pipapo_field *f, unsigned int first) { struct nft_pipapo_elem *e = NULL; /* Keep gcc happy */ - int r; + unsigned int r; for (r = first; r < f->rules; r++) { if (r != first && e != f->mt[r].e) @@ -1489,8 +1501,9 @@ static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first) * 0 1 2 * element pointers: 0x42 0x42 0x44 */ -static void pipapo_unmap(union nft_pipapo_map_bucket *mt, int rules, - int start, int n, int to_offset, bool is_last) +static void pipapo_unmap(union nft_pipapo_map_bucket *mt, unsigned int rules, + unsigned int start, unsigned int n, + unsigned int to_offset, bool is_last) { int i; @@ -1596,8 +1609,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); + unsigned int rules_f0, first_rule = 0; u64 tstamp = nft_net_tstamp(net); - int rules_f0, first_rule = 0; struct nft_pipapo_elem *e; struct nft_trans_gc *gc; @@ -1608,7 +1621,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const struct nft_pipapo_field *f; - int i, start, rules_fx; + unsigned int i, start, rules_fx; start = first_rule; rules_fx = rules_f0; @@ -1986,7 +1999,7 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; - int rules_f0, first_rule = 0; + unsigned int rules_f0, first_rule = 0; struct nft_pipapo_elem *e; const u8 *data; @@ -2051,7 +2064,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, struct net *net = read_pnet(&set->net); const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; - int i, r; + unsigned int i, r; rcu_read_lock(); if (iter->genmask == nft_genmask_cur(net)) @@ -2155,6 +2168,9 @@ static int nft_pipapo_init(const struct nft_set *set, field_count = desc->field_count ? : 1; + BUILD_BUG_ON(NFT_PIPAPO_MAX_FIELDS > 255); + BUILD_BUG_ON(NFT_PIPAPO_MAX_FIELDS != NFT_REG32_COUNT); + if (field_count > NFT_PIPAPO_MAX_FIELDS) return -EINVAL; @@ -2176,7 +2192,11 @@ static int nft_pipapo_init(const struct nft_set *set, rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { - int len = desc->field_len[i] ? : set->klen; + unsigned int len = desc->field_len[i] ? : set->klen; + + /* f->groups is u8 */ + BUILD_BUG_ON((NFT_PIPAPO_MAX_BYTES * + BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS_LARGE_SET) >= 256); f->bb = NFT_PIPAPO_GROUP_BITS_INIT; f->groups = len * NFT_PIPAPO_GROUPS_PER_BYTE(f); @@ -2185,7 +2205,7 @@ static int nft_pipapo_init(const struct nft_set *set, f->bsize = 0; f->rules = 0; - NFT_PIPAPO_LT_ASSIGN(f, NULL); + f->lt = NULL; f->mt = NULL; } @@ -2221,7 +2241,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, struct nft_pipapo_match *m) { struct nft_pipapo_field *f; - int i, r; + unsigned int i, r; for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) ; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 42464e7c24ac..ca64f7505e71 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -70,15 +70,9 @@ #define NFT_PIPAPO_ALIGN_HEADROOM \ (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN) #define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN)) -#define NFT_PIPAPO_LT_ASSIGN(field, x) \ - do { \ - (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \ - (field)->lt = (x); \ - } while (0) #else #define NFT_PIPAPO_ALIGN_HEADROOM 0 #define NFT_PIPAPO_LT_ALIGN(lt) (lt) -#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x)) #endif /* NFT_PIPAPO_ALIGN */ #define nft_pipapo_for_each_field(field, index, match) \ @@ -110,22 +104,18 @@ union nft_pipapo_map_bucket { /** * struct nft_pipapo_field - Lookup, mapping tables and related data for a field - * @groups: Amount of bit groups * @rules: Number of inserted rules * @bsize: Size of each bucket in lookup table, in longs + * @groups: Amount of bit groups * @bb: Number of bits grouped together in lookup table buckets * @lt: Lookup table: 'groups' rows of buckets - * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes * @mt: Mapping table: one bucket per rule */ struct nft_pipapo_field { - int groups; - unsigned long rules; - size_t bsize; - int bb; -#ifdef NFT_PIPAPO_ALIGN - unsigned long *lt_aligned; -#endif + unsigned int rules; + unsigned int bsize; + u8 groups; + u8 bb; unsigned long *lt; union nft_pipapo_map_bucket *mt; }; @@ -145,15 +135,15 @@ struct nft_pipapo_scratch { /** * struct nft_pipapo_match - Data used for lookup and matching * @field_count: Amount of fields in set - * @scratch: Preallocated per-CPU maps for partial matching results * @bsize_max: Maximum lookup table bucket size of all fields, in longs + * @scratch: Preallocated per-CPU maps for partial matching results * @rcu: Matching data is swapped on commits * @f: Fields, with lookup and mapping tables */ struct nft_pipapo_match { - int field_count; + u8 field_count; + unsigned int bsize_max; struct nft_pipapo_scratch * __percpu *scratch; - size_t bsize_max; struct rcu_head rcu; struct nft_pipapo_field f[] __counted_by(field_count); }; @@ -186,7 +176,8 @@ struct nft_pipapo_elem { struct nft_set_ext ext; }; -int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, +int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, + unsigned long *dst, const union nft_pipapo_map_bucket *mt, bool match_only); /** -- cgit v1.2.3 From 9f439bd6ef4f60c8a37bd9138fa9ed9b5e7ae0d7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Feb 2024 16:23:40 +0100 Subject: netfilter: nft_set_pipapo: speed up bulk element insertions Insertions into the set are slow when we try to add many elements. For 800k elements I get: time nft -f pipapo_800k real 19m34.849s user 0m2.390s sys 19m12.828s perf stats: --95.39%--nft_pipapo_insert |--76.60%--pipapo_insert | --76.37%--pipapo_resize | |--72.87%--memcpy_orig | |--1.88%--__free_pages_ok | | --0.89%--free_tail_page_prepare | --1.38%--kvmalloc_node .. --18.56%--pipapo_get.isra.0 |--13.91%--__bitmap_and |--3.01%--pipapo_refill |--0.81%--__kmalloc | --0.74%--__kmalloc_large_node | --0.66%--__alloc_pages .. --0.52%--memset_orig So lots of time is spent in copying exising elements to make space for the next one. Instead of allocating to the exact size of the new rule count, allocate extra slack to reduce alloc/copy/free overhead. After: time nft -f pipapo_800k real 1m54.110s user 0m2.515s sys 1m51.377s --80.46%--nft_pipapo_insert |--73.45%--pipapo_get.isra.0 |--57.63%--__bitmap_and | |--8.52%--pipapo_refill |--3.45%--__kmalloc | --3.05%--__kmalloc_large_node | --2.58%--__alloc_pages --2.59%--memset_orig |--6.51%--pipapo_insert --5.96%--pipapo_resize |--3.63%--memcpy_orig --2.13%--kvmalloc_node The new @rules_alloc fills a hole, so struct size doesn't go up. Also make it so rule removal doesn't shrink unless the free/extra space exceeds two pages. This should be safe as well: When a rule gets removed, the attempt to lower the allocated size is already allowed to fail. Exception: do exact allocations as long as set is very small (less than one page needed). v2: address comments from Stefano: kdoc comment formatting changes remove redundant assignment switch back to PAGE_SIZE Link: https://lore.kernel.org/netfilter-devel/20240213141753.17ef27a6@elisabeth/ Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 83 ++++++++++++++++++++++++++++++++++-------- net/netfilter/nft_set_pipapo.h | 2 + 2 files changed, 69 insertions(+), 16 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index b63278b2017a..6118e4bba2ef 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -621,6 +621,65 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set, return &e->priv; } +/** + * pipapo_realloc_mt() - Reallocate mapping table if needed upon resize + * @f: Field containing mapping table + * @old_rules: Amount of existing mapped rules + * @rules: Amount of new rules to map + * + * Return: 0 on success, negative error code on failure. + */ +static int pipapo_realloc_mt(struct nft_pipapo_field *f, + unsigned int old_rules, unsigned int rules) +{ + union nft_pipapo_map_bucket *new_mt = NULL, *old_mt = f->mt; + const unsigned int extra = PAGE_SIZE / sizeof(*new_mt); + unsigned int rules_alloc = rules; + + might_sleep(); + + if (unlikely(rules == 0)) + goto out_free; + + /* growing and enough space left, no action needed */ + if (rules > old_rules && f->rules_alloc > rules) + return 0; + + /* downsize and extra slack has not grown too large */ + if (rules < old_rules) { + unsigned int remove = f->rules_alloc - rules; + + if (remove < (2u * extra)) + return 0; + } + + /* If set needs more than one page of memory for rules then + * allocate another extra page to avoid frequent reallocation. + */ + if (rules > extra && + check_add_overflow(rules, extra, &rules_alloc)) + return -EOVERFLOW; + + new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL); + if (!new_mt) + return -ENOMEM; + + if (old_mt) + memcpy(new_mt, old_mt, min(old_rules, rules) * sizeof(*new_mt)); + + if (rules > old_rules) { + memset(new_mt + old_rules, 0, + (rules - old_rules) * sizeof(*new_mt)); + } +out_free: + f->rules_alloc = rules_alloc; + f->mt = new_mt; + + kvfree(old_mt); + + return 0; +} + /** * pipapo_resize() - Resize lookup or mapping table, or both * @f: Field containing lookup and mapping tables @@ -637,9 +696,8 @@ static int pipapo_resize(struct nft_pipapo_field *f, unsigned int old_rules, unsigned int rules) { long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p; - union nft_pipapo_map_bucket *new_mt, *old_mt = f->mt; unsigned int new_bucket_size, copy; - int group, bucket; + int group, bucket, err; if (rules >= NFT_PIPAPO_RULE0_MAX) return -ENOSPC; @@ -682,16 +740,10 @@ static int pipapo_resize(struct nft_pipapo_field *f, } mt: - new_mt = kvmalloc(rules * sizeof(*new_mt), GFP_KERNEL); - if (!new_mt) { + err = pipapo_realloc_mt(f, old_rules, rules); + if (err) { kvfree(new_lt); - return -ENOMEM; - } - - memcpy(new_mt, f->mt, min(old_rules, rules) * sizeof(*new_mt)); - if (rules > old_rules) { - memset(new_mt + old_rules, 0, - (rules - old_rules) * sizeof(*new_mt)); + return err; } if (new_lt) { @@ -700,9 +752,6 @@ mt: kvfree(old_lt); } - f->mt = new_mt; - kvfree(old_mt); - return 0; } @@ -1382,14 +1431,15 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) src->groups * NFT_PIPAPO_BUCKETS(src->bb)); if (src->rules > 0) { - dst->mt = kvmalloc_array(src->rules, sizeof(*src->mt), - GFP_KERNEL); + dst->mt = kvmalloc_array(src->rules_alloc, + sizeof(*src->mt), GFP_KERNEL); if (!dst->mt) goto out_mt; memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt)); } else { dst->mt = NULL; + dst->rules_alloc = 0; } src++; @@ -2205,6 +2255,7 @@ static int nft_pipapo_init(const struct nft_set *set, f->bsize = 0; f->rules = 0; + f->rules_alloc = 0; f->lt = NULL; f->mt = NULL; } diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index ca64f7505e71..24cd1ff73f98 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -106,6 +106,7 @@ union nft_pipapo_map_bucket { * struct nft_pipapo_field - Lookup, mapping tables and related data for a field * @rules: Number of inserted rules * @bsize: Size of each bucket in lookup table, in longs + * @rules_alloc: Number of allocated rules, always >= rules * @groups: Amount of bit groups * @bb: Number of bits grouped together in lookup table buckets * @lt: Lookup table: 'groups' rows of buckets @@ -114,6 +115,7 @@ union nft_pipapo_map_bucket { struct nft_pipapo_field { unsigned int rules; unsigned int bsize; + unsigned int rules_alloc; u8 groups; u8 bb; unsigned long *lt; -- cgit v1.2.3 From 5b651783d80b97167ecd27dc6a4408c694873902 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 15 Feb 2024 09:29:30 +0100 Subject: netfilter: nft_set_pipapo: use GFP_KERNEL for insertions An earlier attempt changed this to GFP_KERNEL, but the get helper is also called for get requests from userspace, which uses rcu. Let the caller pass in the kmalloc flags to allow insertions to schedule if needed. Suggested-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 6118e4bba2ef..c0ceea068936 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -507,6 +507,7 @@ out: * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask * @tstamp: timestamp to check for expired elements + * @gfp: the type of memory to allocate (see kmalloc). * * This is essentially the same as the lookup function, except that it matches * key data against the uncommitted copy and doesn't use preallocated maps for @@ -517,7 +518,7 @@ out: static struct nft_pipapo_elem *pipapo_get(const struct net *net, const struct nft_set *set, const u8 *data, u8 genmask, - u64 tstamp) + u64 tstamp, gfp_t gfp) { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); struct nft_pipapo *priv = nft_set_priv(set); @@ -530,13 +531,13 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, if (m->bsize_max == 0) return ret; - res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC); + res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), gfp); if (!res_map) { ret = ERR_PTR(-ENOMEM); goto out; } - fill_map = kcalloc(m->bsize_max, sizeof(*res_map), GFP_ATOMIC); + fill_map = kcalloc(m->bsize_max, sizeof(*res_map), gfp); if (!fill_map) { ret = ERR_PTR(-ENOMEM); goto out; @@ -614,7 +615,8 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set, struct nft_pipapo_elem *e; e = pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net), get_jiffies_64()); + nft_genmask_cur(net), get_jiffies_64(), + GFP_ATOMIC); if (IS_ERR(e)) return ERR_CAST(e); @@ -1275,7 +1277,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else end = start; - dup = pipapo_get(net, set, start, genmask, tstamp); + dup = pipapo_get(net, set, start, genmask, tstamp, GFP_KERNEL); if (!IS_ERR(dup)) { /* Check if we already have the same exact entry */ const struct nft_data *dup_key, *dup_end; @@ -1297,7 +1299,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (PTR_ERR(dup) == -ENOENT) { /* Look for partially overlapping entries */ - dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp); + dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp, + GFP_KERNEL); } if (PTR_ERR(dup) != -ENOENT) { @@ -1865,7 +1868,8 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, { struct nft_pipapo_elem *e; - e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net)); + e = pipapo_get(net, set, data, nft_genmask_next(net), + nft_net_tstamp(net), GFP_KERNEL); if (IS_ERR(e)) return NULL; -- cgit v1.2.3 From 3f801968889459ecae1eab524b039676e6eaa319 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 14 Feb 2024 14:41:02 +0100 Subject: netfilter: move nf_reinject into nfnetlink_queue modules No need to keep this in the core, move it to the nfnetlink_queue module. nf_reroute is moved too, there were no other callers. Signed-off-by: Florian Westphal --- include/linux/netfilter.h | 1 - include/net/netfilter/nf_queue.h | 1 - net/netfilter/nf_queue.c | 106 ----------------------------- net/netfilter/nfnetlink_queue.c | 142 +++++++++++++++++++++++++++++++++++++++ net/netfilter/utils.c | 37 ---------- 5 files changed, 142 insertions(+), 145 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 80900d910992..ffb5e0297eb5 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -370,7 +370,6 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); -int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry); #include diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index c81021ab07aa..4aeffddb7586 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -35,7 +35,6 @@ struct nf_queue_handler { void nf_register_queue_handler(const struct nf_queue_handler *qh); void nf_unregister_queue_handler(void); -void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_free(struct nf_queue_entry *entry); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index e2f334f70281..7f12e56e6e52 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -248,109 +248,3 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, return 0; } EXPORT_SYMBOL_GPL(nf_queue); - -static unsigned int nf_iterate(struct sk_buff *skb, - struct nf_hook_state *state, - const struct nf_hook_entries *hooks, - unsigned int *index) -{ - const struct nf_hook_entry *hook; - unsigned int verdict, i = *index; - - while (i < hooks->num_hook_entries) { - hook = &hooks->hooks[i]; -repeat: - verdict = nf_hook_entry_hookfn(hook, skb, state); - if (verdict != NF_ACCEPT) { - *index = i; - if (verdict != NF_REPEAT) - return verdict; - goto repeat; - } - i++; - } - - *index = i; - return NF_ACCEPT; -} - -static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum) -{ - switch (pf) { -#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE - case NFPROTO_BRIDGE: - return rcu_dereference(net->nf.hooks_bridge[hooknum]); -#endif - case NFPROTO_IPV4: - return rcu_dereference(net->nf.hooks_ipv4[hooknum]); - case NFPROTO_IPV6: - return rcu_dereference(net->nf.hooks_ipv6[hooknum]); - default: - WARN_ON_ONCE(1); - return NULL; - } - - return NULL; -} - -/* Caller must hold rcu read-side lock */ -void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) -{ - const struct nf_hook_entry *hook_entry; - const struct nf_hook_entries *hooks; - struct sk_buff *skb = entry->skb; - const struct net *net; - unsigned int i; - int err; - u8 pf; - - net = entry->state.net; - pf = entry->state.pf; - - hooks = nf_hook_entries_head(net, pf, entry->state.hook); - - i = entry->hook_index; - if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { - kfree_skb(skb); - nf_queue_entry_free(entry); - return; - } - - hook_entry = &hooks->hooks[i]; - - /* Continue traversal iff userspace said ok... */ - if (verdict == NF_REPEAT) - verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); - - if (verdict == NF_ACCEPT) { - if (nf_reroute(skb, entry) < 0) - verdict = NF_DROP; - } - - if (verdict == NF_ACCEPT) { -next_hook: - ++i; - verdict = nf_iterate(skb, &entry->state, hooks, &i); - } - - switch (verdict & NF_VERDICT_MASK) { - case NF_ACCEPT: - case NF_STOP: - local_bh_disable(); - entry->state.okfn(entry->state.net, entry->state.sk, skb); - local_bh_enable(); - break; - case NF_QUEUE: - err = nf_queue(skb, &entry->state, i, verdict); - if (err == 1) - goto next_hook; - break; - case NF_STOLEN: - break; - default: - kfree_skb(skb); - } - - nf_queue_entry_free(entry); -} -EXPORT_SYMBOL(nf_reinject); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5cf38fc0a366..00f4bd21c59b 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -225,6 +225,148 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) return entry; } +static unsigned int nf_iterate(struct sk_buff *skb, + struct nf_hook_state *state, + const struct nf_hook_entries *hooks, + unsigned int *index) +{ + const struct nf_hook_entry *hook; + unsigned int verdict, i = *index; + + while (i < hooks->num_hook_entries) { + hook = &hooks->hooks[i]; +repeat: + verdict = nf_hook_entry_hookfn(hook, skb, state); + if (verdict != NF_ACCEPT) { + *index = i; + if (verdict != NF_REPEAT) + return verdict; + goto repeat; + } + i++; + } + + *index = i; + return NF_ACCEPT; +} + +static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum) +{ + switch (pf) { +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + case NFPROTO_BRIDGE: + return rcu_dereference(net->nf.hooks_bridge[hooknum]); +#endif + case NFPROTO_IPV4: + return rcu_dereference(net->nf.hooks_ipv4[hooknum]); + case NFPROTO_IPV6: + return rcu_dereference(net->nf.hooks_ipv6[hooknum]); + default: + WARN_ON_ONCE(1); + return NULL; + } + + return NULL; +} + +static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) +{ +#ifdef CONFIG_INET + const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); + + if (entry->state.hook == NF_INET_LOCAL_OUT) { + const struct iphdr *iph = ip_hdr(skb); + + if (!(iph->tos == rt_info->tos && + skb->mark == rt_info->mark && + iph->daddr == rt_info->daddr && + iph->saddr == rt_info->saddr)) + return ip_route_me_harder(entry->state.net, entry->state.sk, + skb, RTN_UNSPEC); + } +#endif + return 0; +} + +static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) +{ + const struct nf_ipv6_ops *v6ops; + int ret = 0; + + switch (entry->state.pf) { + case AF_INET: + ret = nf_ip_reroute(skb, entry); + break; + case AF_INET6: + v6ops = rcu_dereference(nf_ipv6_ops); + if (v6ops) + ret = v6ops->reroute(skb, entry); + break; + } + return ret; +} + +/* caller must hold rcu read-side lock */ +static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) +{ + const struct nf_hook_entry *hook_entry; + const struct nf_hook_entries *hooks; + struct sk_buff *skb = entry->skb; + const struct net *net; + unsigned int i; + int err; + u8 pf; + + net = entry->state.net; + pf = entry->state.pf; + + hooks = nf_hook_entries_head(net, pf, entry->state.hook); + + i = entry->hook_index; + if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { + kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP); + nf_queue_entry_free(entry); + return; + } + + hook_entry = &hooks->hooks[i]; + + /* Continue traversal iff userspace said ok... */ + if (verdict == NF_REPEAT) + verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); + + if (verdict == NF_ACCEPT) { + if (nf_reroute(skb, entry) < 0) + verdict = NF_DROP; + } + + if (verdict == NF_ACCEPT) { +next_hook: + ++i; + verdict = nf_iterate(skb, &entry->state, hooks, &i); + } + + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + case NF_STOP: + local_bh_disable(); + entry->state.okfn(entry->state.net, entry->state.sk, skb); + local_bh_enable(); + break; + case NF_QUEUE: + err = nf_queue(skb, &entry->state, i, verdict); + if (err == 1) + goto next_hook; + break; + case NF_STOLEN: + break; + default: + kfree_skb(skb); + } + + nf_queue_entry_free(entry); +} + static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) { const struct nf_ct_hook *ct_hook; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index acef4155f0da..008419db815a 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -179,43 +179,6 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, } EXPORT_SYMBOL_GPL(nf_route); -static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) -{ -#ifdef CONFIG_INET - const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(skb); - - if (!(iph->tos == rt_info->tos && - skb->mark == rt_info->mark && - iph->daddr == rt_info->daddr && - iph->saddr == rt_info->saddr)) - return ip_route_me_harder(entry->state.net, entry->state.sk, - skb, RTN_UNSPEC); - } -#endif - return 0; -} - -int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) -{ - const struct nf_ipv6_ops *v6ops; - int ret = 0; - - switch (entry->state.pf) { - case AF_INET: - ret = nf_ip_reroute(skb, entry); - break; - case AF_INET6: - v6ops = rcu_dereference(nf_ipv6_ops); - if (v6ops) - ret = v6ops->reroute(skb, entry); - break; - } - return ret; -} - /* Only get and check the lengths, not do any hop-by-hop stuff. */ int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen) { -- cgit v1.2.3 From 26f4dac11775a1ca24e2605cb30e828d4dbdea93 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Feb 2024 15:31:32 -0800 Subject: netfilter: x_tables: Use unsafe_memcpy() for 0-sized destination The struct xt_entry_target fake flexible array has not be converted to a true flexible array, which is mainly blocked by it being both UAPI and used in the middle of other structures. In order to properly check for 0-sized destinations in memcpy(), an exception must be made for the one place where it is still a destination. Since memcpy() was already skipping checks for 0-sized destinations, using unsafe_memcpy() is no change in behavior. Signed-off-by: Kees Cook Reviewed-by: Simon Horman Signed-off-by: Florian Westphal --- net/netfilter/x_tables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 21624d68314f..da5d929c7c85 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1142,7 +1142,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, if (target->compat_from_user) target->compat_from_user(t->data, ct->data); else - memcpy(t->data, ct->data, tsize - sizeof(*ct)); + unsafe_memcpy(t->data, ct->data, tsize - sizeof(*ct), + /* UAPI 0-sized destination */); tsize += off; t->u.user.target_size = tsize; -- cgit v1.2.3 From e0bb2675fea2783c45bb95d74f00c55156720863 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:29 +0000 Subject: ipv6: annotate data-races around cnf.hop_limit idev->cnf.hop_limit and net->ipv6.devconf_all->hop_limit might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Acked-by: Florian Westphal # for netfilter parts Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/netfilter/nf_reject_ipv6.c | 4 ++-- net/ipv6/output_core.c | 4 ++-- net/netfilter/nf_synproxy_core.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net/netfilter') diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2b383d92d7f5..2c3f62907958 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -460,7 +460,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, set_tun->ttl = ip6_dst_hoplimit(dst); dst_release(dst); } else { - set_tun->ttl = net->ipv6.devconf_all->hop_limit; + set_tun->ttl = READ_ONCE(net->ipv6.devconf_all->hop_limit); } #endif } else { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56c3c467f9de..f61d977ac052 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1346,7 +1346,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } if (val < 0) - val = sock_net(sk)->ipv6.devconf_all->hop_limit; + val = READ_ONCE(sock_net(sk)->ipv6.devconf_all->hop_limit); break; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e96d79cd34d2..9c9c31268432 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1423,7 +1423,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { - in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + WRITE_ONCE(in6_dev->cnf.hop_limit, ra_msg->icmph.icmp6_hop_limit); fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 196dd4ecb5e2..dedee264b8f6 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -83,7 +83,7 @@ struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv6.devconf_all->hop_limit); + READ_ONCE(net->ipv6.devconf_all->hop_limit)); nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); @@ -124,7 +124,7 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, - net->ipv6.devconf_all->hop_limit); + READ_ONCE(net->ipv6.devconf_all->hop_limit)); skb_reset_transport_header(nskb); icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr)); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index b5205311f372..806d4b5dd1e6 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -111,9 +111,9 @@ int ip6_dst_hoplimit(struct dst_entry *dst) rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) - hoplimit = idev->cnf.hop_limit; + hoplimit = READ_ONCE(idev->cnf.hop_limit); else - hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; + hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); rcu_read_unlock(); } return hoplimit; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index fbbc4fd37349..5b140c12b7df 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -800,7 +800,7 @@ synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb, skb_reset_network_header(skb); iph = skb_put(skb, sizeof(*iph)); ip6_flow_hdr(iph, 0, 0); - iph->hop_limit = net->ipv6.devconf_all->hop_limit; + iph->hop_limit = READ_ONCE(net->ipv6.devconf_all->hop_limit); iph->nexthdr = IPPROTO_TCP; iph->saddr = *saddr; iph->daddr = *daddr; -- cgit v1.2.3