From 4b80ced971b0d118f9a11dd503a5833a5016de92 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 17 Aug 2023 20:28:32 +0200 Subject: netfilter: nf_tables: validate all pending tables We have to validate all tables in the transaction that are in VALIDATE_DO state, the blamed commit below did not move the break statement to its right location so we only validate one table. Moreover, we can't init table->validate to _SKIP when a table object is allocated. If we do, then if a transcaction creates a new table and then fails the transaction, nfnetlink will loop and nft will hang until user cancels the command. Add back the pernet state as a place to stash the last state encountered. This is either _DO (we hit an error during commit validation) or _SKIP (transaction passed all checks). Fixes: 00c320f9b755 ("netfilter: nf_tables: make validation state per table") Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3e841e45f2c0..a76a62ebe9c9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1373,7 +1373,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, if (table == NULL) goto err_kzalloc; - table->validate_state = NFT_VALIDATE_SKIP; + table->validate_state = nft_net->validate_state; table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT); if (table->name == NULL) goto err_strdup; @@ -9051,9 +9051,8 @@ static int nf_tables_validate(struct net *net) return -EAGAIN; nft_validate_state_update(table, NFT_VALIDATE_SKIP); + break; } - - break; } return 0; @@ -9799,8 +9798,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } /* 0. Validate ruleset, otherwise roll back for error reporting. */ - if (nf_tables_validate(net) < 0) + if (nf_tables_validate(net) < 0) { + nft_net->validate_state = NFT_VALIDATE_DO; return -EAGAIN; + } err = nft_flow_rule_offload_commit(net); if (err < 0) @@ -10059,6 +10060,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_commit_audit_log(&adl, nft_net->base_seq); nft_gc_seq_end(nft_net, gc_seq); + nft_net->validate_state = NFT_VALIDATE_SKIP; nf_tables_commit_release(net); return 0; @@ -11115,6 +11117,7 @@ static int __net_init nf_tables_init_net(struct net *net) mutex_init(&nft_net->commit_mutex); nft_net->base_seq = 1; nft_net->gc_seq = 0; + nft_net->validate_state = NFT_VALIDATE_SKIP; return 0; } -- cgit v1.2.3 From 2c9f0293280e258606e54ed2b96fa71498432eae Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 18 Aug 2023 01:13:31 +0200 Subject: netfilter: nf_tables: flush pending destroy work before netlink notifier Destroy work waits for the RCU grace period then it releases the objects with no mutex held. All releases objects follow this path for transactions, therefore, order is guaranteed and references to top-level objects in the hierarchy remain valid. However, netlink notifier might interfer with pending destroy work. rcu_barrier() is not correct because objects are not release via RCU callback. Flush destroy work before releasing objects from netlink notifier path. Fixes: d4bc8271db21 ("netfilter: nf_tables: netlink notifier might race to release objects") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a76a62ebe9c9..d299e7aa1b96 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -11073,7 +11073,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); if (!list_empty(&nf_tables_destroy_list)) - rcu_barrier(); + nf_tables_trans_destroy_flush_work(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && -- cgit v1.2.3 From 720344340fb9be2765bbaab7b292ece0a4570eae Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 18 Aug 2023 01:13:52 +0200 Subject: netfilter: nf_tables: GC transaction race with abort path Abort path is missing a synchronization point with GC transactions. Add GC sequence number hence any GC transaction losing race will be discarded. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d299e7aa1b96..a255456efae4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10337,8 +10337,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); - int ret = __nf_tables_abort(net, action); + unsigned int gc_seq; + int ret; + gc_seq = nft_gc_seq_begin(nft_net); + ret = __nf_tables_abort(net, action); + nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); return ret; -- cgit v1.2.3 From 8357bc946a2abc2a10ca40e5a2105d2b4c57515e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 21 Aug 2023 14:33:32 +0200 Subject: netfilter: nf_tables: use correct lock to protect gc_list Use nf_tables_gc_list_lock spinlock, not nf_tables_destroy_list_lock to protect the gc list. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a255456efae4..eb8b1167dced 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9456,9 +9456,9 @@ static void nft_trans_gc_work(struct work_struct *work) struct nft_trans_gc *trans, *next; LIST_HEAD(trans_gc_list); - spin_lock(&nf_tables_destroy_list_lock); + spin_lock(&nf_tables_gc_list_lock); list_splice_init(&nf_tables_gc_list, &trans_gc_list); - spin_unlock(&nf_tables_destroy_list_lock); + spin_unlock(&nf_tables_gc_list_lock); list_for_each_entry_safe(trans, next, &trans_gc_list, list) { list_del(&trans->list); -- cgit v1.2.3 From 5e1be4cdc98c989d5387ce94ff15b5ad06a5b681 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Aug 2023 19:49:52 +0200 Subject: netfilter: nf_tables: fix out of memory error handling Several instances of pipapo_resize() don't propagate allocation failures, this causes a crash when fault injection is enabled for gfp_kernel slabs. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Florian Westphal Reviewed-by: Stefano Brivio --- net/netfilter/nft_set_pipapo.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 3757fcc55723..6af9c9ed4b5c 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -902,12 +902,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int mask_bits) { - int rule = f->rules++, group, ret, bit_offset = 0; + int rule = f->rules, group, ret, bit_offset = 0; - ret = pipapo_resize(f, f->rules - 1, f->rules); + ret = pipapo_resize(f, f->rules, f->rules + 1); if (ret) return ret; + f->rules++; + for (group = 0; group < f->groups; group++) { int i, v; u8 mask; @@ -1052,7 +1054,9 @@ static int pipapo_expand(struct nft_pipapo_field *f, step++; if (step >= len) { if (!masks) { - pipapo_insert(f, base, 0); + err = pipapo_insert(f, base, 0); + if (err < 0) + return err; masks = 1; } goto out; @@ -1235,6 +1239,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else ret = pipapo_expand(f, start, end, f->groups * f->bb); + if (ret < 0) + return ret; + if (f->bsize > bsize_max) bsize_max = f->bsize; -- cgit v1.2.3 From 8e51830e29e12670b4c10df070a4ea4c9593e961 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Aug 2023 22:03:57 +0200 Subject: netfilter: nf_tables: defer gc run if previous batch is still pending Don't queue more gc work, else we may queue the same elements multiple times. If an element is flagged as dead, this can mean that either the previous gc request was invalidated/discarded by a transaction or that the previous request is still pending in the system work queue. The latter will happen if the gc interval is set to a very low value, e.g. 1ms, and system work queue is backlogged. The sets refcount is 1 if no previous gc requeusts are queued, so add a helper for this and skip gc run if old requests are pending. Add a helper for this and skip the gc run in this case. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Florian Westphal Reviewed-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++++ net/netfilter/nft_set_hash.c | 3 +++ net/netfilter/nft_set_rbtree.c | 3 +++ 3 files changed, 11 insertions(+) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ffcbdf08380f..dd40c75011d2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -587,6 +587,11 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline bool nft_set_gc_is_pending(const struct nft_set *s) +{ + return refcount_read(&s->refs) != 1; +} + static inline struct nft_set *nft_set_container_of(const void *priv) { return (void *)priv - offsetof(struct nft_set, data); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index cef5df846000..524763659f25 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -326,6 +326,9 @@ static void nft_rhash_gc(struct work_struct *work) nft_net = nft_pernet(net); gc_seq = READ_ONCE(nft_net->gc_seq); + if (nft_set_gc_is_pending(set)) + goto done; + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); if (!gc) goto done; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index f9d4c8fcbbf8..c6435e709231 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -611,6 +611,9 @@ static void nft_rbtree_gc(struct work_struct *work) nft_net = nft_pernet(net); gc_seq = READ_ONCE(nft_net->gc_seq); + if (nft_set_gc_is_pending(set)) + goto done; + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); if (!gc) goto done; -- cgit v1.2.3