From bd4a816752bab609dd6d65ae021387beb9e2ddbd Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Wed, 6 Dec 2023 09:36:12 -0800 Subject: net: ipv6: support reporting otherwise unknown prefix flags in RTM_NEWPREFIX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lorenzo points out that we effectively clear all unknown flags from PIO when copying them to userspace in the netlink RTM_NEWPREFIX notification. We could fix this one at a time as new flags are defined, or in one fell swoop - I choose the latter. We could either define 6 new reserved flags (reserved1..6) and handle them individually (and rename them as new flags are defined), or we could simply copy the entire unmodified byte over - I choose the latter. This unfortunately requires some anonymous union/struct magic, so we add a static assert on the struct size for a little extra safety. Cc: David Ahern Cc: Lorenzo Colitti Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Maciej Żenczykowski Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/addrconf.h | 12 ++++++++++-- include/net/if_inet6.h | 4 ---- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 82da55101b5a..61ebe723ee4d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -31,17 +31,22 @@ struct prefix_info { __u8 length; __u8 prefix_len; + union __packed { + __u8 flags; + struct __packed { #if defined(__BIG_ENDIAN_BITFIELD) - __u8 onlink : 1, + __u8 onlink : 1, autoconf : 1, reserved : 6; #elif defined(__LITTLE_ENDIAN_BITFIELD) - __u8 reserved : 6, + __u8 reserved : 6, autoconf : 1, onlink : 1; #else #error "Please fix " #endif + }; + }; __be32 valid; __be32 prefered; __be32 reserved2; @@ -49,6 +54,9 @@ struct prefix_info { struct in6_addr prefix; }; +/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */ +static_assert(sizeof(struct prefix_info) == 32); + #include #include #include diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 3e454c4d7ba6..f07642264c1e 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -22,10 +22,6 @@ #define IF_RS_SENT 0x10 #define IF_READY 0x80000000 -/* prefix flags */ -#define IF_PREFIX_ONLINK 0x01 -#define IF_PREFIX_AUTOCONF 0x02 - enum { INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, -- cgit v1.2.3 From 125f1c7f26ffcdbf96177abe75b70c1a6ceb17bc Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 5 Dec 2023 18:25:54 +0100 Subject: net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table The referenced change added custom cleanup code to act_ct to delete any callbacks registered on the parent block when deleting the tcf_ct_flow_table instance. However, the underlying issue is that the drivers don't obtain the reference to the tcf_ct_flow_table instance when registering callbacks which means that not only driver callbacks may still be on the table when deleting it but also that the driver can still have pointers to its internal nf_flowtable and can use it concurrently which results either warning in netfilter[0] or use-after-free. Fix the issue by taking a reference to the underlying struct tcf_ct_flow_table instance when registering the callback and release the reference when unregistering. Expose new API required for such reference counting by adding two new callbacks to nf_flowtable_type and implementing them for act_ct flowtable_ct type. This fixes the issue by extending the lifetime of nf_flowtable until all users have unregistered. [0]: [106170.938634] ------------[ cut here ]------------ [106170.939111] WARNING: CPU: 21 PID: 3688 at include/net/netfilter/nf_flow_table.h:262 mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.940108] Modules linked in: act_ct nf_flow_table act_mirred act_skbedit act_tunnel_key vxlan cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa bonding openvswitch nsh rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_regis try overlay mlx5_core [106170.943496] CPU: 21 PID: 3688 Comm: kworker/u48:0 Not tainted 6.6.0-rc7_for_upstream_min_debug_2023_11_01_13_02 #1 [106170.944361] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [106170.945292] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core] [106170.945846] RIP: 0010:mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.946413] Code: 89 ef 48 83 05 71 a4 14 00 01 e8 f4 06 04 e1 48 83 05 6c a4 14 00 01 48 83 c4 28 5b 5d 41 5c 41 5d c3 48 83 05 d1 8b 14 00 01 <0f> 0b 48 83 05 d7 8b 14 00 01 e9 96 fe ff ff 48 83 05 a2 90 14 00 [106170.947924] RSP: 0018:ffff88813ff0fcb8 EFLAGS: 00010202 [106170.948397] RAX: 0000000000000000 RBX: ffff88811eabac40 RCX: ffff88811eabad48 [106170.949040] RDX: ffff88811eab8000 RSI: ffffffffa02cd560 RDI: 0000000000000000 [106170.949679] RBP: ffff88811eab8000 R08: 0000000000000001 R09: ffffffffa0229700 [106170.950317] R10: ffff888103538fc0 R11: 0000000000000001 R12: ffff88811eabad58 [106170.950969] R13: ffff888110c01c00 R14: ffff888106b40000 R15: 0000000000000000 [106170.951616] FS: 0000000000000000(0000) GS:ffff88885fd40000(0000) knlGS:0000000000000000 [106170.952329] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [106170.952834] CR2: 00007f1cefd28cb0 CR3: 000000012181b006 CR4: 0000000000370ea0 [106170.953482] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [106170.954121] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [106170.954766] Call Trace: [106170.955057] [106170.955315] ? __warn+0x79/0x120 [106170.955648] ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.956172] ? report_bug+0x17c/0x190 [106170.956537] ? handle_bug+0x3c/0x60 [106170.956891] ? exc_invalid_op+0x14/0x70 [106170.957264] ? asm_exc_invalid_op+0x16/0x20 [106170.957666] ? mlx5_del_flow_rules+0x10/0x310 [mlx5_core] [106170.958172] ? mlx5_tc_ct_block_flow_offload_add+0x1240/0x1240 [mlx5_core] [106170.958788] ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.959339] ? mlx5_tc_ct_del_ft_cb+0xc6/0x2b0 [mlx5_core] [106170.959854] ? mapping_remove+0x154/0x1d0 [mlx5_core] [106170.960342] ? mlx5e_tc_action_miss_mapping_put+0x4f/0x80 [mlx5_core] [106170.960927] mlx5_tc_ct_delete_flow+0x76/0xc0 [mlx5_core] [106170.961441] mlx5_free_flow_attr_actions+0x13b/0x220 [mlx5_core] [106170.962001] mlx5e_tc_del_fdb_flow+0x22c/0x3b0 [mlx5_core] [106170.962524] mlx5e_tc_del_flow+0x95/0x3c0 [mlx5_core] [106170.963034] mlx5e_flow_put+0x73/0xe0 [mlx5_core] [106170.963506] mlx5e_put_flow_list+0x38/0x70 [mlx5_core] [106170.964002] mlx5e_rep_update_flows+0xec/0x290 [mlx5_core] [106170.964525] mlx5e_rep_neigh_update+0x1da/0x310 [mlx5_core] [106170.965056] process_one_work+0x13a/0x2c0 [106170.965443] worker_thread+0x2e5/0x3f0 [106170.965808] ? rescuer_thread+0x410/0x410 [106170.966192] kthread+0xc6/0xf0 [106170.966515] ? kthread_complete_and_exit+0x20/0x20 [106170.966970] ret_from_fork+0x2d/0x50 [106170.967332] ? kthread_complete_and_exit+0x20/0x20 [106170.967774] ret_from_fork_asm+0x11/0x20 [106170.970466] [106170.970726] ---[ end trace 0000000000000000 ]--- Fixes: 77ac5e40c44e ("net/sched: act_ct: remove and free nf_table callbacks") Signed-off-by: Vlad Buslov Reviewed-by: Paul Blakey Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/netfilter/nf_flow_table.h | 10 ++++++++++ net/sched/act_ct.c | 34 ++++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index fe1507c1db82..692d5955911c 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -62,6 +62,8 @@ struct nf_flowtable_type { enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule); void (*free)(struct nf_flowtable *ft); + void (*get)(struct nf_flowtable *ft); + void (*put)(struct nf_flowtable *ft); nf_hookfn *hook; struct module *owner; }; @@ -240,6 +242,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, } list_add_tail(&block_cb->list, &block->cb_list); + up_write(&flow_table->flow_block_lock); + + if (flow_table->type->get) + flow_table->type->get(flow_table); + return 0; unlock: up_write(&flow_table->flow_block_lock); @@ -262,6 +269,9 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, WARN_ON(true); } up_write(&flow_table->flow_block_lock); + + if (flow_table->type->put) + flow_table->type->put(flow_table); } void flow_offload_route_init(struct flow_offload *flow, diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b3f4a503ee2b..f69c47945175 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -286,9 +286,31 @@ static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); } +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_get(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_get_ref(ct_ft); +} + +static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_put(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_put(ct_ft); +} + static struct nf_flowtable_type flowtable_ct = { .gc = tcf_ct_flow_is_outdated, .action = tcf_ct_flow_table_fill_actions, + .get = tcf_ct_nf_get, + .put = tcf_ct_nf_put, .owner = THIS_MODULE, }; @@ -337,9 +359,13 @@ err_alloc: return err; } +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft) +{ + refcount_inc(&ct_ft->ref); +} + static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) { - struct flow_block_cb *block_cb, *tmp_cb; struct tcf_ct_flow_table *ct_ft; struct flow_block *block; @@ -347,13 +373,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) rwork); nf_flow_table_free(&ct_ft->nf_ft); - /* Remove any remaining callbacks before cleanup */ block = &ct_ft->nf_ft.flow_block; down_write(&ct_ft->nf_ft.flow_block_lock); - list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) { - list_del(&block_cb->list); - flow_block_cb_free(block_cb); - } + WARN_ON(!list_empty(&block->cb_list)); up_write(&ct_ft->nf_ft.flow_block_lock); kfree(ct_ft); -- cgit v1.2.3