From 4d42cd6bc2ac1b9be50ade13771daec90c9d18b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Feb 2023 10:12:01 -0800 Subject: tls: rx: fix return value for async crypto Gaurav reports that TLS Rx is broken with async crypto accelerators. The commit under fixes missed updating the retval byte counting logic when updating how records are stored. Even tho both before and after the change 'decrypted' was updated inside the main loop, it was completely overwritten when processing the async completions. Now that the rx_list only holds non-zero-copy records we need to add, not overwrite. Reported-and-bisected-by: Gaurav Jain Fixes: cbbdee9918a2 ("tls: rx: async: don't put async zc on the list") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217064 Tested-by: Gaurav Jain Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230227181201.1793772-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 782d3701b86f..021d760f9133 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2127,7 +2127,7 @@ recv_end: else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek); - decrypted = max(err, 0); + decrypted += max(err, 0); } copied += decrypted; -- cgit v1.2.3 From 880ce5f20033cd6ecb2c0edfe0376c9e45220012 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Feb 2023 14:17:06 +0000 Subject: net: avoid skb end_offset change in __skb_unclone_keeptruesize() Once initial skb->head has been allocated from skb_small_head_cache, we need to make sure to use the same strategy whenever skb->head has to be re-allocated, as found by syzbot [1] This means kmalloc_reserve() can not fallback from using skb_small_head_cache to generic (power-of-two) kmem caches. It seems that we probably want to rework things in the future, to partially revert following patch, because we no longer use ksize() for skb allocated in TX path. 2b88cba55883 ("net: preserve skb_end_offset() in skb_unclone_keeptruesize()") Ideally, TCP stack should never put payload in skb->head, this effort has to be completed. In the mean time, add a sanity check. [1] BUG: KASAN: invalid-free in slab_free mm/slub.c:3787 [inline] BUG: KASAN: invalid-free in kmem_cache_free+0xee/0x5c0 mm/slub.c:3809 Free of addr ffff88806cdee800 by task syz-executor239/5189 CPU: 0 PID: 5189 Comm: syz-executor239 Not tainted 6.2.0-rc8-syzkaller-02400-gd1fabc68f8e0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:306 [inline] print_report+0x15e/0x45d mm/kasan/report.c:417 kasan_report_invalid_free+0x9b/0x1b0 mm/kasan/report.c:482 ____kasan_slab_free+0x1a5/0x1c0 mm/kasan/common.c:216 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1781 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1807 slab_free mm/slub.c:3787 [inline] kmem_cache_free+0xee/0x5c0 mm/slub.c:3809 skb_kfree_head net/core/skbuff.c:857 [inline] skb_kfree_head net/core/skbuff.c:853 [inline] skb_free_head+0x16f/0x1a0 net/core/skbuff.c:872 skb_release_data+0x57a/0x820 net/core/skbuff.c:901 skb_release_all net/core/skbuff.c:966 [inline] __kfree_skb+0x4f/0x70 net/core/skbuff.c:980 tcp_wmem_free_skb include/net/tcp.h:302 [inline] tcp_rtx_queue_purge net/ipv4/tcp.c:3061 [inline] tcp_write_queue_purge+0x617/0xcf0 net/ipv4/tcp.c:3074 tcp_v4_destroy_sock+0x125/0x810 net/ipv4/tcp_ipv4.c:2302 inet_csk_destroy_sock+0x19a/0x440 net/ipv4/inet_connection_sock.c:1195 __tcp_close+0xb96/0xf50 net/ipv4/tcp.c:3021 tcp_close+0x2d/0xc0 net/ipv4/tcp.c:3033 inet_release+0x132/0x270 net/ipv4/af_inet.c:426 __sock_release+0xcd/0x280 net/socket.c:651 sock_close+0x1c/0x20 net/socket.c:1393 __fput+0x27c/0xa90 fs/file_table.c:320 task_work_run+0x16f/0x270 kernel/task_work.c:179 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x23c/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f2511f546c3 Code: c7 c2 c0 ff ff ff f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 64 8b 04 25 18 00 00 00 85 c0 75 14 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 RSP: 002b:00007ffef0103d48 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f2511f546c3 RDX: 0000000000000978 RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000002 R09: 0000000000003434 R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffef0103d6c R13: 00007ffef0103d80 R14: 00007ffef0103dc0 R15: 0000000000000003 Allocated by task 5189: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] ____kasan_kmalloc mm/kasan/common.c:333 [inline] __kasan_kmalloc+0xa5/0xb0 mm/kasan/common.c:383 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:968 [inline] __kmalloc_node_track_caller+0x5b/0xc0 mm/slab_common.c:988 kmalloc_reserve+0xf1/0x230 net/core/skbuff.c:539 pskb_expand_head+0x237/0x1160 net/core/skbuff.c:1995 __skb_unclone_keeptruesize+0x93/0x220 net/core/skbuff.c:2094 skb_unclone_keeptruesize include/linux/skbuff.h:1910 [inline] skb_prepare_for_shift net/core/skbuff.c:3804 [inline] skb_shift+0xef8/0x1e20 net/core/skbuff.c:3877 tcp_skb_shift net/ipv4/tcp_input.c:1538 [inline] tcp_shift_skb_data net/ipv4/tcp_input.c:1646 [inline] tcp_sacktag_walk+0x93b/0x18a0 net/ipv4/tcp_input.c:1713 tcp_sacktag_write_queue+0x1599/0x31d0 net/ipv4/tcp_input.c:1974 tcp_ack+0x2e9f/0x5a10 net/ipv4/tcp_input.c:3847 tcp_rcv_established+0x667/0x2230 net/ipv4/tcp_input.c:6006 tcp_v4_do_rcv+0x670/0x9b0 net/ipv4/tcp_ipv4.c:1721 sk_backlog_rcv include/net/sock.h:1113 [inline] __release_sock+0x133/0x3b0 net/core/sock.c:2921 release_sock+0x58/0x1b0 net/core/sock.c:3488 tcp_sendmsg+0x3a/0x50 net/ipv4/tcp.c:1485 inet_sendmsg+0x9d/0xe0 net/ipv4/af_inet.c:825 sock_sendmsg_nosec net/socket.c:722 [inline] sock_sendmsg+0xde/0x190 net/socket.c:745 sock_write_iter+0x295/0x3d0 net/socket.c:1136 call_write_iter include/linux/fs.h:2189 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9ed/0xdd0 fs/read_write.c:584 ksys_write+0x1ec/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The buggy address belongs to the object at ffff88806cdee800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 0 bytes inside of 1024-byte region [ffff88806cdee800, ffff88806cdeec00) The buggy address belongs to the physical page: page:ffffea0001b37a00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x6cde8 head:ffffea0001b37a00 order:3 compound_mapcount:0 subpages_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 ffff888012441dc0 dead000000000122 0000000000000000 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0x1f2a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_MEMALLOC|__GFP_HARDWALL), pid 75, tgid 75 (kworker/u4:4), ts 96369578780, free_ts 26734162530 prep_new_page mm/page_alloc.c:2531 [inline] get_page_from_freelist+0x119c/0x2ce0 mm/page_alloc.c:4283 __alloc_pages+0x1cb/0x5b0 mm/page_alloc.c:5549 alloc_pages+0x1aa/0x270 mm/mempolicy.c:2287 alloc_slab_page mm/slub.c:1851 [inline] allocate_slab+0x25f/0x350 mm/slub.c:1998 new_slab mm/slub.c:2051 [inline] ___slab_alloc+0xa91/0x1400 mm/slub.c:3193 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3292 __slab_alloc_node mm/slub.c:3345 [inline] slab_alloc_node mm/slub.c:3442 [inline] __kmem_cache_alloc_node+0x1a4/0x430 mm/slub.c:3491 __do_kmalloc_node mm/slab_common.c:967 [inline] __kmalloc_node_track_caller+0x4b/0xc0 mm/slab_common.c:988 kmalloc_reserve+0xf1/0x230 net/core/skbuff.c:539 __alloc_skb+0x129/0x330 net/core/skbuff.c:608 __netdev_alloc_skb+0x74/0x410 net/core/skbuff.c:672 __netdev_alloc_skb_ip_align include/linux/skbuff.h:3203 [inline] netdev_alloc_skb_ip_align include/linux/skbuff.h:3213 [inline] batadv_iv_ogm_aggregate_new+0x106/0x4e0 net/batman-adv/bat_iv_ogm.c:558 batadv_iv_ogm_queue_add net/batman-adv/bat_iv_ogm.c:670 [inline] batadv_iv_ogm_schedule_buff+0xe6b/0x1450 net/batman-adv/bat_iv_ogm.c:849 batadv_iv_ogm_schedule net/batman-adv/bat_iv_ogm.c:868 [inline] batadv_iv_ogm_schedule net/batman-adv/bat_iv_ogm.c:861 [inline] batadv_iv_send_outstanding_bat_ogm_packet+0x744/0x910 net/batman-adv/bat_iv_ogm.c:1712 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1446 [inline] free_pcp_prepare+0x66a/0xc20 mm/page_alloc.c:1496 free_unref_page_prepare mm/page_alloc.c:3369 [inline] free_unref_page+0x1d/0x490 mm/page_alloc.c:3464 free_contig_range+0xb5/0x180 mm/page_alloc.c:9488 destroy_args+0xa8/0x64c mm/debug_vm_pgtable.c:998 debug_vm_pgtable+0x28de/0x296f mm/debug_vm_pgtable.c:1318 do_one_initcall+0x141/0x790 init/main.c:1306 do_initcall_level init/main.c:1379 [inline] do_initcalls init/main.c:1395 [inline] do_basic_setup init/main.c:1414 [inline] kernel_init_freeable+0x6f9/0x782 init/main.c:1634 kernel_init+0x1e/0x1d0 init/main.c:1522 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 Memory state around the buggy address: ffff88806cdee700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88806cdee780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88806cdee800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ^ ffff88806cdee880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fixes: bf9f1baa279f ("net: add dedicated kmem_cache for typical/small skb->head") Reported-by: syzbot Signed-off-by: Eric Dumazet Tested-by: Christoph Paasch Signed-off-by: David S. Miller --- net/core/skbuff.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index eb7d33b41e71..1a31815104d6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -517,18 +517,16 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, #ifdef HAVE_SKB_SMALL_HEAD_CACHE if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE && !(flags & KMALLOC_NOT_NORMAL_BITS)) { - - /* skb_small_head_cache has non power of two size, - * likely forcing SLUB to use order-3 pages. - * We deliberately attempt a NOMEMALLOC allocation only. - */ obj = kmem_cache_alloc_node(skb_small_head_cache, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); - if (obj) { - *size = SKB_SMALL_HEAD_CACHE_SIZE; + *size = SKB_SMALL_HEAD_CACHE_SIZE; + if (obj || !(gfp_pfmemalloc_allowed(flags))) goto out; - } + /* Try again but now we are using pfmemalloc reserves */ + ret_pfmemalloc = true; + obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node); + goto out; } #endif *size = obj_size = kmalloc_size_roundup(obj_size); @@ -2082,6 +2080,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) } EXPORT_SYMBOL(skb_realloc_headroom); +/* Note: We plan to rework this in linux-6.4 */ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { unsigned int saved_end_offset, saved_truesize; @@ -2100,6 +2099,22 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) if (likely(skb_end_offset(skb) == saved_end_offset)) return 0; +#ifdef HAVE_SKB_SMALL_HEAD_CACHE + /* We can not change skb->end if the original or new value + * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head(). + */ + if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM || + skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) { + /* We think this path should not be taken. + * Add a temporary trace to warn us just in case. + */ + pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n", + saved_end_offset, skb_end_offset(skb)); + WARN_ON_ONCE(1); + return 0; + } +#endif + shinfo = skb_shinfo(skb); /* We are about to change back skb->end, -- cgit v1.2.3 From fb07390463c95e6eef254044d6dde050bfb9807a Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Mon, 27 Feb 2023 12:23:52 -0300 Subject: net/sched: act_connmark: handle errno on tcf_idr_check_alloc Smatch reports that 'ci' can be used uninitialized. The current code ignores errno coming from tcf_idr_check_alloc, which will lead to the incorrect usage of 'ci'. Handle the errno as it should. Fixes: 288864effe33 ("net/sched: act_connmark: transition to percpu stats and rcu") Reviewed-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/act_connmark.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8dabfb52ea3d..0d7aee8933c5 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -158,6 +158,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, nparms->zone = parm->zone; ret = 0; + } else { + err = ret; + goto out_free; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); -- cgit v1.2.3 From 693aa2c0d9b6d5b1f2745d31b6e70d09dbbaf06e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Feb 2023 15:30:24 +0000 Subject: ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping() ila_xlat_nl_cmd_get_mapping() generates an empty skb, triggerring a recent sanity check [1]. Instead, return an error code, so that user space can get it. [1] skb_assert_len WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 skb_assert_len include/linux/skbuff.h:2527 [inline] WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 Modules linked in: CPU: 0 PID: 5923 Comm: syz-executor269 Not tainted 6.2.0-syzkaller-18300-g2ebd1fbb946d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_assert_len include/linux/skbuff.h:2527 [inline] pc : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 lr : skb_assert_len include/linux/skbuff.h:2527 [inline] lr : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 sp : ffff80001e0d6c40 x29: ffff80001e0d6e60 x28: dfff800000000000 x27: ffff0000c86328c0 x26: dfff800000000000 x25: ffff0000c8632990 x24: ffff0000c8632a00 x23: 0000000000000000 x22: 1fffe000190c6542 x21: ffff0000c8632a10 x20: ffff0000c8632a00 x19: ffff80001856e000 x18: ffff80001e0d5fc0 x17: 0000000000000000 x16: ffff80001235d16c x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001 x11: ff80800008353a30 x10: 0000000000000000 x9 : 21567eaf25bfb600 x8 : 21567eaf25bfb600 x7 : 0000000000000001 x6 : 0000000000000001 x5 : ffff80001e0d6558 x4 : ffff800015c74760 x3 : ffff800008596744 x2 : 0000000000000001 x1 : 0000000100000000 x0 : 000000000000000e Call trace: skb_assert_len include/linux/skbuff.h:2527 [inline] __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 dev_queue_xmit include/linux/netdevice.h:3033 [inline] __netlink_deliver_tap_skb net/netlink/af_netlink.c:307 [inline] __netlink_deliver_tap+0x45c/0x6f8 net/netlink/af_netlink.c:325 netlink_deliver_tap+0xf4/0x174 net/netlink/af_netlink.c:338 __netlink_sendskb net/netlink/af_netlink.c:1283 [inline] netlink_sendskb+0x6c/0x154 net/netlink/af_netlink.c:1292 netlink_unicast+0x334/0x8d4 net/netlink/af_netlink.c:1380 nlmsg_unicast include/net/netlink.h:1099 [inline] genlmsg_unicast include/net/genetlink.h:433 [inline] genlmsg_reply include/net/genetlink.h:443 [inline] ila_xlat_nl_cmd_get_mapping+0x620/0x7d0 net/ipv6/ila/ila_xlat.c:493 genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline] genl_rcv_msg+0x938/0xc1c net/netlink/genetlink.c:1065 netlink_rcv_skb+0x214/0x3c4 net/netlink/af_netlink.c:2574 genl_rcv+0x38/0x50 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x660/0x8d4 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x800/0xae0 net/netlink/af_netlink.c:1942 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x558/0x844 net/socket.c:2479 ___sys_sendmsg net/socket.c:2533 [inline] __sys_sendmsg+0x26c/0x33c net/socket.c:2562 __do_sys_sendmsg net/socket.c:2571 [inline] __se_sys_sendmsg net/socket.c:2569 [inline] __arm64_sys_sendmsg+0x80/0x94 net/socket.c:2569 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall+0x98/0x2c0 arch/arm64/kernel/syscall.c:52 el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:193 el0_svc+0x58/0x168 arch/arm64/kernel/entry-common.c:637 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 irq event stamp: 136484 hardirqs last enabled at (136483): [] __up_console_sem+0x60/0xb4 kernel/printk/printk.c:345 hardirqs last disabled at (136484): [] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:405 softirqs last enabled at (136418): [] softirq_handle_end kernel/softirq.c:414 [inline] softirqs last enabled at (136418): [] __do_softirq+0xd4c/0xfa4 kernel/softirq.c:600 softirqs last disabled at (136371): [] ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80 ---[ end trace 0000000000000000 ]--- skb len=0 headroom=0 headlen=0 tailroom=192 mac=(0,0) net=(0,-1) trans=-1 shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0)) csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0) hash(0x0 sw=0 l4=0) proto=0x0010 pkttype=6 iif=0 dev name=nlmon0 feat=0x0000000000005861 Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ila/ila_xlat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 47447f0241df..bee45dfeb187 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); + ret = -ESRCH; ila = ila_lookup_by_params(&xp, ilan); if (ila) { ret = ila_dump_info(ila, -- cgit v1.2.3 From dfd2f0eb2347dbdf391fd5b8255fefc58a745472 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Feb 2023 18:44:36 +0000 Subject: net/sched: flower: fix fl_change() error recovery path The two "goto errout;" paths in fl_change() became wrong after cited commit. Indeed we only must not call __fl_put() until the net pointer has been set in tcf_exts_init_ex() This is a minimal fix. We might in the future validate TCA_FLOWER_FLAGS before we allocate @fnew. BUG: KASAN: null-ptr-deref in instrument_atomic_read include/linux/instrumented.h:72 [inline] BUG: KASAN: null-ptr-deref in atomic_read include/linux/atomic/atomic-instrumented.h:27 [inline] BUG: KASAN: null-ptr-deref in refcount_read include/linux/refcount.h:147 [inline] BUG: KASAN: null-ptr-deref in __refcount_add_not_zero include/linux/refcount.h:152 [inline] BUG: KASAN: null-ptr-deref in __refcount_inc_not_zero include/linux/refcount.h:227 [inline] BUG: KASAN: null-ptr-deref in refcount_inc_not_zero include/linux/refcount.h:245 [inline] BUG: KASAN: null-ptr-deref in maybe_get_net include/net/net_namespace.h:269 [inline] BUG: KASAN: null-ptr-deref in tcf_exts_get_net include/net/pkt_cls.h:260 [inline] BUG: KASAN: null-ptr-deref in __fl_put net/sched/cls_flower.c:513 [inline] BUG: KASAN: null-ptr-deref in __fl_put+0x13e/0x3b0 net/sched/cls_flower.c:508 Read of size 4 at addr 000000000000014c by task syz-executor548/5082 CPU: 0 PID: 5082 Comm: syz-executor548 Not tainted 6.2.0-syzkaller-05251-g5b7c4cabbb65 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106 print_report mm/kasan/report.c:420 [inline] kasan_report+0xec/0x130 mm/kasan/report.c:517 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x141/0x190 mm/kasan/generic.c:189 instrument_atomic_read include/linux/instrumented.h:72 [inline] atomic_read include/linux/atomic/atomic-instrumented.h:27 [inline] refcount_read include/linux/refcount.h:147 [inline] __refcount_add_not_zero include/linux/refcount.h:152 [inline] __refcount_inc_not_zero include/linux/refcount.h:227 [inline] refcount_inc_not_zero include/linux/refcount.h:245 [inline] maybe_get_net include/net/net_namespace.h:269 [inline] tcf_exts_get_net include/net/pkt_cls.h:260 [inline] __fl_put net/sched/cls_flower.c:513 [inline] __fl_put+0x13e/0x3b0 net/sched/cls_flower.c:508 fl_change+0x101b/0x4ab0 net/sched/cls_flower.c:2341 tc_new_tfilter+0x97c/0x2290 net/sched/cls_api.c:2310 rtnetlink_rcv_msg+0x996/0xd50 net/core/rtnetlink.c:6165 netlink_rcv_skb+0x165/0x440 net/netlink/af_netlink.c:2574 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x547/0x7f0 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x925/0xe30 net/netlink/af_netlink.c:1942 sock_sendmsg_nosec net/socket.c:722 [inline] sock_sendmsg+0xde/0x190 net/socket.c:745 ____sys_sendmsg+0x334/0x900 net/socket.c:2504 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2558 __sys_sendmmsg+0x18f/0x460 net/socket.c:2644 __do_sys_sendmmsg net/socket.c:2673 [inline] __se_sys_sendmmsg net/socket.c:2670 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2670 Fixes: 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier") Reported-by: syzbot+baabf3efa7c1e57d28b2@syzkaller.appspotmail.com Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Paul Blakey Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e960a46b0520..475fe222a855 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2200,8 +2200,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); if (!tc_flags_valid(fnew->flags)) { + kfree(fnew); err = -EINVAL; - goto errout; + goto errout_tb; } } @@ -2226,8 +2227,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } spin_unlock(&tp->lock); - if (err) - goto errout; + if (err) { + kfree(fnew); + goto errout_tb; + } } fnew->handle = handle; @@ -2337,7 +2340,6 @@ errout_mask: fl_mask_put(head, fnew->mask); errout_idr: idr_remove(&head->handle_idr, fnew->handle); -errout: __fl_put(fnew); errout_tb: kfree(tb); -- cgit v1.2.3 From 860e874290fb3be08e966c9c8ffc510c5b0f2bd8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Feb 2023 17:09:03 +0100 Subject: netfilter: nft_last: copy content when cloning expression If the ruleset contains last timestamps, restore them accordingly. Otherwise, listing after restoration shows never used items. Fixes: 33a24de37e81 ("netfilter: nft_last: move stateful fields out of expression data") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_last.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 7f2bda6641bd..8e6d7eaf9dc8 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -105,11 +105,15 @@ static void nft_last_destroy(const struct nft_ctx *ctx, static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src) { struct nft_last_priv *priv_dst = nft_expr_priv(dst); + struct nft_last_priv *priv_src = nft_expr_priv(src); priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC); if (!priv_dst->last) return -ENOMEM; + priv_dst->last->set = priv_src->last->set; + priv_dst->last->jiffies = priv_src->last->jiffies; + return 0; } -- cgit v1.2.3 From aabef97a35160461e9c576848ded737558d89055 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Feb 2023 20:43:02 +0100 Subject: netfilter: nft_quota: copy content when cloning expression If the ruleset contains consumed quota, restore them accordingly. Otherwise, listing after restoration shows never used items. Restore the user-defined quota and flags too. Fixes: ed0a0c60f0e5 ("netfilter: nft_quota: move stateful fields out of expression data") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_quota.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 123578e28917..3ba12a7471b0 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -236,12 +236,16 @@ static void nft_quota_destroy(const struct nft_ctx *ctx, static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) { struct nft_quota *priv_dst = nft_expr_priv(dst); + struct nft_quota *priv_src = nft_expr_priv(src); + + priv_dst->quota = priv_src->quota; + priv_dst->flags = priv_src->flags; priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC); if (!priv_dst->consumed) return -ENOMEM; - atomic64_set(priv_dst->consumed, 0); + *priv_dst->consumed = *priv_src->consumed; return 0; } -- cgit v1.2.3 From 49c47cc21b5b7a3d8deb18fc57b0aa2ab1286962 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Tue, 28 Feb 2023 10:33:44 +0800 Subject: net: tls: fix possible race condition between do_tls_getsockopt_conf() and do_tls_setsockopt_conf() ctx->crypto_send.info is not protected by lock_sock in do_tls_getsockopt_conf(). A race condition between do_tls_getsockopt_conf() and error paths of do_tls_setsockopt_conf() may lead to a use-after-free or null-deref. More discussion: https://lore.kernel.org/all/Y/ht6gQL+u6fj3dG@hog/ Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20230228023344.9623-1-hbh25y@gmail.com Signed-off-by: Jakub Kicinski --- net/tls/tls_main.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 3735cb00905d..b32c112984dd 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -405,13 +405,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aes_gcm_128->iv, cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, TLS_CIPHER_AES_GCM_128_IV_SIZE); memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq, TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aes_gcm_128, sizeof(*crypto_info_aes_gcm_128))) @@ -429,13 +427,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aes_gcm_256->iv, cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE, TLS_CIPHER_AES_GCM_256_IV_SIZE); memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq, TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aes_gcm_256, sizeof(*crypto_info_aes_gcm_256))) @@ -451,13 +447,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(aes_ccm_128->iv, cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE, TLS_CIPHER_AES_CCM_128_IV_SIZE); memcpy(aes_ccm_128->rec_seq, cctx->rec_seq, TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128))) rc = -EFAULT; break; @@ -472,13 +466,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(chacha20_poly1305->iv, cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE, TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE); memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq, TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, chacha20_poly1305, sizeof(*chacha20_poly1305))) rc = -EFAULT; @@ -493,13 +485,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(sm4_gcm_info->iv, cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE, TLS_CIPHER_SM4_GCM_IV_SIZE); memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq, TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info))) rc = -EFAULT; break; @@ -513,13 +503,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(sm4_ccm_info->iv, cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE, TLS_CIPHER_SM4_CCM_IV_SIZE); memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq, TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info))) rc = -EFAULT; break; @@ -535,13 +523,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aria_gcm_128->iv, cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE, TLS_CIPHER_ARIA_GCM_128_IV_SIZE); memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq, TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aria_gcm_128, sizeof(*crypto_info_aria_gcm_128))) @@ -559,13 +545,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aria_gcm_256->iv, cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE, TLS_CIPHER_ARIA_GCM_256_IV_SIZE); memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq, TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aria_gcm_256, sizeof(*crypto_info_aria_gcm_256))) @@ -614,11 +598,9 @@ static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval, if (len < sizeof(value)) return -EINVAL; - lock_sock(sk); value = -EINVAL; if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) value = ctx->rx_no_pad; - release_sock(sk); if (value < 0) return value; @@ -635,6 +617,8 @@ static int do_tls_getsockopt(struct sock *sk, int optname, { int rc = 0; + lock_sock(sk); + switch (optname) { case TLS_TX: case TLS_RX: @@ -651,6 +635,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname, rc = -ENOPROTOOPT; break; } + + release_sock(sk); + return rc; } -- cgit v1.2.3 From f3221361dc85d4de22586ce8441ec2c67b454f5d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Feb 2023 16:28:57 -0800 Subject: net: tls: avoid hanging tasks on the tx_lock syzbot sent a hung task report and Eric explains that adversarial receiver may keep RWIN at 0 for a long time, so we are not guaranteed to make forward progress. Thread which took tx_lock and went to sleep may not release tx_lock for hours. Use interruptible sleep where possible and reschedule the work if it can't take the lock. Testing: existing selftest passes Reported-by: syzbot+9c0268252b8ef967c62e@syzkaller.appspotmail.com Fixes: 79ffe6087e91 ("net/tls: add a TX lock") Link: https://lore.kernel.org/all/000000000000e412e905f5b46201@google.com/ Cc: stable@vger.kernel.org # wait 4 weeks Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230301002857.2101894-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 021d760f9133..635b8bf6b937 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -956,7 +956,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) MSG_CMSG_COMPAT)) return -EOPNOTSUPP; - mutex_lock(&tls_ctx->tx_lock); + ret = mutex_lock_interruptible(&tls_ctx->tx_lock); + if (ret) + return ret; lock_sock(sk); if (unlikely(msg->msg_controllen)) { @@ -1290,7 +1292,9 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) return -EOPNOTSUPP; - mutex_lock(&tls_ctx->tx_lock); + ret = mutex_lock_interruptible(&tls_ctx->tx_lock); + if (ret) + return ret; lock_sock(sk); ret = tls_sw_do_sendpage(sk, page, offset, size, flags); release_sock(sk); @@ -2435,11 +2439,19 @@ static void tx_work_handler(struct work_struct *work) if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) return; - mutex_lock(&tls_ctx->tx_lock); - lock_sock(sk); - tls_tx_records(sk, -1); - release_sock(sk); - mutex_unlock(&tls_ctx->tx_lock); + + if (mutex_trylock(&tls_ctx->tx_lock)) { + lock_sock(sk); + tls_tx_records(sk, -1); + release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); + } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + /* Someone is holding the tx_lock, they will likely run Tx + * and cancel the work on their way out of the lock section. + * Schedule a long delay just in case. + */ + schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10)); + } } static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx) -- cgit v1.2.3 From 5c1ebbfabcd61142a4551bfc0e51840f9bdae7af Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Wed, 1 Mar 2023 13:32:47 +0000 Subject: net: use indirect calls helpers for sk_exit_memory_pressure() Florian reported a regression and sent a patch with the following changelog: There is a noticeable tcp performance regression (loopback or cross-netns), seen with iperf3 -Z (sendfile mode) when generic retpolines are needed. With SK_RECLAIM_THRESHOLD checks gone number of calls to enter/leave memory pressure happen much more often. For TCP indirect calls are used. We can't remove the if-set-return short-circuit check in tcp_enter_memory_pressure because there are callers other than sk_enter_memory_pressure. Doing a check in the sk wrapper too reduces the indirect calls enough to recover some performance. Before, 0.00-60.00 sec 322 GBytes 46.1 Gbits/sec receiver After: 0.00-60.04 sec 359 GBytes 51.4 Gbits/sec receiver "iperf3 -c $peer -t 60 -Z -f g", connected via veth in another netns. It seems we forgot to upstream this indirect call mitigation we had for years, lets do this instead. [edumazet] - It seems we forgot to upstream this indirect call mitigation we had for years, let's do this instead. - Changed to INDIRECT_CALL_INET_1() to avoid bots reports. Fixes: 4890b686f408 ("net: keep sk->sk_forward_alloc as small as possible") Reported-by: Florian Westphal Link: https://lore.kernel.org/netdev/20230227152741.4a53634b@kernel.org/T/ Signed-off-by: Brian Vazquez Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230301133247.2346111-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 341c565dbc26..c25888795390 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2818,7 +2818,8 @@ static void sk_enter_memory_pressure(struct sock *sk) static void sk_leave_memory_pressure(struct sock *sk) { if (sk->sk_prot->leave_memory_pressure) { - sk->sk_prot->leave_memory_pressure(sk); + INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure, + tcp_leave_memory_pressure, sk); } else { unsigned long *memory_pressure = sk->sk_prot->memory_pressure; -- cgit v1.2.3 From 02f18662f6c671382345fcb696e808d78f4c194a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 1 Mar 2023 16:44:50 +0100 Subject: ieee802154: Prevent user from crashing the host Avoid crashing the machine by checking info->attrs[NL802154_ATTR_SCAN_TYPE] presence before de-referencing it, which was the primary intend of the blamed patch. Reported-by: Sanan Hasanov Suggested-by: Eric Dumazet Fixes: a0b6106672b5 ("ieee802154: Convert scan error messages to extack") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20230301154450.547716-1-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- net/ieee802154/nl802154.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 2215f576ee37..d8f4379d4fa6 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1412,7 +1412,7 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } - if (!nla_get_u8(info->attrs[NL802154_ATTR_SCAN_TYPE])) { + if (!info->attrs[NL802154_ATTR_SCAN_TYPE]) { NL_SET_ERR_MSG(info->extack, "Malformed request, missing scan type"); return -EINVAL; } -- cgit v1.2.3 From 9781e98a97110f5e76999058368b4be76a788484 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 2 Mar 2023 01:39:13 +0900 Subject: net: caif: Fix use-after-free in cfusbl_device_notify() syzbot reported use-after-free in cfusbl_device_notify() [1]. This causes a stack trace like below: BUG: KASAN: use-after-free in cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 Read of size 8 at addr ffff88807ac4e6f0 by task kworker/u4:6/1214 CPU: 0 PID: 1214 Comm: kworker/u4:6 Not tainted 5.19.0-rc3-syzkaller-00146-g92f20ff72066 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313 print_report mm/kasan/report.c:429 [inline] kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 notifier_call_chain+0xb5/0x200 kernel/notifier.c:87 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] call_netdevice_notifiers net/core/dev.c:1997 [inline] netdev_wait_allrefs_any net/core/dev.c:10227 [inline] netdev_run_todo+0xbc0/0x10f0 net/core/dev.c:10341 default_device_exit_batch+0x44e/0x590 net/core/dev.c:11334 ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 When unregistering a net device, unregister_netdevice_many_notify() sets the device's reg_state to NETREG_UNREGISTERING, calls notifiers with NETDEV_UNREGISTER, and adds the device to the todo list. Later on, devices in the todo list are processed by netdev_run_todo(). netdev_run_todo() waits devices' reference count become 1 while rebdoadcasting NETDEV_UNREGISTER notification. When cfusbl_device_notify() is called with NETDEV_UNREGISTER multiple times, the parent device might be freed. This could cause UAF. Processing NETDEV_UNREGISTER multiple times also causes inbalance of reference count for the module. This patch fixes the issue by accepting only first NETDEV_UNREGISTER notification. Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface") CC: sjur.brandeland@stericsson.com Reported-by: syzbot+b563d33852b893653a9e@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=c3bfd8e2450adab3bffe4d80821fbbced600407f [1] Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20230301163913.391304-1-syoshida@redhat.com Signed-off-by: Jakub Kicinski --- net/caif/caif_usb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index ebc202ffdd8d..bf61ea4b8132 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, struct usb_device *usbdev; int res; + if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED) + return 0; + /* Check whether we have a NCM device, and find its VID/PID. */ if (!(dev->dev.parent && dev->dev.parent->driver && strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0)) -- cgit v1.2.3 From d900f3d20cc3169ce42ec72acc850e662a4d4db2 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Fri, 3 Mar 2023 16:09:46 +0800 Subject: bpf, sockmap: Fix an infinite loop error when len is 0 in tcp_bpf_recvmsg_parser() When the buffer length of the recvmsg system call is 0, we got the flollowing soft lockup problem: watchdog: BUG: soft lockup - CPU#3 stuck for 27s! [a.out:6149] CPU: 3 PID: 6149 Comm: a.out Kdump: loaded Not tainted 6.2.0+ #30 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 RIP: 0010:remove_wait_queue+0xb/0xc0 Code: 5e 41 5f c3 cc cc cc cc 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 41 57 <41> 56 41 55 41 54 55 48 89 fd 53 48 89 f3 4c 8d 6b 18 4c 8d 73 20 RSP: 0018:ffff88811b5978b8 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffff88811a7d3780 RCX: ffffffffb7a4d768 RDX: dffffc0000000000 RSI: ffff88811b597908 RDI: ffff888115408040 RBP: 1ffff110236b2f1b R08: 0000000000000000 R09: ffff88811a7d37e7 R10: ffffed10234fa6fc R11: 0000000000000001 R12: ffff88811179b800 R13: 0000000000000001 R14: ffff88811a7d38a8 R15: ffff88811a7d37e0 FS: 00007f6fb5398740(0000) GS:ffff888237180000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000000 CR3: 000000010b6ba002 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_msg_wait_data+0x279/0x2f0 tcp_bpf_recvmsg_parser+0x3c6/0x490 inet_recvmsg+0x280/0x290 sock_recvmsg+0xfc/0x120 ____sys_recvmsg+0x160/0x3d0 ___sys_recvmsg+0xf0/0x180 __sys_recvmsg+0xea/0x1a0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc The logic in tcp_bpf_recvmsg_parser is as follows: msg_bytes_ready: copied = sk_msg_recvmsg(sk, psock, msg, len, flags); if (!copied) { wait data; goto msg_bytes_ready; } In this case, "copied" always is 0, the infinite loop occurs. According to the Linux system call man page, 0 should be returned in this case. Therefore, in tcp_bpf_recvmsg_parser(), if the length is 0, directly return. Also modify several other functions with the same problem. Fixes: 1f5be6b3b063 ("udp: Implement udp_bpf_recvmsg() for sockmap") Fixes: 9825d866ce0d ("af_unix: Implement unix_dgram_bpf_recvmsg()") Fixes: c5d2177a72a1 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self") Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Cc: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20230303080946.1146638-1-liujian56@huawei.com --- net/ipv4/tcp_bpf.c | 6 ++++++ net/ipv4/udp_bpf.c | 3 +++ net/unix/unix_bpf.c | 3 +++ 3 files changed, 12 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index cf26d65ca389..ebf917511937 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -186,6 +186,9 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return tcp_recvmsg(sk, msg, len, flags, addr_len); @@ -244,6 +247,9 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return tcp_recvmsg(sk, msg, len, flags, addr_len); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index e5dc91d0e079..0735d820e413 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -68,6 +68,9 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return sk_udp_recvmsg(sk, msg, len, flags, addr_len); diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index e9bf15513961..2f9d8271c6ec 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -54,6 +54,9 @@ static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, struct sk_psock *psock; int copied; + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return __unix_recvmsg(sk, msg, len, flags); -- cgit v1.2.3 From 9f7dd42f0db1dc6915a52d4a8a96ca18dd8cc34e Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Thu, 2 Mar 2023 17:48:31 -0800 Subject: netfilter: ctnetlink: revert to dumping mark regardless of event type It seems that change was unintentional, we have userspace code that needs the mark while listening for events like REPLY, DESTROY, etc. Also include 0-marks in requested dumps, as they were before that fix. Fixes: 1feeae071507 ("netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark") Signed-off-by: Ivan Delalande Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c11dff91d52d..bfc3aaa2c872 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -328,11 +328,12 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct, + bool dump) { u32 mark = READ_ONCE(ct->mark); - if (!mark) + if (!mark && !dump) return 0; if (nla_put_be32(skb, CTA_MARK, htonl(mark))) @@ -343,7 +344,7 @@ nla_put_failure: return -1; } #else -#define ctnetlink_dump_mark(a, b) (0) +#define ctnetlink_dump_mark(a, b, c) (0) #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK @@ -548,7 +549,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_mark(skb, ct, true) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -831,8 +832,7 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - if (events & (1 << IPCT_MARK) && - ctnetlink_dump_mark(skb, ct) < 0) + if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK))) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -2735,7 +2735,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - if (ctnetlink_dump_mark(skb, ct) < 0) + if (ctnetlink_dump_mark(skb, ct, true) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) -- cgit v1.2.3 From 4a02426787bf024dafdb79b362285ee325de3f5e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 3 Mar 2023 10:58:56 +0100 Subject: netfilter: tproxy: fix deadlock due to missing BH disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xtables packet traverser performs an unconditional local_bh_disable(), but the nf_tables evaluation loop does not. Functions that are called from either xtables or nftables must assume that they can be called in process context. inet_twsk_deschedule_put() assumes that no softirq interrupt can occur. If tproxy is used from nf_tables its possible that we'll deadlock trying to aquire a lock already held in process context. Add a small helper that takes care of this and use it. Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/ Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") Reported-and-tested-by: Major Dávid Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tproxy.h | 7 +++++++ net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 +- net/ipv6/netfilter/nf_tproxy_ipv6.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h index 82d0e41b76f2..faa108b1ba67 100644 --- a/include/net/netfilter/nf_tproxy.h +++ b/include/net/netfilter/nf_tproxy.h @@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) return false; } +static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw) +{ + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); +} + /* assign a socket to the skb -- consumes sk */ static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index b22b2c745c76..69e331799604 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, hp->source, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c index 929502e51203..52f828bb5a83 100644 --- a/net/ipv6/netfilter/nf_tproxy_ipv6.c +++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c @@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } -- cgit v1.2.3 From 294635a8165a31408a8b3a24f9c74849ca3d8701 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Feb 2023 17:36:07 +0100 Subject: bpf, test_run: fix &xdp_frame misplacement for LIVE_FRAMES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit &xdp_buff and &xdp_frame are bound in a way that xdp_buff->data_hard_start == xdp_frame It's always the case and e.g. xdp_convert_buff_to_frame() relies on this. IOW, the following: for (u32 i = 0; i < 0xdead; i++) { xdpf = xdp_convert_buff_to_frame(&xdp); xdp_convert_frame_to_buff(xdpf, &xdp); } shouldn't ever modify @xdpf's contents or the pointer itself. However, "live packet" code wrongly treats &xdp_frame as part of its context placed *before* the data_hard_start. With such flow, data_hard_start is sizeof(*xdpf) off to the right and no longer points to the XDP frame. Instead of replacing `sizeof(ctx)` with `offsetof(ctx, xdpf)` in several places and praying that there are no more miscalcs left somewhere in the code, unionize ::frm with ::data in a flex array, so that both starts pointing to the actual data_hard_start and the XDP frame actually starts being a part of it, i.e. a part of the headroom, not the context. A nice side effect is that the maximum frame size for this mode gets increased by 40 bytes, as xdp_buff::frame_sz includes everything from data_hard_start (-> includes xdpf already) to the end of XDP/skb shared info. Also update %MAX_PKT_SIZE accordingly in the selftests code. Leave it hardcoded for 64 bit && 4k pages, it can be made more flexible later on. Minor: align `&head->data` with how `head->frm` is assigned for consistency. Minor #2: rename 'frm' to 'frame' in &xdp_page_head while at it for clarity. (was found while testing XDP traffic generator on ice, which calls xdp_convert_frame_to_buff() for each XDP frame) Fixes: b530e9e1063e ("bpf: Add "live packet" mode for XDP in BPF_PROG_RUN") Acked-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230224163607.2994755-1-aleksander.lobakin@intel.com Signed-off-by: Martin KaFai Lau --- net/bpf/test_run.c | 19 +++++++++++++------ .../selftests/bpf/prog_tests/xdp_do_redirect.c | 7 ++++--- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6f3d654b3339..f81b24320a36 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -97,8 +97,11 @@ reset: struct xdp_page_head { struct xdp_buff orig_ctx; struct xdp_buff ctx; - struct xdp_frame frm; - u8 data[]; + union { + /* ::data_hard_start starts here */ + DECLARE_FLEX_ARRAY(struct xdp_frame, frame); + DECLARE_FLEX_ARRAY(u8, data); + }; }; struct xdp_test_data { @@ -113,6 +116,10 @@ struct xdp_test_data { u32 frame_cnt; }; +/* tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c:%MAX_PKT_SIZE + * must be updated accordingly this gets changed, otherwise BPF selftests + * will fail. + */ #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head)) #define TEST_XDP_MAX_BATCH 256 @@ -132,8 +139,8 @@ static void xdp_test_run_init_page(struct page *page, void *arg) headroom -= meta_len; new_ctx = &head->ctx; - frm = &head->frm; - data = &head->data; + frm = head->frame; + data = head->data; memcpy(data + headroom, orig_ctx->data_meta, frm_len); xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq); @@ -223,7 +230,7 @@ static void reset_ctx(struct xdp_page_head *head) head->ctx.data = head->orig_ctx.data; head->ctx.data_meta = head->orig_ctx.data_meta; head->ctx.data_end = head->orig_ctx.data_end; - xdp_update_frame_from_buff(&head->ctx, &head->frm); + xdp_update_frame_from_buff(&head->ctx, head->frame); } static int xdp_recv_frames(struct xdp_frame **frames, int nframes, @@ -285,7 +292,7 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, head = phys_to_virt(page_to_phys(page)); reset_ctx(head); ctx = &head->ctx; - frm = &head->frm; + frm = head->frame; xdp->frame_cnt++; act = bpf_prog_run_xdp(prog, ctx); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 2666c84dbd01..7271a18ab3e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -65,12 +65,13 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd) } /* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) - - * sizeof(struct skb_shared_info) - XDP_PACKET_HEADROOM = 3368 bytes + * SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - XDP_PACKET_HEADROOM = + * 3408 bytes for 64-byte cacheline and 3216 for 256-byte one. */ #if defined(__s390x__) -#define MAX_PKT_SIZE 3176 +#define MAX_PKT_SIZE 3216 #else -#define MAX_PKT_SIZE 3368 +#define MAX_PKT_SIZE 3408 #endif static void test_max_pkt_size(int fd) { -- cgit v1.2.3 From e539a105f947b9db470fec39fe91d85fe737a432 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 4 Mar 2023 11:26:10 -0800 Subject: net: tls: fix device-offloaded sendpage straddling records Adrien reports that incorrect data is transmitted when a single page straddles multiple records. We would transmit the same data in all iterations of the loop. Reported-by: Adrien Moulin Link: https://lore.kernel.org/all/61481278.42813558.1677845235112.JavaMail.zimbra@corp.free.fr Fixes: c1318b39c7d3 ("tls: Add opt-in zerocopy mode of sendfile()") Tested-by: Adrien Moulin Reviewed-by: Tariq Toukan Acked-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20230304192610.3818098-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 6c593788dc25..a7cc4f9faac2 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -508,6 +508,8 @@ handle_error: zc_pfrag.offset = iter_offset.offset; zc_pfrag.size = copy; tls_append_frag(record, &zc_pfrag, copy); + + iter_offset.offset += copy; } else if (copy) { copy = min_t(size_t, copy, pfrag->size - pfrag->offset); -- cgit v1.2.3 From c77737b736ceb50fdf150434347dbd81ec76dbb1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Mar 2023 05:22:54 +0000 Subject: netfilter: conntrack: adopt safer max chain length Customers using GKE 1.25 and 1.26 are facing conntrack issues root caused to commit c9c3b6811f74 ("netfilter: conntrack: make max chain length random"). Even if we assume Uniform Hashing, a bucket often reachs 8 chained items while the load factor of the hash table is smaller than 0.5 With a limit of 16, we reach load factors of 3. With a limit of 32, we reach load factors of 11. With a limit of 40, we reach load factors of 15. With a limit of 50, we reach load factors of 24. This patch changes MIN_CHAINLEN to 50, to minimize risks. Ideally, we could in the future add a cushion based on expected load factor (2 * nf_conntrack_max / nf_conntrack_buckets), because some setups might expect unusual values. Fixes: c9c3b6811f74 ("netfilter: conntrack: make max chain length random") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7250082e7de5..c6a6a6099b4e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -96,8 +96,8 @@ static DEFINE_MUTEX(nf_conntrack_mutex); #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) #define GC_SCAN_EXPIRED_MAX (64000u / HZ) -#define MIN_CHAINLEN 8u -#define MAX_CHAINLEN (32u - MIN_CHAINLEN) +#define MIN_CHAINLEN 50u +#define MAX_CHAINLEN (80u - MIN_CHAINLEN) static struct conntrack_gc_work conntrack_gc_work; -- cgit v1.2.3 From 7d834b4d1ab66c48e8c0810fdeadaabb80fa2c81 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Tue, 7 Mar 2023 00:26:50 +0300 Subject: nfc: change order inside nfc_se_io error path cb_context should be freed on the error path in nfc_se_io as stated by commit 25ff6f8a5a3b ("nfc: fix memory leak of se_io context in nfc_genl_se_io"). Make the error path in nfc_se_io unwind everything in reverse order, i.e. free the cb_context after unlocking the device. Suggested-by: Krzysztof Kozlowski Signed-off-by: Fedor Pchelkin Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230306212650.230322-1-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski --- net/nfc/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 348bf561bc9f..b9264e730fd9 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1446,8 +1446,8 @@ static int nfc_se_io(struct nfc_dev *dev, u32 se_idx, return rc; error: - kfree(cb_context); device_unlock(&dev->dev); + kfree(cb_context); return rc; } -- cgit v1.2.3 From 37d9df224d1eec1b434fe9ffa40104c756478c29 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Mar 2023 12:04:57 -0800 Subject: ynl: re-license uniformly under GPL-2.0 OR BSD-3-Clause I was intending to make all the Netlink Spec code BSD-3-Clause to ease the adoption but it appears that: - I fumbled the uAPI and used "GPL WITH uAPI note" there - it gives people pause as they expect GPL in the kernel As suggested by Chuck re-license under dual. This gives us benefit of full BSD freedom while fulfilling the broad "kernel is under GPL" expectations. Link: https://lore.kernel.org/all/20230304120108.05dd44c5@kernel.org/ Link: https://lore.kernel.org/r/20230306200457.3903854-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/genetlink-c.yaml | 2 +- Documentation/netlink/genetlink-legacy.yaml | 2 +- Documentation/netlink/genetlink.yaml | 2 +- Documentation/netlink/specs/ethtool.yaml | 2 ++ Documentation/netlink/specs/fou.yaml | 2 ++ Documentation/netlink/specs/netdev.yaml | 2 ++ Documentation/userspace-api/netlink/specs.rst | 3 +++ include/uapi/linux/fou.h | 2 +- include/uapi/linux/netdev.h | 2 +- net/core/netdev-genl-gen.c | 2 +- net/core/netdev-genl-gen.h | 2 +- net/ipv4/fou_nl.c | 2 +- net/ipv4/fou_nl.h | 2 +- tools/include/uapi/linux/netdev.h | 2 +- tools/net/ynl/cli.py | 2 +- tools/net/ynl/lib/__init__.py | 2 +- tools/net/ynl/lib/nlspec.py | 2 +- tools/net/ynl/lib/ynl.py | 2 +- tools/net/ynl/ynl-gen-c.py | 7 ++++--- tools/net/ynl/ynl-regen.sh | 2 +- 20 files changed, 28 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index bbcfa2472b04..f082a5ad7cf1 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause %YAML 1.2 --- $id: http://kernel.org/schemas/netlink/genetlink-c.yaml# diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 5642925c4ceb..c6b8c77f7d12 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause %YAML 1.2 --- $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml# diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index 62a922755ce2..b2d56ab9e615 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause %YAML 1.2 --- $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml# diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 35c462bce56f..18ecb7d90cbe 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + name: ethtool protocol: genetlink-legacy diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml index cca4cf98f03a..cff104288723 100644 --- a/Documentation/netlink/specs/fou.yaml +++ b/Documentation/netlink/specs/fou.yaml @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + name: fou protocol: genetlink-legacy diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index ba9ee13cf729..24de747b5344 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + name: netdev doc: diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst index 32e53328d113..2122e0c4a399 100644 --- a/Documentation/userspace-api/netlink/specs.rst +++ b/Documentation/userspace-api/netlink/specs.rst @@ -24,6 +24,9 @@ YAML specifications can be found under ``Documentation/netlink/specs/`` This document describes details of the schema. See :doc:`intro-specs` for a practical starting guide. +All specs must be licensed under ``GPL-2.0-only OR BSD-3-Clause`` +to allow for easy adoption in user space code. + Compatibility levels ==================== diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index 19ebbef41a63..5041c3598493 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN uapi header */ diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 588391447bfb..8c4e3e536c04 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 48812ec843f5..9e10802587fc 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel source */ diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index b16dc7e026bb..2c5fc7d1e8a7 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: BSD-3-Clause */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel header */ diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 6c3820f41dd5..5c14fe030eda 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel source */ diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h index b7a68121ce6f..58b1e1ed4b3b 100644 --- a/net/ipv4/fou_nl.h +++ b/net/ipv4/fou_nl.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: BSD-3-Clause */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel header */ diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 588391447bfb..8c4e3e536c04 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index db410b74d539..ffaa8038aa8c 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause import argparse import json diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py index 3c73f59eabab..a2cb8b16d6f1 100644 --- a/tools/net/ynl/lib/__init__.py +++ b/tools/net/ynl/lib/__init__.py @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause from .nlspec import SpecAttr, SpecAttrSet, SpecFamily, SpecOperation from .ynl import YnlFamily diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 9d394e50de23..0a2cfb5862aa 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause import collections import importlib diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 1c7411ee04dc..a842adc8e87e 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause import functools import os diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 62f8f2c3c56c..c940ca834d3f 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause import argparse import collections @@ -2127,12 +2128,12 @@ def main(): _, spec_kernel = find_kernel_root(args.spec) if args.mode == 'uapi': - cw.p('/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */') + cw.p('/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */') else: if args.header: - cw.p('/* SPDX-License-Identifier: BSD-3-Clause */') + cw.p('/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */') else: - cw.p('// SPDX-License-Identifier: BSD-3-Clause') + cw.p('// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause') cw.p("/* Do not edit directly, auto-generated from: */") cw.p(f"/*\t{spec_kernel} */") cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */") diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh index 43989ae48ed0..74f5de1c2399 100755 --- a/tools/net/ynl/ynl-regen.sh +++ b/tools/net/ynl/ynl-regen.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause TOOL=$(dirname $(realpath $0))/ynl-gen-c.py -- cgit v1.2.3 From ce7ca794712f186da99719e8b4e97bd5ddbb04c3 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Tue, 7 Mar 2023 11:23:46 +0800 Subject: net/smc: fix fallback failed while sendmsg with fastopen Before determining whether the msg has unsupported options, it has been prematurely terminated by the wrong status check. For the application, the general usages of MSG_FASTOPEN likes fd = socket(...) /* rather than connect */ sendto(fd, data, len, MSG_FASTOPEN) Hence, We need to check the flag before state check, because the sock state here is always SMC_INIT when applications tries MSG_FASTOPEN. Once we found unsupported options, fallback it to TCP. Fixes: ee9dfbef02d1 ("net/smc: handle sockopts forcing fallback") Signed-off-by: D. Wythe Signed-off-by: Simon Horman v2 -> v1: Optimize code style Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/af_smc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a4cccdfdc00a..ff6dd86bdc9f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2657,16 +2657,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct smc_sock *smc; - int rc = -EPIPE; + int rc; smc = smc_sk(sk); lock_sock(sk); - if ((sk->sk_state != SMC_ACTIVE) && - (sk->sk_state != SMC_APPCLOSEWAIT1) && - (sk->sk_state != SMC_INIT)) - goto out; + /* SMC does not support connect with fastopen */ if (msg->msg_flags & MSG_FASTOPEN) { + /* not connected yet, fallback */ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); if (rc) @@ -2675,6 +2673,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) rc = -EINVAL; goto out; } + } else if ((sk->sk_state != SMC_ACTIVE) && + (sk->sk_state != SMC_APPCLOSEWAIT1) && + (sk->sk_state != SMC_INIT)) { + rc = -EPIPE; + goto out; } if (smc->use_fallback) { -- cgit v1.2.3 From 2aab4b96900272885bc157f8b236abf1cdc02e08 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Mar 2023 16:45:30 +0000 Subject: af_unix: fix struct pid leaks in OOB support syzbot reported struct pid leak [1]. Issue is that queue_oob() calls maybe_add_creds() which potentially holds a reference on a pid. But skb->destructor is not set (either directly or by calling unix_scm_to_skb()) This means that subsequent kfree_skb() or consume_skb() would leak this reference. In this fix, I chose to fully support scm even for the OOB message. [1] BUG: memory leak unreferenced object 0xffff8881053e7f80 (size 128): comm "syz-executor242", pid 5066, jiffies 4294946079 (age 13.220s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] alloc_pid+0x6a/0x560 kernel/pid.c:180 [] copy_process+0x169f/0x26c0 kernel/fork.c:2285 [] kernel_clone+0xf7/0x610 kernel/fork.c:2684 [] __do_sys_clone+0x7c/0xb0 kernel/fork.c:2825 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: syzbot+7699d9e5635c10253a27@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Rao Shoaib Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230307164530.771896-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 347122c3575e..0b0f18ecce44 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2105,7 +2105,8 @@ out: #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) #if IS_ENABLED(CONFIG_AF_UNIX_OOB) -static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) +static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, + struct scm_cookie *scm, bool fds_sent) { struct unix_sock *ousk = unix_sk(other); struct sk_buff *skb; @@ -2116,6 +2117,11 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other if (!skb) return err; + err = unix_scm_to_skb(scm, skb, !fds_sent); + if (err < 0) { + kfree_skb(skb); + return err; + } skb_put(skb, 1); err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); @@ -2243,7 +2249,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (msg->msg_flags & MSG_OOB) { - err = queue_oob(sock, msg, other); + err = queue_oob(sock, msg, other, &scm, fds_sent); if (err) goto out_err; sent++; -- cgit v1.2.3 From 649c15c7691e9b13cbe9bf6c65c365350e056067 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 7 Mar 2023 14:37:07 -0300 Subject: net: avoid double iput when sock_alloc_file fails When sock_alloc_file fails to allocate a file, it will call sock_release. __sys_socket_file should then not call sock_release again, otherwise there will be a double free. [ 89.319884] ------------[ cut here ]------------ [ 89.320286] kernel BUG at fs/inode.c:1764! [ 89.320656] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 89.321051] CPU: 7 PID: 125 Comm: iou-sqp-124 Not tainted 6.2.0+ #361 [ 89.321535] RIP: 0010:iput+0x1ff/0x240 [ 89.321808] Code: d1 83 e1 03 48 83 f9 02 75 09 48 81 fa 00 10 00 00 77 05 83 e2 01 75 1f 4c 89 ef e8 fb d2 ba 00 e9 80 fe ff ff c3 cc cc cc cc <0f> 0b 0f 0b e9 d0 fe ff ff 0f 0b eb 8d 49 8d b4 24 08 01 00 00 48 [ 89.322760] RSP: 0018:ffffbdd60068bd50 EFLAGS: 00010202 [ 89.323036] RAX: 0000000000000000 RBX: ffff9d7ad3cacac0 RCX: 0000000000001107 [ 89.323412] RDX: 000000000003af00 RSI: 0000000000000000 RDI: ffff9d7ad3cacb40 [ 89.323785] RBP: ffffbdd60068bd68 R08: ffffffffffffffff R09: ffffffffab606438 [ 89.324157] R10: ffffffffacb3dfa0 R11: 6465686361657256 R12: ffff9d7ad3cacb40 [ 89.324529] R13: 0000000080000001 R14: 0000000080000001 R15: 0000000000000002 [ 89.324904] FS: 00007f7b28516740(0000) GS:ffff9d7aeb1c0000(0000) knlGS:0000000000000000 [ 89.325328] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 89.325629] CR2: 00007f0af52e96c0 CR3: 0000000002a02006 CR4: 0000000000770ee0 [ 89.326004] PKRU: 55555554 [ 89.326161] Call Trace: [ 89.326298] [ 89.326419] __sock_release+0xb5/0xc0 [ 89.326632] __sys_socket_file+0xb2/0xd0 [ 89.326844] io_socket+0x88/0x100 [ 89.327039] ? io_issue_sqe+0x6a/0x430 [ 89.327258] io_issue_sqe+0x67/0x430 [ 89.327450] io_submit_sqes+0x1fe/0x670 [ 89.327661] io_sq_thread+0x2e6/0x530 [ 89.327859] ? __pfx_autoremove_wake_function+0x10/0x10 [ 89.328145] ? __pfx_io_sq_thread+0x10/0x10 [ 89.328367] ret_from_fork+0x29/0x50 [ 89.328576] RIP: 0033:0x0 [ 89.328732] Code: Unable to access opcode bytes at 0xffffffffffffffd6. [ 89.329073] RSP: 002b:0000000000000000 EFLAGS: 00000202 ORIG_RAX: 00000000000001a9 [ 89.329477] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00007f7b28637a3d [ 89.329845] RDX: 00007fff4e4318a8 RSI: 00007fff4e4318b0 RDI: 0000000000000400 [ 89.330216] RBP: 00007fff4e431830 R08: 00007fff4e431711 R09: 00007fff4e4318b0 [ 89.330584] R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff4e441b38 [ 89.330950] R13: 0000563835e3e725 R14: 0000563835e40d10 R15: 00007f7b28784040 [ 89.331318] [ 89.331441] Modules linked in: [ 89.331617] ---[ end trace 0000000000000000 ]--- Fixes: da214a475f8b ("net: add __sys_socket_file()") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Jens Axboe Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230307173707.468744-1-cascardo@canonical.com Signed-off-by: Jakub Kicinski --- net/socket.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 6bae8ce7059e..9c92c0e6c4da 100644 --- a/net/socket.c +++ b/net/socket.c @@ -450,7 +450,9 @@ static struct file_system_type sock_fs_type = { * * Returns the &file bound with @sock, implicitly storing it * in sock->file. If dname is %NULL, sets to "". - * On failure the return is a ERR pointer (see linux/err.h). + * + * On failure @sock is released, and an ERR pointer is returned. + * * This function uses GFP_KERNEL internally. */ @@ -1638,7 +1640,6 @@ static struct socket *__sys_socket_create(int family, int type, int protocol) struct file *__sys_socket_file(int family, int type, int protocol) { struct socket *sock; - struct file *file; int flags; sock = __sys_socket_create(family, type, protocol); @@ -1649,11 +1650,7 @@ struct file *__sys_socket_file(int family, int type, int protocol) if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - file = sock_alloc_file(sock, flags, NULL); - if (IS_ERR(file)) - sock_release(sock); - - return file; + return sock_alloc_file(sock, flags, NULL); } int __sys_socket(int family, int type, int protocol) -- cgit v1.2.3