From 9b53a13422162feac7c7ee58e5bc0e0a80a41963 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 20 Jun 2023 13:01:59 -0400 Subject: counter: Fix menuconfig "Counter support" submenu entries disappearance The current placement of the I8254 Kconfig entry results in the disappearance of the "Counter support" submenu items in menuconfig. Move the I8254 above the menuconfig COUNTER entry to restore the intended submenu behavior. Fixes: d428487471ba ("counter: i8254: Introduce the Intel 8254 interface library module") Reported-by: Jarkko Nikula Closes: https://lore.kernel.org/all/32ddaa7b-53a8-d61f-d526-b545bd561337@linux.intel.com/ Reviewed-by: Randy Dunlap Tested-by: Jarkko Nikula Reviewed-by: Jarkko Nikula Link: https://lore.kernel.org/r/20230620170159.556788-1-william.gray@linaro.org/ Signed-off-by: William Breathitt Gray --- drivers/counter/Kconfig | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/counter/Kconfig b/drivers/counter/Kconfig index a61a4b9b8ec6..86536c2cc531 100644 --- a/drivers/counter/Kconfig +++ b/drivers/counter/Kconfig @@ -3,13 +3,6 @@ # Counter devices # -menuconfig COUNTER - tristate "Counter support" - help - This enables counter device support through the Generic Counter - interface. You only need to enable this, if you also want to enable - one or more of the counter device drivers below. - config I8254 tristate select COUNTER @@ -25,6 +18,13 @@ config I8254 If built as a module its name will be i8254. +menuconfig COUNTER + tristate "Counter support" + help + This enables counter device support through the Generic Counter + interface. You only need to enable this, if you also want to enable + one or more of the counter device drivers below. + if COUNTER config 104_QUAD_8 -- cgit v1.2.3 From dfa73c17d55b921e1d4e154976de35317e43a93a Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 27 Jun 2023 11:31:38 +0800 Subject: net: xfrm: Fix xfrm_address_filter OOB read We found below OOB crash: [ 44.211730] ================================================================== [ 44.212045] BUG: KASAN: slab-out-of-bounds in memcmp+0x8b/0xb0 [ 44.212045] Read of size 8 at addr ffff88800870f320 by task poc.xfrm/97 [ 44.212045] [ 44.212045] CPU: 0 PID: 97 Comm: poc.xfrm Not tainted 6.4.0-rc7-00072-gdad9774deaf1-dirty #4 [ 44.212045] Call Trace: [ 44.212045] [ 44.212045] dump_stack_lvl+0x37/0x50 [ 44.212045] print_report+0xcc/0x620 [ 44.212045] ? __virt_addr_valid+0xf3/0x170 [ 44.212045] ? memcmp+0x8b/0xb0 [ 44.212045] kasan_report+0xb2/0xe0 [ 44.212045] ? memcmp+0x8b/0xb0 [ 44.212045] kasan_check_range+0x39/0x1c0 [ 44.212045] memcmp+0x8b/0xb0 [ 44.212045] xfrm_state_walk+0x21c/0x420 [ 44.212045] ? __pfx_dump_one_state+0x10/0x10 [ 44.212045] xfrm_dump_sa+0x1e2/0x290 [ 44.212045] ? __pfx_xfrm_dump_sa+0x10/0x10 [ 44.212045] ? __kernel_text_address+0xd/0x40 [ 44.212045] ? kasan_unpoison+0x27/0x60 [ 44.212045] ? mutex_lock+0x60/0xe0 [ 44.212045] ? __pfx_mutex_lock+0x10/0x10 [ 44.212045] ? kasan_save_stack+0x22/0x50 [ 44.212045] netlink_dump+0x322/0x6c0 [ 44.212045] ? __pfx_netlink_dump+0x10/0x10 [ 44.212045] ? mutex_unlock+0x7f/0xd0 [ 44.212045] ? __pfx_mutex_unlock+0x10/0x10 [ 44.212045] __netlink_dump_start+0x353/0x430 [ 44.212045] xfrm_user_rcv_msg+0x3a4/0x410 [ 44.212045] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 44.212045] ? __pfx_xfrm_user_rcv_msg+0x10/0x10 [ 44.212045] ? __pfx_xfrm_dump_sa+0x10/0x10 [ 44.212045] ? __pfx_xfrm_dump_sa_done+0x10/0x10 [ 44.212045] ? __stack_depot_save+0x382/0x4e0 [ 44.212045] ? filter_irq_stacks+0x1c/0x70 [ 44.212045] ? kasan_save_stack+0x32/0x50 [ 44.212045] ? kasan_save_stack+0x22/0x50 [ 44.212045] ? kasan_set_track+0x25/0x30 [ 44.212045] ? __kasan_slab_alloc+0x59/0x70 [ 44.212045] ? kmem_cache_alloc_node+0xf7/0x260 [ 44.212045] ? kmalloc_reserve+0xab/0x120 [ 44.212045] ? __alloc_skb+0xcf/0x210 [ 44.212045] ? netlink_sendmsg+0x509/0x700 [ 44.212045] ? sock_sendmsg+0xde/0xe0 [ 44.212045] ? __sys_sendto+0x18d/0x230 [ 44.212045] ? __x64_sys_sendto+0x71/0x90 [ 44.212045] ? do_syscall_64+0x3f/0x90 [ 44.212045] ? entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] ? netlink_sendmsg+0x509/0x700 [ 44.212045] ? sock_sendmsg+0xde/0xe0 [ 44.212045] ? __sys_sendto+0x18d/0x230 [ 44.212045] ? __x64_sys_sendto+0x71/0x90 [ 44.212045] ? do_syscall_64+0x3f/0x90 [ 44.212045] ? entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] ? kasan_save_stack+0x22/0x50 [ 44.212045] ? kasan_set_track+0x25/0x30 [ 44.212045] ? kasan_save_free_info+0x2e/0x50 [ 44.212045] ? __kasan_slab_free+0x10a/0x190 [ 44.212045] ? kmem_cache_free+0x9c/0x340 [ 44.212045] ? netlink_recvmsg+0x23c/0x660 [ 44.212045] ? sock_recvmsg+0xeb/0xf0 [ 44.212045] ? __sys_recvfrom+0x13c/0x1f0 [ 44.212045] ? __x64_sys_recvfrom+0x71/0x90 [ 44.212045] ? do_syscall_64+0x3f/0x90 [ 44.212045] ? entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] ? copyout+0x3e/0x50 [ 44.212045] netlink_rcv_skb+0xd6/0x210 [ 44.212045] ? __pfx_xfrm_user_rcv_msg+0x10/0x10 [ 44.212045] ? __pfx_netlink_rcv_skb+0x10/0x10 [ 44.212045] ? __pfx_sock_has_perm+0x10/0x10 [ 44.212045] ? mutex_lock+0x8d/0xe0 [ 44.212045] ? __pfx_mutex_lock+0x10/0x10 [ 44.212045] xfrm_netlink_rcv+0x44/0x50 [ 44.212045] netlink_unicast+0x36f/0x4c0 [ 44.212045] ? __pfx_netlink_unicast+0x10/0x10 [ 44.212045] ? netlink_recvmsg+0x500/0x660 [ 44.212045] netlink_sendmsg+0x3b7/0x700 [ 44.212045] ? __pfx_netlink_sendmsg+0x10/0x10 [ 44.212045] ? __pfx_netlink_sendmsg+0x10/0x10 [ 44.212045] sock_sendmsg+0xde/0xe0 [ 44.212045] __sys_sendto+0x18d/0x230 [ 44.212045] ? __pfx___sys_sendto+0x10/0x10 [ 44.212045] ? rcu_core+0x44a/0xe10 [ 44.212045] ? __rseq_handle_notify_resume+0x45b/0x740 [ 44.212045] ? _raw_spin_lock_irq+0x81/0xe0 [ 44.212045] ? __pfx___rseq_handle_notify_resume+0x10/0x10 [ 44.212045] ? __pfx_restore_fpregs_from_fpstate+0x10/0x10 [ 44.212045] ? __pfx_blkcg_maybe_throttle_current+0x10/0x10 [ 44.212045] ? __pfx_task_work_run+0x10/0x10 [ 44.212045] __x64_sys_sendto+0x71/0x90 [ 44.212045] do_syscall_64+0x3f/0x90 [ 44.212045] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] RIP: 0033:0x44b7da [ 44.212045] RSP: 002b:00007ffdc8838548 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 44.212045] RAX: ffffffffffffffda RBX: 00007ffdc8839978 RCX: 000000000044b7da [ 44.212045] RDX: 0000000000000038 RSI: 00007ffdc8838770 RDI: 0000000000000003 [ 44.212045] RBP: 00007ffdc88385b0 R08: 00007ffdc883858c R09: 000000000000000c [ 44.212045] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 [ 44.212045] R13: 00007ffdc8839968 R14: 00000000004c37d0 R15: 0000000000000001 [ 44.212045] [ 44.212045] [ 44.212045] Allocated by task 97: [ 44.212045] kasan_save_stack+0x22/0x50 [ 44.212045] kasan_set_track+0x25/0x30 [ 44.212045] __kasan_kmalloc+0x7f/0x90 [ 44.212045] __kmalloc_node_track_caller+0x5b/0x140 [ 44.212045] kmemdup+0x21/0x50 [ 44.212045] xfrm_dump_sa+0x17d/0x290 [ 44.212045] netlink_dump+0x322/0x6c0 [ 44.212045] __netlink_dump_start+0x353/0x430 [ 44.212045] xfrm_user_rcv_msg+0x3a4/0x410 [ 44.212045] netlink_rcv_skb+0xd6/0x210 [ 44.212045] xfrm_netlink_rcv+0x44/0x50 [ 44.212045] netlink_unicast+0x36f/0x4c0 [ 44.212045] netlink_sendmsg+0x3b7/0x700 [ 44.212045] sock_sendmsg+0xde/0xe0 [ 44.212045] __sys_sendto+0x18d/0x230 [ 44.212045] __x64_sys_sendto+0x71/0x90 [ 44.212045] do_syscall_64+0x3f/0x90 [ 44.212045] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] [ 44.212045] The buggy address belongs to the object at ffff88800870f300 [ 44.212045] which belongs to the cache kmalloc-64 of size 64 [ 44.212045] The buggy address is located 32 bytes inside of [ 44.212045] allocated 36-byte region [ffff88800870f300, ffff88800870f324) [ 44.212045] [ 44.212045] The buggy address belongs to the physical page: [ 44.212045] page:00000000e4de16ee refcount:1 mapcount:0 mapping:000000000 ... [ 44.212045] flags: 0x100000000000200(slab|node=0|zone=1) [ 44.212045] page_type: 0xffffffff() [ 44.212045] raw: 0100000000000200 ffff888004c41640 dead000000000122 0000000000000000 [ 44.212045] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000 [ 44.212045] page dumped because: kasan: bad access detected [ 44.212045] [ 44.212045] Memory state around the buggy address: [ 44.212045] ffff88800870f200: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 44.212045] ffff88800870f280: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc [ 44.212045] >ffff88800870f300: 00 00 00 00 04 fc fc fc fc fc fc fc fc fc fc fc [ 44.212045] ^ [ 44.212045] ffff88800870f380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 44.212045] ffff88800870f400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 44.212045] ================================================================== By investigating the code, we find the root cause of this OOB is the lack of checks in xfrm_dump_sa(). The buggy code allows a malicious user to pass arbitrary value of filter->splen/dplen. Hence, with crafted xfrm states, the attacker can achieve 8 bytes heap OOB read, which causes info leak. if (attrs[XFRMA_ADDRESS_FILTER]) { filter = kmemdup(nla_data(attrs[XFRMA_ADDRESS_FILTER]), sizeof(*filter), GFP_KERNEL); if (filter == NULL) return -ENOMEM; // NO MORE CHECKS HERE !!! } This patch fixes the OOB by adding necessary boundary checks, just like the code in pfkey_dump() function. Fixes: d3623099d350 ("ipsec: add support of limited SA dump") Signed-off-by: Lin Ma Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c34a2a06ca94..7c91deadc36e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1267,6 +1267,15 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) sizeof(*filter), GFP_KERNEL); if (filter == NULL) return -ENOMEM; + + /* see addr_match(), (prefix length >> 5) << 2 + * will be used to compare xfrm_address_t + */ + if (filter->splen > (sizeof(xfrm_address_t) << 3) || + filter->dplen > (sizeof(xfrm_address_t) << 3)) { + kfree(filter); + return -EINVAL; + } } if (attrs[XFRMA_PROTO]) -- cgit v1.2.3 From 75065a8929069bc93181848818e23f147a73f83a Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 27 Jun 2023 11:39:54 +0800 Subject: net: af_key: fix sadb_x_filter validation When running xfrm_state_walk_init(), the xfrm_address_filter being used is okay to have a splen/dplen that equals to sizeof(xfrm_address_t)<<3. This commit replaces >= to > to make sure the boundary checking is correct. Fixes: 37bd22420f85 ("af_key: pfkey_dump needs parameter validation") Signed-off-by: Lin Ma Signed-off-by: Steffen Klassert --- net/key/af_key.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index ede3c6a60353..b4ea4cf9fad4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1848,9 +1848,9 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; - if ((xfilter->sadb_x_filter_splen >= + if ((xfilter->sadb_x_filter_splen > (sizeof(xfrm_address_t) << 3)) || - (xfilter->sadb_x_filter_dplen >= + (xfilter->sadb_x_filter_dplen > (sizeof(xfrm_address_t) << 3))) { mutex_unlock(&pfk->dump_lock); return -EINVAL; -- cgit v1.2.3 From d1e0e61d617ba17aa516db707aa871387566bbf7 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 30 Jun 2023 16:19:11 +0800 Subject: net: xfrm: Amend XFRMA_SEC_CTX nla_policy structure According to all consumers code of attrs[XFRMA_SEC_CTX], like * verify_sec_ctx_len(), convert to xfrm_user_sec_ctx* * xfrm_state_construct(), call security_xfrm_state_alloc whose prototype is int security_xfrm_state_alloc(.., struct xfrm_user_sec_ctx *sec_ctx); * copy_from_user_sec_ctx(), convert to xfrm_user_sec_ctx * ... It seems that the expected parsing result for XFRMA_SEC_CTX should be structure xfrm_user_sec_ctx, and the current xfrm_sec_ctx is confusing and misleading (Luckily, they happen to have same size 8 bytes). This commit amend the policy structure to xfrm_user_sec_ctx to avoid ambiguity. Fixes: cf5cb79f6946 ("[XFRM] netlink: Establish an attribute policy") Signed-off-by: Lin Ma Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_compat.c | 2 +- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 8cbf45a8bcdc..655fe4ff8621 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -108,7 +108,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, - [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) }, + [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) }, [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7c91deadc36e..fdc0c17122b6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3024,7 +3024,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, - [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) }, + [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) }, [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, -- cgit v1.2.3 From 57010b8ece2821a1fdfdba2197d14a022f3769db Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 4 Jul 2023 08:53:49 +0800 Subject: xfrm: Silence warnings triggerable by bad packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the elimination of inner modes, a couple of warnings that were previously unreachable can now be triggered by malformed inbound packets. Fix this by: 1. Moving the setting of skb->protocol into the decap functions. 2. Returning -EINVAL when unexpected protocol is seen. Reported-by: Maciej Żenczykowski Fixes: 5f24f41e8ea6 ("xfrm: Remove inner/outer modes from input path") Signed-off-by: Herbert Xu Reviewed-by: Maciej Żenczykowski Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 815b38080401..d5ee96789d4b 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -180,6 +180,8 @@ static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int optlen = 0; int err = -EINVAL; + skb->protocol = htons(ETH_P_IP); + if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) { struct ip_beet_phdr *ph; int phlen; @@ -232,6 +234,8 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; + skb->protocol = htons(ETH_P_IP); + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -267,6 +271,8 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; + skb->protocol = htons(ETH_P_IPV6); + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -296,6 +302,8 @@ static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int size = sizeof(struct ipv6hdr); int err; + skb->protocol = htons(ETH_P_IPV6); + err = skb_cow_head(skb, size + skb->mac_len); if (err) goto out; @@ -346,6 +354,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, return xfrm6_remove_tunnel_encap(x, skb); break; } + return -EINVAL; } WARN_ON_ONCE(1); @@ -366,19 +375,6 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; } - switch (XFRM_MODE_SKB_CB(skb)->protocol) { - case IPPROTO_IPIP: - case IPPROTO_BEETPH: - skb->protocol = htons(ETH_P_IP); - break; - case IPPROTO_IPV6: - skb->protocol = htons(ETH_P_IPV6); - break; - default: - WARN_ON_ONCE(1); - break; - } - return xfrm_inner_mode_encap_remove(x, skb); } -- cgit v1.2.3 From 53223f2ed1ef5c90dad814daaaefea4e68a933c8 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 10 Jul 2023 17:40:51 +0800 Subject: xfrm: fix slab-use-after-free in decode_session6 When the xfrm device is set to the qdisc of the sfb type, the cb field of the sent skb may be modified during enqueuing. Then, slab-use-after-free may occur when the xfrm device sends IPv6 packets. The stack information is as follows: BUG: KASAN: slab-use-after-free in decode_session6+0x103f/0x1890 Read of size 1 at addr ffff8881111458ef by task swapper/3/0 CPU: 3 PID: 0 Comm: swapper/3 Not tainted 6.4.0-next-20230707 #409 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 Call Trace: dump_stack_lvl+0xd9/0x150 print_address_description.constprop.0+0x2c/0x3c0 kasan_report+0x11d/0x130 decode_session6+0x103f/0x1890 __xfrm_decode_session+0x54/0xb0 xfrmi_xmit+0x173/0x1ca0 dev_hard_start_xmit+0x187/0x700 sch_direct_xmit+0x1a3/0xc30 __qdisc_run+0x510/0x17a0 __dev_queue_xmit+0x2215/0x3b10 neigh_connected_output+0x3c2/0x550 ip6_finish_output2+0x55a/0x1550 ip6_finish_output+0x6b9/0x1270 ip6_output+0x1f1/0x540 ndisc_send_skb+0xa63/0x1890 ndisc_send_rs+0x132/0x6f0 addrconf_rs_timer+0x3f1/0x870 call_timer_fn+0x1a0/0x580 expire_timers+0x29b/0x4b0 run_timer_softirq+0x326/0x910 __do_softirq+0x1d4/0x905 irq_exit_rcu+0xb7/0x120 sysvec_apic_timer_interrupt+0x97/0xc0 asm_sysvec_apic_timer_interrupt+0x1a/0x20 RIP: 0010:intel_idle_hlt+0x23/0x30 Code: 1f 84 00 00 00 00 00 f3 0f 1e fa 41 54 41 89 d4 0f 1f 44 00 00 66 90 0f 1f 44 00 00 0f 00 2d c4 9f ab 00 0f 1f 44 00 00 fb f4 44 89 e0 41 5c c3 66 0f 1f 44 00 00 f3 0f 1e fa 41 54 41 89 d4 RSP: 0018:ffffc90000197d78 EFLAGS: 00000246 RAX: 00000000000a83c3 RBX: ffffe8ffffd09c50 RCX: ffffffff8a22d8e5 RDX: 0000000000000001 RSI: ffffffff8d3f8080 RDI: ffffe8ffffd09c50 RBP: ffffffff8d3f8080 R08: 0000000000000001 R09: ffffed1026ba6d9d R10: ffff888135d36ceb R11: 0000000000000001 R12: 0000000000000001 R13: ffffffff8d3f8100 R14: 0000000000000001 R15: 0000000000000000 cpuidle_enter_state+0xd3/0x6f0 cpuidle_enter+0x4e/0xa0 do_idle+0x2fe/0x3c0 cpu_startup_entry+0x18/0x20 start_secondary+0x200/0x290 secondary_startup_64_no_verify+0x167/0x16b Allocated by task 939: kasan_save_stack+0x22/0x40 kasan_set_track+0x25/0x30 __kasan_slab_alloc+0x7f/0x90 kmem_cache_alloc_node+0x1cd/0x410 kmalloc_reserve+0x165/0x270 __alloc_skb+0x129/0x330 inet6_ifa_notify+0x118/0x230 __ipv6_ifa_notify+0x177/0xbe0 addrconf_dad_completed+0x133/0xe00 addrconf_dad_work+0x764/0x1390 process_one_work+0xa32/0x16f0 worker_thread+0x67d/0x10c0 kthread+0x344/0x440 ret_from_fork+0x1f/0x30 The buggy address belongs to the object at ffff888111145800 which belongs to the cache skbuff_small_head of size 640 The buggy address is located 239 bytes inside of freed 640-byte region [ffff888111145800, ffff888111145a80) As commit f855691975bb ("xfrm6: Fix the nexthdr offset in _decode_session6.") showed, xfrm_decode_session was originally intended only for the receive path. IP6CB(skb)->nhoff is not set during transmission. Therefore, set the cb field in the skb to 0 before sending packets. Fixes: f855691975bb ("xfrm6: Fix the nexthdr offset in _decode_session6.") Signed-off-by: Zhengchao Shao Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index a3319965470a..b86474084690 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -537,8 +537,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IPV6): - xfrm_decode_session(skb, &fl, AF_INET6); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + xfrm_decode_session(skb, &fl, AF_INET6); if (!dst) { fl.u.ip6.flowi6_oif = dev->ifindex; fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; @@ -552,8 +552,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) } break; case htons(ETH_P_IP): - xfrm_decode_session(skb, &fl, AF_INET); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + xfrm_decode_session(skb, &fl, AF_INET); if (!dst) { struct rtable *rt; -- cgit v1.2.3 From 9fd41f1ba638938c9a1195d09bc6fa3be2712f25 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 10 Jul 2023 17:40:52 +0800 Subject: ip6_vti: fix slab-use-after-free in decode_session6 When ipv6_vti device is set to the qdisc of the sfb type, the cb field of the sent skb may be modified during enqueuing. Then, slab-use-after-free may occur when ipv6_vti device sends IPv6 packets. The stack information is as follows: BUG: KASAN: slab-use-after-free in decode_session6+0x103f/0x1890 Read of size 1 at addr ffff88802e08edc2 by task swapper/0/0 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.4.0-next-20230707-00001-g84e2cad7f979 #410 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 Call Trace: dump_stack_lvl+0xd9/0x150 print_address_description.constprop.0+0x2c/0x3c0 kasan_report+0x11d/0x130 decode_session6+0x103f/0x1890 __xfrm_decode_session+0x54/0xb0 vti6_tnl_xmit+0x3e6/0x1ee0 dev_hard_start_xmit+0x187/0x700 sch_direct_xmit+0x1a3/0xc30 __qdisc_run+0x510/0x17a0 __dev_queue_xmit+0x2215/0x3b10 neigh_connected_output+0x3c2/0x550 ip6_finish_output2+0x55a/0x1550 ip6_finish_output+0x6b9/0x1270 ip6_output+0x1f1/0x540 ndisc_send_skb+0xa63/0x1890 ndisc_send_rs+0x132/0x6f0 addrconf_rs_timer+0x3f1/0x870 call_timer_fn+0x1a0/0x580 expire_timers+0x29b/0x4b0 run_timer_softirq+0x326/0x910 __do_softirq+0x1d4/0x905 irq_exit_rcu+0xb7/0x120 sysvec_apic_timer_interrupt+0x97/0xc0 Allocated by task 9176: kasan_save_stack+0x22/0x40 kasan_set_track+0x25/0x30 __kasan_slab_alloc+0x7f/0x90 kmem_cache_alloc_node+0x1cd/0x410 kmalloc_reserve+0x165/0x270 __alloc_skb+0x129/0x330 netlink_sendmsg+0x9b1/0xe30 sock_sendmsg+0xde/0x190 ____sys_sendmsg+0x739/0x920 ___sys_sendmsg+0x110/0x1b0 __sys_sendmsg+0xf7/0x1c0 do_syscall_64+0x39/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 9176: kasan_save_stack+0x22/0x40 kasan_set_track+0x25/0x30 kasan_save_free_info+0x2b/0x40 ____kasan_slab_free+0x160/0x1c0 slab_free_freelist_hook+0x11b/0x220 kmem_cache_free+0xf0/0x490 skb_free_head+0x17f/0x1b0 skb_release_data+0x59c/0x850 consume_skb+0xd2/0x170 netlink_unicast+0x54f/0x7f0 netlink_sendmsg+0x926/0xe30 sock_sendmsg+0xde/0x190 ____sys_sendmsg+0x739/0x920 ___sys_sendmsg+0x110/0x1b0 __sys_sendmsg+0xf7/0x1c0 do_syscall_64+0x39/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd The buggy address belongs to the object at ffff88802e08ed00 which belongs to the cache skbuff_small_head of size 640 The buggy address is located 194 bytes inside of freed 640-byte region [ffff88802e08ed00, ffff88802e08ef80) As commit f855691975bb ("xfrm6: Fix the nexthdr offset in _decode_session6.") showed, xfrm_decode_session was originally intended only for the receive path. IP6CB(skb)->nhoff is not set during transmission. Therefore, set the cb field in the skb to 0 before sending packets. Fixes: f855691975bb ("xfrm6: Fix the nexthdr offset in _decode_session6.") Signed-off-by: Zhengchao Shao Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 10b222865d46..73c85d4e0e9c 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -568,12 +568,12 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) vti6_addr_conflict(t, ipv6_hdr(skb))) goto tx_err; - xfrm_decode_session(skb, &fl, AF_INET6); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + xfrm_decode_session(skb, &fl, AF_INET6); break; case htons(ETH_P_IP): - xfrm_decode_session(skb, &fl, AF_INET); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + xfrm_decode_session(skb, &fl, AF_INET); break; default: goto tx_err; -- cgit v1.2.3 From 6018a266279b1a75143c7c0804dd08a5fc4c3e0b Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 10 Jul 2023 17:40:53 +0800 Subject: ip_vti: fix potential slab-use-after-free in decode_session6 When ip_vti device is set to the qdisc of the sfb type, the cb field of the sent skb may be modified during enqueuing. Then, slab-use-after-free may occur when ip_vti device sends IPv6 packets. As commit f855691975bb ("xfrm6: Fix the nexthdr offset in _decode_session6.") showed, xfrm_decode_session was originally intended only for the receive path. IP6CB(skb)->nhoff is not set during transmission. Therefore, set the cb field in the skb to 0 before sending packets. Fixes: f855691975bb ("xfrm6: Fix the nexthdr offset in _decode_session6.") Signed-off-by: Zhengchao Shao Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 53bfd8af6920..d1e7d0ceb7ed 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -287,12 +287,12 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): - xfrm_decode_session(skb, &fl, AF_INET); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + xfrm_decode_session(skb, &fl, AF_INET); break; case htons(ETH_P_IPV6): - xfrm_decode_session(skb, &fl, AF_INET6); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + xfrm_decode_session(skb, &fl, AF_INET6); break; default: goto tx_err; -- cgit v1.2.3 From d8630f050d3fd2079f8617dd6c00c6509109c755 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Fri, 23 Jun 2023 14:50:42 +0200 Subject: interconnect: qcom: Add support for mask-based BCMs Some BCMs aren't directly associated with the data path (i.e. ACV) and therefore don't communicate using BW. Instead, they are simply enabled/disabled with a simple bit mask. Add support for these. Origin commit retrieved from: https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/2d1573e0206998151b342e6b52a4c0f7234d7e36 Signed-off-by: Mike Tipton [narmstrong: removed copyright change from original commit] Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230619-topic-sm8550-upstream-interconnect-mask-vote-v2-1-709474b151cc@linaro.org Fixes: fafc114a468e ("interconnect: qcom: Add SM8450 interconnect provider driver") Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/bcm-voter.c | 5 +++++ drivers/interconnect/qcom/icc-rpmh.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/interconnect/qcom/bcm-voter.c b/drivers/interconnect/qcom/bcm-voter.c index 8f385f9c2dd3..d5f2a6b5376b 100644 --- a/drivers/interconnect/qcom/bcm-voter.c +++ b/drivers/interconnect/qcom/bcm-voter.c @@ -83,6 +83,11 @@ static void bcm_aggregate(struct qcom_icc_bcm *bcm) temp = agg_peak[bucket] * bcm->vote_scale; bcm->vote_y[bucket] = bcm_div(temp, bcm->aux_data.unit); + + if (bcm->enable_mask && (bcm->vote_x[bucket] || bcm->vote_y[bucket])) { + bcm->vote_x[bucket] = 0; + bcm->vote_y[bucket] = bcm->enable_mask; + } } if (bcm->keepalive && bcm->vote_x[QCOM_ICC_BUCKET_AMC] == 0 && diff --git a/drivers/interconnect/qcom/icc-rpmh.h b/drivers/interconnect/qcom/icc-rpmh.h index 04391c1ba465..7843d8864d6b 100644 --- a/drivers/interconnect/qcom/icc-rpmh.h +++ b/drivers/interconnect/qcom/icc-rpmh.h @@ -81,6 +81,7 @@ struct qcom_icc_node { * @vote_x: aggregated threshold values, represents sum_bw when @type is bw bcm * @vote_y: aggregated threshold values, represents peak_bw when @type is bw bcm * @vote_scale: scaling factor for vote_x and vote_y + * @enable_mask: optional mask to send as vote instead of vote_x/vote_y * @dirty: flag used to indicate whether the bcm needs to be committed * @keepalive: flag used to indicate whether a keepalive is required * @aux_data: auxiliary data used when calculating threshold values and @@ -97,6 +98,7 @@ struct qcom_icc_bcm { u64 vote_x[QCOM_ICC_NUM_BUCKETS]; u64 vote_y[QCOM_ICC_NUM_BUCKETS]; u64 vote_scale; + u32 enable_mask; bool dirty; bool keepalive; struct bcm_db aux_data; -- cgit v1.2.3 From be02db24cf840bc0fdfbecc78ad803619dd143e6 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 23 Jun 2023 14:50:43 +0200 Subject: interconnect: qcom: sm8450: add enable_mask for bcm nodes Set the proper enable_mask to nodes requiring such value to be used instead of a bandwidth when voting. The masks were copied from the downstream implementation at [1]. [1] https://git.codelinaro.org/clo/la/kernel/msm-5.10/-/blob/KERNEL.PLATFORM.1.0.r2-05600-WAIPIOLE.0/drivers/interconnect/qcom/waipio.c Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230619-topic-sm8550-upstream-interconnect-mask-vote-v2-2-709474b151cc@linaro.org Fixes: fafc114a468e ("interconnect: qcom: Add SM8450 interconnect provider driver") Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sm8450.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/interconnect/qcom/sm8450.c b/drivers/interconnect/qcom/sm8450.c index 2d7a8e7b85ec..e64c214b4020 100644 --- a/drivers/interconnect/qcom/sm8450.c +++ b/drivers/interconnect/qcom/sm8450.c @@ -1337,6 +1337,7 @@ static struct qcom_icc_node qns_mem_noc_sf_disp = { static struct qcom_icc_bcm bcm_acv = { .name = "ACV", + .enable_mask = 0x8, .num_nodes = 1, .nodes = { &ebi }, }; @@ -1349,6 +1350,7 @@ static struct qcom_icc_bcm bcm_ce0 = { static struct qcom_icc_bcm bcm_cn0 = { .name = "CN0", + .enable_mask = 0x1, .keepalive = true, .num_nodes = 55, .nodes = { &qnm_gemnoc_cnoc, &qnm_gemnoc_pcie, @@ -1383,6 +1385,7 @@ static struct qcom_icc_bcm bcm_cn0 = { static struct qcom_icc_bcm bcm_co0 = { .name = "CO0", + .enable_mask = 0x1, .num_nodes = 2, .nodes = { &qxm_nsp, &qns_nsp_gemnoc }, }; @@ -1403,6 +1406,7 @@ static struct qcom_icc_bcm bcm_mm0 = { static struct qcom_icc_bcm bcm_mm1 = { .name = "MM1", + .enable_mask = 0x1, .num_nodes = 12, .nodes = { &qnm_camnoc_hf, &qnm_camnoc_icp, &qnm_camnoc_sf, &qnm_mdp, @@ -1445,6 +1449,7 @@ static struct qcom_icc_bcm bcm_sh0 = { static struct qcom_icc_bcm bcm_sh1 = { .name = "SH1", + .enable_mask = 0x1, .num_nodes = 7, .nodes = { &alm_gpu_tcu, &alm_sys_tcu, &qnm_nsp_gemnoc, &qnm_pcie, @@ -1461,6 +1466,7 @@ static struct qcom_icc_bcm bcm_sn0 = { static struct qcom_icc_bcm bcm_sn1 = { .name = "SN1", + .enable_mask = 0x1, .num_nodes = 4, .nodes = { &qhm_gic, &qxm_pimem, &xm_gic, &qns_gemnoc_gc }, @@ -1492,6 +1498,7 @@ static struct qcom_icc_bcm bcm_sn7 = { static struct qcom_icc_bcm bcm_acv_disp = { .name = "ACV", + .enable_mask = 0x1, .num_nodes = 1, .nodes = { &ebi_disp }, }; @@ -1510,6 +1517,7 @@ static struct qcom_icc_bcm bcm_mm0_disp = { static struct qcom_icc_bcm bcm_mm1_disp = { .name = "MM1", + .enable_mask = 0x1, .num_nodes = 3, .nodes = { &qnm_mdp_disp, &qnm_rot_disp, &qns_mem_noc_sf_disp }, @@ -1523,6 +1531,7 @@ static struct qcom_icc_bcm bcm_sh0_disp = { static struct qcom_icc_bcm bcm_sh1_disp = { .name = "SH1", + .enable_mask = 0x1, .num_nodes = 1, .nodes = { &qnm_pcie_disp }, }; -- cgit v1.2.3 From 0dc82bd9e4627065dbc6ac8468296aa18f13c840 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 23 Jun 2023 14:50:44 +0200 Subject: interconnect: qcom: sm8550: add enable_mask for bcm nodes Set the proper enable_mask to nodes requiring such value to be used instead of a bandwidth when voting. The masks were copied from the downstream implementation at [1]. [1] https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/blob/kernel.lnx.5.15.r1-rel/drivers/interconnect/qcom/kalama.c Reviewed-by: Konrad Dybcio Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20230619-topic-sm8550-upstream-interconnect-mask-vote-v2-3-709474b151cc@linaro.org Fixes: e6f0d6a30f73 ("interconnect: qcom: Add SM8550 interconnect provider driver") Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sm8550.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/interconnect/qcom/sm8550.c b/drivers/interconnect/qcom/sm8550.c index d823ba988ef6..0864ed285375 100644 --- a/drivers/interconnect/qcom/sm8550.c +++ b/drivers/interconnect/qcom/sm8550.c @@ -1473,6 +1473,7 @@ static struct qcom_icc_node qns_mem_noc_sf_cam_ife_2 = { static struct qcom_icc_bcm bcm_acv = { .name = "ACV", + .enable_mask = 0x8, .num_nodes = 1, .nodes = { &ebi }, }; @@ -1485,6 +1486,7 @@ static struct qcom_icc_bcm bcm_ce0 = { static struct qcom_icc_bcm bcm_cn0 = { .name = "CN0", + .enable_mask = 0x1, .keepalive = true, .num_nodes = 54, .nodes = { &qsm_cfg, &qhs_ahb2phy0, @@ -1524,6 +1526,7 @@ static struct qcom_icc_bcm bcm_cn1 = { static struct qcom_icc_bcm bcm_co0 = { .name = "CO0", + .enable_mask = 0x1, .num_nodes = 2, .nodes = { &qxm_nsp, &qns_nsp_gemnoc }, }; @@ -1549,6 +1552,7 @@ static struct qcom_icc_bcm bcm_mm0 = { static struct qcom_icc_bcm bcm_mm1 = { .name = "MM1", + .enable_mask = 0x1, .num_nodes = 8, .nodes = { &qnm_camnoc_hf, &qnm_camnoc_icp, &qnm_camnoc_sf, &qnm_vapss_hcp, @@ -1589,6 +1593,7 @@ static struct qcom_icc_bcm bcm_sh0 = { static struct qcom_icc_bcm bcm_sh1 = { .name = "SH1", + .enable_mask = 0x1, .num_nodes = 13, .nodes = { &alm_gpu_tcu, &alm_sys_tcu, &chm_apps, &qnm_gpu, @@ -1608,6 +1613,7 @@ static struct qcom_icc_bcm bcm_sn0 = { static struct qcom_icc_bcm bcm_sn1 = { .name = "SN1", + .enable_mask = 0x1, .num_nodes = 3, .nodes = { &qhm_gic, &xm_gic, &qns_gemnoc_gc }, @@ -1633,6 +1639,7 @@ static struct qcom_icc_bcm bcm_sn7 = { static struct qcom_icc_bcm bcm_acv_disp = { .name = "ACV", + .enable_mask = 0x1, .num_nodes = 1, .nodes = { &ebi_disp }, }; @@ -1657,12 +1664,14 @@ static struct qcom_icc_bcm bcm_sh0_disp = { static struct qcom_icc_bcm bcm_sh1_disp = { .name = "SH1", + .enable_mask = 0x1, .num_nodes = 2, .nodes = { &qnm_mnoc_hf_disp, &qnm_pcie_disp }, }; static struct qcom_icc_bcm bcm_acv_cam_ife_0 = { .name = "ACV", + .enable_mask = 0x0, .num_nodes = 1, .nodes = { &ebi_cam_ife_0 }, }; @@ -1681,6 +1690,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_0 = { static struct qcom_icc_bcm bcm_mm1_cam_ife_0 = { .name = "MM1", + .enable_mask = 0x1, .num_nodes = 4, .nodes = { &qnm_camnoc_hf_cam_ife_0, &qnm_camnoc_icp_cam_ife_0, &qnm_camnoc_sf_cam_ife_0, &qns_mem_noc_sf_cam_ife_0 }, @@ -1694,6 +1704,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_0 = { static struct qcom_icc_bcm bcm_sh1_cam_ife_0 = { .name = "SH1", + .enable_mask = 0x1, .num_nodes = 3, .nodes = { &qnm_mnoc_hf_cam_ife_0, &qnm_mnoc_sf_cam_ife_0, &qnm_pcie_cam_ife_0 }, @@ -1701,6 +1712,7 @@ static struct qcom_icc_bcm bcm_sh1_cam_ife_0 = { static struct qcom_icc_bcm bcm_acv_cam_ife_1 = { .name = "ACV", + .enable_mask = 0x0, .num_nodes = 1, .nodes = { &ebi_cam_ife_1 }, }; @@ -1719,6 +1731,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_1 = { static struct qcom_icc_bcm bcm_mm1_cam_ife_1 = { .name = "MM1", + .enable_mask = 0x1, .num_nodes = 4, .nodes = { &qnm_camnoc_hf_cam_ife_1, &qnm_camnoc_icp_cam_ife_1, &qnm_camnoc_sf_cam_ife_1, &qns_mem_noc_sf_cam_ife_1 }, @@ -1732,6 +1745,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_1 = { static struct qcom_icc_bcm bcm_sh1_cam_ife_1 = { .name = "SH1", + .enable_mask = 0x1, .num_nodes = 3, .nodes = { &qnm_mnoc_hf_cam_ife_1, &qnm_mnoc_sf_cam_ife_1, &qnm_pcie_cam_ife_1 }, @@ -1739,6 +1753,7 @@ static struct qcom_icc_bcm bcm_sh1_cam_ife_1 = { static struct qcom_icc_bcm bcm_acv_cam_ife_2 = { .name = "ACV", + .enable_mask = 0x0, .num_nodes = 1, .nodes = { &ebi_cam_ife_2 }, }; @@ -1757,6 +1772,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_2 = { static struct qcom_icc_bcm bcm_mm1_cam_ife_2 = { .name = "MM1", + .enable_mask = 0x1, .num_nodes = 4, .nodes = { &qnm_camnoc_hf_cam_ife_2, &qnm_camnoc_icp_cam_ife_2, &qnm_camnoc_sf_cam_ife_2, &qns_mem_noc_sf_cam_ife_2 }, @@ -1770,6 +1786,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_2 = { static struct qcom_icc_bcm bcm_sh1_cam_ife_2 = { .name = "SH1", + .enable_mask = 0x1, .num_nodes = 3, .nodes = { &qnm_mnoc_hf_cam_ife_2, &qnm_mnoc_sf_cam_ife_2, &qnm_pcie_cam_ife_2 }, -- cgit v1.2.3 From 3cb11fe244d516f757c1022cfa971528d525fe65 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 23 Jun 2023 14:50:45 +0200 Subject: interconnect: qcom: sa8775p: add enable_mask for bcm nodes Set the proper enable_mask the ACV node requiring such value to be used instead of a bandwidth when voting. The masks was copied from the downstream implementation at [1]. [1] https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/blob/kernel.lnx.5.15.r32-rel/drivers/interconnect/qcom/lemans.c Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230619-topic-sm8550-upstream-interconnect-mask-vote-v2-4-709474b151cc@linaro.org Fixes: 3655a63f9661 ("interconnect: qcom: add a driver for sa8775p") Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sa8775p.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/sa8775p.c b/drivers/interconnect/qcom/sa8775p.c index da21cc31a580..f56538669de0 100644 --- a/drivers/interconnect/qcom/sa8775p.c +++ b/drivers/interconnect/qcom/sa8775p.c @@ -1873,6 +1873,7 @@ static struct qcom_icc_node srvc_snoc = { static struct qcom_icc_bcm bcm_acv = { .name = "ACV", + .enable_mask = 0x8, .num_nodes = 1, .nodes = { &ebi }, }; -- cgit v1.2.3 From 37540db221e1ca94d9a57632238d1a62043205b3 Mon Sep 17 00:00:00 2001 From: Krzysztof Wilczyński Date: Thu, 13 Jul 2023 12:18:39 -0500 Subject: MAINTAINERS: Add Manivannan Sadhasivam as DesignWare PCIe driver maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Manivannan has been actively reviewing patches and testing changes related to the DesignWare core driver and other DWC-based PCIe drivers for a while now. Add Manivannan as a maintainer for the Synopsys DesignWare driver to make his role and contributions official. Thank you Manivannan! For all the help with DWC! Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Acked-by: Manivannan Sadhasivam --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3be1bdfe8ecc..76bdef7ba1c6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16285,6 +16285,7 @@ F: drivers/pci/controller/dwc/pci-exynos.c PCI DRIVER FOR SYNOPSYS DESIGNWARE M: Jingoo Han M: Gustavo Pimentel +M: Manivannan Sadhasivam L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml -- cgit v1.2.3 From 6bc471b6c3aeaa7b95d1b86a1bb8d91a3c341fa5 Mon Sep 17 00:00:00 2001 From: Alisa Roman Date: Wed, 14 Jun 2023 18:52:43 +0300 Subject: iio: adc: ad7192: Fix ac excitation feature AC excitation enable feature exposed to user on AD7192, allowing a bit which should be 0 to be set. This feature is specific only to AD7195. AC excitation attribute moved accordingly. In the AD7195 documentation, the AC excitation enable bit is on position 22 in the Configuration register. ACX macro changed to match correct register and bit. Note that the fix tag is for the commit that moved the driver out of staging. Fixes: b581f748cce0 ("staging: iio: adc: ad7192: move out of staging") Signed-off-by: Alisa Roman Cc: stable@vger.kernel.org Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20230614155242.160296-1-alisa.roman@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index 8685e0b58a83..7bc3ebfe8081 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -62,7 +62,6 @@ #define AD7192_MODE_STA_MASK BIT(20) /* Status Register transmission Mask */ #define AD7192_MODE_CLKSRC(x) (((x) & 0x3) << 18) /* Clock Source Select */ #define AD7192_MODE_SINC3 BIT(15) /* SINC3 Filter Select */ -#define AD7192_MODE_ACX BIT(14) /* AC excitation enable(AD7195 only)*/ #define AD7192_MODE_ENPAR BIT(13) /* Parity Enable */ #define AD7192_MODE_CLKDIV BIT(12) /* Clock divide by 2 (AD7190/2 only)*/ #define AD7192_MODE_SCYCLE BIT(11) /* Single cycle conversion */ @@ -91,6 +90,7 @@ /* Configuration Register Bit Designations (AD7192_REG_CONF) */ #define AD7192_CONF_CHOP BIT(23) /* CHOP enable */ +#define AD7192_CONF_ACX BIT(22) /* AC excitation enable(AD7195 only) */ #define AD7192_CONF_REFSEL BIT(20) /* REFIN1/REFIN2 Reference Select */ #define AD7192_CONF_CHAN(x) ((x) << 8) /* Channel select */ #define AD7192_CONF_CHAN_MASK (0x7FF << 8) /* Channel select mask */ @@ -472,7 +472,7 @@ static ssize_t ad7192_show_ac_excitation(struct device *dev, struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct ad7192_state *st = iio_priv(indio_dev); - return sysfs_emit(buf, "%d\n", !!(st->mode & AD7192_MODE_ACX)); + return sysfs_emit(buf, "%d\n", !!(st->conf & AD7192_CONF_ACX)); } static ssize_t ad7192_show_bridge_switch(struct device *dev, @@ -513,13 +513,13 @@ static ssize_t ad7192_set(struct device *dev, ad_sd_write_reg(&st->sd, AD7192_REG_GPOCON, 1, st->gpocon); break; - case AD7192_REG_MODE: + case AD7192_REG_CONF: if (val) - st->mode |= AD7192_MODE_ACX; + st->conf |= AD7192_CONF_ACX; else - st->mode &= ~AD7192_MODE_ACX; + st->conf &= ~AD7192_CONF_ACX; - ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode); + ad_sd_write_reg(&st->sd, AD7192_REG_CONF, 3, st->conf); break; default: ret = -EINVAL; @@ -579,12 +579,11 @@ static IIO_DEVICE_ATTR(bridge_switch_en, 0644, static IIO_DEVICE_ATTR(ac_excitation_en, 0644, ad7192_show_ac_excitation, ad7192_set, - AD7192_REG_MODE); + AD7192_REG_CONF); static struct attribute *ad7192_attributes[] = { &iio_dev_attr_filter_low_pass_3db_frequency_available.dev_attr.attr, &iio_dev_attr_bridge_switch_en.dev_attr.attr, - &iio_dev_attr_ac_excitation_en.dev_attr.attr, NULL }; @@ -595,6 +594,7 @@ static const struct attribute_group ad7192_attribute_group = { static struct attribute *ad7195_attributes[] = { &iio_dev_attr_filter_low_pass_3db_frequency_available.dev_attr.attr, &iio_dev_attr_bridge_switch_en.dev_attr.attr, + &iio_dev_attr_ac_excitation_en.dev_attr.attr, NULL }; -- cgit v1.2.3 From d47b9b84292706784482a661324bbc178153781f Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 13 Jun 2023 12:34:36 +0300 Subject: iio: light: bu27034: Fix scale format The driver is expecting accuracy of NANOs for intensity scale in raw_write. The IIO core is however defaulting to MICROs. This leads the raw-write of smallest scales to never succeed as correct selector(s) are not found. Fix this by implementing the .write_raw_get_fmt callback to use NANO accuracy for writes of IIO_CHAN_INFO_SCALE. Signed-off-by: Matti Vaittinen Fixes: e52afbd61039 ("iio: light: ROHM BU27034 Ambient Light Sensor") Link: https://lore.kernel.org/r/5369117315cf05b88cf0ccb87373fd77190f6ca2.1686648422.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/rohm-bu27034.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/rohm-bu27034.c b/drivers/iio/light/rohm-bu27034.c index e63ef5789cde..bf3de853a811 100644 --- a/drivers/iio/light/rohm-bu27034.c +++ b/drivers/iio/light/rohm-bu27034.c @@ -575,7 +575,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan, return -EINVAL; if (chan == BU27034_CHAN_ALS) { - if (val == 0 && val2 == 1000) + if (val == 0 && val2 == 1000000) return 0; return -EINVAL; @@ -587,7 +587,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan, goto unlock_out; ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts, time_sel, - val, val2 * 1000, &gain_sel); + val, val2, &gain_sel); if (ret) { /* * Could not support scale with given time. Need to change time. @@ -624,7 +624,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan, /* Can we provide requested scale with this time? */ ret = iio_gts_find_gain_sel_for_scale_using_time( - &data->gts, new_time_sel, val, val2 * 1000, + &data->gts, new_time_sel, val, val2, &gain_sel); if (ret) continue; @@ -1217,6 +1217,21 @@ static int bu27034_read_raw(struct iio_dev *idev, } } +static int bu27034_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + long mask) +{ + + switch (mask) { + case IIO_CHAN_INFO_SCALE: + return IIO_VAL_INT_PLUS_NANO; + case IIO_CHAN_INFO_INT_TIME: + return IIO_VAL_INT_PLUS_MICRO; + default: + return -EINVAL; + } +} + static int bu27034_write_raw(struct iio_dev *idev, struct iio_chan_spec const *chan, int val, int val2, long mask) @@ -1267,6 +1282,7 @@ static int bu27034_read_avail(struct iio_dev *idev, static const struct iio_info bu27034_info = { .read_raw = &bu27034_read_raw, .write_raw = &bu27034_write_raw, + .write_raw_get_fmt = &bu27034_write_raw_get_fmt, .read_avail = &bu27034_read_avail, }; -- cgit v1.2.3 From 096649cd7cb0fc1c8f684829f816d938ad1eb808 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 13 Jun 2023 12:34:55 +0300 Subject: iio: light: bu27008: Fix scale format The driver is expecting accuracy of NANOs for intensity scale in raw_write. The IIO core is however defaulting to MICROs. This leads the raw-write of smallest scales to never succeed as correct selector(s) are not found. Fix this by implementing the .write_raw_get_fmt callback to use NANO accuracy for writes of IIO_CHAN_INFO_SCALE. Fixes: 41ff93d14f78 ("iio: light: ROHM BU27008 color sensor") Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/e4778b74cde41431f77bc8dd88ec18605da0b400.1686648422.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/rohm-bu27008.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/rohm-bu27008.c b/drivers/iio/light/rohm-bu27008.c index 489902bed7f0..80eb14ea8193 100644 --- a/drivers/iio/light/rohm-bu27008.c +++ b/drivers/iio/light/rohm-bu27008.c @@ -633,7 +633,7 @@ static int bu27008_try_find_new_time_gain(struct bu27008_data *data, int val, for (i = 0; i < data->gts.num_itime; i++) { new_time_sel = data->gts.itime_table[i].sel; ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts, - new_time_sel, val, val2 * 1000, gain_sel); + new_time_sel, val, val2, gain_sel); if (!ret) break; } @@ -662,7 +662,7 @@ static int bu27008_set_scale(struct bu27008_data *data, goto unlock_out; ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts, time_sel, - val, val2 * 1000, &gain_sel); + val, val2, &gain_sel); if (ret) { ret = bu27008_try_find_new_time_gain(data, val, val2, &gain_sel); if (ret) @@ -677,6 +677,21 @@ unlock_out: return ret; } +static int bu27008_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + long mask) +{ + + switch (mask) { + case IIO_CHAN_INFO_SCALE: + return IIO_VAL_INT_PLUS_NANO; + case IIO_CHAN_INFO_INT_TIME: + return IIO_VAL_INT_PLUS_MICRO; + default: + return -EINVAL; + } +} + static int bu27008_write_raw(struct iio_dev *idev, struct iio_chan_spec const *chan, int val, int val2, long mask) @@ -756,6 +771,7 @@ static int bu27008_update_scan_mode(struct iio_dev *idev, static const struct iio_info bu27008_info = { .read_raw = &bu27008_read_raw, .write_raw = &bu27008_write_raw, + .write_raw_get_fmt = &bu27008_write_raw_get_fmt, .read_avail = &bu27008_read_avail, .update_scan_mode = bu27008_update_scan_mode, .validate_trigger = iio_validate_own_trigger, -- cgit v1.2.3 From 95fb1e7b23bc82130016daefa02a87e83185ca95 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 13 Jun 2023 12:35:12 +0300 Subject: iio: light: bu27008: Fix intensity data type The intensity data from bu27008 is unsigned. The type of the scan data was incorrectly marked as signed resulting large intensity values to be interpreted as negative ones. Fix the scan data type. Fixes: 41ff93d14f78 ("iio: light: ROHM BU27008 color sensor") Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/240a7ca5fc1b76da20d81f930d00f31a54b1fdf8.1686648422.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/rohm-bu27008.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/rohm-bu27008.c b/drivers/iio/light/rohm-bu27008.c index 80eb14ea8193..b50bf8973d9a 100644 --- a/drivers/iio/light/rohm-bu27008.c +++ b/drivers/iio/light/rohm-bu27008.c @@ -190,7 +190,7 @@ static const struct iio_itime_sel_mul bu27008_itimes[] = { .address = BU27008_REG_##data##_LO, \ .scan_index = BU27008_##color, \ .scan_type = { \ - .sign = 's', \ + .sign = 'u', \ .realbits = 16, \ .storagebits = 16, \ .endianness = IIO_LE, \ -- cgit v1.2.3 From a41e19cc0d6b6a445a4133170b90271e4a2553dc Mon Sep 17 00:00:00 2001 From: Alvin Šipraga Date: Mon, 19 Jun 2023 16:12:39 +0200 Subject: iio: adc: ina2xx: avoid NULL pointer dereference on OF device match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The affected lines were resulting in a NULL pointer dereference on our platform because the device tree contained the following list of compatible strings: power-sensor@40 { compatible = "ti,ina232", "ti,ina231"; ... }; Since the driver doesn't declare a compatible string "ti,ina232", the OF matching succeeds on "ti,ina231". But the I2C device ID info is populated via the first compatible string, cf. modalias population in of_i2c_get_board_info(). Since there is no "ina232" entry in the legacy I2C device ID table either, the struct i2c_device_id *id pointer in the probe function is NULL. Fix this by using the already populated type variable instead, which points to the proper driver data. Since the name is also wanted, add a generic one to the ina2xx_config table. Signed-off-by: Alvin Šipraga Fixes: c43a102e67db ("iio: ina2xx: add support for TI INA2xx Power Monitors") Link: https://lore.kernel.org/r/20230619141239.2257392-1-alvin@pqrs.dk Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ina2xx-adc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c index 213526c1592f..aea83f369437 100644 --- a/drivers/iio/adc/ina2xx-adc.c +++ b/drivers/iio/adc/ina2xx-adc.c @@ -124,6 +124,7 @@ static const struct regmap_config ina2xx_regmap_config = { enum ina2xx_ids { ina219, ina226 }; struct ina2xx_config { + const char *name; u16 config_default; int calibration_value; int shunt_voltage_lsb; /* nV */ @@ -155,6 +156,7 @@ struct ina2xx_chip_info { static const struct ina2xx_config ina2xx_config[] = { [ina219] = { + .name = "ina219", .config_default = INA219_CONFIG_DEFAULT, .calibration_value = 4096, .shunt_voltage_lsb = 10000, @@ -164,6 +166,7 @@ static const struct ina2xx_config ina2xx_config[] = { .chip_id = ina219, }, [ina226] = { + .name = "ina226", .config_default = INA226_CONFIG_DEFAULT, .calibration_value = 2048, .shunt_voltage_lsb = 2500, @@ -996,7 +999,7 @@ static int ina2xx_probe(struct i2c_client *client) /* Patch the current config register with default. */ val = chip->config->config_default; - if (id->driver_data == ina226) { + if (type == ina226) { ina226_set_average(chip, INA226_DEFAULT_AVG, &val); ina226_set_int_time_vbus(chip, INA226_DEFAULT_IT, &val); ina226_set_int_time_vshunt(chip, INA226_DEFAULT_IT, &val); @@ -1015,7 +1018,7 @@ static int ina2xx_probe(struct i2c_client *client) } indio_dev->modes = INDIO_DIRECT_MODE; - if (id->driver_data == ina226) { + if (type == ina226) { indio_dev->channels = ina226_channels; indio_dev->num_channels = ARRAY_SIZE(ina226_channels); indio_dev->info = &ina226_info; @@ -1024,7 +1027,7 @@ static int ina2xx_probe(struct i2c_client *client) indio_dev->num_channels = ARRAY_SIZE(ina219_channels); indio_dev->info = &ina219_info; } - indio_dev->name = id->name; + indio_dev->name = id ? id->name : chip->config->name; ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev, &ina2xx_setup_ops); -- cgit v1.2.3 From 48faabfb3634e519fc49ef01525448ad2ba96751 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 12 Jul 2023 10:05:12 +0200 Subject: dt-bindings: iio: adi,ad74115: remove ref from -nanoamp dtschema v2023.06 comes with support for properties with -nanoamp suffix, thus bindings should not have a ref for it: adi,ad74115.yaml: properties:adi,ext1-burnout-current-nanoamp: '$ref' should not be valid under {'const': '$ref'} Cc: Cosmin Tanislav Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230712080512.94964-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml index 72d2e910f206..2594fa192f93 100644 --- a/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml +++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml @@ -216,7 +216,6 @@ properties: description: Whether to enable burnout current for EXT1. adi,ext1-burnout-current-nanoamp: - $ref: /schemas/types.yaml#/definitions/uint32 description: Burnout current in nanoamps to be applied to EXT1. enum: [0, 50, 500, 1000, 10000] @@ -233,7 +232,6 @@ properties: description: Whether to enable burnout current for EXT2. adi,ext2-burnout-current-nanoamp: - $ref: /schemas/types.yaml#/definitions/uint32 description: Burnout current in nanoamps to be applied to EXT2. enum: [0, 50, 500, 1000, 10000] default: 0 @@ -249,7 +247,6 @@ properties: description: Whether to enable burnout current for VIOUT. adi,viout-burnout-current-nanoamp: - $ref: /schemas/types.yaml#/definitions/uint32 description: Burnout current in nanoamps to be applied to VIOUT. enum: [0, 1000, 10000] default: 0 -- cgit v1.2.3 From 272ffb925e2020000863748867d91a2407d3e8e9 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 20 Jun 2023 13:01:59 -0400 Subject: counter: Fix menuconfig "Counter support" submenu entries disappearance The current placement of the I8254 Kconfig entry results in the disappearance of the "Counter support" submenu items in menuconfig. Move the I8254 above the menuconfig COUNTER entry to restore the intended submenu behavior. Fixes: d428487471ba ("counter: i8254: Introduce the Intel 8254 interface library module") Reported-by: Jarkko Nikula Closes: https://lore.kernel.org/all/32ddaa7b-53a8-d61f-d526-b545bd561337@linux.intel.com/ Reviewed-by: Randy Dunlap Tested-by: Jarkko Nikula Reviewed-by: Jarkko Nikula Link: https://lore.kernel.org/r/20230620170159.556788-1-william.gray@linaro.org/ Signed-off-by: William Breathitt Gray --- drivers/counter/Kconfig | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/counter/Kconfig b/drivers/counter/Kconfig index bca21df51168..62962ae84b77 100644 --- a/drivers/counter/Kconfig +++ b/drivers/counter/Kconfig @@ -3,13 +3,6 @@ # Counter devices # -menuconfig COUNTER - tristate "Counter support" - help - This enables counter device support through the Generic Counter - interface. You only need to enable this, if you also want to enable - one or more of the counter device drivers below. - config I8254 tristate select COUNTER @@ -25,6 +18,13 @@ config I8254 If built as a module its name will be i8254. +menuconfig COUNTER + tristate "Counter support" + help + This enables counter device support through the Generic Counter + interface. You only need to enable this, if you also want to enable + one or more of the counter device drivers below. + if COUNTER config 104_QUAD_8 -- cgit v1.2.3 From 1b95e817916069ec45a7f259d088fd1c091a8cc6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 11 Jul 2023 17:40:39 +0800 Subject: nvme: fix possible hang when removing a controller during error recovery Error recovery can be interrupted by controller removal, then the controller is left as quiesced, and IO hang can be caused. Fix the issue by unquiescing controller unconditionally when removing namespaces. This way is reasonable and safe given forward progress can be made when removing namespaces. Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reported-by: Chunguang Xu Closes: https://lore.kernel.org/linux-nvme/cover.1685350577.git.chunguang.xu@shopee.com/ Cc: stable@vger.kernel.org Signed-off-by: Ming Lei Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 37b6fa746662..f3a01b79148c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3933,6 +3933,12 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) */ nvme_mpath_clear_ctrl_paths(ctrl); + /* + * Unquiesce io queues so any pending IO won't hang, especially + * those submitted from scan work + */ + nvme_unquiesce_io_queues(ctrl); + /* prevent racing with ns scanning */ flush_work(&ctrl->scan_work); @@ -3942,10 +3948,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) * removing the namespaces' disks; fail all the queues now to avoid * potentially having to clean up the failed sync later. */ - if (ctrl->state == NVME_CTRL_DEAD) { + if (ctrl->state == NVME_CTRL_DEAD) nvme_mark_namespaces_dead(ctrl); - nvme_unquiesce_io_queues(ctrl); - } /* this is a no-op when called from the controller reset handler */ nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO); -- cgit v1.2.3 From 99dc264014d5aed66ee37ddf136a38b5a2b1b529 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 11 Jul 2023 17:40:40 +0800 Subject: nvme-tcp: fix potential unbalanced freeze & unfreeze Move start_freeze into nvme_tcp_configure_io_queues(), and there is at least two benefits: 1) fix unbalanced freeze and unfreeze, since re-connection work may fail or be broken by removal 2) IO during error recovery can be failfast quickly because nvme fabrics unquiesces queues after teardown. One side-effect is that !mpath request may timeout during connecting because of queue topo change, but that looks not one big deal: 1) same problem exists with current code base 2) compared with !mpath, mpath use case is dominant Fixes: 2875b0aecabe ("nvme-tcp: fix controller reset hang during traffic") Cc: stable@vger.kernel.org Signed-off-by: Ming Lei Tested-by: Yi Zhang Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 3e7dd6f91832..fb24cd8ac46c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1868,6 +1868,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) goto out_cleanup_connect_q; if (!new) { + nvme_start_freeze(ctrl); nvme_unquiesce_io_queues(ctrl); if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { /* @@ -1876,6 +1877,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) * to be safe. */ ret = -ENODEV; + nvme_unfreeze(ctrl); goto out_wait_freeze_timed_out; } blk_mq_update_nr_hw_queues(ctrl->tagset, @@ -1980,7 +1982,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, if (ctrl->queue_count <= 1) return; nvme_quiesce_admin_queue(ctrl); - nvme_start_freeze(ctrl); nvme_quiesce_io_queues(ctrl); nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); -- cgit v1.2.3 From 29b434d1e49252b3ad56ad3197e47fafff5356a1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 11 Jul 2023 17:40:41 +0800 Subject: nvme-rdma: fix potential unbalanced freeze & unfreeze Move start_freeze into nvme_rdma_configure_io_queues(), and there is at least two benefits: 1) fix unbalanced freeze and unfreeze, since re-connection work may fail or be broken by removal 2) IO during error recovery can be failfast quickly because nvme fabrics unquiesces queues after teardown. One side-effect is that !mpath request may timeout during connecting because of queue topo change, but that looks not one big deal: 1) same problem exists with current code base 2) compared with !mpath, mpath use case is dominant Fixes: 9f98772ba307 ("nvme-rdma: fix controller reset hang during traffic") Cc: stable@vger.kernel.org Signed-off-by: Ming Lei Tested-by: Yi Zhang Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d433b2ec07a6..337a624a537c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -883,6 +883,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_cleanup_tagset; if (!new) { + nvme_start_freeze(&ctrl->ctrl); nvme_unquiesce_io_queues(&ctrl->ctrl); if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) { /* @@ -891,6 +892,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) * to be safe. */ ret = -ENODEV; + nvme_unfreeze(&ctrl->ctrl); goto out_wait_freeze_timed_out; } blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset, @@ -940,7 +942,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, bool remove) { if (ctrl->ctrl.queue_count > 1) { - nvme_start_freeze(&ctrl->ctrl); nvme_quiesce_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); -- cgit v1.2.3 From 507397d19b5a296aa339f7a1bd16284f668a1906 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 18 Jul 2023 10:02:18 +0300 Subject: iio: frequency: admv1013: propagate errors from regulator_get_voltage() The regulator_get_voltage() function returns negative error codes. This function saves it to an unsigned int and then does some range checking and, since the error code falls outside the correct range, it returns -EINVAL. Beyond the messiness, this is bad because the regulator_get_voltage() function can return -EPROBE_DEFER and it's important to propagate that back properly so it can be handled. Fixes: da35a7b526d9 ("iio: frequency: admv1013: add support for ADMV1013") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/ce75aac3-2aba-4435-8419-02e59fdd862b@moroto.mountain Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/frequency/admv1013.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c index 9bf8337806fc..8c8e0bbfc99f 100644 --- a/drivers/iio/frequency/admv1013.c +++ b/drivers/iio/frequency/admv1013.c @@ -344,9 +344,12 @@ static int admv1013_update_quad_filters(struct admv1013_state *st) static int admv1013_update_mixer_vgate(struct admv1013_state *st) { - unsigned int vcm, mixer_vgate; + unsigned int mixer_vgate; + int vcm; vcm = regulator_get_voltage(st->reg); + if (vcm < 0) + return vcm; if (vcm < 1800000) mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100; -- cgit v1.2.3 From b2a69969908fcaf68596dfc04369af0fe2e1d2f7 Mon Sep 17 00:00:00 2001 From: Milan Zamazal Date: Wed, 19 Jul 2023 10:32:08 +0200 Subject: iio: core: Prevent invalid memory access when there is no parent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 813665564b3d ("iio: core: Convert to use firmware node handle instead of OF node") switched the kind of nodes to use for label retrieval in device registration. Probably an unwanted change in that commit was that if the device has no parent then NULL pointer is accessed. This is what happens in the stock IIO dummy driver when a new entry is created in configfs: # mkdir /sys/kernel/config/iio/devices/dummy/foo BUG: kernel NULL pointer dereference, address: ... ... Call Trace: __iio_device_register iio_dummy_probe Since there seems to be no reason to make a parent device of an IIO dummy device mandatory, let’s prevent the invalid memory access in __iio_device_register when the parent device is NULL. With this change, the IIO dummy driver works fine with configfs. Fixes: 813665564b3d ("iio: core: Convert to use firmware node handle instead of OF node") Reviewed-by: Andy Shevchenko Signed-off-by: Milan Zamazal Link: https://lore.kernel.org/r/20230719083208.88149-1-mzamazal@redhat.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index c117f50d0cf3..adcba832e6fa 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1888,7 +1888,7 @@ static const struct iio_buffer_setup_ops noop_ring_setup_ops; int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); - struct fwnode_handle *fwnode; + struct fwnode_handle *fwnode = NULL; int ret; if (!indio_dev->info) @@ -1899,7 +1899,8 @@ int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod) /* If the calling driver did not initialize firmware node, do it here */ if (dev_fwnode(&indio_dev->dev)) fwnode = dev_fwnode(&indio_dev->dev); - else + /* The default dummy IIO device has no parent */ + else if (indio_dev->dev.parent) fwnode = dev_fwnode(indio_dev->dev.parent); device_set_node(&indio_dev->dev, fwnode); -- cgit v1.2.3 From 09738ccbc4148c62d6c8c4644ff4a099d57f49ad Mon Sep 17 00:00:00 2001 From: George Stark Date: Fri, 21 Jul 2023 13:23:08 +0300 Subject: iio: adc: meson: fix core clock enable/disable moment Enable core clock at probe stage and disable it at remove stage. Core clock is responsible for turning on/off the entire SoC module so it should be on before the first module register is touched and be off at very last moment. Fixes: 3adbf3427330 ("iio: adc: add a driver for the SAR ADC found in Amlogic Meson SoCs") Signed-off-by: George Stark Link: https://lore.kernel.org/r/20230721102413.255726-2-gnstark@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/meson_saradc.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index af6bfcc19075..eb78a6f17fd0 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -916,12 +916,6 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev) goto err_vref; } - ret = clk_prepare_enable(priv->core_clk); - if (ret) { - dev_err(dev, "failed to enable core clk\n"); - goto err_core_clk; - } - regval = FIELD_PREP(MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, 1); regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG0, MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, regval); @@ -948,8 +942,6 @@ err_adc_clk: regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3, MESON_SAR_ADC_REG3_ADC_EN, 0); meson_sar_adc_set_bandgap(indio_dev, false); - clk_disable_unprepare(priv->core_clk); -err_core_clk: regulator_disable(priv->vref); err_vref: meson_sar_adc_unlock(indio_dev); @@ -977,8 +969,6 @@ static void meson_sar_adc_hw_disable(struct iio_dev *indio_dev) meson_sar_adc_set_bandgap(indio_dev, false); - clk_disable_unprepare(priv->core_clk); - regulator_disable(priv->vref); if (!ret) @@ -1211,7 +1201,7 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (IS_ERR(priv->clkin)) return dev_err_probe(dev, PTR_ERR(priv->clkin), "failed to get clkin\n"); - priv->core_clk = devm_clk_get(dev, "core"); + priv->core_clk = devm_clk_get_enabled(dev, "core"); if (IS_ERR(priv->core_clk)) return dev_err_probe(dev, PTR_ERR(priv->core_clk), "failed to get core clk\n"); @@ -1294,15 +1284,26 @@ static int meson_sar_adc_remove(struct platform_device *pdev) static int meson_sar_adc_suspend(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct meson_sar_adc_priv *priv = iio_priv(indio_dev); meson_sar_adc_hw_disable(indio_dev); + clk_disable_unprepare(priv->core_clk); + return 0; } static int meson_sar_adc_resume(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct meson_sar_adc_priv *priv = iio_priv(indio_dev); + int ret; + + ret = clk_prepare_enable(priv->core_clk); + if (ret) { + dev_err(dev, "failed to enable core clk\n"); + return ret; + } return meson_sar_adc_hw_enable(indio_dev); } -- cgit v1.2.3 From 6811694eb2f6b7a4e97be2029edc7dd6a39460f8 Mon Sep 17 00:00:00 2001 From: Alejandro Tafalla Date: Fri, 14 Jul 2023 17:31:26 +0200 Subject: iio: imu: lsm6dsx: Fix mount matrix retrieval The function lsm6dsx_get_acpi_mount_matrix should return an error when ACPI support is not enabled to allow executing iio_read_mount_matrix in the probe function. Fixes: dc3d25f22b88 ("iio: imu: lsm6dsx: Add ACPI mount matrix retrieval") Signed-off-by: Alejandro Tafalla Acked-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/20230714153132.27265-1-atafalla@dnyon.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 6a18b363cf73..b6e6b1df8a61 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -2687,7 +2687,7 @@ unknown_format: static int lsm6dsx_get_acpi_mount_matrix(struct device *dev, struct iio_mount_matrix *orientation) { - return false; + return -EOPNOTSUPP; } #endif -- cgit v1.2.3 From aead78125a987f48944bff2001f61df72b95afc4 Mon Sep 17 00:00:00 2001 From: Anh Tuan Phan Date: Sun, 16 Jul 2023 22:44:56 +0700 Subject: tools/counter: Makefile: Replace rmdir by rm to avoid make,clean failure Use rm -df instead of rmdir -p since rmdir requires the directory exist so it causes "make -C tools clean" failed if someone only builds other tools but not counter. Fixes: 228354ed692f ("tools/counter: Makefile: Remove lingering 'include' directories on make clean") Signed-off-by: Anh Tuan Phan Link: https://lore.kernel.org/r/d4080db5-1825-2848-079a-8bb674d8ee44@gmail.com/ Signed-off-by: William Breathitt Gray --- tools/counter/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/counter/Makefile b/tools/counter/Makefile index a0f4cab71fe5..b2c2946f44c9 100644 --- a/tools/counter/Makefile +++ b/tools/counter/Makefile @@ -40,7 +40,8 @@ $(OUTPUT)counter_example: $(COUNTER_EXAMPLE) clean: rm -f $(ALL_PROGRAMS) rm -rf $(OUTPUT)include/linux/counter.h - rmdir -p $(OUTPUT)include/linux + rm -df $(OUTPUT)include/linux + rm -df $(OUTPUT)include find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete install: $(ALL_PROGRAMS) -- cgit v1.2.3 From c5097b9869a136349d8404715dc8aabb7570a762 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 6 Jul 2023 10:26:10 +0200 Subject: Revert "PCI: dwc: Wait for link up only if link is started" This reverts commit da56a1bfbab55189595e588f1d984bdfb5cf5924. Bjorn Andersson, Fabio Estevam, Xiaolei Wang, and Jon Hunter reported that da56a1bfbab5 ("PCI: dwc: Wait for link up only if link is started") broke controller probing by returning an error in case the link does not come up during host initialisation, for example when the slot is empty. As explained in commit 886a9c134755 ("PCI: dwc: Move link handling into common code") and as indicated by the comment "Ignore errors, the link may come up later" in the code, waiting for link up and ignoring errors is the intended behaviour: Let's standardize this to succeed as there are usecases where devices (and the link) appear later even without hotplug. For example, a reconfigured FPGA device. Reverting the offending commit specifically fixes a regression on Qualcomm platforms like the Lenovo ThinkPad X13s which no longer reach the interconnect sync state if a slot does not have a device populated (e.g. an optional modem). Note that enabling asynchronous probing by default as was done for Qualcomm platforms by commit c0e1eb441b1d ("PCI: qcom: Enable async probe by default"), should take care of any related boot time concerns. Finally, note that the intel-gw driver is the only driver currently not providing a .start_link() callback and instead starts the link in its .host_init() callback, which may avoid an additional one-second timeout during probe by making the link-up wait conditional. If anyone cares, that can be done in a follow-up patch with a proper motivation. [bhelgaas: add Fabio Estevam, Xiaolei Wang, Jon Hunter reports] Fixes: da56a1bfbab5 ("PCI: dwc: Wait for link up only if link is started") Link: https://lore.kernel.org/r/20230706082610.26584-1-johan+linaro@kernel.org Reported-by: Bjorn Andersson Signed-off-by: Johan Hovold Reported-by: Fabio Estevam Link: https://lore.kernel.org/r/20230704122635.1362156-1-festevam@gmail.com/ Reported-by: Xiaolei Wang Link: https://lore.kernel.org/r/20230705010624.3912934-1-xiaolei.wang@windriver.com/ Reported-by: Jon Hunter Link: https://lore.kernel.org/r/6ca287a1-6c7c-7b90-9022-9e73fb82b564@nvidia.com Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: Sajid Dalvi Cc: Ajay Agarwal --- drivers/pci/controller/dwc/pcie-designware-host.c | 13 ++++--------- drivers/pci/controller/dwc/pcie-designware.c | 20 +++++++------------- drivers/pci/controller/dwc/pcie-designware.h | 1 - 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index cf61733bf78d..9952057c8819 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -485,20 +485,15 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp) if (ret) goto err_remove_edma; - if (dw_pcie_link_up(pci)) { - dw_pcie_print_link_status(pci); - } else { + if (!dw_pcie_link_up(pci)) { ret = dw_pcie_start_link(pci); if (ret) goto err_remove_edma; - - if (pci->ops && pci->ops->start_link) { - ret = dw_pcie_wait_for_link(pci); - if (ret) - goto err_stop_link; - } } + /* Ignore errors, the link may come up later */ + dw_pcie_wait_for_link(pci); + bridge->sysdata = pp; ret = pci_host_probe(bridge); diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index c87848cd8686..1f2ee71da4da 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -644,20 +644,9 @@ void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index) dw_pcie_writel_atu(pci, dir, index, PCIE_ATU_REGION_CTRL2, 0); } -void dw_pcie_print_link_status(struct dw_pcie *pci) -{ - u32 offset, val; - - offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); - val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA); - - dev_info(pci->dev, "PCIe Gen.%u x%u link up\n", - FIELD_GET(PCI_EXP_LNKSTA_CLS, val), - FIELD_GET(PCI_EXP_LNKSTA_NLW, val)); -} - int dw_pcie_wait_for_link(struct dw_pcie *pci) { + u32 offset, val; int retries; /* Check if the link is up or not */ @@ -673,7 +662,12 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci) return -ETIMEDOUT; } - dw_pcie_print_link_status(pci); + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA); + + dev_info(pci->dev, "PCIe Gen.%u x%u link up\n", + FIELD_GET(PCI_EXP_LNKSTA_CLS, val), + FIELD_GET(PCI_EXP_LNKSTA_NLW, val)); return 0; } diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index 615660640801..79713ce075cc 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -429,7 +429,6 @@ void dw_pcie_setup(struct dw_pcie *pci); void dw_pcie_iatu_detect(struct dw_pcie *pci); int dw_pcie_edma_detect(struct dw_pcie *pci); void dw_pcie_edma_remove(struct dw_pcie *pci); -void dw_pcie_print_link_status(struct dw_pcie *pci); static inline void dw_pcie_writel_dbi(struct dw_pcie *pci, u32 reg, u32 val) { -- cgit v1.2.3 From 238353088e9b28d61f58994aa058d736fc306614 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 25 Jul 2023 11:58:26 +0100 Subject: scripts/kallsyms: Fix build failure by setting errno before calling getline() getline() returns -1 at EOF as well as on error. It also doesn't set errno to 0 on success, so initialize it to 0 before using errno to check for an error condition. See the paragraph here [1]: For some system calls and library functions (e.g., getpriority(2)), -1 is a valid return on success. In such cases, a successful return can be distinguished from an error return by setting errno to zero before the call, and then, if the call returns a status that indicates that an error may have occurred, checking to see if errno has a nonzero value. Bear has a bug [2] that launches processes with errno set and causes the following build failure: $ bear -- make LLVM=1 ... LD .tmp_vmlinux.kallsyms1 NM .tmp_vmlinux.kallsyms1.syms KSYMS .tmp_vmlinux.kallsyms1.S read_symbol: Invalid argument [1]: https://linux.die.net/man/3/errno [2]: https://github.com/rizsotto/Bear/issues/469 Fixes: 1c975da56a6f ("scripts/kallsyms: remove KSYM_NAME_LEN_BUFFER") Reviewed-by: Miguel Ojeda Signed-off-by: James Clark Signed-off-by: Masahiro Yamada --- scripts/kallsyms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 16c87938b316..653b92f6d4c8 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -129,6 +129,7 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len) ssize_t readlen; struct sym_entry *sym; + errno = 0; readlen = getline(buf, buf_len, in); if (readlen < 0) { if (errno) { -- cgit v1.2.3 From 8a4629055ef55177b5b63dab1ecce676bd8cccdd Mon Sep 17 00:00:00 2001 From: Yiyuan Guo Date: Fri, 30 Jun 2023 22:37:19 +0800 Subject: iio: cros_ec: Fix the allocation size for cros_ec_command The struct cros_ec_command contains several integer fields and a trailing array. An allocation size neglecting the integer fields can lead to buffer overrun. Reviewed-by: Tzung-Bi Shih Signed-off-by: Yiyuan Guo Fixes: 974e6f02e27e ("iio: cros_ec_sensors_core: Add common functions for the ChromeOS EC Sensor Hub.") Link: https://lore.kernel.org/r/20230630143719.1513906-1-yguoaz@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c index 943e9e14d1e9..b72d39fc2434 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -253,7 +253,7 @@ int cros_ec_sensors_core_init(struct platform_device *pdev, platform_set_drvdata(pdev, indio_dev); state->ec = ec->ec_dev; - state->msg = devm_kzalloc(&pdev->dev, + state->msg = devm_kzalloc(&pdev->dev, sizeof(*state->msg) + max((u16)sizeof(struct ec_params_motion_sense), state->ec->max_response), GFP_KERNEL); if (!state->msg) -- cgit v1.2.3 From 00374d9b6d9f932802b55181be9831aa948e5b7c Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 21 Jul 2023 22:51:03 +0800 Subject: xfrm: add NULL check in xfrm_update_ae_params Normally, x->replay_esn and x->preplay_esn should be allocated at xfrm_alloc_replay_state_esn(...) in xfrm_state_construct(...), hence the xfrm_update_ae_params(...) is okay to update them. However, the current implementation of xfrm_new_ae(...) allows a malicious user to directly dereference a NULL pointer and crash the kernel like below. BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 8253067 P4D 8253067 PUD 8e0e067 PMD 0 Oops: 0002 [#1] PREEMPT SMP KASAN NOPTI CPU: 0 PID: 98 Comm: poc.npd Not tainted 6.4.0-rc7-00072-gdad9774deaf1 #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.o4 RIP: 0010:memcpy_orig+0xad/0x140 Code: e8 4c 89 5f e0 48 8d 7f e0 73 d2 83 c2 20 48 29 d6 48 29 d7 83 fa 10 72 34 4c 8b 06 4c 8b 4e 08 c RSP: 0018:ffff888008f57658 EFLAGS: 00000202 RAX: 0000000000000000 RBX: ffff888008bd0000 RCX: ffffffff8238e571 RDX: 0000000000000018 RSI: ffff888007f64844 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff888008f57818 R13: ffff888007f64aa4 R14: 0000000000000000 R15: 0000000000000000 FS: 00000000014013c0(0000) GS:ffff88806d600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000054d8000 CR4: 00000000000006f0 Call Trace: ? __die+0x1f/0x70 ? page_fault_oops+0x1e8/0x500 ? __pfx_is_prefetch.constprop.0+0x10/0x10 ? __pfx_page_fault_oops+0x10/0x10 ? _raw_spin_unlock_irqrestore+0x11/0x40 ? fixup_exception+0x36/0x460 ? _raw_spin_unlock_irqrestore+0x11/0x40 ? exc_page_fault+0x5e/0xc0 ? asm_exc_page_fault+0x26/0x30 ? xfrm_update_ae_params+0xd1/0x260 ? memcpy_orig+0xad/0x140 ? __pfx__raw_spin_lock_bh+0x10/0x10 xfrm_update_ae_params+0xe7/0x260 xfrm_new_ae+0x298/0x4e0 ? __pfx_xfrm_new_ae+0x10/0x10 ? __pfx_xfrm_new_ae+0x10/0x10 xfrm_user_rcv_msg+0x25a/0x410 ? __pfx_xfrm_user_rcv_msg+0x10/0x10 ? __alloc_skb+0xcf/0x210 ? stack_trace_save+0x90/0xd0 ? filter_irq_stacks+0x1c/0x70 ? __stack_depot_save+0x39/0x4e0 ? __kasan_slab_free+0x10a/0x190 ? kmem_cache_free+0x9c/0x340 ? netlink_recvmsg+0x23c/0x660 ? sock_recvmsg+0xeb/0xf0 ? __sys_recvfrom+0x13c/0x1f0 ? __x64_sys_recvfrom+0x71/0x90 ? do_syscall_64+0x3f/0x90 ? entry_SYSCALL_64_after_hwframe+0x72/0xdc ? copyout+0x3e/0x50 netlink_rcv_skb+0xd6/0x210 ? __pfx_xfrm_user_rcv_msg+0x10/0x10 ? __pfx_netlink_rcv_skb+0x10/0x10 ? __pfx_sock_has_perm+0x10/0x10 ? mutex_lock+0x8d/0xe0 ? __pfx_mutex_lock+0x10/0x10 xfrm_netlink_rcv+0x44/0x50 netlink_unicast+0x36f/0x4c0 ? __pfx_netlink_unicast+0x10/0x10 ? netlink_recvmsg+0x500/0x660 netlink_sendmsg+0x3b7/0x700 This Null-ptr-deref bug is assigned CVE-2023-3772. And this commit adds additional NULL check in xfrm_update_ae_params to fix the NPD. Fixes: d8647b79c3b7 ("xfrm: Add user interface for esn and big anti-replay windows") Signed-off-by: Lin Ma Reviewed-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fdc0c17122b6..8f74dde4a55f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -628,7 +628,7 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; struct nlattr *mt = attrs[XFRMA_MTIMER_THRESH]; - if (re) { + if (re && x->replay_esn && x->preplay_esn) { struct xfrm_replay_state_esn *replay_esn; replay_esn = nla_data(re); memcpy(x->replay_esn, replay_esn, -- cgit v1.2.3 From 5e2424708da7207087934c5c75211e8584d553a0 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Sun, 23 Jul 2023 15:41:10 +0800 Subject: xfrm: add forgotten nla_policy for XFRMA_MTIMER_THRESH The previous commit 4e484b3e969b ("xfrm: rate limit SA mapping change message to user space") added one additional attribute named XFRMA_MTIMER_THRESH and described its type at compat_policy (net/xfrm/xfrm_compat.c). However, the author forgot to also describe the nla_policy at xfrma_policy (net/xfrm/xfrm_user.c). Hence, this suppose NLA_U32 (4 bytes) value can be faked as empty (0 bytes) by a malicious user, which leads to 4 bytes overflow read and heap information leak when parsing nlattrs. To exploit this, one malicious user can spray the SLUB objects and then leverage this 4 bytes OOB read to leak the heap data into x->mapping_maxage (see xfrm_update_ae_params(...)), and leak it to userspace via copy_to_user_state_extra(...). The above bug is assigned CVE-2023-3773. To fix it, this commit just completes the nla_policy description for XFRMA_MTIMER_THRESH, which enforces the length check and avoids such OOB read. Fixes: 4e484b3e969b ("xfrm: rate limit SA mapping change message to user space") Signed-off-by: Lin Ma Reviewed-by: Simon Horman Reviewed-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8f74dde4a55f..f06d6deb58dd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3044,6 +3044,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK] = { .type = NLA_U32 }, [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, + [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); -- cgit v1.2.3 From 186b169cf1e4be85aa212a893ea783a543400979 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 19 Jul 2023 12:02:41 +0300 Subject: RDMA/umem: Set iova in ODP flow Fixing the ODP registration flow to set the iova correctly. The calculation in ib_umem_num_dma_blocks() function assumes the iova of the umem is set correctly. When iova is not set, the calculation in ib_umem_num_dma_blocks() is equivalent to length/page_size, which is true only when memory is aligned. For unaligned memory, iova must be set for the ALIGN() in the ib_umem_num_dma_blocks() to take effect and return a correct value. mlx5_ib uses ib_umem_num_dma_blocks() to decide the mkey size to use for the MR. Without this fix, when registering unaligned ODP MR, a wrong size mkey might be chosen and this might cause the UMR to fail. UMR would fail over insufficient size to update the mkey translation: infiniband mlx5_0: dump_cqe:273:(pid 0): dump error cqe 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 0f 00 78 06 25 00 00 58 00 da ac d2 infiniband mlx5_0: mlx5_ib_post_send_wait:806:(pid 20311): reg umr failed (6) infiniband mlx5_0: pagefault_real_mr:661:(pid 20311): Failed to update mkey page tables Fixes: f0093fb1a7cb ("RDMA/mlx5: Move mlx5_ib_cont_pages() to the creation of the mlx5_ib_mr") Fixes: a665aca89a41 ("RDMA/umem: Split ib_umem_num_pages() into ib_umem_num_dma_blocks()") Signed-off-by: Artemy Kovalyov Signed-off-by: Michael Guralnik Link: https://lore.kernel.org/r/3d4be7ca2155bf239dd8c00a2d25974a92c26ab8.1689757344.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 755a9c57db6f..f9ab671c8eda 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -85,6 +85,8 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, dma_addr_t mask; int i; + umem->iova = va = virt; + if (umem->is_odp) { unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift); @@ -100,7 +102,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, */ pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT); - umem->iova = va = virt; /* The best result is the smallest page size that results in the minimum * number of required pages. Compute the largest page size that could * work based on VA address bits that don't change. -- cgit v1.2.3 From 785c00993dc4c4bb2f7b0f3a3f29c03a6f7aab2e Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 27 Jul 2023 09:43:15 +0800 Subject: platform/x86/amd/pmf: Fix unsigned comparison with less than zero The return value from the call to amd_pmf_get_pprof_modes() is int. However, the return value is being assigned to an unsigned char variable 'mode', so making 'mode' an int. silence the warning: ./drivers/platform/x86/amd/pmf/sps.c:183:5-9: WARNING: Unsigned expression compared with zero: mode < 0 Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=5995 Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20230727014315.51375-1-yang.lee@linux.alibaba.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/sps.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index ab69d517a36a..a70e67749be3 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -176,7 +176,8 @@ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf) int amd_pmf_power_slider_update_event(struct amd_pmf_dev *dev) { - u8 mode, flag = 0; + u8 flag = 0; + int mode; int src; mode = amd_pmf_get_pprof_modes(dev); -- cgit v1.2.3 From 1cd0302be5645420f73090aee26fa787287e1096 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Fri, 28 Jul 2023 12:13:45 +0100 Subject: ACPI: scan: Create platform device for CS35L56 The ACPI device CSC3556 is a Cirrus Logic CS35L56 mono amplifier which is used in multiples, and can be connected either to I2C or SPI. There will be multiple instances under the same Device() node. Add it to ignore_serial_bus_ids and handle it in the serial-multi-instantiate driver. There can be a 5th I2cSerialBusV2, but this is an alias address and doesn't represent a real device. Ignore this by having a dummy 5th entry in the serial-multi-instantiate instance list with the name of a non-existent driver, on the same pattern as done for bsg2150. Signed-off-by: Simon Trimmer Signed-off-by: Richard Fitzgerald Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230728111345.7224-1-rf@opensource.cirrus.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/acpi/scan.c | 1 + drivers/platform/x86/serial-multi-instantiate.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 5b145f1aaa1b..87e385542576 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1714,6 +1714,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) {"BSG1160", }, {"BSG2150", }, {"CSC3551", }, + {"CSC3556", }, {"INT33FE", }, {"INT3515", }, /* Non-conforming _HID for Cirrus Logic already released */ diff --git a/drivers/platform/x86/serial-multi-instantiate.c b/drivers/platform/x86/serial-multi-instantiate.c index 2c2abf69f049..8158e3cf5d6d 100644 --- a/drivers/platform/x86/serial-multi-instantiate.c +++ b/drivers/platform/x86/serial-multi-instantiate.c @@ -329,6 +329,19 @@ static const struct smi_node cs35l41_hda = { .bus_type = SMI_AUTO_DETECT, }; +static const struct smi_node cs35l56_hda = { + .instances = { + { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 }, + { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 }, + { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 }, + { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 }, + /* a 5th entry is an alias address, not a real device */ + { "cs35l56-hda_dummy_dev" }, + {} + }, + .bus_type = SMI_AUTO_DETECT, +}; + /* * Note new device-ids must also be added to ignore_serial_bus_ids in * drivers/acpi/scan.c: acpi_device_enumeration_by_parent(). @@ -337,6 +350,7 @@ static const struct acpi_device_id smi_acpi_ids[] = { { "BSG1160", (unsigned long)&bsg1160_data }, { "BSG2150", (unsigned long)&bsg2150_data }, { "CSC3551", (unsigned long)&cs35l41_hda }, + { "CSC3556", (unsigned long)&cs35l56_hda }, { "INT3515", (unsigned long)&int3515_data }, /* Non-conforming _HID for Cirrus Logic already released */ { "CLSA0100", (unsigned long)&cs35l41_hda }, -- cgit v1.2.3 From 175544ad48cbf56affeef2a679c6a4d4fb1e2881 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Fri, 28 Jul 2023 21:59:24 -0700 Subject: scsi: storvsc: Fix handling of virtual Fibre Channel timeouts Hyper-V provides the ability to connect Fibre Channel LUNs to the host system and present them in a guest VM as a SCSI device. I/O to the vFC device is handled by the storvsc driver. The storvsc driver includes a partial integration with the FC transport implemented in the generic portion of the Linux SCSI subsystem so that FC attributes can be displayed in /sys. However, the partial integration means that some aspects of vFC don't work properly. Unfortunately, a full and correct integration isn't practical because of limitations in what Hyper-V provides to the guest. In particular, in the context of Hyper-V storvsc, the FC transport timeout function fc_eh_timed_out() causes a kernel panic because it can't find the rport and dereferences a NULL pointer. The original patch that added the call from storvsc_eh_timed_out() to fc_eh_timed_out() is faulty in this regard. In many cases a timeout is due to a transient condition, so the situation can be improved by just continuing to wait like with other I/O requests issued by storvsc, and avoiding the guaranteed panic. For a permanent failure, continuing to wait may result in a hung thread instead of a panic, which again may be better. So fix the panic by removing the storvsc call to fc_eh_timed_out(). This allows storvsc to keep waiting for a response. The change has been tested by users who experienced a panic in fc_eh_timed_out() due to transient timeouts, and it solves their problem. In the future we may want to deprecate the vFC functionality in storvsc since it can't be fully fixed. But it has current users for whom it is working well enough, so it should probably stay for a while longer. Fixes: 3930d7309807 ("scsi: storvsc: use default I/O timeout handler for FC devices") Cc: stable@vger.kernel.org Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1690606764-79669-1-git-send-email-mikelley@microsoft.com Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index f2823218670a..047ffaf7d42a 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1674,10 +1674,6 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) */ static enum scsi_timeout_action storvsc_eh_timed_out(struct scsi_cmnd *scmnd) { -#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) - if (scmnd->device->host->transportt == fc_transport_template) - return fc_eh_timed_out(scmnd); -#endif return SCSI_EH_RESET_TIMER; } -- cgit v1.2.3 From 5a43b07a87835660f91d88a4db11abfea8c523b7 Mon Sep 17 00:00:00 2001 From: Karan Tilak Kumar Date: Thu, 27 Jul 2023 12:39:19 -0700 Subject: scsi: fnic: Replace return codes in fnic_clean_pending_aborts() fnic_clean_pending_aborts() was returning a non-zero value irrespective of failure or success. This caused the caller of this function to assume that the device reset had failed, even though it would succeed in most cases. As a consequence, a successful device reset would escalate to host reset. Reviewed-by: Sesidhar Baddela Tested-by: Karan Tilak Kumar Signed-off-by: Karan Tilak Kumar Link: https://lore.kernel.org/r/20230727193919.2519-1-kartilak@cisco.com Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic.h | 2 +- drivers/scsi/fnic/fnic_scsi.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index d82de34f6fd7..e51e92f932fa 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -27,7 +27,7 @@ #define DRV_NAME "fnic" #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" -#define DRV_VERSION "1.6.0.54" +#define DRV_VERSION "1.6.0.55" #define PFX DRV_NAME ": " #define DFX DRV_NAME "%d: " diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 26dbd347156e..be89ce96df46 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -2139,7 +2139,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, bool new_sc) { - int ret = SUCCESS; + int ret = 0; struct fnic_pending_aborts_iter_data iter_data = { .fnic = fnic, .lun_dev = lr_sc->device, @@ -2159,9 +2159,11 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, /* walk again to check, if IOs are still pending in fw */ if (fnic_is_abts_pending(fnic, lr_sc)) - ret = FAILED; + ret = 1; clean_pending_aborts_end: + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "%s: exit status: %d\n", __func__, ret); return ret; } -- cgit v1.2.3 From 8366d1f1249a0d0bba41d0bd1298d63e5d34c7f7 Mon Sep 17 00:00:00 2001 From: Alexandra Diupina Date: Fri, 28 Jul 2023 15:35:21 +0300 Subject: scsi: 53c700: Check that command slot is not NULL Add a check for the command slot value to avoid dereferencing a NULL pointer. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Co-developed-by: Vladimir Telezhnikov Signed-off-by: Vladimir Telezhnikov Signed-off-by: Alexandra Diupina Link: https://lore.kernel.org/r/20230728123521.18293-1-adiupina@astralinux.ru Signed-off-by: Martin K. Petersen --- drivers/scsi/53c700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index e1e4f9d10887..857be0f3ae5b 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -1598,7 +1598,7 @@ NCR_700_intr(int irq, void *dev_id) printk("scsi%d (%d:%d) PHASE MISMATCH IN SEND MESSAGE %d remain, return %p[%04x], phase %s\n", host->host_no, pun, lun, count, (void *)temp, temp - hostdata->pScript, sbcl_to_string(NCR_700_readb(host, SBCL_REG))); #endif resume_offset = hostdata->pScript + Ent_SendMessagePhaseMismatch; - } else if(dsp >= to32bit(&slot->pSG[0].ins) && + } else if (slot && dsp >= to32bit(&slot->pSG[0].ins) && dsp <= to32bit(&slot->pSG[NCR_700_SG_SEGMENTS].ins)) { int data_transfer = NCR_700_readl(host, DBC_REG) & 0xffffff; int SGcount = (dsp - to32bit(&slot->pSG[0].ins))/sizeof(struct NCR_700_SG_List); -- cgit v1.2.3 From 9426d3cef5000824e5f24f80ed5f42fb935f2488 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Mon, 24 Jul 2023 14:25:40 -0400 Subject: scsi: core: Fix legacy /proc parsing buffer overflow (lightly modified commit message mostly by Linus Torvalds) The parsing code for /proc/scsi/scsi is disgusting and broken. We should have just used 'sscanf()' or something simple like that, but the logic may actually predate our kernel sscanf library routine for all I know. It certainly predates both git and BK histories. And we can't change it to be something sane like that now, because the string matching at the start is done case-insensitively, and the separator parsing between numbers isn't done at all, so *any* separator will work, including a possible terminating NUL character. This interface is root-only, and entirely for legacy use, so there is absolutely no point in trying to tighten up the parsing. Because any separator has traditionally worked, it's entirely possible that people have used random characters rather than the suggested space. So don't bother to try to pretty it up, and let's just make a minimal patch that can be back-ported and we can forget about this whole sorry thing for another two decades. Just make it at least not read past the end of the supplied data. Link: https://lore.kernel.org/linux-scsi/b570f5fe-cb7c-863a-6ed9-f6774c219b88@cybernetics.com/ Cc: Linus Torvalds Cc: Martin K Petersen Cc: James Bottomley Cc: Willy Tarreau Cc: stable@kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Tony Battersby Signed-off-by: Martin K Petersen --- drivers/scsi/scsi_proc.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index 4a6eb1741be0..41f23cd0bfb4 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -406,7 +406,7 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf, size_t length, loff_t *ppos) { int host, channel, id, lun; - char *buffer, *p; + char *buffer, *end, *p; int err; if (!buf || length > PAGE_SIZE) @@ -421,10 +421,14 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf, goto out; err = -EINVAL; - if (length < PAGE_SIZE) - buffer[length] = '\0'; - else if (buffer[PAGE_SIZE-1]) - goto out; + if (length < PAGE_SIZE) { + end = buffer + length; + *end = '\0'; + } else { + end = buffer + PAGE_SIZE - 1; + if (*end) + goto out; + } /* * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi @@ -433,10 +437,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf, if (!strncmp("scsi add-single-device", buffer, 22)) { p = buffer + 23; - host = simple_strtoul(p, &p, 0); - channel = simple_strtoul(p + 1, &p, 0); - id = simple_strtoul(p + 1, &p, 0); - lun = simple_strtoul(p + 1, &p, 0); + host = (p < end) ? simple_strtoul(p, &p, 0) : 0; + channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; + id = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; + lun = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; err = scsi_add_single_device(host, channel, id, lun); @@ -447,10 +451,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf, } else if (!strncmp("scsi remove-single-device", buffer, 25)) { p = buffer + 26; - host = simple_strtoul(p, &p, 0); - channel = simple_strtoul(p + 1, &p, 0); - id = simple_strtoul(p + 1, &p, 0); - lun = simple_strtoul(p + 1, &p, 0); + host = (p < end) ? simple_strtoul(p, &p, 0) : 0; + channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; + id = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; + lun = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; err = scsi_remove_single_device(host, channel, id, lun); } -- cgit v1.2.3 From 982c3aca8bac8ae38acdc940e4f1ecec3bffc623 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 31 Jul 2023 14:38:26 +0300 Subject: xfrm: delete offloaded policy The policy memory was released but not HW driver data. Add call to xfrm_dev_policy_delete(), so drivers will have a chance to release their resources. Fixes: 919e43fad516 ("xfrm: add an interface to offload policy") Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f06d6deb58dd..ad01997c3aa9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2345,6 +2345,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).portid); } } else { + xfrm_dev_policy_delete(xp); xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err != 0) -- cgit v1.2.3 From f3ec2b5d879ef5bbcb24678914641343cb6399a2 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 31 Jul 2023 14:38:27 +0300 Subject: xfrm: don't skip free of empty state in acquire policy In destruction flow, the assignment of NULL to xso->dev caused to skip of xfrm_dev_state_free() call, which was called in xfrm_state_put(to_put) routine. Instead of open-coded variant of xfrm_dev_state_delete() and xfrm_dev_state_free(), let's use them directly. Fixes: f8a70afafc17 ("xfrm: add TX datapath support for IPsec packet offload mode") Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/xfrm/xfrm_state.c | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 151ca95dd08d..363c7d510554 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1984,6 +1984,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) if (dev->xfrmdev_ops->xdo_dev_state_free) dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; + xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; netdev_put(dev, &xso->dev_tracker); } } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 49e63eea841d..bda5327bf34d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1324,12 +1324,8 @@ found: struct xfrm_dev_offload *xso = &x->xso; if (xso->type == XFRM_DEV_OFFLOAD_PACKET) { - xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); - xso->dir = 0; - netdev_put(xso->dev, &xso->dev_tracker); - xso->dev = NULL; - xso->real_dev = NULL; - xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; + xfrm_dev_state_delete(x); + xfrm_dev_state_free(x); } #endif x->km.state = XFRM_STATE_DEAD; -- cgit v1.2.3 From 688b419c57c13637d95d7879e165fff3dec581eb Mon Sep 17 00:00:00 2001 From: August Wikerfors Date: Wed, 16 Nov 2022 18:17:27 +0100 Subject: nvme-pci: add NVME_QUIRK_BOGUS_NID for Samsung PM9B1 256G and 512G The Samsung PM9B1 512G SSD found in some Lenovo Yoga 7 14ARB7 laptop units reports eui as 0001000200030004 when resuming from s2idle, causing the device to be removed with this error in dmesg: nvme nvme0: identifiers changed for nsid 1 To fix this, add a quirk to ignore namespace identifiers for this device. Signed-off-by: August Wikerfors Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index baf69af7ea78..2f57da12d983 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3402,7 +3402,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x144d, 0xa80b), /* Samsung PM9B1 256G and 512G */ - .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */ -- cgit v1.2.3 From e9d699af3f65d62cf195f0e7a039400093ab2af2 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Thu, 27 Jul 2023 18:01:10 +0800 Subject: drm/bridge: it6505: Check power state with it6505->powered in IRQ handler On system resume, the driver might call it6505_poweron directly if the runtime PM hasn't been enabled. In such case, pm_runtime_get_if_in_use will always return 0 because dev->power.runtime_status stays at RPM_SUSPENDED, and the IRQ will never be handled. Use it6505->powered from the driver struct fixes this because it always gets updated when it6505_poweron is called. Fixes: 5eb9a4314053 ("drm/bridge: it6505: Guard bridge power in IRQ handler") Signed-off-by: Pin-yen Lin Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230727100131.2338127-1-treapking@chromium.org --- drivers/gpu/drm/bridge/ite-it6505.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c index 504d51c42f79..aadb396508c5 100644 --- a/drivers/gpu/drm/bridge/ite-it6505.c +++ b/drivers/gpu/drm/bridge/ite-it6505.c @@ -2517,9 +2517,11 @@ static irqreturn_t it6505_int_threaded_handler(int unused, void *data) }; int int_status[3], i; - if (it6505->enable_drv_hold || pm_runtime_get_if_in_use(dev) <= 0) + if (it6505->enable_drv_hold || !it6505->powered) return IRQ_HANDLED; + pm_runtime_get_sync(dev); + int_status[0] = it6505_read(it6505, INT_STATUS_01); int_status[1] = it6505_read(it6505, INT_STATUS_02); int_status[2] = it6505_read(it6505, INT_STATUS_03); -- cgit v1.2.3 From 1cb9e2ef66d53b020842b18762e30d0eb4384de8 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Thu, 22 Jun 2023 17:20:17 +0200 Subject: drm/nouveau/gr: enable memory loads on helper invocation on all channels We have a lurking bug where Fragment Shader Helper Invocations can't load from memory. But this is actually required in OpenGL and is causing random hangs or failures in random shaders. It is unknown how widespread this issue is, but shaders hitting this can end up with infinite loops. We enable those only on all Kepler and newer GPUs where we use our own Firmware. Nvidia's firmware provides a way to set a kernelspace controlled list of mmio registers in the gr space from push buffers via MME macros. v2: drop code for gm200 and newer. Cc: Ben Skeggs Cc: David Airlie Cc: nouveau@lists.freedesktop.org Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Karol Herbst Reviewed-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230622152017.2512101-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c | 4 +++- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c | 10 ++++++++++ drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c | 1 + 6 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 00dbeda7e346..de161e7a04aa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -117,6 +117,7 @@ void gk104_grctx_generate_r418800(struct gf100_gr *); extern const struct gf100_grctx_func gk110_grctx; void gk110_grctx_generate_r419eb0(struct gf100_gr *); +void gk110_grctx_generate_r419f78(struct gf100_gr *); extern const struct gf100_grctx_func gk110b_grctx; extern const struct gf100_grctx_func gk208_grctx; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index 94233d0119df..52a234b1ef01 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -906,7 +906,9 @@ static void gk104_grctx_generate_r419f78(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000); + + /* bit 3 set disables loads in fp helper invocations, we need it enabled */ + nvkm_mask(device, 0x419f78, 0x00000009, 0x00000000); } void diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c index 4391458e1fb2..3acdd9eeb74a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c @@ -820,6 +820,15 @@ gk110_grctx_generate_r419eb0(struct gf100_gr *gr) nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000); } +void +gk110_grctx_generate_r419f78(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + /* bit 3 set disables loads in fp helper invocations, we need it enabled */ + nvkm_mask(device, 0x419f78, 0x00000008, 0x00000000); +} + const struct gf100_grctx_func gk110_grctx = { .main = gf100_grctx_generate_main, @@ -854,4 +863,5 @@ gk110_grctx = { .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, .r418800 = gk104_grctx_generate_r418800, .r419eb0 = gk110_grctx_generate_r419eb0, + .r419f78 = gk110_grctx_generate_r419f78, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c index 7b9a34f9ec3c..5597e87624ac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c @@ -103,4 +103,5 @@ gk110b_grctx = { .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, .r418800 = gk104_grctx_generate_r418800, .r419eb0 = gk110_grctx_generate_r419eb0, + .r419f78 = gk110_grctx_generate_r419f78, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c index c78d07a8bb7d..612656496541 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c @@ -568,4 +568,5 @@ gk208_grctx = { .dist_skip_table = gf117_grctx_generate_dist_skip_table, .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, .r418800 = gk104_grctx_generate_r418800, + .r419f78 = gk110_grctx_generate_r419f78, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index beac66eb2a80..9906974ac3f0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -988,4 +988,5 @@ gm107_grctx = { .r406500 = gm107_grctx_generate_r406500, .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, .r419e00 = gm107_grctx_generate_r419e00, + .r419f78 = gk110_grctx_generate_r419f78, }; -- cgit v1.2.3 From e4060dad253352382b20420d8ef98daab24dbc17 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 28 Jul 2023 18:58:57 -0400 Subject: drm/nouveau/nvkm/dp: Add workaround to fix DP 1.3+ DPCD issues Currently we use the drm_dp_dpcd_read_caps() helper in the DRM side of nouveau in order to read the DPCD of a DP connector, which makes sure we do the right thing and also check for extended DPCD caps. However, it turns out we're not currently doing this on the nvkm side since we don't have access to the drm_dp_aux structure there - which means that the DRM side of the driver and the NVKM side can end up with different DPCD capabilities for the same connector. Ideally in order to fix this, we just want to use the drm_dp_read_dpcd_caps() helper in nouveau. That's not currently possible though, and is going to depend on having a bunch of the DP code moved out of nvkm and into the DRM side of things as part of the GSP enablement work. Until then however, let's workaround this problem by porting a copy of drm_dp_read_dpcd_caps() into NVKM - which should fix this issue. Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Link: https://gitlab.freedesktop.org/drm/nouveau/-/issues/211 Link: https://patchwork.freedesktop.org/patch/msgid/20230728225858.350581-1-lyude@redhat.com (cherry picked from commit cc4adf3a7323212f303bc9ff0f96346c44fcba06 in drm-misc-next) Cc: # 6.3+ Signed-off-by: Karol Herbst --- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 48 ++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 40c8ea43c42f..b8ac66b4a2c4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -26,6 +26,8 @@ #include "head.h" #include "ior.h" +#include + #include #include #include @@ -634,6 +636,50 @@ nvkm_dp_enable_supported_link_rates(struct nvkm_outp *outp) return outp->dp.rates != 0; } +/* XXX: This is a big fat hack, and this is just drm_dp_read_dpcd_caps() + * converted to work inside nvkm. This is a temporary holdover until we start + * passing the drm_dp_aux device through NVKM + */ +static int +nvkm_dp_read_dpcd_caps(struct nvkm_outp *outp) +{ + struct nvkm_i2c_aux *aux = outp->dp.aux; + u8 dpcd_ext[DP_RECEIVER_CAP_SIZE]; + int ret; + + ret = nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, DP_RECEIVER_CAP_SIZE); + if (ret < 0) + return ret; + + /* + * Prior to DP1.3 the bit represented by + * DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT was reserved. + * If it is set DP_DPCD_REV at 0000h could be at a value less than + * the true capability of the panel. The only way to check is to + * then compare 0000h and 2200h. + */ + if (!(outp->dp.dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT)) + return 0; + + ret = nvkm_rdaux(aux, DP_DP13_DPCD_REV, dpcd_ext, sizeof(dpcd_ext)); + if (ret < 0) + return ret; + + if (outp->dp.dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) { + OUTP_DBG(outp, "Extended DPCD rev less than base DPCD rev (%d > %d)\n", + outp->dp.dpcd[DP_DPCD_REV], dpcd_ext[DP_DPCD_REV]); + return 0; + } + + if (!memcmp(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext))) + return 0; + + memcpy(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext)); + + return 0; +} + void nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr) { @@ -689,7 +735,7 @@ nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr) memset(outp->dp.lttpr, 0x00, sizeof(outp->dp.lttpr)); } - if (!nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, sizeof(outp->dp.dpcd))) { + if (!nvkm_dp_read_dpcd_caps(outp)) { const u8 rates[] = { 0x1e, 0x14, 0x0a, 0x06, 0 }; const u8 *rate; int rate_max; -- cgit v1.2.3 From 583893a66d731f5da010a3fa38a0460e05f0149b Mon Sep 17 00:00:00 2001 From: Sanjay R Mehta Date: Wed, 2 Aug 2023 06:11:49 -0500 Subject: thunderbolt: Fix Thunderbolt 3 display flickering issue on 2nd hot plug onwards Previously, on unplug events, the TMU mode was disabled first followed by the Time Synchronization Handshake, irrespective of whether the tb_switch_tmu_rate_write() API was successful or not. However, this caused a problem with Thunderbolt 3 (TBT3) devices, as the TSPacketInterval bits were always enabled by default, leading the host router to assume that the device router's TMU was already enabled and preventing it from initiating the Time Synchronization Handshake. As a result, TBT3 monitors experienced display flickering from the second hot plug onwards. To address this issue, we have modified the code to only disable the Time Synchronization Handshake during TMU disable if the tb_switch_tmu_rate_write() function is successful. This ensures that the TBT3 devices function correctly and eliminates the display flickering issue. Co-developed-by: Sanath S Signed-off-by: Sanath S Signed-off-by: Sanjay R Mehta Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/thunderbolt/tmu.c b/drivers/thunderbolt/tmu.c index 1269f417515b..0dfd1e083994 100644 --- a/drivers/thunderbolt/tmu.c +++ b/drivers/thunderbolt/tmu.c @@ -579,7 +579,9 @@ int tb_switch_tmu_disable(struct tb_switch *sw) * uni-directional mode and we don't want to change it's TMU * mode. */ - tb_switch_tmu_rate_write(sw, tmu_rates[TB_SWITCH_TMU_MODE_OFF]); + ret = tb_switch_tmu_rate_write(sw, tmu_rates[TB_SWITCH_TMU_MODE_OFF]); + if (ret) + return ret; tb_port_tmu_time_sync_disable(up); ret = tb_port_tmu_time_sync_disable(down); -- cgit v1.2.3 From 5a78d5db9c90c9dc84212f40a5f2687b7cafc8ec Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 1 Aug 2023 21:09:51 +0200 Subject: gpio: sim: mark the GPIO chip as a one that can sleep Simulated chips use a mutex for synchronization in driver callbacks so they must not be called from interrupt context. Set the can_sleep field of the GPIO chip to true to force users to only use threaded irqs. Fixes: cb8c474e79be ("gpio: sim: new testing module") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij --- drivers/gpio/gpio-sim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 8b49b0abacd5..f1f6f1c32987 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -429,6 +429,7 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev) gc->set_config = gpio_sim_set_config; gc->to_irq = gpio_sim_to_irq; gc->free = gpio_sim_free; + gc->can_sleep = true; ret = devm_gpiochip_add_data(dev, gc, chip); if (ret) -- cgit v1.2.3 From 421dabcad1c69e02a41c0d601aefbc29ee3f5368 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Aug 2023 16:33:48 +0200 Subject: drm/nouveau: remove unused tu102_gr_load() function tu102_gr_load() is completely unused and can be removed to address this warning: drivers/gpu/drm/nouveau/dispnv50/disp.c:2517:1: error: no previous prototype for 'nv50_display_create' Another patch was sent in the meantime to mark the function static but that would just cause a different warning about an unused function. Fixes: 1cd97b5490c8 ("drm/nouveau/gr/tu102-: use sw_veid_bundle_init from firmware") Link: https://lore.kernel.org/all/CACO55tuaNOYphHyB9+ygi9AnXVuF49etsW7x2X5K5iEtFNAAyw@mail.gmail.com/ Link: https://lore.kernel.org/all/20230417210310.2443152-1-arnd@kernel.org/ Signed-off-by: Arnd Bergmann Reviewed-by: Karol Herbst Signed-off-by: Karol Herbst Link: https://patchwork.freedesktop.org/patch/msgid/20230803143358.13563-1-arnd@kernel.org --- drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c index 3b6c8100a242..a7775aa18541 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c @@ -206,19 +206,6 @@ tu102_gr_av_to_init_veid(struct nvkm_blob *blob, struct gf100_gr_pack **ppack) return gk20a_gr_av_to_init_(blob, 64, 0x00100000, ppack); } -int -tu102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif) -{ - int ret; - - ret = gm200_gr_load(gr, ver, fwif); - if (ret) - return ret; - - return gk20a_gr_load_net(gr, "gr/", "sw_veid_bundle_init", ver, tu102_gr_av_to_init_veid, - &gr->bundle_veid); -} - static const struct gf100_gr_fwif tu102_gr_fwif[] = { { 0, gm200_gr_load, &tu102_gr, &gp108_gr_fecs_acr, &gp108_gr_gpccs_acr }, -- cgit v1.2.3 From 4fdfaef71fced490835145631a795497646f4555 Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Wed, 2 Aug 2023 13:32:41 -0400 Subject: IB/hfi1: Fix possible panic during hotplug remove During hotplug remove it is possible that the update counters work might be pending, and may run after memory has been freed. Cancel the update counters work before freeing memory. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Signed-off-by: Douglas Miller Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/169099756100.3927190.15284930454106475280.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/chip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9dbb89e9f4af..baaa4406d5e6 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12307,6 +12307,7 @@ static void free_cntrs(struct hfi1_devdata *dd) if (dd->synth_stats_timer.function) del_timer_sync(&dd->synth_stats_timer); + cancel_work_sync(&dd->update_cntr_work); ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { kfree(ppd->cntrs); -- cgit v1.2.3 From 3c6bd1b7e2043fb00ce6b622709d176609431406 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 2 Aug 2023 10:52:22 +0200 Subject: Revert "drm/bridge: lt9611: Do not generate HFP/HBP/HSA and EOT packet" This reverts commit 8ddce13ae696 ("drm/bridge: lt9611: Do not generate HFP/HBP/HSA and EOT packet") to fix display regression on the Dragonboard 845c (SDM845) devboard. There's a mismatch on the real action of the following flags: - MIPI_DSI_MODE_VIDEO_NO_HSA - MIPI_DSI_MODE_VIDEO_NO_HFP - MIPI_DSI_MODE_VIDEO_NO_HBP which leads to a non-working display on qcom platforms. Cc: Marek Vasut Cc: Robert Foss Cc: Jagan Teki Cc: Dmitry Baryshkov Cc: Abhinav Kumar Fixes: 8ddce13ae696 ("drm/bridge: lt9611: Do not generate HFP/HBP/HSA and EOT packet") Reported-by: Amit Pundir Closes: https://lore.kernel.org/r/CAMi1Hd0TD=2z_=bcDrht3H_wiLvAFcv8Z-U_r_KUOoeMc6UMjw@mail.gmail.com/ Tested-by: Amit Pundir Acked-by: Dmitry Baryshkov #fix db845c [narmstrong: fixed commit message format] Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230802-revert-do-not-generate-hfp-hbp-hsa-eot-packet-v1-1-f8a20084e15a@linaro.org --- drivers/gpu/drm/bridge/lontium-lt9611.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index 5163e5224aad..9663601ce098 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -774,9 +774,7 @@ static struct mipi_dsi_device *lt9611_attach_dsi(struct lt9611 *lt9611, dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | - MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO_NO_HSA | - MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP | - MIPI_DSI_MODE_NO_EOT_PACKET; + MIPI_DSI_MODE_VIDEO_HSE; ret = devm_mipi_dsi_attach(dev, dsi); if (ret < 0) { -- cgit v1.2.3 From 30c694fd4a99fbbc4115d180156ca01b60953371 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Fri, 4 Aug 2023 10:34:30 +0200 Subject: regulator: da9063: better fix null deref with partial DT Two versions of the original patch were sent but V1 was merged instead of V2 due to a mistake. So update to V2. The advantage of V2 is that it completely avoids dereferencing the pointer, even just to take the address, which may fix problems with some compilers. Both versions work on my gcc 9.4 but use the safer one. Fixes: 98e2dd5f7a8b ("regulator: da9063: fix null pointer deref with partial DT config") Signed-off-by: Martin Fuzzey Tested-by: Benjamin Bara Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230804083514.1887124-1-martin.fuzzey@flowbird.group Signed-off-by: Mark Brown --- drivers/regulator/da9063-regulator.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c index dfd5ec9f75c9..a0621665a6d2 100644 --- a/drivers/regulator/da9063-regulator.c +++ b/drivers/regulator/da9063-regulator.c @@ -778,9 +778,6 @@ static int da9063_check_xvp_constraints(struct regulator_config *config) const struct notification_limit *uv_l = &constr->under_voltage_limits; const struct notification_limit *ov_l = &constr->over_voltage_limits; - if (!config->init_data) /* No config in DT, pointers will be invalid */ - return 0; - /* make sure that only one severity is used to clarify if unchanged, enabled or disabled */ if ((!!uv_l->prot + !!uv_l->err + !!uv_l->warn) > 1) { dev_err(config->dev, "%s: at most one voltage monitoring severity allowed!\n", @@ -1031,9 +1028,12 @@ static int da9063_regulator_probe(struct platform_device *pdev) config.of_node = da9063_reg_matches[id].of_node; config.regmap = da9063->regmap; - ret = da9063_check_xvp_constraints(&config); - if (ret) - return ret; + /* Checking constraints requires init_data from DT. */ + if (config.init_data) { + ret = da9063_check_xvp_constraints(&config); + if (ret) + return ret; + } regl->rdev = devm_regulator_register(&pdev->dev, ®l->desc, &config); -- cgit v1.2.3 From 4270d2b4845e820b274702bfc2a7140f69e4d19d Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 12 Jul 2023 08:57:22 +0000 Subject: usb: typec: tcpm: Fix response to vsafe0V event Do not transition to SNK_UNATTACHED state when receiving vsafe0v event while in SNK_HARD_RESET_WAIT_VBUS. Ignore VBUS off events as well as in some platforms VBUS off can be signalled more than once. [143515.364753] Requesting mux state 1, usb-role 2, orientation 2 [143515.365520] pending state change SNK_HARD_RESET_SINK_OFF -> SNK_HARD_RESET_SINK_ON @ 650 ms [rev3 HARD_RESET] [143515.632281] CC1: 0 -> 0, CC2: 3 -> 0 [state SNK_HARD_RESET_SINK_OFF, polarity 1, disconnected] [143515.637214] VBUS on [143515.664985] VBUS off [143515.664992] state change SNK_HARD_RESET_SINK_OFF -> SNK_HARD_RESET_WAIT_VBUS [rev3 HARD_RESET] [143515.665564] VBUS VSAFE0V [143515.665566] state change SNK_HARD_RESET_WAIT_VBUS -> SNK_UNATTACHED [rev3 HARD_RESET] Fixes: 28b43d3d746b ("usb: typec: tcpm: Introduce vsafe0v for vbus") Cc: Signed-off-by: Badhri Jagan Sridharan Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230712085722.1414743-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 829d75ebab42..cc1d83926497 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5349,6 +5349,10 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port) /* Do nothing, vbus drop expected */ break; + case SNK_HARD_RESET_WAIT_VBUS: + /* Do nothing, its OK to receive vbus off events */ + break; + default: if (port->pwr_role == TYPEC_SINK && port->attached) tcpm_set_state(port, SNK_UNATTACHED, tcpm_wait_for_discharge(port)); @@ -5395,6 +5399,9 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port) case SNK_DEBOUNCED: /*Do nothing, still waiting for VSAFE5V for connect */ break; + case SNK_HARD_RESET_WAIT_VBUS: + /* Do nothing, its OK to receive vbus off events */ + break; default: if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled) tcpm_set_state(port, SNK_UNATTACHED, 0); -- cgit v1.2.3 From 5a5ccd61cfd76156cb3e0373c300c509d05448ce Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Wed, 26 Jul 2023 02:09:02 +0000 Subject: usb: typec: altmodes/displayport: Signal hpd when configuring pin assignment When connecting to some DisplayPort partners, the initial status update after entering DisplayPort Alt Mode notifies that the DFP_D/UFP_D is not in the connected state. This leads to sending a configure message that keeps the device in USB mode. The port partner then sets DFP_D/UFP_D to the connected state and HPD to high in the same Attention message. Currently, the HPD signal is dropped in order to handle configuration. This patch saves changes to the HPD signal when the device chooses to configure during dp_altmode_status_update, and invokes sysfs_notify if necessary for HPD after configuring. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230726020903.1409072-1-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 66de880b28d0..cdf8261e22db 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -60,6 +60,7 @@ struct dp_altmode { enum dp_state state; bool hpd; + bool pending_hpd; struct mutex lock; /* device lock */ struct work_struct work; @@ -144,8 +145,13 @@ static int dp_altmode_status_update(struct dp_altmode *dp) dp->state = DP_STATE_EXIT; } else if (!(con & DP_CONF_CURRENTLY(dp->data.conf))) { ret = dp_altmode_configure(dp, con); - if (!ret) + if (!ret) { dp->state = DP_STATE_CONFIGURE; + if (dp->hpd != hpd) { + dp->hpd = hpd; + dp->pending_hpd = true; + } + } } else { if (dp->hpd != hpd) { drm_connector_oob_hotplug_event(dp->connector_fwnode); @@ -161,6 +167,16 @@ static int dp_altmode_configured(struct dp_altmode *dp) { sysfs_notify(&dp->alt->dev.kobj, "displayport", "configuration"); sysfs_notify(&dp->alt->dev.kobj, "displayport", "pin_assignment"); + /* + * If the DFP_D/UFP_D sends a change in HPD when first notifying the + * DisplayPort driver that it is connected, then we wait until + * configuration is complete to signal HPD. + */ + if (dp->pending_hpd) { + drm_connector_oob_hotplug_event(dp->connector_fwnode); + sysfs_notify(&dp->alt->dev.kobj, "displayport", "hpd"); + dp->pending_hpd = false; + } return dp_altmode_notify(dp); } -- cgit v1.2.3 From 348359e7c232adc153ed7ec9a157f22d68d29860 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 18 Jul 2023 23:40:05 +0200 Subject: usb: typec: nb7vpq904m: Add an error handling path in nb7vpq904m_probe() In case of error in the nb7vpq904m_probe() probe function, some resources need to be freed, as already done in the remove function. Add the missing error handling path and adjust code accordingly. Fixes: 88d8f3ac9c67 ("usb: typec: add support for the nb7vpq904m Type-C Linear Redriver") Signed-off-by: Christophe JAILLET Reviewed-by: Neil Armstrong Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/9118954765821ea9f1179883602b4eca63e91749.1689716381.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/nb7vpq904m.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/usb/typec/mux/nb7vpq904m.c b/drivers/usb/typec/mux/nb7vpq904m.c index 80e580d50129..4d1122d95013 100644 --- a/drivers/usb/typec/mux/nb7vpq904m.c +++ b/drivers/usb/typec/mux/nb7vpq904m.c @@ -463,16 +463,18 @@ static int nb7vpq904m_probe(struct i2c_client *client) ret = nb7vpq904m_register_bridge(nb7); if (ret) - return ret; + goto err_disable_gpio; sw_desc.drvdata = nb7; sw_desc.fwnode = dev->fwnode; sw_desc.set = nb7vpq904m_sw_set; nb7->sw = typec_switch_register(dev, &sw_desc); - if (IS_ERR(nb7->sw)) - return dev_err_probe(dev, PTR_ERR(nb7->sw), - "Error registering typec switch\n"); + if (IS_ERR(nb7->sw)) { + ret = dev_err_probe(dev, PTR_ERR(nb7->sw), + "Error registering typec switch\n"); + goto err_disable_gpio; + } retimer_desc.drvdata = nb7; retimer_desc.fwnode = dev->fwnode; @@ -480,12 +482,21 @@ static int nb7vpq904m_probe(struct i2c_client *client) nb7->retimer = typec_retimer_register(dev, &retimer_desc); if (IS_ERR(nb7->retimer)) { - typec_switch_unregister(nb7->sw); - return dev_err_probe(dev, PTR_ERR(nb7->retimer), - "Error registering typec retimer\n"); + ret = dev_err_probe(dev, PTR_ERR(nb7->retimer), + "Error registering typec retimer\n"); + goto err_switch_unregister; } return 0; + +err_switch_unregister: + typec_switch_unregister(nb7->sw); + +err_disable_gpio: + gpiod_set_value(nb7->enable_gpio, 0); + regulator_disable(nb7->vcc_supply); + + return ret; } static void nb7vpq904m_remove(struct i2c_client *client) -- cgit v1.2.3 From ef7c4d8a90c64bac294363c6f67eb98246a162a2 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 31 Jul 2023 16:12:10 +0300 Subject: usb: typec: mux: intel: Add dependency on USB_COMMON This fixes an undefined reference to `usb_debug_root' issue when USB_COMMON is not enabled. Fixes: 0a453dc9f260 ("usb: typec: intel_pmc_mux: Expose IOM port status to debugfs") Reported-by: Randy Dunlap Closes: https://lore.kernel.org/lkml/c3bb8781-676d-2448-cfbb-62e29f1f570b@infradead.org/ Cc: Rajat Khandelwal Signed-off-by: Heikki Krogerus Tested-by: Randy Dunlap # build-tested Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20230731131210.43158-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/mux/Kconfig b/drivers/usb/typec/mux/Kconfig index 784b9d8107e9..65da61150ba7 100644 --- a/drivers/usb/typec/mux/Kconfig +++ b/drivers/usb/typec/mux/Kconfig @@ -29,6 +29,7 @@ config TYPEC_MUX_INTEL_PMC tristate "Intel PMC mux control" depends on ACPI depends on INTEL_SCU_IPC + select USB_COMMON select USB_ROLE_SWITCH help Driver for USB muxes controlled by Intel PMC FW. Intel PMC FW can -- cgit v1.2.3 From 65dadb2beeb7360232b09ebc4585b54475dfee06 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 29 Jul 2023 10:59:38 -0400 Subject: USB: Gadget: core: Help prevent panic during UVC unconfigure Avichal Rakesh reported a kernel panic that occurred when the UVC gadget driver was removed from a gadget's configuration. The panic involves a somewhat complicated interaction between the kernel driver and a userspace component (as described in the Link tag below), but the analysis did make one thing clear: The Gadget core should accomodate gadget drivers calling usb_gadget_deactivate() as part of their unbind procedure. Currently this doesn't work. gadget_unbind_driver() calls driver->unbind() while holding the udc->connect_lock mutex, and usb_gadget_deactivate() attempts to acquire that mutex, which will result in a deadlock. The simple fix is for gadget_unbind_driver() to release the mutex when invoking the ->unbind() callback. There is no particular reason for it to be holding the mutex at that time, and the mutex isn't held while the ->bind() callback is invoked. So we'll drop the mutex before performing the unbind callback and reacquire it afterward. We'll also add a couple of comments to usb_gadget_activate() and usb_gadget_deactivate(). Because they run in process context they must not be called from a gadget driver's ->disconnect() callback, which (according to the kerneldoc for struct usb_gadget_driver in include/linux/usb/gadget.h) may run in interrupt context. This may help prevent similar bugs from arising in the future. Reported-and-tested-by: Avichal Rakesh Signed-off-by: Alan Stern Fixes: 286d9975a838 ("usb: gadget: udc: core: Prevent soft_connect_store() race") Link: https://lore.kernel.org/linux-usb/4d7aa3f4-22d9-9f5a-3d70-1bd7148ff4ba@google.com/ Cc: Badhri Jagan Sridharan Cc: Link: https://lore.kernel.org/r/48b2f1f1-0639-46bf-bbfc-98cb05a24914@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index cd58f2a4e7f3..7d49d8a0b00c 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -822,6 +822,9 @@ EXPORT_SYMBOL_GPL(usb_gadget_disconnect); * usb_gadget_activate() is called. For example, user mode components may * need to be activated before the system can talk to hosts. * + * This routine may sleep; it must not be called in interrupt context + * (such as from within a gadget driver's disconnect() callback). + * * Returns zero on success, else negative errno. */ int usb_gadget_deactivate(struct usb_gadget *gadget) @@ -860,6 +863,8 @@ EXPORT_SYMBOL_GPL(usb_gadget_deactivate); * This routine activates gadget which was previously deactivated with * usb_gadget_deactivate() call. It calls usb_gadget_connect() if needed. * + * This routine may sleep; it must not be called in interrupt context. + * * Returns zero on success, else negative errno. */ int usb_gadget_activate(struct usb_gadget *gadget) @@ -1638,7 +1643,11 @@ static void gadget_unbind_driver(struct device *dev) usb_gadget_disable_async_callbacks(udc); if (gadget->irq) synchronize_irq(gadget->irq); + mutex_unlock(&udc->connect_lock); + udc->driver->unbind(gadget); + + mutex_lock(&udc->connect_lock); usb_gadget_udc_stop_locked(udc); mutex_unlock(&udc->connect_lock); -- cgit v1.2.3 From 8e21a620c7e6e00347ade1a6ed4967b359eada5a Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 1 Aug 2023 14:33:52 +0530 Subject: usb: common: usb-conn-gpio: Prevent bailing out if initial role is none Currently if we bootup a device without cable connected, then usb-conn-gpio won't call set_role() because last_role is same as current role. This happens since last_role gets initialised to zero during the probe. To avoid this, add a new flag initial_detection into struct usb_conn_info, which prevents bailing out during initial detection. Cc: # 5.4 Fixes: 4602f3bff266 ("usb: common: add USB GPIO based connection detection driver") Signed-off-by: Prashanth K Tested-by: AngeloGioacchino Del Regno Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/1690880632-12588-1-git-send-email-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/usb-conn-gpio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c index 766005d20bae..501e8bc9738e 100644 --- a/drivers/usb/common/usb-conn-gpio.c +++ b/drivers/usb/common/usb-conn-gpio.c @@ -42,6 +42,7 @@ struct usb_conn_info { struct power_supply_desc desc; struct power_supply *charger; + bool initial_detection; }; /* @@ -86,11 +87,13 @@ static void usb_conn_detect_cable(struct work_struct *work) dev_dbg(info->dev, "role %s -> %s, gpios: id %d, vbus %d\n", usb_role_string(info->last_role), usb_role_string(role), id, vbus); - if (info->last_role == role) { + if (!info->initial_detection && info->last_role == role) { dev_warn(info->dev, "repeated role: %s\n", usb_role_string(role)); return; } + info->initial_detection = false; + if (info->last_role == USB_ROLE_HOST && info->vbus) regulator_disable(info->vbus); @@ -258,6 +261,7 @@ static int usb_conn_probe(struct platform_device *pdev) device_set_wakeup_capable(&pdev->dev, true); /* Perform initial detection */ + info->initial_detection = true; usb_conn_queue_dwork(info, 0); return 0; -- cgit v1.2.3 From a6ff6e7a9dd69364547751db0f626a10a6d628d2 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 2 Aug 2023 13:49:02 -0400 Subject: usb-storage: alauda: Fix uninit-value in alauda_check_media() Syzbot got KMSAN to complain about access to an uninitialized value in the alauda subdriver of usb-storage: BUG: KMSAN: uninit-value in alauda_transport+0x462/0x57f0 drivers/usb/storage/alauda.c:1137 CPU: 0 PID: 12279 Comm: usb-storage Not tainted 5.3.0-rc7+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x191/0x1f0 lib/dump_stack.c:113 kmsan_report+0x13a/0x2b0 mm/kmsan/kmsan_report.c:108 __msan_warning+0x73/0xe0 mm/kmsan/kmsan_instr.c:250 alauda_check_media+0x344/0x3310 drivers/usb/storage/alauda.c:460 The problem is that alauda_check_media() doesn't verify that its USB transfer succeeded before trying to use the received data. What should happen if the transfer fails isn't entirely clear, but a reasonably conservative approach is to pretend that no media is present. A similar problem exists in a usb_stor_dbg() call in alauda_get_media_status(). In this case, when an error occurs the call is redundant, because usb_stor_ctrl_transfer() already will print a debugging message. Finally, unrelated to the uninitialized memory access, is the fact that alauda_check_media() performs DMA to a buffer on the stack. Fortunately usb-storage provides a general purpose DMA-able buffer for uses like this. We'll use it instead. Reported-and-tested-by: syzbot+e7d46eb426883fb97efd@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/0000000000007d25ff059457342d@google.com/T/ Suggested-by: Christophe JAILLET Signed-off-by: Alan Stern Fixes: e80b0fade09e ("[PATCH] USB Storage: add alauda support") Cc: Link: https://lore.kernel.org/r/693d5d5e-f09b-42d0-8ed9-1f96cd30bcce@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/alauda.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c index 5e912dd29b4c..115f05a6201a 100644 --- a/drivers/usb/storage/alauda.c +++ b/drivers/usb/storage/alauda.c @@ -318,7 +318,8 @@ static int alauda_get_media_status(struct us_data *us, unsigned char *data) rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, command, 0xc0, 0, 1, data, 2); - usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]); + if (rc == USB_STOR_XFER_GOOD) + usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]); return rc; } @@ -454,9 +455,14 @@ static int alauda_init_media(struct us_data *us) static int alauda_check_media(struct us_data *us) { struct alauda_info *info = (struct alauda_info *) us->extra; - unsigned char status[2]; + unsigned char *status = us->iobuf; + int rc; - alauda_get_media_status(us, status); + rc = alauda_get_media_status(us, status); + if (rc != USB_STOR_XFER_GOOD) { + status[0] = 0xF0; /* Pretend there's no media */ + status[1] = 0; + } /* Check for no media or door open */ if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10) -- cgit v1.2.3 From 3ddaa6a274578e23745b7466346fc2650df8f959 Mon Sep 17 00:00:00 2001 From: Elson Roy Serrao Date: Tue, 1 Aug 2023 12:26:58 -0700 Subject: usb: dwc3: Properly handle processing of pending events If dwc3 is runtime suspended we defer processing the event buffer until resume, by setting the pending_events flag. Set this flag before triggering resume to avoid race with the runtime resume callback. While handling the pending events, in addition to checking the event buffer we also need to process it. Handle this by explicitly calling dwc3_thread_interrupt(). Also balance the runtime pm get() operation that triggered this processing. Cc: stable@vger.kernel.org Fixes: fc8bb91bc83e ("usb: dwc3: implement runtime PM") Signed-off-by: Elson Roy Serrao Acked-by: Thinh Nguyen Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20230801192658.19275-1-quic_eserrao@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 5fd067151fbf..858fe4c299b7 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4455,9 +4455,14 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) u32 count; if (pm_runtime_suspended(dwc->dev)) { + dwc->pending_events = true; + /* + * Trigger runtime resume. The get() function will be balanced + * after processing the pending events in dwc3_process_pending + * events(). + */ pm_runtime_get(dwc->dev); disable_irq_nosync(dwc->irq_gadget); - dwc->pending_events = true; return IRQ_HANDLED; } @@ -4718,6 +4723,8 @@ void dwc3_gadget_process_pending_events(struct dwc3 *dwc) { if (dwc->pending_events) { dwc3_interrupt(dwc->irq_gadget, dwc->ev_buf); + dwc3_thread_interrupt(dwc->irq_gadget, dwc->ev_buf); + pm_runtime_put(dwc->dev); dwc->pending_events = false; enable_irq(dwc->irq_gadget); } -- cgit v1.2.3 From 596a5123cc782d458b057eb3837e66535cd0befa Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 22 Jun 2023 14:59:12 +0300 Subject: thunderbolt: Fix memory leak in tb_handle_dp_bandwidth_request() The memory allocated in tb_queue_dp_bandwidth_request() needs to be released once the request is handled to avoid leaking it. Fixes: 6ce3563520be ("thunderbolt: Add support for DisplayPort bandwidth allocation mode") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 62b26b7998fd..3fb4553a6442 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1964,6 +1964,8 @@ unlock: pm_runtime_mark_last_busy(&tb->dev); pm_runtime_put_autosuspend(&tb->dev); + + kfree(ev); } static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port) -- cgit v1.2.3 From adb9743d6a08778b78d62d16b4230346d3508986 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Sun, 25 Jun 2023 15:49:37 +0000 Subject: binder: fix memory leak in binder_init() In binder_init(), the destruction of binder_alloc_shrinker_init() is not performed in the wrong path, which will cause memory leaks. So this commit introduces binder_alloc_shrinker_exit() and calls it in the wrong path to fix that. Signed-off-by: Qi Zheng Acked-by: Carlos Llamas Fixes: f2517eb76f1f ("android: binder: Add global lru shrinker to binder") Cc: stable Link: https://lore.kernel.org/r/20230625154937.64316-1-qi.zheng@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 1 + drivers/android/binder_alloc.c | 6 ++++++ drivers/android/binder_alloc.h | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 486c8271cab7..d720f93d8b19 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6617,6 +6617,7 @@ err_init_binder_device_failed: err_alloc_device_names_failed: debugfs_remove_recursive(binder_debugfs_dir_entry_root); + binder_alloc_shrinker_exit(); return ret; } diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 662a2a2e2e84..e3db8297095a 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -1087,6 +1087,12 @@ int binder_alloc_shrinker_init(void) return ret; } +void binder_alloc_shrinker_exit(void) +{ + unregister_shrinker(&binder_shrinker); + list_lru_destroy(&binder_alloc_lru); +} + /** * check_buffer() - verify that buffer/offset is safe to access * @alloc: binder_alloc for this proc diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 138d1d5af9ce..dc1e2b01dd64 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -129,6 +129,7 @@ extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, int pid); extern void binder_alloc_init(struct binder_alloc *alloc); extern int binder_alloc_shrinker_init(void); +extern void binder_alloc_shrinker_exit(void); extern void binder_alloc_vma_close(struct binder_alloc *alloc); extern struct binder_buffer * binder_alloc_prepare_to_free(struct binder_alloc *alloc, -- cgit v1.2.3 From 101bd907b4244a726980ee67f95ed9cafab6ff7a Mon Sep 17 00:00:00 2001 From: Ricky WU Date: Tue, 25 Jul 2023 09:10:54 +0000 Subject: misc: rtsx: judge ASPM Mode to set PETXCFG Reg ASPM Mode is ASPM_MODE_CFG need to judge the value of clkreq_0 to set HIGH or LOW, if the ASPM Mode is ASPM_MODE_REG always set to HIGH during the initialization. Cc: stable@vger.kernel.org Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/52906c6836374c8cb068225954c5543a@realtek.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rts5227.c | 2 +- drivers/misc/cardreader/rts5228.c | 18 ------------------ drivers/misc/cardreader/rts5249.c | 3 +-- drivers/misc/cardreader/rts5260.c | 18 ------------------ drivers/misc/cardreader/rts5261.c | 18 ------------------ drivers/misc/cardreader/rtsx_pcr.c | 5 ++++- 6 files changed, 6 insertions(+), 58 deletions(-) diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c index d676cf63a966..3dae5e3a1697 100644 --- a/drivers/misc/cardreader/rts5227.c +++ b/drivers/misc/cardreader/rts5227.c @@ -195,7 +195,7 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr) } } - if (option->force_clkreq_0) + if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG) rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); else diff --git a/drivers/misc/cardreader/rts5228.c b/drivers/misc/cardreader/rts5228.c index cfebad51d1d8..f4ab09439da7 100644 --- a/drivers/misc/cardreader/rts5228.c +++ b/drivers/misc/cardreader/rts5228.c @@ -435,17 +435,10 @@ static void rts5228_init_from_cfg(struct rtsx_pcr *pcr) option->ltr_enabled = false; } } - - if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN - | PM_L1_1_EN | PM_L1_2_EN)) - option->force_clkreq_0 = false; - else - option->force_clkreq_0 = true; } static int rts5228_extra_init_hw(struct rtsx_pcr *pcr) { - struct rtsx_cr_option *option = &pcr->option; rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1, CD_RESUME_EN_MASK, CD_RESUME_EN_MASK); @@ -476,17 +469,6 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr) else rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00); - /* - * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced - * to drive low, and we forcibly request clock. - */ - if (option->force_clkreq_0) - rtsx_pci_write_register(pcr, PETXCFG, - FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); - else - rtsx_pci_write_register(pcr, PETXCFG, - FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); - rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB); if (pcr->rtd3_en) { diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c index 91d240dd68fa..47ab72a43256 100644 --- a/drivers/misc/cardreader/rts5249.c +++ b/drivers/misc/cardreader/rts5249.c @@ -327,12 +327,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) } } - /* * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced * to drive low, and we forcibly request clock. */ - if (option->force_clkreq_0) + if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG) rtsx_pci_write_register(pcr, PETXCFG, FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); else diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c index 9b42b20a3e5a..79b18f6f73a8 100644 --- a/drivers/misc/cardreader/rts5260.c +++ b/drivers/misc/cardreader/rts5260.c @@ -517,17 +517,10 @@ static void rts5260_init_from_cfg(struct rtsx_pcr *pcr) option->ltr_enabled = false; } } - - if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN - | PM_L1_1_EN | PM_L1_2_EN)) - option->force_clkreq_0 = false; - else - option->force_clkreq_0 = true; } static int rts5260_extra_init_hw(struct rtsx_pcr *pcr) { - struct rtsx_cr_option *option = &pcr->option; /* Set mcu_cnt to 7 to ensure data can be sampled properly */ rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07); @@ -546,17 +539,6 @@ static int rts5260_extra_init_hw(struct rtsx_pcr *pcr) rts5260_init_hw(pcr); - /* - * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced - * to drive low, and we forcibly request clock. - */ - if (option->force_clkreq_0) - rtsx_pci_write_register(pcr, PETXCFG, - FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); - else - rtsx_pci_write_register(pcr, PETXCFG, - FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); - rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00); return 0; diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c index b1e76030cafd..94af6bf8a25a 100644 --- a/drivers/misc/cardreader/rts5261.c +++ b/drivers/misc/cardreader/rts5261.c @@ -498,17 +498,10 @@ static void rts5261_init_from_cfg(struct rtsx_pcr *pcr) option->ltr_enabled = false; } } - - if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN - | PM_L1_1_EN | PM_L1_2_EN)) - option->force_clkreq_0 = false; - else - option->force_clkreq_0 = true; } static int rts5261_extra_init_hw(struct rtsx_pcr *pcr) { - struct rtsx_cr_option *option = &pcr->option; u32 val; rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1, @@ -554,17 +547,6 @@ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr) else rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00); - /* - * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced - * to drive low, and we forcibly request clock. - */ - if (option->force_clkreq_0) - rtsx_pci_write_register(pcr, PETXCFG, - FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); - else - rtsx_pci_write_register(pcr, PETXCFG, - FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); - rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB); if (pcr->rtd3_en) { diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index 32b7783e9d4f..a3f4b52bb159 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -1326,8 +1326,11 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) return err; } - if (pcr->aspm_mode == ASPM_MODE_REG) + if (pcr->aspm_mode == ASPM_MODE_REG) { rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30); + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + } /* No CD interrupt if probing driver with card inserted. * So we need to initialize pcr->card_exist here. -- cgit v1.2.3 From 77107b08f0f29c6e9d02c2e4bfcd6e1e0c57bdd5 Mon Sep 17 00:00:00 2001 From: Esteban Blanc Date: Thu, 27 Jul 2023 11:04:35 +0200 Subject: misc: tps6594-esm: Disable ESM for rev 1 PMIC Due to a silicon bug, ESM on TPS6594 PMIC revision 1 is not working properly. This patch keeps SOC ESM disabled for such PMIC. Fixes: 875fdd0787e4 ("misc: tps6594-esm: Add driver for TI TPS6594 ESM") Co-developed-by: Julien Panis Signed-off-by: Julien Panis Signed-off-by: Esteban Blanc Link: https://lore.kernel.org/r/20230726-tps6594_fix_esm_for_v1-v1-1-2adfdcad31c2@baylibre.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/tps6594-esm.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/misc/tps6594-esm.c b/drivers/misc/tps6594-esm.c index b488f704f104..05e2c151e632 100644 --- a/drivers/misc/tps6594-esm.c +++ b/drivers/misc/tps6594-esm.c @@ -13,6 +13,8 @@ #include +#define TPS6594_DEV_REV_1 0x08 + static irqreturn_t tps6594_esm_isr(int irq, void *dev_id) { struct platform_device *pdev = dev_id; @@ -32,11 +34,26 @@ static int tps6594_esm_probe(struct platform_device *pdev) { struct tps6594 *tps = dev_get_drvdata(pdev->dev.parent); struct device *dev = &pdev->dev; + unsigned int rev; int irq; int ret; int i; - for (i = 0 ; i < pdev->num_resources ; i++) { + /* + * Due to a bug in revision 1 of the PMIC, the GPIO3 used for the + * SoC ESM function is used to power the load switch instead. + * As a consequence, ESM can not be used on those PMIC. + * Check the version and return an error in case of revision 1. + */ + ret = regmap_read(tps->regmap, TPS6594_REG_DEV_REV, &rev); + if (ret) + return dev_err_probe(dev, ret, + "Failed to read PMIC revision\n"); + if (rev == TPS6594_DEV_REV_1) + return dev_err_probe(dev, -ENODEV, + "ESM not supported for revision 1 PMIC\n"); + + for (i = 0; i < pdev->num_resources; i++) { irq = platform_get_irq_byname(pdev, pdev->resource[i].name); if (irq < 0) return dev_err_probe(dev, irq, "Failed to get %s irq\n", -- cgit v1.2.3 From d0b4f95a51038becce4bdab4789aa7ce59d4ea6e Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Wed, 26 Jul 2023 11:53:59 +0200 Subject: riscv/kexec: handle R_RISCV_CALL_PLT relocation type R_RISCV_CALL has been deprecated and replaced by R_RISCV_CALL_PLT. See Enum 18-19 in Table 3. Relocation types here: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc It was deprecated in ("Deprecated R_RISCV_CALL, prefer R_RISCV_CALL_PLT"): https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a0dced85018d7a0ec17023c9389cbd70b1dbc1b0 Recent tools (at least GNU binutils-2.40) already use R_RISCV_CALL_PLT. Kernels built with such binutils fail kexec_load_file(2) with: kexec_image: Unknown rela relocation: 19 kexec_image: Error loading purgatory ret=-8 The binary code at the call site remains the same, so tell arch_kexec_apply_relocations_add() to handle _PLT alike. Fixes: 838b3e28488f ("RISC-V: Load purgatory in kexec_file") Signed-off-by: Torsten Duwe Signed-off-by: Petr Tesarik Cc: Li Zhengyu Cc: stable@vger.kernel.org Reviewed-by: Conor Dooley Link: https://lore.kernel.org/all/b046b164af8efd33bbdb7d4003273bdf9196a5b0.1690365011.git.petr.tesarik.ext@huawei.com/ Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/elf_kexec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 5372b708fae2..38390d3bdcac 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -425,6 +425,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, * sym, instead of searching the whole relsec. */ case R_RISCV_PCREL_HI20: + case R_RISCV_CALL_PLT: case R_RISCV_CALL: *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | ENCODE_UJTYPE_IMM(val - addr); -- cgit v1.2.3 From 49af7a2cd5f678217b8b4f86a29411aebebf3e78 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Wed, 26 Jul 2023 11:54:01 +0200 Subject: riscv/kexec: load initrd high in available memory When initrd is loaded low, the secondary kernel fails like this: INITRD: 0xdc581000+0x00eef000 overlaps in-use memory region This initrd load address corresponds to the _end symbol, but the reservation is aligned on PMD_SIZE, as explained by a comment in setup_bootmem(). It is technically possible to align the initrd load address accordingly, leaving a hole between the end of kernel and the initrd, but it is much simpler to allocate the initrd top-down. Fixes: 838b3e28488f ("RISC-V: Load purgatory in kexec_file") Signed-off-by: Torsten Duwe Signed-off-by: Petr Tesarik Cc: stable@vger.kernel.org Reviewed-by: Conor Dooley Link: https://lore.kernel.org/all/67c8eb9eea25717c2c8208d9bfbfaa39e6e2a1c6.1690365011.git.petr.tesarik.ext@huawei.com/ Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/elf_kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 38390d3bdcac..c08bb5c3b385 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -281,7 +281,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, kbuf.buffer = initrd; kbuf.bufsz = kbuf.memsz = initrd_len; kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = false; + kbuf.top_down = true; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) -- cgit v1.2.3 From c3bcc65d4d2e8292c435322cbc34c318d06b8b6c Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 4 Jul 2023 14:18:37 +0200 Subject: riscv: Start of DRAM should at least be aligned on PMD size for the direct mapping So that we do not end up mapping the whole linear mapping using 4K pages, which is slow at boot time, and also very likely at runtime. So make sure we align the start of DRAM on a PMD boundary. Signed-off-by: Alexandre Ghiti Reported-by: Song Shuai Fixes: 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping") Tested-by: Song Shuai Link: https://lore.kernel.org/r/20230704121837.248976-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9ce504737d18..ad845c3aa9b2 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -214,8 +214,13 @@ static void __init setup_bootmem(void) memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); phys_ram_end = memblock_end_of_DRAM(); + + /* + * Make sure we align the start of the memory on a PMD boundary so that + * at worst, we map the linear mapping with PMD mappings. + */ if (!IS_ENABLED(CONFIG_XIP_KERNEL)) - phys_ram_base = memblock_start_of_DRAM(); + phys_ram_base = memblock_start_of_DRAM() & PMD_MASK; /* * In 64-bit, any use of __va/__pa before this point is wrong as we -- cgit v1.2.3 From 4b5d1e47b69426c0f7491d97d73ad0152d02d437 Mon Sep 17 00:00:00 2001 From: Andrew Yang Date: Fri, 21 Jul 2023 14:37:01 +0800 Subject: zsmalloc: fix races between modifications of fullness and isolated We encountered many kernel exceptions of VM_BUG_ON(zspage->isolated == 0) in dec_zspage_isolation() and BUG_ON(!pages[1]) in zs_unmap_object() lately. This issue only occurs when migration and reclamation occur at the same time. With our memory stress test, we can reproduce this issue several times a day. We have no idea why no one else encountered this issue. BTW, we switched to the new kernel version with this defect a few months ago. Since fullness and isolated share the same unsigned int, modifications of them should be protected by the same lock. [andrew.yang@mediatek.com: move comment] Link: https://lkml.kernel.org/r/20230727062910.6337-1-andrew.yang@mediatek.com Link: https://lkml.kernel.org/r/20230721063705.11455-1-andrew.yang@mediatek.com Fixes: c4549b871102 ("zsmalloc: remove zspage isolation for migration") Signed-off-by: Andrew Yang Reviewed-by: Sergey Senozhatsky Cc: AngeloGioacchino Del Regno Cc: Matthias Brugger Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 3f057970504e..32916d28d9d9 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1798,6 +1798,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage, static bool zs_page_isolate(struct page *page, isolate_mode_t mode) { + struct zs_pool *pool; struct zspage *zspage; /* @@ -1807,9 +1808,10 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode) VM_BUG_ON_PAGE(PageIsolated(page), page); zspage = get_zspage(page); - migrate_write_lock(zspage); + pool = zspage->pool; + spin_lock(&pool->lock); inc_zspage_isolation(zspage); - migrate_write_unlock(zspage); + spin_unlock(&pool->lock); return true; } @@ -1875,12 +1877,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page, kunmap_atomic(s_addr); replace_sub_page(class, zspage, newpage, page); + dec_zspage_isolation(zspage); /* * Since we complete the data copy and set up new zspage structure, * it's okay to release the pool's lock. */ spin_unlock(&pool->lock); - dec_zspage_isolation(zspage); migrate_write_unlock(zspage); get_page(newpage); @@ -1897,14 +1899,16 @@ static int zs_page_migrate(struct page *newpage, struct page *page, static void zs_page_putback(struct page *page) { + struct zs_pool *pool; struct zspage *zspage; VM_BUG_ON_PAGE(!PageIsolated(page), page); zspage = get_zspage(page); - migrate_write_lock(zspage); + pool = zspage->pool; + spin_lock(&pool->lock); dec_zspage_isolation(zspage); - migrate_write_unlock(zspage); + spin_unlock(&pool->lock); } static const struct movable_operations zsmalloc_mops = { -- cgit v1.2.3 From f443fd5af5dbd531f880d3645d5dd36976cf087f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Jul 2023 11:57:56 +0100 Subject: crypto, cifs: fix error handling in extract_iter_to_sg() Fix error handling in extract_iter_to_sg(). Pages need to be unpinned, not put in extract_user_to_sg() when handling IOVEC/UBUF sources. The bug may result in a warning like the following: WARNING: CPU: 1 PID: 20384 at mm/gup.c:229 __lse_atomic_add arch/arm64/include/asm/atomic_lse.h:27 [inline] WARNING: CPU: 1 PID: 20384 at mm/gup.c:229 arch_atomic_add arch/arm64/include/asm/atomic.h:28 [inline] WARNING: CPU: 1 PID: 20384 at mm/gup.c:229 raw_atomic_add include/linux/atomic/atomic-arch-fallback.h:537 [inline] WARNING: CPU: 1 PID: 20384 at mm/gup.c:229 atomic_add include/linux/atomic/atomic-instrumented.h:105 [inline] WARNING: CPU: 1 PID: 20384 at mm/gup.c:229 try_grab_page+0x108/0x160 mm/gup.c:252 ... pc : try_grab_page+0x108/0x160 mm/gup.c:229 lr : follow_page_pte+0x174/0x3e4 mm/gup.c:651 ... Call trace: __lse_atomic_add arch/arm64/include/asm/atomic_lse.h:27 [inline] arch_atomic_add arch/arm64/include/asm/atomic.h:28 [inline] raw_atomic_add include/linux/atomic/atomic-arch-fallback.h:537 [inline] atomic_add include/linux/atomic/atomic-instrumented.h:105 [inline] try_grab_page+0x108/0x160 mm/gup.c:252 follow_pmd_mask mm/gup.c:734 [inline] follow_pud_mask mm/gup.c:765 [inline] follow_p4d_mask mm/gup.c:782 [inline] follow_page_mask+0x12c/0x2e4 mm/gup.c:839 __get_user_pages+0x174/0x30c mm/gup.c:1217 __get_user_pages_locked mm/gup.c:1448 [inline] __gup_longterm_locked+0x94/0x8f4 mm/gup.c:2142 internal_get_user_pages_fast+0x970/0xb60 mm/gup.c:3140 pin_user_pages_fast+0x4c/0x60 mm/gup.c:3246 iov_iter_extract_user_pages lib/iov_iter.c:1768 [inline] iov_iter_extract_pages+0xc8/0x54c lib/iov_iter.c:1831 extract_user_to_sg lib/scatterlist.c:1123 [inline] extract_iter_to_sg lib/scatterlist.c:1349 [inline] extract_iter_to_sg+0x26c/0x6fc lib/scatterlist.c:1339 hash_sendmsg+0xc0/0x43c crypto/algif_hash.c:117 sock_sendmsg_nosec net/socket.c:725 [inline] sock_sendmsg+0x54/0x60 net/socket.c:748 ____sys_sendmsg+0x270/0x2ac net/socket.c:2494 ___sys_sendmsg+0x80/0xdc net/socket.c:2548 __sys_sendmsg+0x68/0xc4 net/socket.c:2577 __do_sys_sendmsg net/socket.c:2586 [inline] __se_sys_sendmsg net/socket.c:2584 [inline] __arm64_sys_sendmsg+0x24/0x30 net/socket.c:2584 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall+0x48/0x114 arch/arm64/kernel/syscall.c:52 el0_svc_common.constprop.0+0x44/0xe4 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x38/0xa4 arch/arm64/kernel/syscall.c:191 el0_svc+0x2c/0xb0 arch/arm64/kernel/entry-common.c:647 el0t_64_sync_handler+0xc0/0xc4 arch/arm64/kernel/entry-common.c:665 el0t_64_sync+0x19c/0x1a0 arch/arm64/kernel/entry.S:591 Link: https://lkml.kernel.org/r/20571.1690369076@warthog.procyon.org.uk Fixes: 018584697533 ("netfs: Add a function to extract an iterator into a scatterlist") Reported-by: syzbot+9b82859567f2e50c123e@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-mm/000000000000273d0105ff97bf56@google.com/ Signed-off-by: David Howells Reviewed-by: David Hildenbrand Acked-by: Steve French Cc: Sven Schnelle Cc: Herbert Xu Cc: Jeff Layton Cc: Shyam Prasad N Cc: Rohith Surabattula Cc: Jens Axboe Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index e86231a44c3d..c65566b4dc66 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -1148,7 +1148,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter, failed: while (sgtable->nents > sgtable->orig_nents) - put_page(sg_page(&sgtable->sgl[--sgtable->nents])); + unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents])); return res; } -- cgit v1.2.3 From cac7ea57a06016e4914848b707477fb07ee4ae1c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 27 Jul 2023 17:09:30 +0100 Subject: radix tree test suite: fix incorrect allocation size for pthreads Currently the pthread allocation for each array item is based on the size of a pthread_t pointer and should be the size of the pthread_t structure, so the allocation is under-allocating the correct size. Fix this by using the size of each element in the pthreads array. Static analysis cppcheck reported: tools/testing/radix-tree/regression1.c:180:2: warning: Size of pointer 'threads' used instead of size of its data. [pointerSize] Link: https://lkml.kernel.org/r/20230727160930.632674-1-colin.i.king@gmail.com Fixes: 1366c37ed84b ("radix tree test harness") Signed-off-by: Colin Ian King Cc: Konstantin Khlebnikov Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton --- tools/testing/radix-tree/regression1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c index a61c7bcbc72d..63f468bf8245 100644 --- a/tools/testing/radix-tree/regression1.c +++ b/tools/testing/radix-tree/regression1.c @@ -177,7 +177,7 @@ void regression1_test(void) nr_threads = 2; pthread_barrier_init(&worker_barrier, NULL, nr_threads); - threads = malloc(nr_threads * sizeof(pthread_t *)); + threads = malloc(nr_threads * sizeof(*threads)); for (i = 0; i < nr_threads; i++) { arg = i; -- cgit v1.2.3 From f985fc322063c73916a0d5b6b3fcc6db2ba5792c Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 27 Jul 2023 19:56:40 +0800 Subject: mm/swapfile: fix wrong swap entry type for hwpoisoned swapcache page Patch series "A few fixup patches for mm", v2. This series contains a few fixup patches to fix potential unexpected return value, fix wrong swap entry type for hwpoisoned swapcache page and so on. More details can be found in the respective changelogs. This patch (of 3): Hwpoisoned dirty swap cache page is kept in the swap cache and there's simple interception code in do_swap_page() to catch it. But when trying to swapoff, unuse_pte() will wrongly install a general sense of "future accesses are invalid" swap entry for hwpoisoned swap cache page due to unaware of such type of page. The user will receive SIGBUS signal without expected BUS_MCEERR_AR payload. BTW, typo 'hwposioned' is fixed. Link: https://lkml.kernel.org/r/20230727115643.639741-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20230727115643.639741-2-linmiaohe@huawei.com Fixes: 6b970599e807 ("mm: hwpoison: support recovery from ksm_might_need_to_copy()") Signed-off-by: Miaohe Lin Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton --- mm/ksm.c | 2 ++ mm/swapfile.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index ba266359da55..d20d7662419b 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2784,6 +2784,8 @@ struct page *ksm_might_need_to_copy(struct page *page, anon_vma->root == vma->anon_vma->root) { return page; /* still no need to copy it */ } + if (PageHWPoison(page)) + return ERR_PTR(-EHWPOISON); if (!PageUptodate(page)) return page; /* let do_swap_page report the error */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 8e6dde68b389..b15112b1f1a8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1746,7 +1746,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, struct page *swapcache; spinlock_t *ptl; pte_t *pte, new_pte, old_pte; - bool hwposioned = false; + bool hwpoisoned = PageHWPoison(page); int ret = 1; swapcache = page; @@ -1754,7 +1754,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, if (unlikely(!page)) return -ENOMEM; else if (unlikely(PTR_ERR(page) == -EHWPOISON)) - hwposioned = true; + hwpoisoned = true; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte), @@ -1765,11 +1765,11 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, old_pte = ptep_get(pte); - if (unlikely(hwposioned || !PageUptodate(page))) { + if (unlikely(hwpoisoned || !PageUptodate(page))) { swp_entry_t swp_entry; dec_mm_counter(vma->vm_mm, MM_SWAPENTS); - if (hwposioned) { + if (hwpoisoned) { swp_entry = make_hwpoison_entry(swapcache); page = swapcache; } else { -- cgit v1.2.3 From f29623e4a599c295cc8f518c8e4bb7848581a14d Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 27 Jul 2023 19:56:41 +0800 Subject: mm: memory-failure: fix potential unexpected return value from unpoison_memory() If unpoison_memory() fails to clear page hwpoisoned flag, return value ret is expected to be -EBUSY. But when get_hwpoison_page() returns 1 and fails to clear page hwpoisoned flag due to races, return value will be unexpected 1 leading to users being confused. And there's a code smell that the variable "ret" is used not only to save the return value of unpoison_memory(), but also the return value from get_hwpoison_page(). Make a further cleanup by using another auto-variable solely to save the return value of get_hwpoison_page() as suggested by Naoya. Link: https://lkml.kernel.org/r/20230727115643.639741-3-linmiaohe@huawei.com Fixes: bf181c582588 ("mm/hwpoison: fix unpoison_memory()") Signed-off-by: Miaohe Lin Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ece5d481b5ff..b32d370b5d43 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2466,7 +2466,7 @@ int unpoison_memory(unsigned long pfn) { struct folio *folio; struct page *p; - int ret = -EBUSY; + int ret = -EBUSY, ghp; unsigned long count = 1; bool huge = false; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, @@ -2514,29 +2514,28 @@ int unpoison_memory(unsigned long pfn) if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio)) goto unlock_mutex; - ret = get_hwpoison_page(p, MF_UNPOISON); - if (!ret) { + ghp = get_hwpoison_page(p, MF_UNPOISON); + if (!ghp) { if (PageHuge(p)) { huge = true; count = folio_free_raw_hwp(folio, false); - if (count == 0) { - ret = -EBUSY; + if (count == 0) goto unlock_mutex; - } } ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY; - } else if (ret < 0) { - if (ret == -EHWPOISON) { + } else if (ghp < 0) { + if (ghp == -EHWPOISON) { ret = put_page_back_buddy(p) ? 0 : -EBUSY; - } else + } else { + ret = ghp; unpoison_pr_info("Unpoison: failed to grab page %#lx\n", pfn, &unpoison_rs); + } } else { if (PageHuge(p)) { huge = true; count = folio_free_raw_hwp(folio, false); if (count == 0) { - ret = -EBUSY; folio_put(folio); goto unlock_mutex; } -- cgit v1.2.3 From faeb2ff2c1c5cb60ce0da193580b256c941f99ca Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 27 Jul 2023 19:56:42 +0800 Subject: mm: memory-failure: avoid false hwpoison page mapped error info folio->_mapcount is overloaded in SLAB, so folio_mapped() has to be done after folio_test_slab() is checked. Otherwise slab folio might be treated as a mapped folio leading to false 'Someone maps the hwpoison page' error info. Link: https://lkml.kernel.org/r/20230727115643.639741-4-linmiaohe@huawei.com Fixes: 230ac719c500 ("mm/hwpoison: don't try to unpoison containment-failed pages") Signed-off-by: Miaohe Lin Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Naoya Horiguchi Cc: Kefeng Wang Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b32d370b5d43..9a285038d765 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2499,6 +2499,13 @@ int unpoison_memory(unsigned long pfn) goto unlock_mutex; } + if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio)) + goto unlock_mutex; + + /* + * Note that folio->_mapcount is overloaded in SLAB, so the simple test + * in folio_mapped() has to be done after folio_test_slab() is checked. + */ if (folio_mapped(folio)) { unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n", pfn, &unpoison_rs); @@ -2511,9 +2518,6 @@ int unpoison_memory(unsigned long pfn) goto unlock_mutex; } - if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio)) - goto unlock_mutex; - ghp = get_hwpoison_page(p, MF_UNPOISON); if (!ghp) { if (PageHuge(p)) { -- cgit v1.2.3 From 32c877191e022b55fe3a374f3d7e9fb5741c514d Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 11 Jul 2023 15:09:41 -0700 Subject: hugetlb: do not clear hugetlb dtor until allocating vmemmap Patch series "Fix hugetlb free path race with memory errors". In the discussion of Jiaqi Yan's series "Improve hugetlbfs read on HWPOISON hugepages" the race window was discovered. https://lore.kernel.org/linux-mm/20230616233447.GB7371@monkey/ Freeing a hugetlb page back to low level memory allocators is performed in two steps. 1) Under hugetlb lock, remove page from hugetlb lists and clear destructor 2) Outside lock, allocate vmemmap if necessary and call low level free Between these two steps, the hugetlb page will appear as a normal compound page. However, vmemmap for tail pages could be missing. If a memory error occurs at this time, we could try to update page flags non-existant page structs. A much more detailed description is in the first patch. The first patch addresses the race window. However, it adds a hugetlb_lock lock/unlock cycle to every vmemmap optimized hugetlb page free operation. This could lead to slowdowns if one is freeing a large number of hugetlb pages. The second path optimizes the update_and_free_pages_bulk routine to only take the lock once in bulk operations. The second patch is technically not a bug fix, but includes a Fixes tag and Cc stable to avoid a performance regression. It can be combined with the first, but was done separately make reviewing easier. This patch (of 2): Freeing a hugetlb page and releasing base pages back to the underlying allocator such as buddy or cma is performed in two steps: - remove_hugetlb_folio() is called to remove the folio from hugetlb lists, get a ref on the page and remove hugetlb destructor. This all must be done under the hugetlb lock. After this call, the page can be treated as a normal compound page or a collection of base size pages. - update_and_free_hugetlb_folio() is called to allocate vmemmap if needed and the free routine of the underlying allocator is called on the resulting page. We can not hold the hugetlb lock here. One issue with this scheme is that a memory error could occur between these two steps. In this case, the memory error handling code treats the old hugetlb page as a normal compound page or collection of base pages. It will then try to SetPageHWPoison(page) on the page with an error. If the page with error is a tail page without vmemmap, a write error will occur when trying to set the flag. Address this issue by modifying remove_hugetlb_folio() and update_and_free_hugetlb_folio() such that the hugetlb destructor is not cleared until after allocating vmemmap. Since clearing the destructor requires holding the hugetlb lock, the clearing is done in remove_hugetlb_folio() if the vmemmap is present. This saves a lock/unlock cycle. Otherwise, destructor is cleared in update_and_free_hugetlb_folio() after allocating vmemmap. Note that this will leave hugetlb pages in a state where they are marked free (by hugetlb specific page flag) and have a ref count. This is not a normal state. The only code that would notice is the memory error code, and it is set up to retry in such a case. A subsequent patch will create a routine to do bulk processing of vmemmap allocation. This will eliminate a lock/unlock cycle for each hugetlb page in the case where we are freeing a large number of pages. Link: https://lkml.kernel.org/r/20230711220942.43706-1-mike.kravetz@oracle.com Link: https://lkml.kernel.org/r/20230711220942.43706-2-mike.kravetz@oracle.com Fixes: ad2fa3717b74 ("mm: hugetlb: alloc the vmemmap pages associated with each HugeTLB page") Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Tested-by: Naoya Horiguchi Cc: Axel Rasmussen Cc: James Houghton Cc: Jiaqi Yan Cc: Miaohe Lin Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 75 +++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 24 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 64a3239b6407..6da626bfb52e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1579,9 +1579,37 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio, unsigned int order) { } #endif +static inline void __clear_hugetlb_destructor(struct hstate *h, + struct folio *folio) +{ + lockdep_assert_held(&hugetlb_lock); + + /* + * Very subtle + * + * For non-gigantic pages set the destructor to the normal compound + * page dtor. This is needed in case someone takes an additional + * temporary ref to the page, and freeing is delayed until they drop + * their reference. + * + * For gigantic pages set the destructor to the null dtor. This + * destructor will never be called. Before freeing the gigantic + * page destroy_compound_gigantic_folio will turn the folio into a + * simple group of pages. After this the destructor does not + * apply. + * + */ + if (hstate_is_gigantic(h)) + folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR); + else + folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR); +} + /* - * Remove hugetlb folio from lists, and update dtor so that the folio appears - * as just a compound page. + * Remove hugetlb folio from lists. + * If vmemmap exists for the folio, update dtor so that the folio appears + * as just a compound page. Otherwise, wait until after allocating vmemmap + * to update dtor. * * A reference is held on the folio, except in the case of demote. * @@ -1612,31 +1640,19 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, } /* - * Very subtle - * - * For non-gigantic pages set the destructor to the normal compound - * page dtor. This is needed in case someone takes an additional - * temporary ref to the page, and freeing is delayed until they drop - * their reference. - * - * For gigantic pages set the destructor to the null dtor. This - * destructor will never be called. Before freeing the gigantic - * page destroy_compound_gigantic_folio will turn the folio into a - * simple group of pages. After this the destructor does not - * apply. - * - * This handles the case where more than one ref is held when and - * after update_and_free_hugetlb_folio is called. - * - * In the case of demote we do not ref count the page as it will soon - * be turned into a page of smaller size. + * We can only clear the hugetlb destructor after allocating vmemmap + * pages. Otherwise, someone (memory error handling) may try to write + * to tail struct pages. + */ + if (!folio_test_hugetlb_vmemmap_optimized(folio)) + __clear_hugetlb_destructor(h, folio); + + /* + * In the case of demote we do not ref count the page as it will soon + * be turned into a page of smaller size. */ if (!demote) folio_ref_unfreeze(folio, 1); - if (hstate_is_gigantic(h)) - folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR); - else - folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR); h->nr_huge_pages--; h->nr_huge_pages_node[nid]--; @@ -1705,6 +1721,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, { int i; struct page *subpage; + bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio); if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) return; @@ -1735,6 +1752,16 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, if (unlikely(folio_test_hwpoison(folio))) folio_clear_hugetlb_hwpoison(folio); + /* + * If vmemmap pages were allocated above, then we need to clear the + * hugetlb destructor under the hugetlb lock. + */ + if (clear_dtor) { + spin_lock_irq(&hugetlb_lock); + __clear_hugetlb_destructor(h, folio); + spin_unlock_irq(&hugetlb_lock); + } + for (i = 0; i < pages_per_huge_page(h); i++) { subpage = folio_page(folio, i); subpage->flags &= ~(1 << PG_locked | 1 << PG_error | -- cgit v1.2.3 From 65294de30cb8bc7659e445f7be2846af9ed35499 Mon Sep 17 00:00:00 2001 From: Ayush Jain Date: Fri, 28 Jul 2023 22:09:51 +0530 Subject: selftests: mm: ksm: fix incorrect evaluation of parameter A missing break in kms_tests leads to kselftest hang when the parameter -s is used. In current code flow because of missing break in -s, -t parses args spilled from -s and as -t accepts only valid values as 0,1 so any arg in -s >1 or <0, gets in ksm_test failure This went undetected since, before the addition of option -t, the next case -M would immediately break out of the switch statement but that is no longer the case Add the missing break statement. ----Before---- ./ksm_tests -H -s 100 Invalid merge type ----After---- ./ksm_tests -H -s 100 Number of normal pages: 0 Number of huge pages: 50 Total size: 100 MiB Total time: 0.401732682 s Average speed: 248.922 MiB/s Link: https://lkml.kernel.org/r/20230728163952.4634-1-ayush.jain3@amd.com Fixes: 07115fcc15b4 ("selftests/mm: add new selftests for KSM") Signed-off-by: Ayush Jain Reviewed-by: David Hildenbrand Cc: Stefan Roesch Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/ksm_tests.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index 435acebdc325..380b691d3eb9 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -831,6 +831,7 @@ int main(int argc, char *argv[]) printf("Size must be greater than 0\n"); return KSFT_FAIL; } + break; case 't': { int tmp = atoi(optarg); -- cgit v1.2.3 From 493614da0d4e8d8bb37c3c558e0c01de20344cff Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 31 Jul 2023 13:24:50 -0400 Subject: mm: compaction: fix endless looping over same migrate block During stress testing, the following situation was observed: 70 root 39 19 0 0 0 R 100.0 0.0 959:29.92 khugepaged 310936 root 20 0 84416 25620 512 R 99.7 1.5 642:37.22 hugealloc Tracing shows isolate_migratepages_block() endlessly looping over the first block in the DMA zone: hugealloc-310936 [001] ..... 237297.415718: mm_compaction_finished: node=0 zone=DMA order=9 ret=no_suitable_page hugealloc-310936 [001] ..... 237297.415718: mm_compaction_isolate_migratepages: range=(0x1 ~ 0x400) nr_scanned=513 nr_taken=0 hugealloc-310936 [001] ..... 237297.415718: mm_compaction_finished: node=0 zone=DMA order=9 ret=no_suitable_page hugealloc-310936 [001] ..... 237297.415718: mm_compaction_isolate_migratepages: range=(0x1 ~ 0x400) nr_scanned=513 nr_taken=0 hugealloc-310936 [001] ..... 237297.415718: mm_compaction_finished: node=0 zone=DMA order=9 ret=no_suitable_page hugealloc-310936 [001] ..... 237297.415718: mm_compaction_isolate_migratepages: range=(0x1 ~ 0x400) nr_scanned=513 nr_taken=0 hugealloc-310936 [001] ..... 237297.415718: mm_compaction_finished: node=0 zone=DMA order=9 ret=no_suitable_page hugealloc-310936 [001] ..... 237297.415718: mm_compaction_isolate_migratepages: range=(0x1 ~ 0x400) nr_scanned=513 nr_taken=0 The problem is that the functions tries to test and set the skip bit once on the block, to avoid skipping on its own skip-set, using pageblock_aligned() on the pfn as a test. But because this is the DMA zone which starts at pfn 1, this is never true for the first block, and the skip bit isn't set or tested at all. As a result, fast_find_migrateblock() returns the same pageblock over and over. If the pfn isn't pageblock-aligned, also check if it's the start of the zone to ensure test-and-set-exactly-once on unaligned ranges. Thanks to Vlastimil Babka for the help in debugging this. Link: https://lkml.kernel.org/r/20230731172450.1632195-1-hannes@cmpxchg.org Fixes: 90ed667c03fe ("Revert "Revert "mm/compaction: fix set skip in fast_find_migrateblock""") Signed-off-by: Johannes Weiner Reviewed-by: Vlastimil Babka Acked-by: Mel Gorman Reviewed-by: Baolin Wang Signed-off-by: Andrew Morton --- mm/compaction.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index dbc9f86b1934..eacca2794e47 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -912,11 +912,12 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* * Check if the pageblock has already been marked skipped. - * Only the aligned PFN is checked as the caller isolates + * Only the first PFN is checked as the caller isolates * COMPACT_CLUSTER_MAX at a time so the second call must * not falsely conclude that the block should be skipped. */ - if (!valid_page && pageblock_aligned(low_pfn)) { + if (!valid_page && (pageblock_aligned(low_pfn) || + low_pfn == cc->zone->zone_start_pfn)) { if (!isolation_suitable(cc, page)) { low_pfn = end_pfn; folio = NULL; @@ -2002,7 +2003,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc) * before making it "skip" so other compaction instances do * not scan the same block. */ - if (pageblock_aligned(low_pfn) && + if ((pageblock_aligned(low_pfn) || + low_pfn == cc->zone->zone_start_pfn) && !fast_find_block && !isolation_suitable(cc, page)) continue; -- cgit v1.2.3 From d1ef9dba07bf637995202d0efd29c2fea19e809c Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Mon, 31 Jul 2023 13:55:42 -0400 Subject: MAINTAINERS: add maple tree mailing list There is a mailing list for the maple tree development. Add the list to the maple tree entry of the MAINTAINERS file so patches will be sent to interested parties. Link: https://lkml.kernel.org/r/20230731175542.1653200-1-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 53b7ca804465..8355ec45452b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12481,6 +12481,7 @@ F: net/mctp/ MAPLE TREE M: Liam R. Howlett +L: maple-tree@lists.infradead.org L: linux-mm@kvack.org S: Supported F: Documentation/core-api/maple_tree.rst -- cgit v1.2.3 From 17457784004c84178798432a029ab20e14f728b1 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 31 Jul 2023 22:50:21 +0100 Subject: fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions Some architectures do not populate the entire range categorised by KCORE_TEXT, so we must ensure that the kernel address we read from is valid. Unfortunately there is no solution currently available to do so with a purely iterator solution so reinstate the bounce buffer in this instance so we can use copy_from_kernel_nofault() in order to avoid page faults when regions are unmapped. This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data"), reinstating the bounce buffer, but adapts the code to continue to use an iterator. [lstoakes@gmail.com: correct comment to be strictly correct about reasoning] Link: https://lkml.kernel.org/r/525a3f14-74fa-4c22-9fca-9dab4de8a0c3@lucifer.local Link: https://lkml.kernel.org/r/20230731215021.70911-1-lstoakes@gmail.com Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data") Signed-off-by: Lorenzo Stoakes Reported-by: Jiri Olsa Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava Tested-by: Jiri Olsa Tested-by: Will Deacon Cc: Alexander Viro Cc: Ard Biesheuvel Cc: Baoquan He Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jens Axboe Cc: Kefeng Wang Cc: Liu Shixin Cc: Matthew Wilcox Cc: Mike Galbraith Cc: Thorsten Leemhuis Cc: Uladzislau Rezki (Sony) Cc: Signed-off-by: Andrew Morton --- fs/proc/kcore.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 9cb32e1a78a0..23fc24d16b31 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name, static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) { + struct file *file = iocb->ki_filp; + char *buf = file->private_data; loff_t *fpos = &iocb->ki_pos; size_t phdrs_offset, notes_offset, data_offset; size_t page_offline_frozen = 1; @@ -555,10 +557,21 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) case KCORE_VMEMMAP: case KCORE_TEXT: /* - * We use _copy_to_iter() to bypass usermode hardening - * which would otherwise prevent this operation. + * Sadly we must use a bounce buffer here to be able to + * make use of copy_from_kernel_nofault(), as these + * memory regions might not always be mapped on all + * architectures. */ - if (_copy_to_iter((char *)start, tsz, iter) != tsz) { + if (copy_from_kernel_nofault(buf, (void *)start, tsz)) { + if (iov_iter_zero(tsz, iter) != tsz) { + ret = -EFAULT; + goto out; + } + /* + * We know the bounce buffer is safe to copy from, so + * use _copy_to_iter() directly. + */ + } else if (_copy_to_iter(buf, tsz, iter) != tsz) { ret = -EFAULT; goto out; } @@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp) if (ret) return ret; + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { @@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp) return 0; } +static int release_kcore(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + static const struct proc_ops kcore_proc_ops = { .proc_read_iter = read_kcore_iter, .proc_open = open_kcore, + .proc_release = release_kcore, .proc_lseek = default_llseek, }; -- cgit v1.2.3 From fac2650276eced3c94bcdbc21d0e5be637c1e582 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 1 Aug 2023 09:56:32 -0400 Subject: selftests: cgroup: fix test_kmem_basic false positives This test fails routinely in our prod testing environment, and I can reproduce it locally as well. The test allocates dcache inside a cgroup, then drops the memory limit and checks that usage drops correspondingly. The reason it fails is because dentries are freed with an RCU delay - a debugging sleep shows that usage drops as expected shortly after. Insert a 1s sleep after dropping the limit. This should be good enough, assuming that machines running those tests are otherwise not very busy. Link: https://lkml.kernel.org/r/20230801135632.1768830-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Paul E. McKenney Cc: Michal Hocko Cc: Roman Gushchin Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_kmem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 258ddc565deb..1b2cec9d18a4 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -70,6 +70,10 @@ static int test_kmem_basic(const char *root) goto cleanup; cg_write(cg, "memory.high", "1M"); + + /* wait for RCU freeing */ + sleep(1); + slab1 = cg_read_key_long(cg, "memory.stat", "slab "); if (slab1 <= 0) goto cleanup; -- cgit v1.2.3 From f8654743a0e6909dc634cbfad6db6816f10f3399 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 29 Jul 2023 04:13:18 +0900 Subject: nilfs2: fix use-after-free of nilfs_root in dirtying inodes via iput During unmount process of nilfs2, nothing holds nilfs_root structure after nilfs2 detaches its writer in nilfs_detach_log_writer(). Previously, nilfs_evict_inode() could cause use-after-free read for nilfs_root if inodes are left in "garbage_list" and released by nilfs_dispose_list at the end of nilfs_detach_log_writer(), and this bug was fixed by commit 9b5a04ac3ad9 ("nilfs2: fix use-after-free bug of nilfs_root in nilfs_evict_inode()"). However, it turned out that there is another possibility of UAF in the call path where mark_inode_dirty_sync() is called from iput(): nilfs_detach_log_writer() nilfs_dispose_list() iput() mark_inode_dirty_sync() __mark_inode_dirty() nilfs_dirty_inode() __nilfs_mark_inode_dirty() nilfs_load_inode_block() --> causes UAF of nilfs_root struct This can happen after commit 0ae45f63d4ef ("vfs: add support for a lazytime mount option"), which changed iput() to call mark_inode_dirty_sync() on its final reference if i_state has I_DIRTY_TIME flag and i_nlink is non-zero. This issue appears after commit 28a65b49eb53 ("nilfs2: do not write dirty data after degenerating to read-only") when using the syzbot reproducer, but the issue has potentially existed before. Fix this issue by adding a "purging flag" to the nilfs structure, setting that flag while disposing the "garbage_list" and checking it in __nilfs_mark_inode_dirty(). Unlike commit 9b5a04ac3ad9 ("nilfs2: fix use-after-free bug of nilfs_root in nilfs_evict_inode()"), this patch does not rely on ns_writer to determine whether to skip operations, so as not to break recovery on mount. The nilfs_salvage_orphan_logs routine dirties the buffer of salvaged data before attaching the log writer, so changing __nilfs_mark_inode_dirty() to skip the operation when ns_writer is NULL will cause recovery write to fail. The purpose of using the cleanup-only flag is to allow for narrowing of such conditions. Link: https://lkml.kernel.org/r/20230728191318.33047-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+74db8b3087f293d3a13a@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/000000000000b4e906060113fd63@google.com Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") Tested-by: Ryusuke Konishi Cc: # 4.0+ Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 8 ++++++++ fs/nilfs2/segment.c | 2 ++ fs/nilfs2/the_nilfs.h | 2 ++ 3 files changed, 12 insertions(+) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index a8ce522ac747..35bc79305318 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -1101,9 +1101,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) int __nilfs_mark_inode_dirty(struct inode *inode, int flags) { + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct buffer_head *ibh; int err; + /* + * Do not dirty inodes after the log writer has been detached + * and its nilfs_root struct has been freed. + */ + if (unlikely(nilfs_purging(nilfs))) + return 0; + err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { nilfs_warn(inode->i_sb, diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index c2553024bd25..581691e4be49 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2845,6 +2845,7 @@ void nilfs_detach_log_writer(struct super_block *sb) nilfs_segctor_destroy(nilfs->ns_writer); nilfs->ns_writer = NULL; } + set_nilfs_purging(nilfs); /* Force to free the list of dirty files */ spin_lock(&nilfs->ns_inode_lock); @@ -2857,4 +2858,5 @@ void nilfs_detach_log_writer(struct super_block *sb) up_write(&nilfs->ns_segctor_sem); nilfs_dispose_list(nilfs, &garbage_list, 1); + clear_nilfs_purging(nilfs); } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 47c7dfbb7ea5..cd4ae1b8ae16 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -29,6 +29,7 @@ enum { THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ THE_NILFS_GC_RUNNING, /* gc process is running */ THE_NILFS_SB_DIRTY, /* super block is dirty */ + THE_NILFS_PURGING, /* disposing dirty files for cleanup */ }; /** @@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init) THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(SB_DIRTY, sb_dirty) +THE_NILFS_FNS(PURGING, purging) /* * Mount option operations -- cgit v1.2.3 From 5f1fc67f2cb8d3035d3acd273b48b97835af8afd Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 29 Jul 2023 20:37:32 +0000 Subject: mm/damon/core: initialize damo_filter->list from damos_new_filter() damos_new_filter() is not initializing the list field of newly allocated filter object. However, DAMON sysfs interface and DAMON_RECLAIM are not initializing it after calling damos_new_filter(). As a result, accessing uninitialized memory is possible. Actually, adding multiple DAMOS filters via DAMON sysfs interface caused NULL pointer dereferencing. Initialize the field just after the allocation from damos_new_filter(). Link: https://lkml.kernel.org/r/20230729203733.38949-2-sj@kernel.org Fixes: 98def236f63c ("mm/damon/core: implement damos filter") Signed-off-by: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/damon/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 91cff7f2997e..eb9580942a5c 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -273,6 +273,7 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type, return NULL; filter->type = type; filter->matching = matching; + INIT_LIST_HEAD(&filter->list); return filter; } -- cgit v1.2.3 From 3bfc37d92687b4b19056998cebc02f94fbc81427 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 4 Aug 2023 11:54:43 -0500 Subject: Revert "PCI: mvebu: Mark driver as BROKEN" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit b3574f579ece ("PCI: mvebu: Mark driver as BROKEN") made it impossible to enable the pci-mvebu driver. The driver does have known problems, but as Russell and Uwe reported, it does work in some configurations, so removing it broke some working setups. Revert b3574f579ece so pci-mvebu is available. Reported-by: Russell King (Oracle) Link: https://lore.kernel.org/r/ZMzicVQEyHyZzBOc@shell.armlinux.org.uk Reported-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230804134622.pmbymxtzxj2yfhri@pengutronix.de Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 8d49bad7f847..0859be86e718 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -179,7 +179,6 @@ config PCI_MVEBU depends on MVEBU_MBUS depends on ARM depends on OF - depends on BROKEN select PCI_BRIDGE_EMUL help Add support for Marvell EBU PCIe controller. This PCIe controller -- cgit v1.2.3 From 95848dcb9d676738411a8ff70a9704039f1b3982 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 5 Aug 2023 07:55:37 +0200 Subject: zram: take device and not only bvec offset into account Commit af8b04c63708 ("zram: simplify bvec iteration in __zram_make_request") changed the bio iteration in zram to rely on the implicit capping to page boundaries in bio_for_each_segment. But it failed to care for the fact zram not only care about the page alignment of the bio payload, but also the page alignment into the device. For buffered I/O and swap those are the same, but for direct I/O or kernel internal I/O like XFS log buffer writes they can differ. Fix this by open coding bio_for_each_segment and limiting the bvec len so that it never crosses over a page alignment boundary in the device in addition to the payload boundary already taken care of by bio_iter_iovec. Cc: stable@vger.kernel.org Fixes: af8b04c63708 ("zram: simplify bvec iteration in __zram_make_request") Reported-by: Dusty Mabe Signed-off-by: Christoph Hellwig Acked-by: Sergey Senozhatsky Link: https://lore.kernel.org/r/20230805055537.147835-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 5676e6dd5b16..06673c6ca255 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1870,15 +1870,16 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio) static void zram_bio_read(struct zram *zram, struct bio *bio) { - struct bvec_iter iter; - struct bio_vec bv; - unsigned long start_time; + unsigned long start_time = bio_start_io_acct(bio); + struct bvec_iter iter = bio->bi_iter; - start_time = bio_start_io_acct(bio); - bio_for_each_segment(bv, bio, iter) { + do { u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; + struct bio_vec bv = bio_iter_iovec(bio, iter); + + bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { atomic64_inc(&zram->stats.failed_reads); @@ -1890,22 +1891,26 @@ static void zram_bio_read(struct zram *zram, struct bio *bio) zram_slot_lock(zram, index); zram_accessed(zram, index); zram_slot_unlock(zram, index); - } + + bio_advance_iter_single(bio, &iter, bv.bv_len); + } while (iter.bi_size); + bio_end_io_acct(bio, start_time); bio_endio(bio); } static void zram_bio_write(struct zram *zram, struct bio *bio) { - struct bvec_iter iter; - struct bio_vec bv; - unsigned long start_time; + unsigned long start_time = bio_start_io_acct(bio); + struct bvec_iter iter = bio->bi_iter; - start_time = bio_start_io_acct(bio); - bio_for_each_segment(bv, bio, iter) { + do { u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; + struct bio_vec bv = bio_iter_iovec(bio, iter); + + bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { atomic64_inc(&zram->stats.failed_writes); @@ -1916,7 +1921,10 @@ static void zram_bio_write(struct zram *zram, struct bio *bio) zram_slot_lock(zram, index); zram_accessed(zram, index); zram_slot_unlock(zram, index); - } + + bio_advance_iter_single(bio, &iter, bv.bv_len); + } while (iter.bi_size); + bio_end_io_acct(bio, start_time); bio_endio(bio); } -- cgit v1.2.3 From f38963b9cd0645a336cf30c5da2e89e34e34fec3 Mon Sep 17 00:00:00 2001 From: Tao Ren Date: Fri, 4 Aug 2023 15:14:03 -0700 Subject: hwmon: (pmbus/bel-pfe) Enable PMBUS_SKIP_STATUS_CHECK for pfe1100 Skip status check for both pfe1100 and pfe3000 because the communication error is also observed on pfe1100 devices. Signed-off-by: Tao Ren Fixes: 626bb2f3fb3c hwmon: (pmbus) add driver for BEL PFE1100 and PFE3000 Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230804221403.28931-1-rentao.bupt@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/bel-pfe.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/pmbus/bel-pfe.c b/drivers/hwmon/pmbus/bel-pfe.c index fa5070ae26bc..7c5f4b10a7c1 100644 --- a/drivers/hwmon/pmbus/bel-pfe.c +++ b/drivers/hwmon/pmbus/bel-pfe.c @@ -17,12 +17,13 @@ enum chips {pfe1100, pfe3000}; /* - * Disable status check for pfe3000 devices, because some devices report - * communication error (invalid command) for VOUT_MODE command (0x20) - * although correct VOUT_MODE (0x16) is returned: it leads to incorrect - * exponent in linear mode. + * Disable status check because some devices report communication error + * (invalid command) for VOUT_MODE command (0x20) although the correct + * VOUT_MODE (0x16) is returned: it leads to incorrect exponent in linear + * mode. + * This affects both pfe3000 and pfe1100. */ -static struct pmbus_platform_data pfe3000_plat_data = { +static struct pmbus_platform_data pfe_plat_data = { .flags = PMBUS_SKIP_STATUS_CHECK, }; @@ -94,16 +95,15 @@ static int pfe_pmbus_probe(struct i2c_client *client) int model; model = (int)i2c_match_id(pfe_device_id, client)->driver_data; + client->dev.platform_data = &pfe_plat_data; /* * PFE3000-12-069RA devices may not stay in page 0 during device * probe which leads to probe failure (read status word failed). * So let's set the device to page 0 at the beginning. */ - if (model == pfe3000) { - client->dev.platform_data = &pfe3000_plat_data; + if (model == pfe3000) i2c_smbus_write_byte_data(client, PMBUS_PAGE, 0); - } return pmbus_do_probe(client, &pfe_driver_info[model]); } -- cgit v1.2.3 From df2f7cde73cb58c0e6a60f97d1cd6037138a45cd Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 7 Aug 2023 10:33:57 +0200 Subject: PM: hibernate: fix resume_store() return value when hibernation not available On a laptop with hibernation set up but not actively used, and with secure boot and lockdown enabled kernel, 6.5-rc1 gets stuck on boot with the following repeated messages: A start job is running for Resume from hibernation using device /dev/system/swap (24s / no limit) lockdown_is_locked_down: 25311154 callbacks suppressed Lockdown: systemd-hiberna: hibernation is restricted; see man kernel_lockdown.7 ... Checking the resume code leads to commit cc89c63e2fe3 ("PM: hibernate: move finding the resume device out of software_resume") which inadvertently changed the return value from resume_store() to 0 when !hibernation_available(). This apparently translates to userspace write() returning 0 as in number of bytes written, and userspace looping indefinitely in the attempt to write the intended value. Fix this by returning the full number of bytes that were to be written, as that's what was done before the commit. Fixes: cc89c63e2fe3 ("PM: hibernate: move finding the resume device out of software_resume") Signed-off-by: Vlastimil Babka Reviewed-by: Christoph Hellwig Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e1b4bfa938dd..2b4a946a6ff5 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1166,7 +1166,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, int error; if (!hibernation_available()) - return 0; + return n; if (len && buf[len-1] == '\n') len--; -- cgit v1.2.3 From 7cdf55462c5533a1c78ae13ab8563558e30e4130 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Tue, 1 Aug 2023 12:57:02 +0300 Subject: regulator: qcom-rpmh: Fix LDO 12 regulator for PM8550 The LDO 12 is NLDO 515 low voltage type, so fix accordingly. Fixes: e6e3776d682d ("regulator: qcom-rpmh: Add support for PM8550 regulators") Signed-off-by: Abel Vesa Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20230801095702.2891127-1-abel.vesa@linaro.org Signed-off-by: Mark Brown --- drivers/regulator/qcom-rpmh-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index f3b280af0773..cd077b7c4aff 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -1068,7 +1068,7 @@ static const struct rpmh_vreg_init_data pm8550_vreg_data[] = { RPMH_VREG("ldo9", "ldo%s9", &pmic5_pldo, "vdd-l8-l9"), RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo515, "vdd-l1-l4-l10"), RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo515, "vdd-l11"), - RPMH_VREG("ldo12", "ldo%s12", &pmic5_pldo, "vdd-l12"), + RPMH_VREG("ldo12", "ldo%s12", &pmic5_nldo515, "vdd-l12"), RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l2-l13-l14"), RPMH_VREG("ldo14", "ldo%s14", &pmic5_pldo, "vdd-l2-l13-l14"), RPMH_VREG("ldo15", "ldo%s15", &pmic5_nldo515, "vdd-l15"), -- cgit v1.2.3 From bee6cf1a80b54548a039e224c651bb15b644a480 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sun, 16 Jul 2023 20:22:20 +0200 Subject: x86/sev: Do not try to parse for the CC blob on non-AMD hardware Tao Liu reported a boot hang on an Intel Atom machine due to an unmapped EFI config table. The reason being that the CC blob which contains the CPUID page for AMD SNP guests is parsed for before even checking whether the machine runs on AMD hardware. Usually that's not a problem on !AMD hw - it simply won't find the CC blob's GUID and return. However, if any parts of the config table pointers array is not mapped, the kernel will #PF very early in the decompressor stage without any opportunity to recover. Therefore, do a superficial CPUID check before poking for the CC blob. This will fix the current issue on real hardware. It would also work as a guest on a non-lying hypervisor. For the lying hypervisor, the check is done again, *after* parsing the CC blob as the real CPUID page will be present then. Clear the #VC handler in case SEV-{ES,SNP} hasn't been detected, as a precaution. Fixes: c01fce9cef84 ("x86/compressed: Add SEV-SNP feature detection/setup") Reported-by: Tao Liu Signed-off-by: Borislav Petkov (AMD) Acked-by: Tom Lendacky Tested-by: Tao Liu Cc: Link: https://lore.kernel.org/r/20230601072043.24439-1-ltao@redhat.com --- arch/x86/boot/compressed/idt_64.c | 9 ++++++++- arch/x86/boot/compressed/sev.c | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c index 6debb816e83d..3cdf94b41456 100644 --- a/arch/x86/boot/compressed/idt_64.c +++ b/arch/x86/boot/compressed/idt_64.c @@ -63,7 +63,14 @@ void load_stage2_idt(void) set_idt_entry(X86_TRAP_PF, boot_page_fault); #ifdef CONFIG_AMD_MEM_ENCRYPT - set_idt_entry(X86_TRAP_VC, boot_stage2_vc); + /* + * Clear the second stage #VC handler in case guest types + * needing #VC have not been detected. + */ + if (sev_status & BIT(1)) + set_idt_entry(X86_TRAP_VC, boot_stage2_vc); + else + set_idt_entry(X86_TRAP_VC, NULL); #endif load_boot_idt(&boot_idt_desc); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 09dc8c187b3c..c3e343bd4760 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -404,13 +404,46 @@ void sev_enable(struct boot_params *bp) if (bp) bp->cc_blob_address = 0; + /* + * Do an initial SEV capability check before snp_init() which + * loads the CPUID page and the same checks afterwards are done + * without the hypervisor and are trustworthy. + * + * If the HV fakes SEV support, the guest will crash'n'burn + * which is good enough. + */ + + /* Check for the SME/SEV support leaf */ + eax = 0x80000000; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x8000001f) + return; + + /* + * Check for the SME/SEV feature: + * CPUID Fn8000_001F[EAX] + * - Bit 0 - Secure Memory Encryption support + * - Bit 1 - Secure Encrypted Virtualization support + * CPUID Fn8000_001F[EBX] + * - Bits 5:0 - Pagetable bit position used to indicate encryption + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + /* Check whether SEV is supported */ + if (!(eax & BIT(1))) + return; + /* * Setup/preliminary detection of SNP. This will be sanity-checked * against CPUID/MSR values later. */ snp = snp_init(bp); - /* Check for the SME/SEV support leaf */ + /* Now repeat the checks with the SNP CPUID table. */ + + /* Recheck the SME/SEV support leaf */ eax = 0x80000000; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); @@ -418,7 +451,7 @@ void sev_enable(struct boot_params *bp) return; /* - * Check for the SME/SEV feature: + * Recheck for the SME/SEV feature: * CPUID Fn8000_001F[EAX] * - Bit 0 - Secure Memory Encryption support * - Bit 1 - Secure Encrypted Virtualization support -- cgit v1.2.3 From 5e720f8c8c9d959283c3908bbf32a91a01a86547 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 7 Aug 2023 08:37:45 +0200 Subject: cpufreq: amd-pstate: fix global sysfs attribute type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 3666062b87ec ("cpufreq: amd-pstate: move to use bus_get_dev_root()") the "amd_pstate" attributes where moved from a dedicated kobject to the cpu root kobject. While the dedicated kobject expects to contain kobj_attributes the root kobject needs device_attributes. As the changed arguments are not used by the callbacks it works most of the time. However CFI will detect this issue: [ 4947.849350] CFI failure at dev_attr_show+0x24/0x60 (target: show_status+0x0/0x70; expected type: 0x8651b1de) ... [ 4947.849409] Call Trace: [ 4947.849410] [ 4947.849411] ? __warn+0xcf/0x1c0 [ 4947.849414] ? dev_attr_show+0x24/0x60 [ 4947.849415] ? report_cfi_failure+0x4e/0x60 [ 4947.849417] ? handle_cfi_failure+0x14c/0x1d0 [ 4947.849419] ? __cfi_show_status+0x10/0x10 [ 4947.849420] ? handle_bug+0x4f/0x90 [ 4947.849421] ? exc_invalid_op+0x1a/0x60 [ 4947.849422] ? asm_exc_invalid_op+0x1a/0x20 [ 4947.849424] ? __cfi_show_status+0x10/0x10 [ 4947.849425] ? dev_attr_show+0x24/0x60 [ 4947.849426] sysfs_kf_seq_show+0xa6/0x110 [ 4947.849433] seq_read_iter+0x16c/0x4b0 [ 4947.849436] vfs_read+0x272/0x2d0 [ 4947.849438] ksys_read+0x72/0xe0 [ 4947.849439] do_syscall_64+0x76/0xb0 [ 4947.849440] ? do_user_addr_fault+0x252/0x650 [ 4947.849442] ? exc_page_fault+0x7a/0x1b0 [ 4947.849443] entry_SYSCALL_64_after_hwframe+0x72/0xdc Fixes: 3666062b87ec ("cpufreq: amd-pstate: move to use bus_get_dev_root()") Reported-by: Jannik Glückert Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217765 Link: https://lore.kernel.org/lkml/c7f1bf9b-b183-bf6e-1cbb-d43f72494083@gmail.com/ Cc: All applicable Signed-off-by: Thomas Weißschuh Reviewed-by: Greg Kroah-Hartman Reviewed-by: Nathan Chancellor Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 81fba0dcbee9..9a1e194d5cf8 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1012,8 +1012,8 @@ static int amd_pstate_update_status(const char *buf, size_t size) return 0; } -static ssize_t show_status(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t status_show(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t ret; @@ -1024,7 +1024,7 @@ static ssize_t show_status(struct kobject *kobj, return ret; } -static ssize_t store_status(struct kobject *a, struct kobj_attribute *b, +static ssize_t status_store(struct device *a, struct device_attribute *b, const char *buf, size_t count) { char *p = memchr(buf, '\n', count); @@ -1043,7 +1043,7 @@ cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); cpufreq_freq_attr_rw(energy_performance_preference); cpufreq_freq_attr_ro(energy_performance_available_preferences); -define_one_global_rw(status); +static DEVICE_ATTR_RW(status); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, @@ -1062,7 +1062,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = { }; static struct attribute *pstate_global_attributes[] = { - &status.attr, + &dev_attr_status.attr, NULL }; -- cgit v1.2.3 From 72dbde0f2afbe4af8e8595a89c650ae6b9d9c36f Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Mon, 7 Aug 2023 12:24:15 +1000 Subject: io_uring: correct check for O_TMPFILE O_TMPFILE is actually __O_TMPFILE|O_DIRECTORY. This means that the old check for whether RESOLVE_CACHED can be used would incorrectly think that O_DIRECTORY could not be used with RESOLVE_CACHED. Cc: stable@vger.kernel.org # v5.12+ Fixes: 3a81fd02045c ("io_uring: enable LOOKUP_CACHED path resolution for filename lookups") Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/r/20230807-resolve_cached-o_tmpfile-v3-1-e49323e1ef6f@cyphar.com Signed-off-by: Jens Axboe --- io_uring/openclose.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/io_uring/openclose.c b/io_uring/openclose.c index 10ca57f5bd24..e3fae26e025d 100644 --- a/io_uring/openclose.c +++ b/io_uring/openclose.c @@ -35,9 +35,11 @@ static bool io_openat_force_async(struct io_open *open) { /* * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open, - * it'll always -EAGAIN + * it'll always -EAGAIN. Note that we test for __O_TMPFILE because + * O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force + * async for. */ - return open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE); + return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE); } static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -- cgit v1.2.3 From 04b5b5cb0136ce970333a9c6cec7e46adba1ea3a Mon Sep 17 00:00:00 2001 From: Zhu Wang Date: Thu, 3 Aug 2023 10:02:30 +0800 Subject: scsi: core: Fix possible memory leak if device_add() fails If device_add() returns error, the name allocated by dev_set_name() needs be freed. As the comment of device_add() says, put_device() should be used to decrease the reference count in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanp(). Fixes: ee959b00c335 ("SCSI: convert struct class_device to struct device") Signed-off-by: Zhu Wang Link: https://lore.kernel.org/r/20230803020230.226903-1-wangzhu9@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/raid_class.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c index 898a0bdf8df6..711252e52d8e 100644 --- a/drivers/scsi/raid_class.c +++ b/drivers/scsi/raid_class.c @@ -248,6 +248,7 @@ int raid_component_add(struct raid_template *r,struct device *raid_dev, return 0; err_out: + put_device(&rc->dev); list_del(&rc->node); rd->component_count--; put_device(component_dev); -- cgit v1.2.3 From 41320b18a0e0dfb236dba4edb9be12dba1878156 Mon Sep 17 00:00:00 2001 From: Zhu Wang Date: Tue, 1 Aug 2023 19:14:21 +0800 Subject: scsi: snic: Fix possible memory leak if device_add() fails If device_add() returns error, the name allocated by dev_set_name() needs be freed. As the comment of device_add() says, put_device() should be used to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanp(). Fixes: c8806b6c9e82 ("snic: driver for Cisco SCSI HBA") Signed-off-by: Zhu Wang Acked-by: Narsimhulu Musini Link: https://lore.kernel.org/r/20230801111421.63651-1-wangzhu9@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/snic/snic_disc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/snic/snic_disc.c b/drivers/scsi/snic/snic_disc.c index 3e2e5783924d..e429ad23c396 100644 --- a/drivers/scsi/snic/snic_disc.c +++ b/drivers/scsi/snic/snic_disc.c @@ -303,6 +303,7 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid) "Snic Tgt: device_add, with err = %d\n", ret); + put_device(&tgt->dev); put_device(&snic->shost->shost_gendev); spin_lock_irqsave(snic->shost->host_lock, flags); list_del(&tgt->list); -- cgit v1.2.3 From b6d128f89a85771433a004e8656090ccbe1fb969 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 3 Aug 2023 17:18:12 +0900 Subject: scsi: ufs: renesas: Fix private allocation Should use devm_kzalloc() for struct ufs_renesas_priv because the .initialized should be false as default. Fixes: d69520288efd ("scsi: ufs: ufs-renesas: Add support for Renesas R-Car UFS controller") Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20230803081812.1446282-1-yoshihiro.shimoda.uh@renesas.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-renesas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-renesas.c b/drivers/ufs/host/ufs-renesas.c index f8a5e79ed3b4..ab0652d8705a 100644 --- a/drivers/ufs/host/ufs-renesas.c +++ b/drivers/ufs/host/ufs-renesas.c @@ -359,7 +359,7 @@ static int ufs_renesas_init(struct ufs_hba *hba) { struct ufs_renesas_priv *priv; - priv = devm_kmalloc(hba->dev, sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(hba->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; ufshcd_set_variant(hba, priv); -- cgit v1.2.3 From 8eebf0e84f0614cebc7347f7bbccba4056d77d42 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 3 Aug 2023 14:19:32 -0700 Subject: scsi: lpfc: Remove reftag check in DIF paths When preparing protection DIF I/O for DMA, the driver obtains reference tags from scsi_prot_ref_tag(). Previously, there was a wrong assumption that an all 0xffffffff value meant error and thus the driver failed the I/O. This patch removes the evaluation code and accepts whatever the upper layer returns. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20230803211932.155745-1-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index a62e091894f6..d26941b131fd 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -109,8 +109,6 @@ lpfc_sli4_set_rsp_sgl_last(struct lpfc_hba *phba, } } -#define LPFC_INVALID_REFTAG ((u32)-1) - /** * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread * @phba: The Hba for which this call is being executed. @@ -978,8 +976,6 @@ lpfc_bg_err_inject(struct lpfc_hba *phba, struct scsi_cmnd *sc, sgpe = scsi_prot_sglist(sc); lba = scsi_prot_ref_tag(sc); - if (lba == LPFC_INVALID_REFTAG) - return 0; /* First check if we need to match the LBA */ if (phba->lpfc_injerr_lba != LPFC_INJERR_LBA_OFF) { @@ -1560,8 +1556,6 @@ lpfc_bg_setup_bpl(struct lpfc_hba *phba, struct scsi_cmnd *sc, /* extract some info from the scsi command for pde*/ reftag = scsi_prot_ref_tag(sc); - if (reftag == LPFC_INVALID_REFTAG) - goto out; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1); @@ -1723,8 +1717,6 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, /* extract some info from the scsi command */ blksize = scsi_prot_interval(sc); reftag = scsi_prot_ref_tag(sc); - if (reftag == LPFC_INVALID_REFTAG) - goto out; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1); @@ -1953,8 +1945,6 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, /* extract some info from the scsi command for pde*/ reftag = scsi_prot_ref_tag(sc); - if (reftag == LPFC_INVALID_REFTAG) - goto out; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1); @@ -2154,8 +2144,6 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, /* extract some info from the scsi command */ blksize = scsi_prot_interval(sc); reftag = scsi_prot_ref_tag(sc); - if (reftag == LPFC_INVALID_REFTAG) - goto out; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1); @@ -2746,8 +2734,6 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) src = (struct scsi_dif_tuple *)sg_virt(sgpe); start_ref_tag = scsi_prot_ref_tag(cmd); - if (start_ref_tag == LPFC_INVALID_REFTAG) - goto out; start_app_tag = src->app_tag; len = sgpe->length; while (src && protsegcnt) { @@ -3493,11 +3479,11 @@ err: scsi_cmnd->sc_data_direction); lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "9084 Cannot setup S/G List for HBA" - "IO segs %d/%d SGL %d SCSI %d: %d %d\n", + "9084 Cannot setup S/G List for HBA " + "IO segs %d/%d SGL %d SCSI %d: %d %d %d\n", lpfc_cmd->seg_cnt, lpfc_cmd->prot_seg_cnt, phba->cfg_total_seg_cnt, phba->cfg_sg_seg_cnt, - prot_group_type, num_sge); + prot_group_type, num_sge, ret); lpfc_cmd->seg_cnt = 0; lpfc_cmd->prot_seg_cnt = 0; -- cgit v1.2.3 From dd64f80587190265ca8a0f4be6c64c2fda6d3ac2 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Wed, 26 Jul 2023 12:56:55 +0000 Subject: scsi: qedi: Fix potential deadlock on &qedi_percpu->p_work_lock As &qedi_percpu->p_work_lock is acquired by hard IRQ qedi_msix_handler(), other acquisitions of the same lock under process context should disable IRQ, otherwise deadlock could happen if the IRQ preempts the execution while the lock is held in process context on the same CPU. qedi_cpu_offline() is one such function which acquires the lock in process context. [Deadlock Scenario] qedi_cpu_offline() ->spin_lock(&p->p_work_lock) ->qedi_msix_handler() ->edi_process_completions() ->spin_lock_irqsave(&p->p_work_lock, flags); (deadlock here) This flaw was found by an experimental static analysis tool I am developing for IRQ-related deadlocks. The tentative patch fix the potential deadlock by spin_lock_irqsave() under process context. Signed-off-by: Chengfeng Ye Link: https://lore.kernel.org/r/20230726125655.4197-1-dg573847474@gmail.com Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 450522b204d6..77a56a136678 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1976,8 +1976,9 @@ static int qedi_cpu_offline(unsigned int cpu) struct qedi_percpu_s *p = this_cpu_ptr(&qedi_percpu); struct qedi_work *work, *tmp; struct task_struct *thread; + unsigned long flags; - spin_lock_bh(&p->p_work_lock); + spin_lock_irqsave(&p->p_work_lock, flags); thread = p->iothread; p->iothread = NULL; @@ -1988,7 +1989,7 @@ static int qedi_cpu_offline(unsigned int cpu) kfree(work); } - spin_unlock_bh(&p->p_work_lock); + spin_unlock_irqrestore(&p->p_work_lock, flags); if (thread) kthread_stop(thread); return 0; -- cgit v1.2.3 From 1516ee035df32115197cd93ae3619dba7b020986 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Mon, 7 Aug 2023 15:07:25 +0530 Subject: scsi: qedi: Fix firmware halt over suspend and resume While performing certain power-off sequences, PCI drivers are called to suspend and resume their underlying devices through PCI PM (power management) interface. However the hardware does not support PCI PM suspend/resume operations so system wide suspend/resume leads to bad MFW (management firmware) state which causes various follow-up errors in driver when communicating with the device/firmware. To fix this driver implements PCI PM suspend handler to indicate unsupported operation to the PCI subsystem explicitly, thus avoiding system to go into suspended/standby mode. Fixes: ace7f46ba5fd ("scsi: qedi: Add QLogic FastLinQ offload iSCSI driver framework.") Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230807093725.46829-2-njavali@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 77a56a136678..cd0180b1f5b9 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -69,6 +69,7 @@ static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi); static void qedi_recovery_handler(struct work_struct *work); static void qedi_schedule_hw_err_handler(void *dev, enum qed_hw_err_type err_type); +static int qedi_suspend(struct pci_dev *pdev, pm_message_t state); static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle) { @@ -2511,6 +2512,22 @@ static void qedi_shutdown(struct pci_dev *pdev) __qedi_remove(pdev, QEDI_MODE_SHUTDOWN); } +static int qedi_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct qedi_ctx *qedi; + + if (!pdev) { + QEDI_ERR(NULL, "pdev is NULL.\n"); + return -ENODEV; + } + + qedi = pci_get_drvdata(pdev); + + QEDI_ERR(&qedi->dbg_ctx, "%s: Device does not support suspend operation\n", __func__); + + return -EPERM; +} + static int __qedi_probe(struct pci_dev *pdev, int mode) { struct qedi_ctx *qedi; @@ -2869,6 +2886,7 @@ static struct pci_driver qedi_pci_driver = { .remove = qedi_remove, .shutdown = qedi_shutdown, .err_handler = &qedi_err_handler, + .suspend = qedi_suspend, }; static int __init qedi_init(void) -- cgit v1.2.3 From ef222f551e7c4e2008fc442ffc9edcd1a7fd8f63 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Mon, 7 Aug 2023 15:07:24 +0530 Subject: scsi: qedf: Fix firmware halt over suspend and resume While performing certain power-off sequences, PCI drivers are called to suspend and resume their underlying devices through PCI PM (power management) interface. However the hardware does not support PCI PM suspend/resume operations so system wide suspend/resume leads to bad MFW (management firmware) state which causes various follow-up errors in driver when communicating with the device/firmware. To fix this driver implements PCI PM suspend handler to indicate unsupported operation to the PCI subsystem explicitly, thus avoiding system to go into suspended/standby mode. Fixes: 61d8658b4a43 ("scsi: qedf: Add QLogic FastLinQ offload FCoE driver framework.") Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230807093725.46829-1-njavali@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 2a31ddc99dde..7825765c936c 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -31,6 +31,7 @@ static void qedf_remove(struct pci_dev *pdev); static void qedf_shutdown(struct pci_dev *pdev); static void qedf_schedule_recovery_handler(void *dev); static void qedf_recovery_handler(struct work_struct *work); +static int qedf_suspend(struct pci_dev *pdev, pm_message_t state); /* * Driver module parameters. @@ -3271,6 +3272,7 @@ static struct pci_driver qedf_pci_driver = { .probe = qedf_probe, .remove = qedf_remove, .shutdown = qedf_shutdown, + .suspend = qedf_suspend, }; static int __qedf_probe(struct pci_dev *pdev, int mode) @@ -4000,6 +4002,22 @@ static void qedf_shutdown(struct pci_dev *pdev) __qedf_remove(pdev, QEDF_MODE_NORMAL); } +static int qedf_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct qedf_ctx *qedf; + + if (!pdev) { + QEDF_ERR(NULL, "pdev is NULL.\n"); + return -ENODEV; + } + + qedf = pci_get_drvdata(pdev); + + QEDF_ERR(&qedf->dbg_ctx, "%s: Device does not support suspend operation\n", __func__); + + return -EPERM; +} + /* * Recovery handler code */ -- cgit v1.2.3 From 39163d5479285a36522b6e8f9cc568cc4987db08 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 21 Mar 2023 23:17:58 -0700 Subject: x86/vdso: Choose the right GDT_ENTRY_CPUNODE for 32-bit getcpu() on 64-bit kernel The vDSO getcpu() reads CPU ID from the GDT_ENTRY_CPUNODE entry when the RDPID instruction is not available. And GDT_ENTRY_CPUNODE is defined as 28 on 32-bit Linux kernel and 15 on 64-bit. But the 32-bit getcpu() on 64-bit Linux kernel is compiled with 32-bit Linux kernel GDT_ENTRY_CPUNODE, i.e., 28, beyond the 64-bit Linux kernel GDT limit. Thus, it just fails _silently_. When BUILD_VDSO32_64 is defined, choose the 64-bit Linux kernel GDT definitions to compile the 32-bit getcpu(). Fixes: 877cff5296faa6e ("x86/vdso: Fake 32bit VDSO build on 64bit compile for vgetcpu") Reported-by: kernel test robot Reported-by: Shan Kang Signed-off-by: Xin Li Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230322061758.10639-1-xin3.li@intel.com Link: https://lore.kernel.org/oe-lkp/202303020903.b01fd1de-yujie.liu@intel.com --- arch/x86/include/asm/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 794f69625780..9d6411c65920 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -56,7 +56,7 @@ #define GDT_ENTRY_INVALID_SEG 0 -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64) /* * The layout of the per-CPU GDT under Linux: * -- cgit v1.2.3 From d5712cd22b9cf109fded1b7f178f4c1888c8b84b Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sat, 5 Aug 2023 12:18:13 +0200 Subject: drm/nouveau/disp: Revert a NULL check inside nouveau_connector_get_modes The original commit adding that check tried to protect the kenrel against a potential invalid NULL pointer access. However we call nouveau_connector_detect_depth once without a native_mode set on purpose for non LVDS connectors and this broke DP support in a few cases. Cc: Olaf Skibbe Cc: Lyude Paul Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/238 Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/245 Fixes: 20a2ce87fbaf8 ("drm/nouveau/dp: check for NULL nv_connector->native_mode") Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230805101813.2603989-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nouveau_connector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index f75c6f09dd2a..a2e0033e8a26 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -967,7 +967,7 @@ nouveau_connector_get_modes(struct drm_connector *connector) /* Determine display colour depth for everything except LVDS now, * DP requires this before mode_valid() is called. */ - if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode) + if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS) nouveau_connector_detect_depth(connector); /* Find the native mode if this is a digital panel, if we didn't -- cgit v1.2.3 From 9a8fa00dad3c7b260071f2f220cfb00505372c40 Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Mon, 3 Jul 2023 14:25:53 +0530 Subject: cpuidle: dt_idle_genpd: Add helper function to remove genpd topology Genpd parent and child domain topology created using dt_idle_pd_init_topology() needs to be removed during error cases. Add new helper function dt_idle_pd_remove_topology() for same. Cc: stable@vger.kernel.org Reviewed-by: Ulf Hanssson Signed-off-by: Maulik Shah Signed-off-by: Ulf Hansson --- drivers/cpuidle/dt_idle_genpd.c | 24 ++++++++++++++++++++++++ drivers/cpuidle/dt_idle_genpd.h | 7 +++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/cpuidle/dt_idle_genpd.c b/drivers/cpuidle/dt_idle_genpd.c index b37165514d4e..1af63c189039 100644 --- a/drivers/cpuidle/dt_idle_genpd.c +++ b/drivers/cpuidle/dt_idle_genpd.c @@ -152,6 +152,30 @@ int dt_idle_pd_init_topology(struct device_node *np) return 0; } +int dt_idle_pd_remove_topology(struct device_node *np) +{ + struct device_node *node; + struct of_phandle_args child, parent; + int ret; + + for_each_child_of_node(np, node) { + if (of_parse_phandle_with_args(node, "power-domains", + "#power-domain-cells", 0, &parent)) + continue; + + child.np = node; + child.args_count = 0; + ret = of_genpd_remove_subdomain(&parent, &child); + of_node_put(parent.np); + if (ret) { + of_node_put(node); + return ret; + } + } + + return 0; +} + struct device *dt_idle_attach_cpu(int cpu, const char *name) { struct device *dev; diff --git a/drivers/cpuidle/dt_idle_genpd.h b/drivers/cpuidle/dt_idle_genpd.h index a95483d08a02..3be1f70f55b5 100644 --- a/drivers/cpuidle/dt_idle_genpd.h +++ b/drivers/cpuidle/dt_idle_genpd.h @@ -14,6 +14,8 @@ struct generic_pm_domain *dt_idle_pd_alloc(struct device_node *np, int dt_idle_pd_init_topology(struct device_node *np); +int dt_idle_pd_remove_topology(struct device_node *np); + struct device *dt_idle_attach_cpu(int cpu, const char *name); void dt_idle_detach_cpu(struct device *dev); @@ -36,6 +38,11 @@ static inline int dt_idle_pd_init_topology(struct device_node *np) return 0; } +static inline int dt_idle_pd_remove_topology(struct device_node *np) +{ + return 0; +} + static inline struct device *dt_idle_attach_cpu(int cpu, const char *name) { return NULL; -- cgit v1.2.3 From 12acb348fa4528a4203edf1cce7a3be2c9af2279 Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Mon, 3 Jul 2023 14:25:54 +0530 Subject: cpuidle: psci: Move enabling OSI mode after power domains creation A switch from OSI to PC mode is only possible if all CPUs other than the calling one are OFF, either through a call to CPU_OFF or not yet booted. Currently OSI mode is enabled before power domains are created. In cases where CPUidle states are not using hierarchical CPU topology the bail out path tries to switch back to PC mode which gets denied by firmware since other CPUs are online at this point and creates inconsistent state as firmware is in OSI mode and Linux in PC mode. This change moves enabling OSI mode after power domains are created, this would makes sure that hierarchical CPU topology is used before switching firmware to OSI mode. Cc: stable@vger.kernel.org Fixes: 70c179b49870 ("cpuidle: psci: Allow PM domain to be initialized even if no OSI mode") Signed-off-by: Maulik Shah Reviewed-by: Ulf Hansson Signed-off-by: Ulf Hansson --- drivers/cpuidle/cpuidle-psci-domain.c | 39 ++++++++++++----------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index c2d6d9c3c930..b88af1262f1a 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -120,20 +120,6 @@ static void psci_pd_remove(void) } } -static bool psci_pd_try_set_osi_mode(void) -{ - int ret; - - if (!psci_has_osi_support()) - return false; - - ret = psci_set_osi_mode(true); - if (ret) - return false; - - return true; -} - static void psci_cpuidle_domain_sync_state(struct device *dev) { /* @@ -152,15 +138,12 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct device_node *node; - bool use_osi; + bool use_osi = psci_has_osi_support(); int ret = 0, pd_count = 0; if (!np) return -ENODEV; - /* If OSI mode is supported, let's try to enable it. */ - use_osi = psci_pd_try_set_osi_mode(); - /* * Parse child nodes for the "#power-domain-cells" property and * initialize a genpd/genpd-of-provider pair when it's found. @@ -170,33 +153,37 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev) continue; ret = psci_pd_init(node, use_osi); - if (ret) - goto put_node; + if (ret) { + of_node_put(node); + goto exit; + } pd_count++; } /* Bail out if not using the hierarchical CPU topology. */ if (!pd_count) - goto no_pd; + return 0; /* Link genpd masters/subdomains to model the CPU topology. */ ret = dt_idle_pd_init_topology(np); if (ret) goto remove_pd; + /* let's try to enable OSI. */ + ret = psci_set_osi_mode(use_osi); + if (ret) + goto remove_pd; + pr_info("Initialized CPU PM domain topology using %s mode\n", use_osi ? "OSI" : "PC"); return 0; -put_node: - of_node_put(node); remove_pd: + dt_idle_pd_remove_topology(np); psci_pd_remove(); +exit: pr_err("failed to create CPU PM domains ret=%d\n", ret); -no_pd: - if (use_osi) - psci_set_osi_mode(false); return ret; } -- cgit v1.2.3 From 1963546390ed8b649f529993a755eba0fdeb7aaa Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Wed, 2 Aug 2023 08:37:35 +0200 Subject: accel/ivpu: Add set_pages_array_wc/uc for internal buffers Buffers mapped with pgprot_writecombined() are not correctly flushed. This triggers issues on VPU access using random memory content such as MMU translation faults, invalid context descriptors being fetched and can lead to VPU FW crashes. Fixes: 647371a6609d ("accel/ivpu: Add GEM buffer object management") Cc: stable@vger.kernel.org # 6.3+ Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230802063735.3005291-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_gem.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 52b339aefadc..9967fcfa27ec 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -173,6 +173,9 @@ static void internal_free_pages_locked(struct ivpu_bo *bo) { unsigned int i, npages = bo->base.size >> PAGE_SHIFT; + if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) + set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT); + for (i = 0; i < npages; i++) put_page(bo->pages[i]); @@ -587,6 +590,11 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT); + if (bo->flags & DRM_IVPU_BO_WC) + set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT); + else if (bo->flags & DRM_IVPU_BO_UNCACHED) + set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT); + prot = ivpu_bo_pgprot(bo, PAGE_KERNEL); bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot); if (!bo->kvaddr) { -- cgit v1.2.3 From 8e7b295da1ed051baedd068b7f785f5d959ef95d Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Mon, 7 Aug 2023 14:42:28 +0800 Subject: MAINTAINERS: Remove maintainer of HiSilicon RoCE Haoyue no longer maintains the Hisilicon RoCE driver. So remove him from MAINTAINERS. Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230807064228.4032536-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3be1bdfe8ecc..a65e2f53bffa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9364,7 +9364,6 @@ F: drivers/crypto/hisilicon/sgl.c F: include/linux/hisi_acc_qm.h HISILICON ROCE DRIVER -M: Haoyue Xu M: Junxian Huang L: linux-rdma@vger.kernel.org S: Maintained -- cgit v1.2.3 From 56675f8b9f9b15b024b8e3145fa289b004916ab7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 7 Aug 2023 20:04:09 +0200 Subject: io_uring/parisc: Adjust pgoff in io_uring mmap() for parisc The changes from commit 32832a407a71 ("io_uring: Fix io_uring mmap() by using architecture-provided get_unmapped_area()") to the parisc implementation of get_unmapped_area() broke glibc's locale-gen executable when running on parisc. This patch reverts those architecture-specific changes, and instead adjusts in io_uring_mmu_get_unmapped_area() the pgoff offset which is then given to parisc's get_unmapped_area() function. This is much cleaner than the previous approach, and we still will get a coherent addresss. This patch has no effect on other architectures (SHM_COLOUR is only defined on parisc), and the liburing testcase stil passes on parisc. Cc: stable@vger.kernel.org # 6.4 Signed-off-by: Helge Deller Reported-by: Christoph Biedl Fixes: 32832a407a71 ("io_uring: Fix io_uring mmap() by using architecture-provided get_unmapped_area()") Fixes: d808459b2e31 ("io_uring: Adjust mapping wrt architecture aliasing requirements") Link: https://lore.kernel.org/r/ZNEyGV0jyI8kOOfz@p100 Signed-off-by: Jens Axboe --- arch/parisc/kernel/sys_parisc.c | 15 +++++---------- io_uring/io_uring.c | 3 +++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index ca2d537e25b1..9915062d5243 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -27,17 +27,12 @@ #include /* - * Construct an artificial page offset for the mapping based on the virtual + * Construct an artificial page offset for the mapping based on the physical * address of the kernel file mapping variable. - * If filp is zero the calculated pgoff value aliases the memory of the given - * address. This is useful for io_uring where the mapping shall alias a kernel - * address and a userspace adress where both the kernel and the userspace - * access the same memory region. */ -#define GET_FILP_PGOFF(filp, addr) \ - ((filp ? (((unsigned long) filp->f_mapping) >> 8) \ - & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL) \ - + (addr >> PAGE_SHIFT)) +#define GET_FILP_PGOFF(filp) \ + (filp ? (((unsigned long) filp->f_mapping) >> 8) \ + & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL) static unsigned long shared_align_offset(unsigned long filp_pgoff, unsigned long pgoff) @@ -117,7 +112,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; - filp_pgoff = GET_FILP_PGOFF(filp, addr); + filp_pgoff = GET_FILP_PGOFF(filp); if (flags & MAP_FIXED) { /* Even MAP_FIXED mappings must reside within TASK_SIZE */ diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f4591b912ea8..93db3e4e7b68 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3470,6 +3470,8 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp, * - use the kernel virtual address of the shared io_uring context * (instead of the userspace-provided address, which has to be 0UL * anyway). + * - use the same pgoff which the get_unmapped_area() uses to + * calculate the page colouring. * For architectures without such aliasing requirements, the * architecture will return any suitable mapping because addr is 0. */ @@ -3478,6 +3480,7 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp, pgoff = 0; /* has been translated to ptr above */ #ifdef SHM_COLOUR addr = (uintptr_t) ptr; + pgoff = addr >> PAGE_SHIFT; #else addr = 0UL; #endif -- cgit v1.2.3 From cc22522fd55e257c86d340ae9aedc122e705a435 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 26 Jul 2023 14:35:18 +0200 Subject: PCI: acpiphp: Use pci_assign_unassigned_bridge_resources() only for non-root bus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 40613da52b13 ("PCI: acpiphp: Reassign resources on bridge if necessary") changed acpiphp hotplug to use pci_assign_unassigned_bridge_resources() which depends on bridge being available, however enable_slot() can be called without bridge associated: 1. Legitimate case of hotplug on root bus (widely used in virt world) 2. A (misbehaving) firmware, that sends ACPI Bus Check notifications to non existing root ports (Dell Inspiron 7352/0W6WV0), which end up at enable_slot(..., bridge = 0) where bus has no bridge assigned to it. acpihp doesn't know that it's a bridge, and bus specific 'PCI subsystem' can't augment ACPI context with bridge information since the PCI device to get this data from is/was not available. Issue is easy to reproduce with QEMU's 'pc' machine, which supports PCI hotplug on hostbridge slots. To reproduce, boot kernel at commit 40613da52b13 in VM started with following CLI (assuming guest root fs is installed on sda1 partition): # qemu-system-x86_64 -M pc -m 1G -enable-kvm -cpu host \ -monitor stdio -serial file:serial.log \ -kernel arch/x86/boot/bzImage \ -append "root=/dev/sda1 console=ttyS0" \ guest_disk.img Once guest OS is fully booted at qemu prompt: (qemu) device_add e1000 (check serial.log) it will cause NULL pointer dereference at: void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) { struct pci_bus *parent = bridge->subordinate; BUG: kernel NULL pointer dereference, address: 0000000000000018 ? pci_assign_unassigned_bridge_resources+0x1f/0x260 enable_slot+0x21f/0x3e0 acpiphp_hotplug_notify+0x13d/0x260 acpi_device_hotplug+0xbc/0x540 acpi_hotplug_work_fn+0x15/0x20 process_one_work+0x1f7/0x370 worker_thread+0x45/0x3b0 The issue was discovered on Dell Inspiron 7352/0W6WV0 laptop with following sequence: 1. Suspend to RAM 2. Wake up with the same backtrace being observed: 3. 2nd suspend to RAM attempt makes laptop freeze Fix it by using __pci_bus_assign_resources() instead of pci_assign_unassigned_bridge_resources() as we used to do, but only in case when bus doesn't have a bridge associated (to cover for the case of ACPI event on hostbridge or non existing root port). That lets us keep hotplug on root bus working like it used to and at the same time keeps resource reassignment usable on root ports (and other 1st level bridges) that was fixed by 40613da52b13. Fixes: 40613da52b13 ("PCI: acpiphp: Reassign resources on bridge if necessary") Link: https://lore.kernel.org/r/20230726123518.2361181-2-imammedo@redhat.com Reported-by: Woody Suwalski Tested-by: Woody Suwalski Tested-by: Michal Koutný Link: https://lore.kernel.org/r/11fc981c-af49-ce64-6b43-3e282728bd1a@gmail.com Signed-off-by: Igor Mammedov Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Acked-by: Michael S. Tsirkin --- drivers/pci/hotplug/acpiphp_glue.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 328d1e416014..601129772b2d 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -498,6 +498,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge) acpiphp_native_scan_bridge(dev); } } else { + LIST_HEAD(add_list); int max, pass; acpiphp_rescan_slot(slot); @@ -511,10 +512,15 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge) if (pass && dev->subordinate) { check_hotplug_bridge(slot, dev); pcibios_resource_survey_bus(dev->subordinate); + if (pci_is_root_bus(bus)) + __pci_bus_size_bridges(dev->subordinate, &add_list); } } } - pci_assign_unassigned_bridge_resources(bus->self); + if (pci_is_root_bus(bus)) + __pci_bus_assign_resources(bus, &add_list, NULL); + else + pci_assign_unassigned_bridge_resources(bus->self); } acpiphp_sanitize_bus(bus); -- cgit v1.2.3 From 7324f74d39531262b8e362f228b46512e6bee632 Mon Sep 17 00:00:00 2001 From: Jinghao Jia Date: Tue, 8 Aug 2023 14:23:53 -0400 Subject: x86/linkage: Fix typo of BUILD_VDSO in asm/linkage.h The BUILD_VDSO macro was incorrectly spelled as BULID_VDSO in asm/linkage.h. This causes the !defined(BULID_VDSO) directive to always evaluate to true. Correct the spelling to BUILD_VDSO. Fixes: bea75b33895f ("x86/Kconfig: Introduce function padding") Signed-off-by: Jinghao Jia Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Acked-by: Randy Dunlap Cc: Link: https://lore.kernel.org/r/20230808182353.76218-1-jinghao@linux.ibm.com --- arch/x86/include/asm/linkage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 0953aa32a324..97a3de7892d3 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -21,7 +21,7 @@ #define FUNCTION_PADDING #endif -#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO) +#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) # define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING #else # define __FUNC_ALIGN __ALIGN -- cgit v1.2.3 From d74f714896fd6268882789ba28e52c9145951403 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Aug 2023 11:03:28 -0600 Subject: block: get rid of unused plug->nowait flag This was introduced to add a plug based way of signaling nowait issues, but we have since moved on from that. Kill the old dead code, nobody is setting it anymore. Reviewed-by: Chaitanya Kulkarni Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-core.c | 6 ------ include/linux/blkdev.h | 1 - 2 files changed, 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 90de50082146..9866468c72a2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -722,14 +722,9 @@ void submit_bio_noacct(struct bio *bio) struct block_device *bdev = bio->bi_bdev; struct request_queue *q = bdev_get_queue(bdev); blk_status_t status = BLK_STS_IOERR; - struct blk_plug *plug; might_sleep(); - plug = blk_mq_plug(bio); - if (plug && plug->nowait) - bio->bi_opf |= REQ_NOWAIT; - /* * For a REQ_NOWAIT based request, return -EOPNOTSUPP * if queue does not support NOWAIT. @@ -1059,7 +1054,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) plug->rq_count = 0; plug->multiple_queues = false; plug->has_elevator = false; - plug->nowait = false; INIT_LIST_HEAD(&plug->cb_list); /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ed44a997f629..87d94be7825a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -969,7 +969,6 @@ struct blk_plug { bool multiple_queues; bool has_elevator; - bool nowait; struct list_head cb_list; /* md requires an unplug callback */ }; -- cgit v1.2.3 From 6514f81e1bd55cbe419a5001a4ce910acc276211 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 2 Aug 2023 18:26:06 -0700 Subject: riscv: Fix CPU feature detection with SMP disabled commit 914d6f44fc50 ("RISC-V: only iterate over possible CPUs in ISA string parser") changed riscv_fill_hwcap() from iterating over CPU DT nodes to iterating over logical CPU IDs. Since this function runs long before cpu_dev_init() creates CPU devices, it hits the fallback path in of_cpu_device_node_get(), which itself iterates over the DT nodes, searching for a node with the requested CPU ID. (Incidentally, this makes riscv_fill_hwcap() now take quadratic time.) riscv_fill_hwcap() passes a logical CPU ID to of_cpu_device_node_get(), which uses the arch_match_cpu_phys_id() hook to translate the logical ID to a physical ID as found in the DT. arch_match_cpu_phys_id() has a generic weak definition, and RISC-V provides a strong definition using cpuid_to_hartid_map(). However, the RISC-V specific implementation is located in arch/riscv/kernel/smp.c, and that file is only compiled when SMP is enabled. As a result, when SMP is disabled, the generic definition is used, and riscv_isa gets initialized based on the ISA string of hart 0, not the boot hart. On FU740, this means has_fpu() returns false, and userspace crashes when trying to use floating-point instructions. Fix this by moving arch_match_cpu_phys_id() to a file which is always compiled. Fixes: 70114560b285 ("RISC-V: Add RISC-V specific arch_match_cpu_phys_id") Fixes: 914d6f44fc50 ("RISC-V: only iterate over possible CPUs in ISA string parser") Reported-by: Palmer Dabbelt Signed-off-by: Samuel Holland Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230803012608.3540081-1-samuel.holland@sifive.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpu.c | 5 +++++ arch/riscv/kernel/smp.c | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index a2fc952318e9..35b854cf078e 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -17,6 +17,11 @@ #include #include +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpuid_to_hartid_map(cpu); +} + /* * Returns the hart ID of the given device tree node, or -ENODEV if the node * isn't an enabled and valid RISC-V hart node. diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 85bbce0f758c..40420afbb1a0 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -61,11 +61,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid) return -ENOENT; } -bool arch_match_cpu_phys_id(int cpu, u64 phys_id) -{ - return phys_id == cpuid_to_hartid_map(cpu); -} - static void ipi_stop(void) { set_cpu_online(smp_processor_id(), false); -- cgit v1.2.3 From 4eb2eb1b4c0eb07793c240744843498564a67b83 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Thu, 3 Aug 2023 06:27:38 +0200 Subject: riscv,mmio: Fix readX()-to-delay() ordering Section 2.1 of the Platform Specification [1] states: Unless otherwise specified by a given I/O device, I/O devices are on ordering channel 0 (i.e., they are point-to-point strongly ordered). which is not sufficient to guarantee that a readX() by a hart completes before a subsequent delay() on the same hart (cf. memory-barriers.txt, "Kernel I/O barrier effects"). Set the I(nput) bit in __io_ar() to restore the ordering, align inline comments. [1] https://github.com/riscv/riscv-platform-specs Signed-off-by: Andrea Parri Link: https://lore.kernel.org/r/20230803042738.5937-1-parri.andrea@gmail.com Fixes: fab957c11efe ("RISC-V: Atomic and Locking Code") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmio.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h index aff6c33ab0c0..4c58ee7f95ec 100644 --- a/arch/riscv/include/asm/mmio.h +++ b/arch/riscv/include/asm/mmio.h @@ -101,9 +101,9 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * Relaxed I/O memory access primitives. These follow the Device memory * ordering rules but do not guarantee any ordering relative to Normal memory * accesses. These are defined to order the indicated access (either a read or - * write) with all other I/O memory accesses. Since the platform specification - * defines that all I/O regions are strongly ordered on channel 2, no explicit - * fences are required to enforce this ordering. + * write) with all other I/O memory accesses to the same peripheral. Since the + * platform specification defines that all I/O regions are strongly ordered on + * channel 0, no explicit fences are required to enforce this ordering. */ /* FIXME: These are now the same as asm-generic */ #define __io_rbr() do {} while (0) @@ -125,14 +125,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) #endif /* - * I/O memory access primitives. Reads are ordered relative to any - * following Normal memory access. Writes are ordered relative to any prior - * Normal memory access. The memory barriers here are necessary as RISC-V + * I/O memory access primitives. Reads are ordered relative to any following + * Normal memory read and delay() loop. Writes are ordered relative to any + * prior Normal memory write. The memory barriers here are necessary as RISC-V * doesn't define any ordering between the memory space and the I/O space. */ #define __io_br() do {} while (0) -#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory") -#define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory") +#define __io_ar(v) ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); }) +#define __io_bw() ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); }) #define __io_aw() mmiowb_set_pending() #define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; }) -- cgit v1.2.3 From d2402048bc8a206a56fde4bc41dd01336c7b5a21 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 8 Aug 2023 09:35:00 -0700 Subject: riscv: mm: fix 2 instances of -Wmissing-variable-declarations I'm looking to enable -Wmissing-variable-declarations behind W=1. 0day bot spotted the following instance in ARCH=riscv builds: arch/riscv/mm/init.c:276:7: warning: no previous extern declaration for non-static variable 'trampoline_pg_dir' [-Wmissing-variable-declarations] 276 | pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; | ^ arch/riscv/mm/init.c:276:1: note: declare 'static' if the variable is not intended to be used outside of this translation unit 276 | pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; | ^ arch/riscv/mm/init.c:279:7: warning: no previous extern declaration for non-static variable 'early_pg_dir' [-Wmissing-variable-declarations] 279 | pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); | ^ arch/riscv/mm/init.c:279:1: note: declare 'static' if the variable is not intended to be used outside of this translation unit 279 | pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); | ^ These symbols are referenced by more than one translation unit, so make sure they're both declared and include the correct header for their declarations. Finally, sort the list of includes to help keep them tidy. Reported-by: kernel test robot Closes: https://lore.kernel.org/llvm/202308081000.tTL1ElTr-lkp@intel.com/ Signed-off-by: Nick Desaulniers Link: https://lore.kernel.org/r/20230808-riscv_static-v2-1-2a1e2d2c7a4f@google.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 2 ++ arch/riscv/mm/init.c | 9 +++++---- arch/riscv/mm/kasan_init.c | 1 - 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 75970ee2bda2..b5680c940c1e 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -188,6 +188,8 @@ extern struct pt_alloc_ops pt_ops __initdata; #define PAGE_KERNEL_IO __pgprot(_PAGE_IOREMAP) extern pgd_t swapper_pg_dir[]; +extern pgd_t trampoline_pg_dir[]; +extern pgd_t early_pg_dir[]; #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_present(pmd_t pmd) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ad845c3aa9b2..e4c35ac2357f 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -26,12 +26,13 @@ #include #include -#include -#include -#include #include -#include #include +#include +#include +#include +#include +#include #include "../kernel/head.h" diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 8fc0efcf905c..a01bc15dce24 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -22,7 +22,6 @@ * region is not and then we have to go down to the PUD level. */ -extern pgd_t early_pg_dir[PTRS_PER_PGD]; pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss; p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss; pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss; -- cgit v1.2.3 From 43dae319b50fac075ad864f84501c703ef20eb2b Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Tue, 8 Aug 2023 11:44:05 +0100 Subject: drm/rockchip: Don't spam logs in atomic check Userspace should not be able to trigger DRM_ERROR messages to spam the logs; especially not through atomic commit parameters which are completely legitimate for userspace to attempt. Signed-off-by: Daniel Stone Fixes: 7707f7227f09 ("drm/rockchip: Add support for afbc") Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20230808104405.522493-1-daniels@collabora.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index a530ecc4d207..bf34498c1b6d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -833,12 +833,12 @@ static int vop_plane_atomic_check(struct drm_plane *plane, * need align with 2 pixel. */ if (fb->format->is_yuv && ((new_plane_state->src.x1 >> 16) % 2)) { - DRM_ERROR("Invalid Source: Yuv format not support odd xpos\n"); + DRM_DEBUG_KMS("Invalid Source: Yuv format not support odd xpos\n"); return -EINVAL; } if (fb->format->is_yuv && new_plane_state->rotation & DRM_MODE_REFLECT_Y) { - DRM_ERROR("Invalid Source: Yuv format does not support this rotation\n"); + DRM_DEBUG_KMS("Invalid Source: Yuv format does not support this rotation\n"); return -EINVAL; } @@ -846,7 +846,7 @@ static int vop_plane_atomic_check(struct drm_plane *plane, struct vop *vop = to_vop(crtc); if (!vop->data->afbc) { - DRM_ERROR("vop does not support AFBC\n"); + DRM_DEBUG_KMS("vop does not support AFBC\n"); return -EINVAL; } @@ -855,15 +855,16 @@ static int vop_plane_atomic_check(struct drm_plane *plane, return ret; if (new_plane_state->src.x1 || new_plane_state->src.y1) { - DRM_ERROR("AFBC does not support offset display, xpos=%d, ypos=%d, offset=%d\n", - new_plane_state->src.x1, - new_plane_state->src.y1, fb->offsets[0]); + DRM_DEBUG_KMS("AFBC does not support offset display, " \ + "xpos=%d, ypos=%d, offset=%d\n", + new_plane_state->src.x1, new_plane_state->src.y1, + fb->offsets[0]); return -EINVAL; } if (new_plane_state->rotation && new_plane_state->rotation != DRM_MODE_ROTATE_0) { - DRM_ERROR("No rotation support in AFBC, rotation=%d\n", - new_plane_state->rotation); + DRM_DEBUG_KMS("No rotation support in AFBC, rotation=%d\n", + new_plane_state->rotation); return -EINVAL; } } -- cgit v1.2.3 From 08fffa74d9772d9538338be3f304006c94dde6f0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 27 Jul 2023 10:22:20 -0500 Subject: drm/amd: Disable S/G for APUs when 64GB or more host memory Users report a white flickering screen on multiple systems that is tied to having 64GB or more memory. When S/G is enabled pages will get pinned to both VRAM carve out and system RAM leading to this. Until it can be fixed properly, disable S/G when 64GB of memory or more is detected. This will force pages to be pinned into VRAM. This should fix white screen flickers but if VRAM pressure is encountered may lead to black screens. It's a trade-off for now. Fixes: 81d0bcf99009 ("drm/amdgpu: make display pinning more flexible (v2)") Cc: Hamza Mahfooz Cc: Roman Li Cc: # 6.1.y: bf0207e172703 ("drm/amdgpu: add S/G display parameter") Cc: # 6.4.y Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2735 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2354 Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +++++++++++++++++++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++--- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a3b86b86dc47..6dc950c1b689 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1296,6 +1296,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); +bool amdgpu_sg_display_supported(struct amdgpu_device *adev); bool amdgpu_device_pcie_dynamic_switching_supported(void); bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); bool amdgpu_device_aspm_support_quirk(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a2cdde0ca0a7..45e9d737e5b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1458,6 +1458,32 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; } +/* + * On APUs with >= 64GB white flickering has been observed w/ SG enabled. + * Disable S/G on such systems until we have a proper fix. + * https://gitlab.freedesktop.org/drm/amd/-/issues/2354 + * https://gitlab.freedesktop.org/drm/amd/-/issues/2735 + */ +bool amdgpu_sg_display_supported(struct amdgpu_device *adev) +{ + switch (amdgpu_sg_display) { + case -1: + break; + case 0: + return false; + case 1: + return true; + default: + return false; + } + if ((totalram_pages() << (PAGE_SHIFT - 10)) + + (adev->gmc.real_vram_size / 1024) >= 64000000) { + DRM_WARN("Disabling S/G due to >=64GB RAM\n"); + return false; + } + return true; +} + /* * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic * speed switching. Until we have confirmation from Intel that a specific host diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0fa739fd6a9c..e5554a36e8c8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1638,9 +1638,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } break; } - if (init_data.flags.gpu_vm_support && - (amdgpu_sg_display == 0)) - init_data.flags.gpu_vm_support = false; + if (init_data.flags.gpu_vm_support) + init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev); if (init_data.flags.gpu_vm_support) adev->mode_info.gpu_vm_support = true; -- cgit v1.2.3 From 730d44e1fa306a20746ad4a85da550662aed9daa Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 27 Jul 2023 09:59:45 +0800 Subject: drm/amd/pm: skip the RLC stop when S0i3 suspend for SMU v13.0.4/11 For SMU v13.0.4/11, driver does not need to stop RLC for S0i3, the firmwares will handle that properly. Signed-off-by: Tim Huang Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index ce41a8309582..222af2fae745 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1581,9 +1581,9 @@ static int smu_disable_dpms(struct smu_context *smu) /* * For SMU 13.0.4/11, PMFW will handle the features disablement properly - * for gpu reset case. Driver involvement is unnecessary. + * for gpu reset and S0i3 cases. Driver involvement is unnecessary. */ - if (amdgpu_in_reset(adev)) { + if (amdgpu_in_reset(adev) || adev->in_s0ix) { switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 11): -- cgit v1.2.3 From d3de41ee5febe5c2d9989fe9810bce2bb54a3a8e Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 25 Jul 2023 19:11:54 +0530 Subject: drm/amdgpu: Match against exact bootloader status On PSP v13.x ASICs, boot loader will set only the MSB to 1 and clear the least significant bits for any command submission. Hence match against the exact register value, otherwise a register value of all 0xFFs also could falsely indicate that boot loader is ready. Also, from PSP v13.0.6 and newer, bits[7:0] will be used to indicate command error status. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index e1a392bcea70..af5685f4cb34 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -137,14 +137,15 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) int ret; int retry_loop; + /* Wait for bootloader to signify that it is ready having bit 31 of + * C2PMSG_35 set to 1. All other bits are expected to be cleared. + * If there is an error in processing command, bits[7:0] will be set. + * This is applicable for PSP v13.0.6 and newer. + */ for (retry_loop = 0; retry_loop < 10; retry_loop++) { - /* Wait for bootloader to signify that is - ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0xffffffff, false); if (ret == 0) return 0; -- cgit v1.2.3 From 7ad1dfc144cbf62702fd07838da8fd8a77921083 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 31 Jul 2023 09:22:05 -0500 Subject: drm/amd/display: Don't show stack trace for missing eDP Some systems are only connected by HDMI or DP, so warning related to missing eDP is unnecessary. Downgrade to debug instead. Cc: Hamza Mahfooz Fixes: 6d9b6dceaa51 ("drm/amd/display: only warn once in dce110_edp_wait_for_hpd_ready()") Reported-by: Mastan.Katragadda@amd.com Signed-off-by: Mario Limonciello Reviewed-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 20d4d08a6a2f..6966420dfbac 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -777,7 +777,8 @@ void dce110_edp_wait_for_hpd_ready( dal_gpio_destroy_irq(&hpd); /* ensure that the panel is detected */ - ASSERT(edp_hpd_high); + if (!edp_hpd_high) + DC_LOG_DC("%s: wait timed out!\n", __func__); } void dce110_edp_power_control( -- cgit v1.2.3 From bd60e2eafd8fb053948b6e23e8167baf7a159750 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 27 Jul 2023 19:37:31 +0800 Subject: drm/amd/pm: correct the pcie width for smu 13.0.0 correct the pcie width value in pp_dpm_pcie for smu 13.0.0 Signed-off-by: Kenneth Feng Reviewed-by: Harish Kasiviswanathan Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 3d188616ba24..e191bbe9e994 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1140,7 +1140,6 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, (OverDriveTableExternal_t *)smu->smu_table.overdrive_table; struct smu_13_0_dpm_table *single_dpm_table; struct smu_13_0_pcie_table *pcie_table; - const int link_width[] = {0, 1, 2, 4, 8, 12, 16}; uint32_t gen_speed, lane_width; int i, curr_freq, size = 0; int32_t min_value, max_value; @@ -1256,7 +1255,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, (pcie_table->pcie_lane[i] == 6) ? "x16" : "", pcie_table->clk_freq[i], (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) && - (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ? + (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ? "*" : ""); break; -- cgit v1.2.3 From 61319b8e3b58a7167cf146313fd4523fe72586bc Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 21 Jul 2023 19:18:00 +0800 Subject: drm/amd/pm: disable the SMU13 OD feature support temporarily The existing OD interface cannot support the growing demand for more OD features. We are in the transition to a new OD mechanism. So, disable the SMU13 OD feature support temporarily. And this should be reverted when the new OD mechanism online. Signed-off-by: Evan Quan Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 17 ++++++++++++++--- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 12 +++++++++--- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e191bbe9e994..fddcd834bcec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -331,11 +331,13 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu) struct smu_13_0_0_powerplay_table *powerplay_table = table_context->power_play_table; struct smu_baco_context *smu_baco = &smu->smu_baco; +#if 0 PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; const OverDriveLimits_t * const overdrive_lowerlimits = &pptable->SkuTable.OverDriveLimitsMin; +#endif if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC) smu->dc_controlled_by_gpio = true; @@ -347,18 +349,27 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu) if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO) smu_baco->maco_support = true; + /* + * We are in the transition to a new OD mechanism. + * Disable the OD feature support for SMU13 temporarily. + * TODO: get this reverted when new OD mechanism online + */ +#if 0 if (!overdrive_lowerlimits->FeatureCtrlMask || !overdrive_upperlimits->FeatureCtrlMask) smu->od_enabled = false; - table_context->thermal_controller_type = - powerplay_table->thermal_controller_type; - /* * Instead of having its own buffer space and get overdrive_table copied, * smu->od_settings just points to the actual overdrive_table */ smu->od_settings = &powerplay_table->overdrive_table; +#else + smu->od_enabled = false; +#endif + + table_context->thermal_controller_type = + powerplay_table->thermal_controller_type; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index b1f0937ccade..62f2886ab4df 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -323,10 +323,12 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu) struct smu_baco_context *smu_baco = &smu->smu_baco; PPTable_t *smc_pptable = table_context->driver_pptable; BoardTable_t *BoardTable = &smc_pptable->BoardTable; +#if 0 const OverDriveLimits_t * const overdrive_upperlimits = &smc_pptable->SkuTable.OverDriveLimitsBasicMax; const OverDriveLimits_t * const overdrive_lowerlimits = &smc_pptable->SkuTable.OverDriveLimitsMin; +#endif if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC) smu->dc_controlled_by_gpio = true; @@ -338,18 +340,22 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu) if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled)) smu_baco->maco_support = true; +#if 0 if (!overdrive_lowerlimits->FeatureCtrlMask || !overdrive_upperlimits->FeatureCtrlMask) smu->od_enabled = false; - table_context->thermal_controller_type = - powerplay_table->thermal_controller_type; - /* * Instead of having its own buffer space and get overdrive_table copied, * smu->od_settings just points to the actual overdrive_table */ smu->od_settings = &powerplay_table->overdrive_table; +#else + smu->od_enabled = false; +#endif + + table_context->thermal_controller_type = + powerplay_table->thermal_controller_type; return 0; } -- cgit v1.2.3 From 96b020e2163fb2197266b2f71b1007495206e6bb Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Mon, 31 Jul 2023 07:35:05 -0100 Subject: drm/amd/display: check attr flag before set cursor degamma on DCN3+ Don't set predefined degamma curve to cursor plane if the cursor attribute flag is not set. Applying a degamma curve to the cursor by default breaks userspace expectation. Checking the flag before performing any color transformation prevents too dark cursor gamma in DCN3+ on many Linux desktop environment (KDE Plasma, GNOME, wlroots-based, etc.) as reported at: - https://gitlab.freedesktop.org/drm/amd/-/issues/1513 This is the same approach followed by DCN2 drivers where the issue is not present. Fixes: 03f54d7d3448 ("drm/amd/display: Add DCN3 DPP") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1513 Signed-off-by: Melissa Wen Reviewed-by: Harry Wentland Tested-by: Alex Hung Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index e5b7ef7422b8..50dc83404644 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -357,8 +357,11 @@ void dpp3_set_cursor_attributes( int cur_rom_en = 0; if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || - color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) - cur_rom_en = 1; + color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { + if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { + cur_rom_en = 1; + } + } REG_UPDATE_3(CURSOR0_CONTROL, CUR0_MODE, color_format, -- cgit v1.2.3 From a73ea79a0c94bacfab4df23a1043644d14f56591 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 2 Aug 2023 10:24:36 +0530 Subject: drm/amd/pm: Fix SMU v13.0.6 energy reporting Energy counter should be reported in units of 15.259 uJ. Don't apply any conversion. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 1ac552142763..fe4ee2daa5d8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1993,9 +1993,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->average_socket_power = SMUQ10_TO_UINT(metrics->SocketPower); - /* Energy is reported in 15.625mJ units */ - gpu_metrics->energy_accumulator = - SMUQ10_TO_UINT(metrics->SocketEnergyAcc); + /* Energy counter reported in 15.259uJ (2^-16) units */ + gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc; gpu_metrics->current_gfxclk = SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc0]); -- cgit v1.2.3 From 90e065677e0362a777b9db97ea21d43a39211399 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Jul 2023 11:14:05 -0400 Subject: drm/amdgpu: fix possible UAF in amdgpu_cs_pass1() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the gang_size check is outside of chunk parsing loop, we need to reset i before we free the chunk data. Suggested by Ye Zhang (@VAR10CK) of Baidu Security. Reviewed-by: Guchun Chen Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 040f4cb6ab2d..fb78a8f47587 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -295,7 +295,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (!p->gang_size) { ret = -EINVAL; - goto free_partial_kdata; + goto free_all_kdata; } for (i = 0; i < p->gang_size; ++i) { -- cgit v1.2.3 From 3bb575572bf498a9d39e9d1ca5c06cc3152928a1 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Jul 2023 17:04:01 -0500 Subject: drm/amd/display: Fix a regression on Polaris cards DCE products don't define a `remove_stream_from_ctx` like DCN ones do. This means that when compute_mst_dsc_configs_for_state() is called it always returns -EINVAL which causes MST to fail to setup. Cc: stable@vger.kernel.org # 6.4.y Cc: Harry Wentland Reported-by: Klaus.Kusche@computerix.info Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2671 Fixes: efa4c4df864e ("drm/amd/display: call remove_stream_from_ctx from res_pool funcs") Signed-off-by: Mario Limonciello Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 9bc86deac9e8..b885c39bd16b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1320,7 +1320,7 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, if (computed_streams[i]) continue; - if (!res_pool->funcs->remove_stream_from_ctx || + if (res_pool->funcs->remove_stream_from_ctx && res_pool->funcs->remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK) return -EINVAL; -- cgit v1.2.3 From 2e91e731f24817bc55f9c9acc95a8939c4077b05 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Jul 2023 16:53:49 -0400 Subject: drm/amdgpu/gfx11: only enable CP GFX shadowing on SR-IOV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is only required for SR-IOV world switches, but it adds additional latency leading to reduced performance in some benchmarks. Disable for now on bare metal. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 3a7af59e83ca..0451533ddde4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -471,8 +471,12 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): if ((adev->gfx.me_fw_version >= 1505) && (adev->gfx.pfp_fw_version >= 1600) && - (adev->gfx.mec_fw_version >= 512)) - adev->gfx.cp_gfx_shadow = true; + (adev->gfx.mec_fw_version >= 512)) { + if (amdgpu_sriov_vf(adev)) + adev->gfx.cp_gfx_shadow = true; + else + adev->gfx.cp_gfx_shadow = false; + } break; default: adev->gfx.cp_gfx_shadow = false; -- cgit v1.2.3 From a6dea2d64ff92851e68cd4e20a35f6534286e016 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Jul 2023 11:45:53 -0400 Subject: drm/amdkfd: ignore crat by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are dropping the IOMMUv2 path, so no need to enable this. It's often buggy on consumer platforms anyway. Reviewed-by: Felix Kuehling Acked-by: Christian König Tested-by: Mike Lothian Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 49f40d9f16e8..f5a6f562e2a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1543,11 +1543,7 @@ static bool kfd_ignore_crat(void) if (ignore_crat) return true; -#ifndef KFD_SUPPORT_IOMMU_V2 ret = true; -#else - ret = false; -#endif return ret; } -- cgit v1.2.3 From 616f92d188ee7142a95a52068efdbea82645f859 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Jul 2023 11:34:59 -0400 Subject: drm/amdkfd: disable IOMMUv2 support for KV/CZ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the dGPU path instead. There were a lot of platform issues with IOMMU in general on these chips due to windows not enabling IOMMU at the time. The dGPU path has been used for a long time with newer APUs and works fine. This also paves the way to simplify the driver significantly. v2: use the dGPU queue manager functions Reviewed-by: Felix Kuehling Acked-by: Christian König Tested-by: Mike Lothian Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 ------ drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0b3dc754e06b..7ae44d68ddc9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -233,10 +233,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd, asic_type != CHIP_TONGA) kfd->device_info.supports_cwsr = true; - if (asic_type == CHIP_KAVERI || - asic_type == CHIP_CARRIZO) - kfd->device_info.needs_iommu_device = true; - if (asic_type != CHIP_HAWAII && !vf) kfd->device_info.needs_pci_atomics = true; } @@ -249,7 +245,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) uint32_t gfx_target_version = 0; switch (adev->asic_type) { -#ifdef KFD_SUPPORT_IOMMU_V2 #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: gfx_target_version = 70000; @@ -262,7 +257,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) if (!vf) f2g = &gfx_v8_kfd2kgd; break; -#endif #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_HAWAII: gfx_target_version = 70001; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2df153828ff4..01192f5abe46 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2538,18 +2538,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) } switch (dev->adev->asic_type) { - case CHIP_CARRIZO: - device_queue_manager_init_vi(&dqm->asic_ops); - break; - case CHIP_KAVERI: - device_queue_manager_init_cik(&dqm->asic_ops); - break; - case CHIP_HAWAII: device_queue_manager_init_cik_hawaii(&dqm->asic_ops); break; + case CHIP_CARRIZO: case CHIP_TONGA: case CHIP_FIJI: case CHIP_POLARIS10: -- cgit v1.2.3 From 091ae5473f96ced844af6ba39b94757359b12348 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Jul 2023 11:38:02 -0400 Subject: drm/amdkfd: disable IOMMUv2 support for Raven MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the dGPU path instead. There were a lot of platform issues with IOMMU in general on these chips due to windows not enabling IOMMU at the time. The dGPU path has been used for a long time with newer APUs and works fine. This also paves the way to simplify the driver significantly. Reviewed-by: Felix Kuehling Acked-by: Christian König Tested-by: Mike Lothian Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 7ae44d68ddc9..a53e0757fe64 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -194,11 +194,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd, kfd_device_info_set_event_interrupt_class(kfd); - /* Raven */ - if (gc_version == IP_VERSION(9, 1, 0) || - gc_version == IP_VERSION(9, 2, 2)) - kfd->device_info.needs_iommu_device = true; - if (gc_version < IP_VERSION(11, 0, 0)) { /* Navi2x+, Navi1x+ */ if (gc_version == IP_VERSION(10, 3, 6)) @@ -292,7 +287,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) gfx_target_version = 90000; f2g = &gfx_v9_kfd2kgd; break; -#ifdef KFD_SUPPORT_IOMMU_V2 /* Raven */ case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): @@ -300,7 +294,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) if (!vf) f2g = &gfx_v9_kfd2kgd; break; -#endif /* Vega12 */ case IP_VERSION(9, 2, 1): gfx_target_version = 90004; -- cgit v1.2.3 From 6ccbd7fd474674654019a20177c943359469103a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 29 Jul 2023 16:42:23 +0900 Subject: alpha: remove __init annotation from exported page_is_ram() EXPORT_SYMBOL and __init is a bad combination because the .init.text section is freed up after the initialization. Commit c5a130325f13 ("ACPI/APEI: Add parameter check before error injection") exported page_is_ram(), hence the __init annotation should be removed. This fixes the modpost warning in ARCH=alpha builds: WARNING: modpost: vmlinux: page_is_ram: EXPORT_SYMBOL used for init symbol. Remove __init or EXPORT_SYMBOL. Fixes: c5a130325f13 ("ACPI/APEI: Add parameter check before error injection") Signed-off-by: Masahiro Yamada Reviewed-by: Randy Dunlap --- arch/alpha/kernel/setup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index b650ff1cb022..3d7473531ab1 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -385,8 +385,7 @@ setup_memory(void *kernel_end) #endif /* CONFIG_BLK_DEV_INITRD */ } -int __init -page_is_ram(unsigned long pfn) +int page_is_ram(unsigned long pfn) { struct memclust_struct * cluster; struct memdesc_struct * memdesc; -- cgit v1.2.3 From 2d331a6ac4815e2e2fe5f2d80d908566e57797cc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 9 Aug 2023 10:55:23 +0200 Subject: ACPI: resource: revert "Remove "Zen" specific match and quirks" Commit a9c4a912b7dc ("ACPI: resource: Remove "Zen" specific match and quirks") is causing keyboard problems for quite a log of AMD based laptop users, leading to many bug reports. Revert this change for now, until we can come up with a better fix for the PS/2 IRQ trigger-type/polarity problems on some x86 laptops. Fixes: a9c4a912b7dc ("ACPI: resource: Remove "Zen" specific match and quirks") Link: https://bugzilla.redhat.com/show_bug.cgi?id=2228891 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2229165 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2229317 Link: https://bugzilla.kernel.org/show_bug.cgi?id=217718 Link: https://bugzilla.kernel.org/show_bug.cgi?id=217726 Link: https://bugzilla.kernel.org/show_bug.cgi?id=217731 Cc: All applicable Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 1dd8d5aebf67..0800a9d77558 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -470,6 +470,52 @@ static const struct dmi_system_id asus_laptop[] = { { } }; +static const struct dmi_system_id lenovo_laptop[] = { + { + .ident = "LENOVO IdeaPad Flex 5 14ALC7", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82R9"), + }, + }, + { + .ident = "LENOVO IdeaPad Flex 5 16ALC7", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82RA"), + }, + }, + { } +}; + +static const struct dmi_system_id tongfang_gm_rg[] = { + { + .ident = "TongFang GMxRGxx/XMG CORE 15 (M22)/TUXEDO Stellaris 15 Gen4 AMD", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"), + }, + }, + { } +}; + +static const struct dmi_system_id maingear_laptop[] = { + { + .ident = "MAINGEAR Vector Pro 2 15", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"), + DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-15A3070T"), + } + }, + { + .ident = "MAINGEAR Vector Pro 2 17", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"), + DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-17A3070T"), + }, + }, + { } +}; + static const struct dmi_system_id lg_laptop[] = { { .ident = "LG Electronics 17U70P", @@ -493,6 +539,10 @@ struct irq_override_cmp { static const struct irq_override_cmp override_table[] = { { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, + { lenovo_laptop, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, + { lenovo_laptop, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, + { tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, + { maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, { lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, }; @@ -512,6 +562,16 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity, return entry->override; } +#ifdef CONFIG_X86 + /* + * IRQ override isn't needed on modern AMD Zen systems and + * this override breaks active low IRQs on AMD Ryzen 6000 and + * newer systems. Skip it. + */ + if (boot_cpu_has(X86_FEATURE_ZEN)) + return false; +#endif + return true; } -- cgit v1.2.3 From 9728ac221160c5ea111879125a7694bb81364720 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 9 Aug 2023 10:55:24 +0200 Subject: ACPI: resource: Always use MADT override IRQ settings for all legacy non i8042 IRQs All the cases, were the DSDT IRQ settings should be used instead of the MADT override, are for IRQ 1 or 12, the PS/2 kbd resp. mouse IRQs. Simplify things by always honering the override for other legacy IRQs (for non DMI quirked cases). This allows removing the DMI quirks to honor the override for some non i8042 IRQs on some AMD ZEN based Lenovo models. Fixes: a9c4a912b7dc ("ACPI: resource: Remove "Zen" specific match and quirks") Cc: All applicable Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 0800a9d77558..380cda1e86f4 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -470,24 +470,6 @@ static const struct dmi_system_id asus_laptop[] = { { } }; -static const struct dmi_system_id lenovo_laptop[] = { - { - .ident = "LENOVO IdeaPad Flex 5 14ALC7", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82R9"), - }, - }, - { - .ident = "LENOVO IdeaPad Flex 5 16ALC7", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82RA"), - }, - }, - { } -}; - static const struct dmi_system_id tongfang_gm_rg[] = { { .ident = "TongFang GMxRGxx/XMG CORE 15 (M22)/TUXEDO Stellaris 15 Gen4 AMD", @@ -539,8 +521,6 @@ struct irq_override_cmp { static const struct irq_override_cmp override_table[] = { { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, - { lenovo_laptop, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, - { lenovo_laptop, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, { tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, { maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, { lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, @@ -563,6 +543,14 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity, } #ifdef CONFIG_X86 + /* + * Always use the MADT override info, except for the i8042 PS/2 ctrl + * IRQs (1 and 12). For these the DSDT IRQ settings should sometimes + * be used otherwise PS/2 keyboards / mice will not work. + */ + if (gsi != 1 && gsi != 12) + return true; + /* * IRQ override isn't needed on modern AMD Zen systems and * this override breaks active low IRQs on AMD Ryzen 6000 and -- cgit v1.2.3 From c6a1fd910d1bf8a0e3db7aebb229e3c81bc305c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 9 Aug 2023 10:55:25 +0200 Subject: ACPI: resource: Honor MADT INT_SRC_OVR settings for IRQ1 on AMD Zen On AMD Zen acpi_dev_irq_override() by default prefers the DSDT IRQ 1 settings over the MADT settings. This causes the keyboard to malfunction on some laptop models (see Links), all models from the Links have an INT_SRC_OVR MADT entry for IRQ 1. Fixes: a9c4a912b7dc ("ACPI: resource: Remove "Zen" specific match and quirks") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217336 Link: https://bugzilla.kernel.org/show_bug.cgi?id=217394 Link: https://bugzilla.kernel.org/show_bug.cgi?id=217406 Cc: All applicable Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/acpi.h | 2 ++ arch/x86/kernel/acpi/boot.c | 4 ++++ drivers/acpi/resource.c | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8eb74cf386db..2888c0ee4df0 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI_APEI # include @@ -31,6 +32,7 @@ extern int acpi_skip_timer_override; extern int acpi_use_timer_override; extern int acpi_fix_pin2_polarity; extern int acpi_disable_cmcff; +extern bool acpi_int_src_ovr[NR_IRQS_LEGACY]; extern u8 acpi_sci_flags; extern u32 acpi_sci_override_gsi; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 21b542a6866c..53369c57751e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -52,6 +52,7 @@ int acpi_lapic; int acpi_ioapic; int acpi_strict; int acpi_disable_cmcff; +bool acpi_int_src_ovr[NR_IRQS_LEGACY]; /* ACPI SCI override configuration */ u8 acpi_sci_flags __initdata; @@ -588,6 +589,9 @@ acpi_parse_int_src_ovr(union acpi_subtable_headers * header, acpi_table_print_madt_entry(&header->common); + if (intsrc->source_irq < NR_IRQS_LEGACY) + acpi_int_src_ovr[intsrc->source_irq] = true; + if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { acpi_sci_ioapic_setup(intsrc->source_irq, intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 380cda1e86f4..8e32dd5776f5 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -551,6 +551,10 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity, if (gsi != 1 && gsi != 12) return true; + /* If the override comes from an INT_SRC_OVR MADT entry, honor it. */ + if (acpi_int_src_ovr[gsi]) + return true; + /* * IRQ override isn't needed on modern AMD Zen systems and * this override breaks active low IRQs on AMD Ryzen 6000 and -- cgit v1.2.3 From 5a66d59b5ff537ddae84a1f175c3f8eb1140a562 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 5 Aug 2023 10:10:10 +0200 Subject: platform/x86: msi-ec: Fix the build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The msi-ec driver fails to build for me (gcc 7.5): CC [M] drivers/platform/x86/msi-ec.o drivers/platform/x86/msi-ec.c:72:6: error: initializer element is not constant { SM_ECO_NAME, 0xc2 }, ^~~~~~~~~~~ drivers/platform/x86/msi-ec.c:72:6: note: (near initialization for ‘CONF0.shift_mode.modes[0].name’) drivers/platform/x86/msi-ec.c:73:6: error: initializer element is not constant { SM_COMFORT_NAME, 0xc1 }, ^~~~~~~~~~~~~~~ drivers/platform/x86/msi-ec.c:73:6: note: (near initialization for ‘CONF0.shift_mode.modes[1].name’) drivers/platform/x86/msi-ec.c:74:6: error: initializer element is not constant { SM_SPORT_NAME, 0xc0 }, ^~~~~~~~~~~~~ drivers/platform/x86/msi-ec.c:74:6: note: (near initialization for ‘CONF0.shift_mode.modes[2].name’) (...) Don't try to be smart, just use defines for the constant strings. The compiler will recognize it's the same string and will store it only once in the data section anyway. Signed-off-by: Jean Delvare Fixes: 392cacf2aa10 ("platform/x86: Add new msi-ec driver") Cc: stable@vger.kernel.org Cc: Nikita Kravets Cc: Hans de Goede Cc: Mark Gross Link: https://lore.kernel.org/r/20230805101010.54d49e91@endymion.delvare Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/msi-ec.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/msi-ec.c b/drivers/platform/x86/msi-ec.c index ff93986e3d35..f26a3121092f 100644 --- a/drivers/platform/x86/msi-ec.c +++ b/drivers/platform/x86/msi-ec.c @@ -27,15 +27,15 @@ #include #include -static const char *const SM_ECO_NAME = "eco"; -static const char *const SM_COMFORT_NAME = "comfort"; -static const char *const SM_SPORT_NAME = "sport"; -static const char *const SM_TURBO_NAME = "turbo"; - -static const char *const FM_AUTO_NAME = "auto"; -static const char *const FM_SILENT_NAME = "silent"; -static const char *const FM_BASIC_NAME = "basic"; -static const char *const FM_ADVANCED_NAME = "advanced"; +#define SM_ECO_NAME "eco" +#define SM_COMFORT_NAME "comfort" +#define SM_SPORT_NAME "sport" +#define SM_TURBO_NAME "turbo" + +#define FM_AUTO_NAME "auto" +#define FM_SILENT_NAME "silent" +#define FM_BASIC_NAME "basic" +#define FM_ADVANCED_NAME "advanced" static const char * const ALLOWED_FW_0[] __initconst = { "14C1EMS1.012", -- cgit v1.2.3 From af8a6d281bfb68023fb60f616ec87fe8a875875e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 8 Aug 2023 10:43:59 -0700 Subject: platform/x86: ISST: Reduce noise for missing numa information in logs On platforms with no numa support and with several CPUs, logs have lots of noise for message "Fail to get numa node for CPU:.." Change pr_info() to pr_info_once() as one print is enough to show the issue. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20230808174359.50602-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/speed_select_if/isst_if_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c index 1f59ac55c5f7..a95004e3d80b 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c @@ -335,8 +335,8 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn node = dev_to_node(&_pci_dev->dev); if (node == NUMA_NO_NODE) { - pr_info("Fail to get numa node for CPU:%d bus:%d dev:%d fn:%d\n", - cpu, bus_no, dev, fn); + pr_info_once("Fail to get numa node for CPU:%d bus:%d dev:%d fn:%d\n", + cpu, bus_no, dev, fn); continue; } -- cgit v1.2.3 From 1b8b1aa90c9c0e825b181b98b8d9e249dc395470 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 3 Aug 2023 18:16:09 +0300 Subject: x86/mm: Fix VDSO and VVAR placement on 5-level paging machines Yingcong has noticed that on the 5-level paging machine, VDSO and VVAR VMAs are placed above the 47-bit border: 8000001a9000-8000001ad000 r--p 00000000 00:00 0 [vvar] 8000001ad000-8000001af000 r-xp 00000000 00:00 0 [vdso] This might confuse users who are not aware of 5-level paging and expect all userspace addresses to be under the 47-bit border. So far problem has only been triggered with ASLR disabled, although it may also occur with ASLR enabled if the layout is randomized in a just right way. The problem happens due to custom placement for the VMAs in the VDSO code: vdso_addr() tries to place them above the stack and checks the result against TASK_SIZE_MAX, which is wrong. TASK_SIZE_MAX is set to the 56-bit border on 5-level paging machines. Use DEFAULT_MAP_WINDOW instead. Fixes: b569bab78d8d ("x86/mm: Prepare to expose larger address space to userspace") Reported-by: Yingcong Wu Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230803151609.22141-1-kirill.shutemov%40linux.intel.com --- arch/x86/entry/vdso/vma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 11a5c68d1218..7645730dc228 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -299,8 +299,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) /* Round the lowest possible end address up to a PMD boundary. */ end = (start + len + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE_MAX) - end = TASK_SIZE_MAX; + if (end >= DEFAULT_MAP_WINDOW) + end = DEFAULT_MAP_WINDOW; end -= len; if (end > start) { -- cgit v1.2.3 From 2bc057692599a5b3dc93d75a3dff34f72576355d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Aug 2023 11:06:17 -0600 Subject: block: don't make REQ_POLLED imply REQ_NOWAIT Normally these two flags do go together, as the issuer of polled IO generally cannot wait for resources that will get freed as part of IO completion. This is because that very task is the one that will complete the request and free those resources, hence that would introduce a deadlock. But it is possible to have someone else issue the polled IO, eg via io_uring if the request is punted to io-wq. For that case, it's fine to have the task block on IO submission, as it is not the same task that will be completing the IO. It's completely up to the caller to ask for both polled and nowait IO separately! If we don't allow polled IO where IOCB_NOWAIT isn't set in the kiocb, then we can run into repeated -EAGAIN submissions and not make any progress. Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/fops.c | 7 ++++--- include/linux/bio.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/block/fops.c b/block/fops.c index a286bf3325c5..838ffada5341 100644 --- a/block/fops.c +++ b/block/fops.c @@ -358,13 +358,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, task_io_account_write(bio->bi_iter.bi_size); } + if (iocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT; + if (iocb->ki_flags & IOCB_HIPRI) { - bio->bi_opf |= REQ_POLLED | REQ_NOWAIT; + bio->bi_opf |= REQ_POLLED; submit_bio(bio); WRITE_ONCE(iocb->private, bio); } else { - if (iocb->ki_flags & IOCB_NOWAIT) - bio->bi_opf |= REQ_NOWAIT; submit_bio(bio); } return -EIOCBQUEUED; diff --git a/include/linux/bio.h b/include/linux/bio.h index c4f5b5228105..11984ed29cb8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -791,7 +791,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) { bio->bi_opf |= REQ_POLLED; - if (!is_sync_kiocb(kiocb)) + if (kiocb->ki_flags & IOCB_NOWAIT) bio->bi_opf |= REQ_NOWAIT; } -- cgit v1.2.3 From f099a108cabf72a1184b1e14e4a09f4ca3375750 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Fri, 4 Aug 2023 15:06:09 +0800 Subject: blk-iocost: fix queue stats accounting The q->stats->accounting is not only used by iocost, but iocost only increase this counter, never decrease it. So queue stats accounting will always enabled after using iocost once. Signed-off-by: Chengming Zhou Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230804070609.31623-1-chengming.zhou@linux.dev Signed-off-by: Jens Axboe --- block/blk-iocost.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index dd64e2066f01..089fcb9cfce3 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3301,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, if (qos[QOS_MIN] > qos[QOS_MAX]) goto einval; - if (enable) { + if (enable && !ioc->enabled) { blk_stat_enable_accounting(disk->queue); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = true; - } else { + } else if (!enable && ioc->enabled) { + blk_stat_disable_accounting(disk->queue); blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = false; } -- cgit v1.2.3 From fe9da61ffccad80ae79fadad836971acf0d465bd Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 7 Aug 2023 13:11:48 +0900 Subject: zonefs: fix synchronous direct writes to sequential files Commit 16d7fd3cfa72 ("zonefs: use iomap for synchronous direct writes") changes zonefs code from a self-built zone append BIO to using iomap for synchronous direct writes. This change relies on iomap submit BIO callback to change the write BIO built by iomap to a zone append BIO. However, this change overlooked the fact that a write BIO may be very large as it is split when issued. The change from a regular write to a zone append operation for the built BIO can result in a block layer warning as zone append BIO are not allowed to be split. WARNING: CPU: 18 PID: 202210 at block/bio.c:1644 bio_split+0x288/0x350 Call Trace: ? __warn+0xc9/0x2b0 ? bio_split+0x288/0x350 ? report_bug+0x2e6/0x390 ? handle_bug+0x41/0x80 ? exc_invalid_op+0x13/0x40 ? asm_exc_invalid_op+0x16/0x20 ? bio_split+0x288/0x350 bio_split_rw+0x4bc/0x810 ? __pfx_bio_split_rw+0x10/0x10 ? lockdep_unlock+0xf2/0x250 __bio_split_to_limits+0x1d8/0x900 blk_mq_submit_bio+0x1cf/0x18a0 ? __pfx_iov_iter_extract_pages+0x10/0x10 ? __pfx_blk_mq_submit_bio+0x10/0x10 ? find_held_lock+0x2d/0x110 ? lock_release+0x362/0x620 ? mark_held_locks+0x9e/0xe0 __submit_bio+0x1ea/0x290 ? __pfx___submit_bio+0x10/0x10 ? seqcount_lockdep_reader_access.constprop.0+0x82/0x90 submit_bio_noacct_nocheck+0x675/0xa20 ? __pfx_bio_iov_iter_get_pages+0x10/0x10 ? __pfx_submit_bio_noacct_nocheck+0x10/0x10 iomap_dio_bio_iter+0x624/0x1280 __iomap_dio_rw+0xa22/0x18a0 ? lock_is_held_type+0xe3/0x140 ? __pfx___iomap_dio_rw+0x10/0x10 ? lock_release+0x362/0x620 ? zonefs_file_write_iter+0x74c/0xc80 [zonefs] ? down_write+0x13d/0x1e0 iomap_dio_rw+0xe/0x40 zonefs_file_write_iter+0x5ea/0xc80 [zonefs] do_iter_readv_writev+0x18b/0x2c0 ? __pfx_do_iter_readv_writev+0x10/0x10 ? inode_security+0x54/0xf0 do_iter_write+0x13b/0x7c0 ? lock_is_held_type+0xe3/0x140 vfs_writev+0x185/0x550 ? __pfx_vfs_writev+0x10/0x10 ? __handle_mm_fault+0x9bd/0x1c90 ? find_held_lock+0x2d/0x110 ? lock_release+0x362/0x620 ? find_held_lock+0x2d/0x110 ? lock_release+0x362/0x620 ? __up_read+0x1ea/0x720 ? do_pwritev+0x136/0x1f0 do_pwritev+0x136/0x1f0 ? __pfx_do_pwritev+0x10/0x10 ? syscall_enter_from_user_mode+0x22/0x90 ? lockdep_hardirqs_on+0x7d/0x100 do_syscall_64+0x58/0x80 This error depends on the hardware used, specifically on the max zone append bytes and max_[hw_]sectors limits. Tests using AMD Epyc machines that have low limits did not reveal this issue while runs on Intel Xeon machines with larger limits trigger it. Manually splitting the zone append BIO using bio_split_rw() can solve this issue but also requires issuing the fragment BIOs synchronously with submit_bio_wait(), to avoid potential reordering of the zone append BIO fragments, which would lead to data corruption. That is, this solution is not better than using regular write BIOs which are subject to serialization using zone write locking at the IO scheduler level. Given this, fix the issue by removing zone append support and using regular write BIOs for synchronous direct writes. This allows preseving the use of iomap and having identical synchronous and asynchronous sequential file write path. Zone append support will be reintroduced later through io_uring commands to ensure that the needed special handling is done correctly. Reported-by: Shin'ichiro Kawasaki Fixes: 16d7fd3cfa72 ("zonefs: use iomap for synchronous direct writes") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Tested-by: Shin'ichiro Kawasaki Reviewed-by: Christoph Hellwig --- fs/zonefs/file.c | 111 ++--------------------------------------------------- fs/zonefs/super.c | 9 +---- fs/zonefs/zonefs.h | 2 - 3 files changed, 4 insertions(+), 118 deletions(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 92c9aaae3663..789cfb74c146 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -341,77 +341,6 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence) return generic_file_llseek_size(file, offset, whence, isize, isize); } -struct zonefs_zone_append_bio { - /* The target inode of the BIO */ - struct inode *inode; - - /* For sync writes, the target append write offset */ - u64 append_offset; - - /* - * This member must come last, bio_alloc_bioset will allocate enough - * bytes for entire zonefs_bio but relies on bio being last. - */ - struct bio bio; -}; - -static inline struct zonefs_zone_append_bio * -zonefs_zone_append_bio(struct bio *bio) -{ - return container_of(bio, struct zonefs_zone_append_bio, bio); -} - -static void zonefs_file_zone_append_dio_bio_end_io(struct bio *bio) -{ - struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio); - struct zonefs_zone *z = zonefs_inode_zone(za_bio->inode); - sector_t za_sector; - - if (bio->bi_status != BLK_STS_OK) - goto bio_end; - - /* - * If the file zone was written underneath the file system, the zone - * append operation can still succedd (if the zone is not full) but - * the write append location will not be where we expect it to be. - * Check that we wrote where we intended to, that is, at z->z_wpoffset. - */ - za_sector = z->z_sector + (za_bio->append_offset >> SECTOR_SHIFT); - if (bio->bi_iter.bi_sector != za_sector) { - zonefs_warn(za_bio->inode->i_sb, - "Invalid write sector %llu for zone at %llu\n", - bio->bi_iter.bi_sector, z->z_sector); - bio->bi_status = BLK_STS_IOERR; - } - -bio_end: - iomap_dio_bio_end_io(bio); -} - -static void zonefs_file_zone_append_dio_submit_io(const struct iomap_iter *iter, - struct bio *bio, - loff_t file_offset) -{ - struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio); - struct inode *inode = iter->inode; - struct zonefs_zone *z = zonefs_inode_zone(inode); - - /* - * Issue a zone append BIO to process sync dio writes. The append - * file offset is saved to check the zone append write location - * on completion of the BIO. - */ - za_bio->inode = inode; - za_bio->append_offset = file_offset; - - bio->bi_opf &= ~REQ_OP_WRITE; - bio->bi_opf |= REQ_OP_ZONE_APPEND; - bio->bi_iter.bi_sector = z->z_sector; - bio->bi_end_io = zonefs_file_zone_append_dio_bio_end_io; - - submit_bio(bio); -} - static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, int error, unsigned int flags) { @@ -442,14 +371,6 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, return 0; } -static struct bio_set zonefs_zone_append_bio_set; - -static const struct iomap_dio_ops zonefs_zone_append_dio_ops = { - .submit_io = zonefs_file_zone_append_dio_submit_io, - .end_io = zonefs_file_write_dio_end_io, - .bio_set = &zonefs_zone_append_bio_set, -}; - static const struct iomap_dio_ops zonefs_write_dio_ops = { .end_io = zonefs_file_write_dio_end_io, }; @@ -533,9 +454,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) struct zonefs_inode_info *zi = ZONEFS_I(inode); struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; - const struct iomap_dio_ops *dio_ops; - bool sync = is_sync_kiocb(iocb); - bool append = false; ssize_t ret, count; /* @@ -543,7 +461,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) * as this can cause write reordering (e.g. the first aio gets EAGAIN * on the inode lock but the second goes through but is now unaligned). */ - if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT)) + if (zonefs_zone_is_seq(z) && !is_sync_kiocb(iocb) && + (iocb->ki_flags & IOCB_NOWAIT)) return -EOPNOTSUPP; if (iocb->ki_flags & IOCB_NOWAIT) { @@ -573,18 +492,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) goto inode_unlock; } mutex_unlock(&zi->i_truncate_mutex); - append = sync; - } - - if (append) { - unsigned int max = bdev_max_zone_append_sectors(sb->s_bdev); - - max = ALIGN_DOWN(max << SECTOR_SHIFT, sb->s_blocksize); - iov_iter_truncate(from, max); - - dio_ops = &zonefs_zone_append_dio_ops; - } else { - dio_ops = &zonefs_write_dio_ops; } /* @@ -593,7 +500,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) * the user can make sense of the error. */ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, - dio_ops, 0, NULL, 0); + &zonefs_write_dio_ops, 0, NULL, 0); if (ret == -ENOTBLK) ret = -EBUSY; @@ -938,15 +845,3 @@ const struct file_operations zonefs_file_operations = { .splice_write = iter_file_splice_write, .iopoll = iocb_bio_iopoll, }; - -int zonefs_file_bioset_init(void) -{ - return bioset_init(&zonefs_zone_append_bio_set, BIO_POOL_SIZE, - offsetof(struct zonefs_zone_append_bio, bio), - BIOSET_NEED_BVECS); -} - -void zonefs_file_bioset_exit(void) -{ - bioset_exit(&zonefs_zone_append_bio_set); -} diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index bbe44a26a8e5..9350221abfc5 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1412,13 +1412,9 @@ static int __init zonefs_init(void) BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); - ret = zonefs_file_bioset_init(); - if (ret) - return ret; - ret = zonefs_init_inodecache(); if (ret) - goto destroy_bioset; + return ret; ret = zonefs_sysfs_init(); if (ret) @@ -1434,8 +1430,6 @@ sysfs_exit: zonefs_sysfs_exit(); destroy_inodecache: zonefs_destroy_inodecache(); -destroy_bioset: - zonefs_file_bioset_exit(); return ret; } @@ -1445,7 +1439,6 @@ static void __exit zonefs_exit(void) unregister_filesystem(&zonefs_type); zonefs_sysfs_exit(); zonefs_destroy_inodecache(); - zonefs_file_bioset_exit(); } MODULE_AUTHOR("Damien Le Moal"); diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index f663b8ebc2cb..8175652241b5 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -279,8 +279,6 @@ extern const struct file_operations zonefs_dir_operations; extern const struct address_space_operations zonefs_file_aops; extern const struct file_operations zonefs_file_operations; int zonefs_file_truncate(struct inode *inode, loff_t isize); -int zonefs_file_bioset_init(void); -void zonefs_file_bioset_exit(void); /* In sysfs.c */ int zonefs_sysfs_register(struct super_block *sb); -- cgit v1.2.3 From 56b930dcd88c2adc261410501c402c790980bdb5 Mon Sep 17 00:00:00 2001 From: Aleksa Savic Date: Mon, 7 Aug 2023 19:20:03 +0200 Subject: hwmon: (aquacomputer_d5next) Add selective 200ms delay after sending ctrl report Add a 200ms delay after sending a ctrl report to Quadro, Octo, D5 Next and Aquaero to give them enough time to process the request and save the data to memory. Otherwise, under heavier userspace loads where multiple sysfs entries are usually set in quick succession, a new ctrl report could be requested from the device while it's still processing the previous one and fail with -EPIPE. The delay is only applied if two ctrl report operations are near each other in time. Reported by a user on Github [1] and tested by both of us. [1] https://github.com/aleksamagicka/aquacomputer_d5next-hwmon/issues/82 Fixes: 752b927951ea ("hwmon: (aquacomputer_d5next) Add support for Aquacomputer Octo") Signed-off-by: Aleksa Savic Link: https://lore.kernel.org/r/20230807172004.456968-1-savicaleksa83@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/aquacomputer_d5next.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/aquacomputer_d5next.c b/drivers/hwmon/aquacomputer_d5next.c index a997dbcb563f..023807859be7 100644 --- a/drivers/hwmon/aquacomputer_d5next.c +++ b/drivers/hwmon/aquacomputer_d5next.c @@ -13,9 +13,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -63,6 +65,8 @@ static const char *const aqc_device_names[] = { #define CTRL_REPORT_ID 0x03 #define AQUAERO_CTRL_REPORT_ID 0x0b +#define CTRL_REPORT_DELAY 200 /* ms */ + /* The HID report that the official software always sends * after writing values, currently same for all devices */ @@ -527,6 +531,9 @@ struct aqc_data { int secondary_ctrl_report_size; u8 *secondary_ctrl_report; + ktime_t last_ctrl_report_op; + int ctrl_report_delay; /* Delay between two ctrl report operations, in ms */ + int buffer_size; u8 *buffer; int checksum_start; @@ -611,17 +618,35 @@ static int aqc_aquastreamxt_convert_fan_rpm(u16 val) return 0; } +static void aqc_delay_ctrl_report(struct aqc_data *priv) +{ + /* + * If previous read or write is too close to this one, delay the current operation + * to give the device enough time to process the previous one. + */ + if (priv->ctrl_report_delay) { + s64 delta = ktime_ms_delta(ktime_get(), priv->last_ctrl_report_op); + + if (delta < priv->ctrl_report_delay) + msleep(priv->ctrl_report_delay - delta); + } +} + /* Expects the mutex to be locked */ static int aqc_get_ctrl_data(struct aqc_data *priv) { int ret; + aqc_delay_ctrl_report(priv); + memset(priv->buffer, 0x00, priv->buffer_size); ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret < 0) ret = -ENODATA; + priv->last_ctrl_report_op = ktime_get(); + return ret; } @@ -631,6 +656,8 @@ static int aqc_send_ctrl_data(struct aqc_data *priv) int ret; u16 checksum; + aqc_delay_ctrl_report(priv); + /* Checksum is not needed for Aquaero */ if (priv->kind != aquaero) { /* Init and xorout value for CRC-16/USB is 0xffff */ @@ -646,12 +673,16 @@ static int aqc_send_ctrl_data(struct aqc_data *priv) ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); if (ret < 0) - return ret; + goto record_access_and_ret; /* The official software sends this report after every change, so do it here as well */ ret = hid_hw_raw_request(priv->hdev, priv->secondary_ctrl_report_id, priv->secondary_ctrl_report, priv->secondary_ctrl_report_size, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + +record_access_and_ret: + priv->last_ctrl_report_op = ktime_get(); + return ret; } @@ -1524,6 +1555,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id) priv->buffer_size = AQUAERO_CTRL_REPORT_SIZE; priv->temp_ctrl_offset = AQUAERO_TEMP_CTRL_OFFSET; + priv->ctrl_report_delay = CTRL_REPORT_DELAY; priv->temp_label = label_temp_sensors; priv->virtual_temp_label = label_virtual_temp_sensors; @@ -1547,6 +1579,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id) priv->temp_ctrl_offset = D5NEXT_TEMP_CTRL_OFFSET; priv->buffer_size = D5NEXT_CTRL_REPORT_SIZE; + priv->ctrl_report_delay = CTRL_REPORT_DELAY; priv->power_cycle_count_offset = D5NEXT_POWER_CYCLES; @@ -1597,6 +1630,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id) priv->temp_ctrl_offset = OCTO_TEMP_CTRL_OFFSET; priv->buffer_size = OCTO_CTRL_REPORT_SIZE; + priv->ctrl_report_delay = CTRL_REPORT_DELAY; priv->power_cycle_count_offset = OCTO_POWER_CYCLES; @@ -1624,6 +1658,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id) priv->temp_ctrl_offset = QUADRO_TEMP_CTRL_OFFSET; priv->buffer_size = QUADRO_CTRL_REPORT_SIZE; + priv->ctrl_report_delay = CTRL_REPORT_DELAY; priv->flow_pulses_ctrl_offset = QUADRO_FLOW_PULSES_CTRL_OFFSET; priv->power_cycle_count_offset = QUADRO_POWER_CYCLES; -- cgit v1.2.3 From 07dd476f6116966cb2006e25fdcf48f0715115ff Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 24 Jul 2023 13:26:10 +0200 Subject: drm/shmem-helper: Reset vma->vm_ops before calling dma_buf_mmap() The dma-buf backend is supposed to provide its own vm_ops, but some implementation just have nothing special to do and leave vm_ops untouched, probably expecting this field to be zero initialized (this is the case with the system_heap implementation for instance). Let's reset vma->vm_ops to NULL to keep things working with these implementations. Fixes: 26d3ac3cb04d ("drm/shmem-helpers: Redirect mmap for imported dma-buf") Cc: Cc: Daniel Vetter Reported-by: Roman Stratiienko Signed-off-by: Boris Brezillon Tested-by: Roman Stratiienko Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230724112610.60974-1-boris.brezillon@collabora.com --- drivers/gpu/drm/drm_gem_shmem_helper.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 4ea6507a77e5..baaf0e0feb06 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -623,7 +623,13 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct int ret; if (obj->import_attach) { + /* Reset both vm_ops and vm_private_data, so we don't end up with + * vm_ops pointing to our implementation if the dma-buf backend + * doesn't set those fields. + */ vma->vm_private_data = NULL; + vma->vm_ops = NULL; + ret = dma_buf_mmap(obj->dma_buf, vma, 0); /* Drop the reference drm_gem_mmap_obj() acquired.*/ -- cgit v1.2.3 From fc1f91b9231a28fba333f931a031bf776bc6ef0e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 21 Jul 2023 16:09:43 -0400 Subject: btrfs: wait for actual caching progress during allocation Recently we've been having mysterious hangs while running generic/475 on the CI system. This turned out to be something like this: Task 1 dmsetup suspend --nolockfs -> __dm_suspend -> dm_wait_for_completion -> dm_wait_for_bios_completion -> Unable to complete because of IO's on a plug in Task 2 Task 2 wb_workfn -> wb_writeback -> blk_start_plug -> writeback_sb_inodes -> Infinite loop unable to make an allocation Task 3 cache_block_group ->read_extent_buffer_pages ->Waiting for IO to complete that can't be submitted because Task 1 suspended the DM device The problem here is that we need Task 2 to be scheduled completely for the blk plug to flush. Normally this would happen, we normally wait for the block group caching to finish (Task 3), and this schedule would result in the block plug flushing. However if there's enough free space available from the current caching to satisfy the allocation we won't actually wait for the caching to complete. This check however just checks that we have enough space, not that we can make the allocation. In this particular case we were trying to allocate 9MiB, and we had 10MiB of free space, but we didn't have 9MiB of contiguous space to allocate, and thus the allocation failed and we looped. We specifically don't cycle through the FFE loop until we stop finding cached block groups because we don't want to allocate new block groups just because we're caching, so we short circuit the normal loop once we hit LOOP_CACHING_WAIT and we found a caching block group. This is normally fine, except in this particular case where the caching thread can't make progress because the DM device has been suspended. Fix this by not only waiting for free space to >= the amount of space we want to allocate, but also that we make some progress in caching from the time we start waiting. This will keep us from busy looping when the caching is taking a while but still theoretically has enough space for us to allocate from, and fixes this particular case by forcing us to actually sleep and wait for forward progress, which will flush the plug. With this fix we're no longer hanging with generic/475. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Boris Burkov Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 17 +++++++++++++++-- fs/btrfs/block-group.h | 2 ++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 63c3b7172ba5..1e4b70f5280d 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -441,13 +441,23 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, u64 num_bytes) { struct btrfs_caching_control *caching_ctl; + int progress; caching_ctl = btrfs_get_caching_control(cache); if (!caching_ctl) return; + /* + * We've already failed to allocate from this block group, so even if + * there's enough space in the block group it isn't contiguous enough to + * allow for an allocation, so wait for at least the next wakeup tick, + * or for the thing to be done. + */ + progress = atomic_read(&caching_ctl->progress); + wait_event(caching_ctl->wait, btrfs_block_group_done(cache) || - (cache->free_space_ctl->free_space >= num_bytes)); + (progress != atomic_read(&caching_ctl->progress) && + (cache->free_space_ctl->free_space >= num_bytes))); btrfs_put_caching_control(caching_ctl); } @@ -802,8 +812,10 @@ next: if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; - if (wakeup) + if (wakeup) { + atomic_inc(&caching_ctl->progress); wake_up(&caching_ctl->wait); + } } } path->slots[0]++; @@ -910,6 +922,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) init_waitqueue_head(&caching_ctl->wait); caching_ctl->block_group = cache; refcount_set(&caching_ctl->count, 2); + atomic_set(&caching_ctl->progress, 0); btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); spin_lock(&cache->lock); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index aba5dff66c19..74b61e663028 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -90,6 +90,8 @@ struct btrfs_caching_control { wait_queue_head_t wait; struct btrfs_work work; struct btrfs_block_group *block_group; + /* Track progress of caching during allocation. */ + atomic_t progress; refcount_t count; }; -- cgit v1.2.3 From 56fec0051a69ace182ca3fba47be9c13038b4e3f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 10 Aug 2023 11:00:11 +0200 Subject: ACPI: resource: Add IRQ override quirk for PCSpecialist Elimina Pro 16 M The PCSpecialist Elimina Pro 16 M laptop model is a Zen laptop which needs to use the MADT IRQ settings override and which does not have an INT_SRC_OVR entry for IRQ 1 in its MADT. So this model needs a DMI quirk to enable the MADT IRQ settings override to fix its keyboard not working. Fixes: a9c4a912b7dc ("ACPI: resource: Remove "Zen" specific match and quirks") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217394#c18 Cc: All applicable Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 8e32dd5776f5..a4d9f149b48d 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -498,6 +498,17 @@ static const struct dmi_system_id maingear_laptop[] = { { } }; +static const struct dmi_system_id pcspecialist_laptop[] = { + { + .ident = "PCSpecialist Elimina Pro 16 M", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"), + DMI_MATCH(DMI_PRODUCT_NAME, "Elimina Pro 16 M"), + }, + }, + { } +}; + static const struct dmi_system_id lg_laptop[] = { { .ident = "LG Electronics 17U70P", @@ -523,6 +534,7 @@ static const struct irq_override_cmp override_table[] = { { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, { tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, { maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, + { pcspecialist_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, { lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, }; -- cgit v1.2.3 From effa24f689ce0948f68c754991a445a8d697d3a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jul 2023 06:26:53 -0700 Subject: btrfs: don't stop integrity writeback too early extent_write_cache_pages stops writing pages as soon as nr_to_write hits zero. That is the right thing for opportunistic writeback, but incorrect for data integrity writeback, which needs to ensure that no dirty pages are left in the range. Thus only stop the writeback for WB_SYNC_NONE if nr_to_write hits 0. This is a port of write_cache_pages changes in commit 05fe478dd04e ("mm: write_cache_pages integrity fix"). Note that I've only trigger the problem with other changes to the btrfs writeback code, but this condition seems worthwhile fixing anyway. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba [ updated comment ] Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a91d5ad27984..c36eb4956f81 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2164,11 +2164,12 @@ retry: } /* - * the filesystem may choose to bump up nr_to_write. + * The filesystem may choose to bump up nr_to_write. * We have to make sure to honor the new nr_to_write - * at any time + * at any time. */ - nr_to_write_done = wbc->nr_to_write <= 0; + nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE && + wbc->nr_to_write <= 0); } folio_batch_release(&fbatch); cond_resched(); -- cgit v1.2.3 From 5c25699871112853f231e52d51c576d5c759a020 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jul 2023 06:26:54 -0700 Subject: btrfs: don't wait for writeback on clean pages in extent_write_cache_pages __extent_writepage could have started on more pages than the one it was called for. This happens regularly for zoned file systems, and in theory could happen for compressed I/O if the worker thread was executed very quickly. For such pages extent_write_cache_pages waits for writeback to complete before moving on to the next page, which is highly inefficient as it blocks the flusher thread. Port over the PageDirty check that was added to write_cache_pages in commit 515f4a037fb ("mm: write_cache_pages optimise page cleaning") to fix this. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c36eb4956f81..ca765d62324f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2145,6 +2145,12 @@ retry: continue; } + if (!folio_test_dirty(folio)) { + /* Someone wrote it for us. */ + folio_unlock(folio); + continue; + } + if (wbc->sync_mode != WB_SYNC_NONE) { if (folio_test_writeback(folio)) submit_write_bio(bio_ctrl, 0); -- cgit v1.2.3 From 12b2d64e591652a2d97dd3afa2b062ca7a4ba352 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jul 2023 06:26:55 -0700 Subject: btrfs: properly clear end of the unreserved range in cow_file_range When the call to btrfs_reloc_clone_csums in cow_file_range returns an error, we jump to the out_unlock label with the extent_reserved variable set to false. The cleanup at the label will then call extent_clear_unlock_delalloc on the range from start to end. But we've already added cur_alloc_size to start before the jump, so there might no range be left from the newly incremented start to end. Move the check for 'start < end' so that it is reached by also for the !extent_reserved case. CC: stable@vger.kernel.org # 6.1+ Fixes: a315e68f6e8b ("Btrfs: fix invalid attempt to free reserved space on failure to cow range") Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 49cef61f6a39..9055e19b01ef 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1654,8 +1654,6 @@ out_unlock: clear_bits, page_ops); start += cur_alloc_size; - if (start >= end) - return ret; } /* @@ -1664,9 +1662,11 @@ out_unlock: * space_info's bytes_may_use counter, reserved in * btrfs_check_data_free_space(). */ - extent_clear_unlock_delalloc(inode, start, end, locked_page, - clear_bits | EXTENT_CLEAR_DATA_RESV, - page_ops); + if (start < end) { + clear_bits |= EXTENT_CLEAR_DATA_RESV; + extent_clear_unlock_delalloc(inode, start, end, locked_page, + clear_bits, page_ops); + } return ret; } -- cgit v1.2.3 From 773e722a98e25caf96f91aced7070c1858250ba2 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 3 Aug 2023 17:20:41 +0800 Subject: btrfs: avoid race between qgroup tree creation and relocation [BUG] Syzbot reported a weird ASSERT() triggered inside prepare_to_merge(). assertion failed: root->reloc_root == reloc_root, in fs/btrfs/relocation.c:1919 ------------[ cut here ]------------ kernel BUG at fs/btrfs/relocation.c:1919! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 9904 Comm: syz-executor.3 Not tainted 6.4.0-syzkaller-08881-g533925cb7604 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023 RIP: 0010:prepare_to_merge+0xbb2/0xc40 fs/btrfs/relocation.c:1919 Code: fe e9 f5 (...) RSP: 0018:ffffc9000325f760 EFLAGS: 00010246 RAX: 000000000000004f RBX: ffff888075644030 RCX: 1481ccc522da5800 RDX: ffffc90005c09000 RSI: 00000000000364ca RDI: 00000000000364cb RBP: ffffc9000325f870 R08: ffffffff816f33ac R09: 1ffff9200064bea0 R10: dffffc0000000000 R11: fffff5200064bea1 R12: ffff888075644000 R13: ffff88803b166000 R14: ffff88803b166560 R15: ffff88803b166558 FS: 00007f4e305fd700(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000056080679c000 CR3: 00000000193ad000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: relocate_block_group+0xa5d/0xcd0 fs/btrfs/relocation.c:3749 btrfs_relocate_block_group+0x7ab/0xd70 fs/btrfs/relocation.c:4087 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3283 __btrfs_balance+0x1b06/0x2690 fs/btrfs/volumes.c:4018 btrfs_balance+0xbdb/0x1120 fs/btrfs/volumes.c:4402 btrfs_ioctl_balance+0x496/0x7c0 fs/btrfs/ioctl.c:3604 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f4e2f88c389 [CAUSE] With extra debugging, the offending reloc_root is for quota tree (rootid 8). Normally we should not use the reloc tree for quota root at all, as reloc trees are only for subvolume trees. But there is a race between quota enabling and relocation, this happens after commit 85724171b302 ("btrfs: fix the btrfs_get_global_root return value"). Before that commit, for quota and free space tree, we exit immediately if we cannot grab it from fs_info. But now we would try to read it from disk, just as if they are fs trees, this sets ROOT_SHAREABLE flags in such race: Thread A | Thread B ---------------------------------+------------------------------ btrfs_quota_enable() | | | btrfs_get_root_ref() | | |- btrfs_get_global_root() | | | Returned NULL | | |- btrfs_lookup_fs_root() | | | Returned NULL |- btrfs_create_tree() | | | Now quota root item is | | | inserted | |- btrfs_read_tree_root() | | | Got the newly inserted quota root | | |- btrfs_init_fs_root() | | | Set ROOT_SHAREABLE flag [FIX] Get back to the old behavior by returning PTR_ERR(-ENOENT) if the target objectid is not a subvolume tree or data reloc tree. Reported-and-tested-by: syzbot+ae97a827ae1c3336bbb4@syzkaller.appspotmail.com Fixes: 85724171b302 ("btrfs: fix the btrfs_get_global_root return value") Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9b9914e5f03d..11b1ac716f3b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1300,6 +1300,16 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, root = btrfs_get_global_root(fs_info, objectid); if (root) return root; + + /* + * If we're called for non-subvolume trees, and above function didn't + * find one, do not try to read it from disk. + * + * This is namely for free-space-tree and quota tree, which can change + * at runtime and should only be grabbed from fs_info. + */ + if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) + return ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { -- cgit v1.2.3 From 05d7ce504545f7874529701664c90814ca645c5d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 3 Aug 2023 17:20:42 +0800 Subject: btrfs: exit gracefully if reloc roots don't match [BUG] Syzbot reported a crash that an ASSERT() got triggered inside prepare_to_merge(). [CAUSE] The root cause of the triggered ASSERT() is we can have a race between quota tree creation and relocation. This leads us to create a duplicated quota tree in the btrfs_read_fs_root() path, and since it's treated as fs tree, it would have ROOT_SHAREABLE flag, causing us to create a reloc tree for it. The bug itself is fixed by a dedicated patch for it, but this already taught us the ASSERT() is not something straightforward for developers. [ENHANCEMENT] Instead of using an ASSERT(), let's handle it gracefully and output extra info about the mismatch reloc roots to help debug. Also with the above ASSERT() removed, we can trigger ASSERT(0)s inside merge_reloc_roots() later. Also replace those ASSERT(0)s with WARN_ON()s. CC: stable@vger.kernel.org # 5.15+ Reported-by: syzbot+ae97a827ae1c3336bbb4@syzkaller.appspotmail.com Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 25a3361caedc..46c3c1d57266 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1916,7 +1916,39 @@ again: err = PTR_ERR(root); break; } - ASSERT(root->reloc_root == reloc_root); + + if (unlikely(root->reloc_root != reloc_root)) { + if (root->reloc_root) { + btrfs_err(fs_info, +"reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu", + root->root_key.objectid, + root->reloc_root->root_key.objectid, + root->reloc_root->root_key.type, + root->reloc_root->root_key.offset, + btrfs_root_generation( + &root->reloc_root->root_item), + reloc_root->root_key.objectid, + reloc_root->root_key.type, + reloc_root->root_key.offset, + btrfs_root_generation( + &reloc_root->root_item)); + } else { + btrfs_err(fs_info, +"reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu", + root->root_key.objectid, + reloc_root->root_key.objectid, + reloc_root->root_key.type, + reloc_root->root_key.offset, + btrfs_root_generation( + &reloc_root->root_item)); + } + list_add(&reloc_root->root_list, &reloc_roots); + btrfs_put_root(root); + btrfs_abort_transaction(trans, -EUCLEAN); + if (!err) + err = -EUCLEAN; + break; + } /* * set reference count to 1, so btrfs_recover_relocation @@ -1989,7 +2021,7 @@ again: root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false); if (btrfs_root_refs(&reloc_root->root_item) > 0) { - if (IS_ERR(root)) { + if (WARN_ON(IS_ERR(root))) { /* * For recovery we read the fs roots on mount, * and if we didn't find the root then we marked @@ -1998,17 +2030,14 @@ again: * memory. However there's no reason we can't * handle the error properly here just in case. */ - ASSERT(0); ret = PTR_ERR(root); goto out; } - if (root->reloc_root != reloc_root) { + if (WARN_ON(root->reloc_root != reloc_root)) { /* - * This is actually impossible without something - * going really wrong (like weird race condition - * or cosmic rays). + * This can happen if on-disk metadata has some + * corruption, e.g. bad reloc tree key offset. */ - ASSERT(0); ret = -EINVAL; goto out; } -- cgit v1.2.3 From 6ebcd021c92b8e4b904552e4d87283032100796d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 3 Aug 2023 17:20:43 +0800 Subject: btrfs: reject invalid reloc tree root keys with stack dump [BUG] Syzbot reported a crash that an ASSERT() got triggered inside prepare_to_merge(). That ASSERT() makes sure the reloc tree is properly pointed back by its subvolume tree. [CAUSE] After more debugging output, it turns out we had an invalid reloc tree: BTRFS error (device loop1): reloc tree mismatch, root 8 has no reloc root, expect reloc root key (-8, 132, 8) gen 17 Note the above root key is (TREE_RELOC_OBJECTID, ROOT_ITEM, QUOTA_TREE_OBJECTID), meaning it's a reloc tree for quota tree. But reloc trees can only exist for subvolumes, as for non-subvolume trees, we just COW the involved tree block, no need to create a reloc tree since those tree blocks won't be shared with other trees. Only subvolumes tree can share tree blocks with other trees (thus they have BTRFS_ROOT_SHAREABLE flag). Thus this new debug output proves my previous assumption that corrupted on-disk data can trigger that ASSERT(). [FIX] Besides the dedicated fix and the graceful exit, also let tree-checker to check such root keys, to make sure reloc trees can only exist for subvolumes. CC: stable@vger.kernel.org # 5.15+ Reported-by: syzbot+ae97a827ae1c3336bbb4@syzkaller.appspotmail.com Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 ++- fs/btrfs/tree-checker.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11b1ac716f3b..a9a2c5446c18 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1103,7 +1103,8 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) btrfs_drew_lock_init(&root->snapshot_lock); if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && - !btrfs_is_data_reloc_root(root)) { + !btrfs_is_data_reloc_root(root) && + is_fstree(root->root_key.objectid)) { set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 351ba9e90675..11d81e39ef4e 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -446,6 +446,20 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key, btrfs_item_key_to_cpu(leaf, &item_key, slot); is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY); + /* + * Bad rootid for reloc trees. + * + * Reloc trees are only for subvolume trees, other trees only need + * to be COWed to be relocated. + */ + if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID && + !is_fstree(key->offset))) { + generic_err(leaf, slot, + "invalid reloc tree for root %lld, root id is not a subvolume tree", + key->offset); + return -EUCLEAN; + } + /* No such tree id */ if (unlikely(key->objectid == 0)) { if (is_root_item) -- cgit v1.2.3 From 92fb94b69c6accf1e49fff699640fa0ce03dc910 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 2 Aug 2023 09:20:24 -0400 Subject: btrfs: set cache_block_group_error if we find an error We set cache_block_group_error if btrfs_cache_block_group() returns an error, this is because we could end up not finding space to allocate and mistakenly return -ENOSPC, and which could then abort the transaction with the incorrect errno, and in the case of ENOSPC result in a WARN_ON() that will trip up tests like generic/475. However there's the case where multiple threads can be racing, one thread gets the proper error, and the other thread doesn't actually call btrfs_cache_block_group(), it instead sees ->cached == BTRFS_CACHE_ERROR. Again the result is the same, we fail to allocate our space and return -ENOSPC. Instead we need to set cache_block_group_error to -EIO in this case to make sure that if we do not make our allocation we get the appropriate error returned back to the caller. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 911908ea5f6f..f396a9afa403 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4310,8 +4310,11 @@ have_block_group: ret = 0; } - if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) { + if (!cache_block_group_error) + cache_block_group_error = -EIO; goto loop; + } if (!find_free_extent_check_size_class(ffe_ctl, block_group)) goto loop; -- cgit v1.2.3 From a0f4b7879f2e14986200747d1b545e5daac8c624 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 9 Aug 2023 09:21:58 +0200 Subject: parisc: Fix lightweight spinlock checks to not break futexes The lightweight spinlock checks verify that a spinlock has either value 0 (spinlock locked) and that not any other bits than in __ARCH_SPIN_LOCK_UNLOCKED_VAL is set. This breaks the current LWS code, which writes the address of the lock into the lock word to unlock it, which was an optimization to save one assembler instruction. Fix it by making spinlock_types.h accessible for asm code, change the LWS spinlock-unlocking code to write __ARCH_SPIN_LOCK_UNLOCKED_VAL into the lock word, and add some missing lightweight spinlock checks to the LWS path. Finally, make the spinlock checks dependend on DEBUG_KERNEL. Noticed-by: John David Anglin Signed-off-by: Helge Deller Tested-by: John David Anglin Cc: stable@vger.kernel.org # v6.4+ Fixes: 15e64ef6520e ("parisc: Add lightweight spinlock checks") --- arch/parisc/Kconfig.debug | 2 +- arch/parisc/include/asm/spinlock.h | 2 -- arch/parisc/include/asm/spinlock_types.h | 6 ++++++ arch/parisc/kernel/syscall.S | 23 ++++++++++++++++++++--- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index 1401e4c5fe5f..bf2b21b96f0b 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -2,7 +2,7 @@ # config LIGHTWEIGHT_SPINLOCK_CHECK bool "Enable lightweight spinlock checks" - depends on SMP && !DEBUG_SPINLOCK + depends on DEBUG_KERNEL && SMP && !DEBUG_SPINLOCK default y help Add checks with low performance impact to the spinlock functions diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index edfcb9858bcb..0b326e52255e 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -7,8 +7,6 @@ #include #include -#define SPINLOCK_BREAK_INSN 0x0000c006 /* break 6,6 */ - static inline void arch_spin_val_check(int lock_val) { if (IS_ENABLED(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK)) diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index d65934079ebd..efd06a897c6a 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -4,6 +4,10 @@ #define __ARCH_SPIN_LOCK_UNLOCKED_VAL 0x1a46 +#define SPINLOCK_BREAK_INSN 0x0000c006 /* break 6,6 */ + +#ifndef __ASSEMBLY__ + typedef struct { #ifdef CONFIG_PA20 volatile unsigned int slock; @@ -27,6 +31,8 @@ typedef struct { volatile unsigned int counter; } arch_rwlock_t; +#endif /* __ASSEMBLY__ */ + #define __ARCH_RW_LOCK_UNLOCKED__ 0x01000000 #define __ARCH_RW_LOCK_UNLOCKED { .lock_mutex = __ARCH_SPIN_LOCK_UNLOCKED, \ .counter = __ARCH_RW_LOCK_UNLOCKED__ } diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 1373e5129868..1f51aa9c8230 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -39,6 +39,7 @@ registers). #include #include #include +#include #include @@ -66,6 +67,16 @@ registers). stw \reg1, 0(%sr2,\reg2) .endm + /* raise exception if spinlock content is not zero or + * __ARCH_SPIN_LOCK_UNLOCKED_VAL */ + .macro spinlock_check spin_val,tmpreg +#ifdef CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK + ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmpreg + andcm,= \spin_val, \tmpreg, %r0 + .word SPINLOCK_BREAK_INSN +#endif + .endm + .text .import syscall_exit,code @@ -508,7 +519,8 @@ lws_start: lws_exit_noerror: lws_pagefault_enable %r1,%r21 - stw,ma %r20, 0(%sr2,%r20) + ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21 + stw,ma %r21, 0(%sr2,%r20) ssm PSW_SM_I, %r0 b lws_exit copy %r0, %r21 @@ -521,7 +533,8 @@ lws_wouldblock: lws_pagefault: lws_pagefault_enable %r1,%r21 - stw,ma %r20, 0(%sr2,%r20) + ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21 + stw,ma %r21, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 3(%r0),%r28 b lws_exit @@ -619,6 +632,7 @@ lws_compare_and_swap: /* Try to acquire the lock */ LDCW 0(%sr2,%r20), %r28 + spinlock_check %r28, %r21 comclr,<> %r0, %r28, %r0 b,n lws_wouldblock @@ -772,6 +786,7 @@ cas2_lock_start: /* Try to acquire the lock */ LDCW 0(%sr2,%r20), %r28 + spinlock_check %r28, %r21 comclr,<> %r0, %r28, %r0 b,n lws_wouldblock @@ -1001,6 +1016,7 @@ atomic_xchg_start: /* Try to acquire the lock */ LDCW 0(%sr2,%r20), %r28 + spinlock_check %r28, %r21 comclr,<> %r0, %r28, %r0 b,n lws_wouldblock @@ -1199,6 +1215,7 @@ atomic_store_start: /* Try to acquire the lock */ LDCW 0(%sr2,%r20), %r28 + spinlock_check %r28, %r21 comclr,<> %r0, %r28, %r0 b,n lws_wouldblock @@ -1330,7 +1347,7 @@ ENTRY(lws_lock_start) /* lws locks */ .rept 256 /* Keep locks aligned at 16-bytes */ - .word 1 + .word __ARCH_SPIN_LOCK_UNLOCKED_VAL .word 0 .word 0 .word 0 -- cgit v1.2.3 From 56cf894effc2946f273f7bfc9a28f3741978156c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 9 Aug 2023 17:19:53 +0800 Subject: parisc: pdt: Use PTR_ERR_OR_ZERO() to simplify code Return PTR_ERR_OR_ZERO() instead of return 0 or PTR_ERR() to simplify code. Signed-off-by: Yang Yingliang Signed-off-by: Helge Deller --- arch/parisc/kernel/pdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index 0d24735bd918..0f9b3b5914cf 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -354,10 +354,8 @@ static int __init pdt_initcall(void) return -ENODEV; kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd"); - if (IS_ERR(kpdtd_task)) - return PTR_ERR(kpdtd_task); - return 0; + return PTR_ERR_OR_ZERO(kpdtd_task); } late_initcall(pdt_initcall); -- cgit v1.2.3 From aa1bb8b6351a81b28b9e10ab3414c21ded7cf11d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 03:31:18 +0200 Subject: parisc: fault: Use C99 arrary initializers Sparse wants C99 array initializers. Signed-off-by: Helge Deller --- arch/parisc/mm/fault.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index a4c7c7630f48..2fe5b44986e0 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -192,31 +192,31 @@ int fixup_exception(struct pt_regs *regs) * For implementation see handle_interruption() in traps.c */ static const char * const trap_description[] = { - [1] "High-priority machine check (HPMC)", - [2] "Power failure interrupt", - [3] "Recovery counter trap", - [5] "Low-priority machine check", - [6] "Instruction TLB miss fault", - [7] "Instruction access rights / protection trap", - [8] "Illegal instruction trap", - [9] "Break instruction trap", - [10] "Privileged operation trap", - [11] "Privileged register trap", - [12] "Overflow trap", - [13] "Conditional trap", - [14] "FP Assist Exception trap", - [15] "Data TLB miss fault", - [16] "Non-access ITLB miss fault", - [17] "Non-access DTLB miss fault", - [18] "Data memory protection/unaligned access trap", - [19] "Data memory break trap", - [20] "TLB dirty bit trap", - [21] "Page reference trap", - [22] "Assist emulation trap", - [25] "Taken branch trap", - [26] "Data memory access rights trap", - [27] "Data memory protection ID trap", - [28] "Unaligned data reference trap", + [1] = "High-priority machine check (HPMC)", + [2] = "Power failure interrupt", + [3] = "Recovery counter trap", + [5] = "Low-priority machine check", + [6] = "Instruction TLB miss fault", + [7] = "Instruction access rights / protection trap", + [8] = "Illegal instruction trap", + [9] = "Break instruction trap", + [10] = "Privileged operation trap", + [11] = "Privileged register trap", + [12] = "Overflow trap", + [13] = "Conditional trap", + [14] = "FP Assist Exception trap", + [15] = "Data TLB miss fault", + [16] = "Non-access ITLB miss fault", + [17] = "Non-access DTLB miss fault", + [18] = "Data memory protection/unaligned access trap", + [19] = "Data memory break trap", + [20] = "TLB dirty bit trap", + [21] = "Page reference trap", + [22] = "Assist emulation trap", + [25] = "Taken branch trap", + [26] = "Data memory access rights trap", + [27] = "Data memory protection ID trap", + [28] = "Unaligned data reference trap", }; const char *trap_name(unsigned long code) -- cgit v1.2.3 From b873bde58578db2201b2f30ca708dcb0e35ff4b1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 03:55:47 +0200 Subject: parisc: ioremap: Fix sparse warnings Fix sparse warning: incorrect type in assignment (different base types) expected unsigned long [usertype] addr got void *addr Signed-off-by: Helge Deller --- arch/parisc/mm/ioremap.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c index 345ff0b66499..d7ee1f43d997 100644 --- a/arch/parisc/mm/ioremap.c +++ b/arch/parisc/mm/ioremap.c @@ -27,7 +27,7 @@ */ void __iomem *ioremap(unsigned long phys_addr, unsigned long size) { - void __iomem *addr; + uintptr_t addr; struct vm_struct *area; unsigned long offset, last_addr; pgprot_t pgprot; @@ -79,10 +79,9 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size) if (!area) return NULL; - addr = (void __iomem *) area->addr; - if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, - phys_addr, pgprot)) { - vunmap(addr); + addr = (uintptr_t) area->addr; + if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) { + vunmap(area->addr); return NULL; } -- cgit v1.2.3 From dc54a52a8cd43cff4fbe8d761c98edeb857d3ad7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 04:07:01 +0200 Subject: parisc: signal: Fix sparse incorrect type in assignment warning Signed-off-by: Helge Deller --- arch/parisc/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index f886ff0c75df..e8d27def6c52 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -423,7 +423,7 @@ static void check_syscallno_in_delay_branch(struct pt_regs *regs) regs->gr[31] -= 8; /* delayed branching */ /* Get assembler opcode of code in delay branch */ - uaddr = (unsigned int *) ((regs->gr[31] & ~3) + 4); + uaddr = (u32 __user *) ((regs->gr[31] & ~3) + 4); err = get_user(opcode, uaddr); if (err) return; -- cgit v1.2.3 From a07c03e8064026d55d1699d161c536cc437d58d6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 04:29:29 +0200 Subject: parisc: firmware: Fix sparse context imbalance warnings Tell sparse about correct context for pdc_cpu_rendezvous_*lock() functions. Signed-off-by: Helge Deller --- arch/parisc/kernel/firmware.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 6d1c781eb1db..9cd8af5e79d7 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -334,7 +334,7 @@ int __pdc_cpu_rendezvous(void) /** * pdc_cpu_rendezvous_lock - Lock PDC while transitioning to rendezvous state */ -void pdc_cpu_rendezvous_lock(void) +void pdc_cpu_rendezvous_lock(void) __acquires(&pdc_lock) { spin_lock(&pdc_lock); } @@ -342,7 +342,7 @@ void pdc_cpu_rendezvous_lock(void) /** * pdc_cpu_rendezvous_unlock - Unlock PDC after reaching rendezvous state */ -void pdc_cpu_rendezvous_unlock(void) +void pdc_cpu_rendezvous_unlock(void) __releases(&pdc_lock) { spin_unlock(&pdc_lock); } -- cgit v1.2.3 From eed869aaf1305444434ad5a9a56abc45aacc0c40 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 04:34:58 +0200 Subject: parisc: firmware: Mark pdc_result buffers local This fixes a sparse warning which suggest to make those static. Signed-off-by: Helge Deller --- arch/parisc/kernel/firmware.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 9cd8af5e79d7..8f37e75f2fb9 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -74,8 +74,8 @@ static DEFINE_SPINLOCK(pdc_lock); #endif -unsigned long pdc_result[NUM_PDC_RESULT] __aligned(8); -unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8); +static unsigned long pdc_result[NUM_PDC_RESULT] __aligned(8); +static unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8); #ifdef CONFIG_64BIT #define WIDE_FIRMWARE 0x1 -- cgit v1.2.3 From d566bea4a638ff0ae824df804bc08818bace41a5 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 8 Aug 2023 15:07:09 +0200 Subject: riscv: Do not allow vmap pud mappings for 3-level page table The vmalloc_fault() path was removed and to avoid syncing the vmalloc PGD mappings, they are now preallocated. But if the kernel can use a PUD mapping (which in sv39 is actually a PGD mapping) for large vmalloc allocation, it will free the current unused preallocated PGD mapping and install a new leaf one. Since there is no sync anymore, some page tables lack this new mapping and that triggers a panic. So only allow PUD mappings for sv48 and sv57. Fixes: 7d3332be011e ("riscv: mm: Pre-allocate PGD entries for vmalloc/modules area") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230808130709.1502614-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vmalloc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h index 58d3e447f191..924d01b56c9a 100644 --- a/arch/riscv/include/asm/vmalloc.h +++ b/arch/riscv/include/asm/vmalloc.h @@ -3,12 +3,14 @@ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +extern bool pgtable_l4_enabled, pgtable_l5_enabled; + #define IOREMAP_MAX_ORDER (PUD_SHIFT) #define arch_vmap_pud_supported arch_vmap_pud_supported static inline bool arch_vmap_pud_supported(pgprot_t prot) { - return true; + return pgtable_l4_enabled || pgtable_l5_enabled; } #define arch_vmap_pmd_supported arch_vmap_pmd_supported -- cgit v1.2.3 From 7e3811521dc3934e2ecae8458676fc4a1f62bf9f Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 25 Jul 2023 15:22:46 +0200 Subject: riscv: Implement flush_cache_vmap() The RISC-V kernel needs a sfence.vma after a page table modification: we used to rely on the vmalloc fault handling to emit an sfence.vma, but commit 7d3332be011e ("riscv: mm: Pre-allocate PGD entries for vmalloc/modules area") got rid of this path for 64-bit kernels, so now we need to explicitly emit a sfence.vma in flush_cache_vmap(). Note that we don't need to implement flush_cache_vunmap() as the generic code should emit a flush tlb after unmapping a vmalloc region. Fixes: 7d3332be011e ("riscv: mm: Pre-allocate PGD entries for vmalloc/modules area") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230725132246.817726-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8091b8bf4883..b93ffddf8a61 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -37,6 +37,10 @@ static inline void flush_dcache_page(struct page *page) #define flush_icache_user_page(vma, pg, addr, len) \ flush_icache_mm(vma->vm_mm, 0) +#ifdef CONFIG_64BIT +#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end) +#endif + #ifndef CONFIG_SMP #define flush_icache_all() local_flush_icache_all() -- cgit v1.2.3 From 438e9230d60ed8771db5770c3c795f3272ef7aae Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 18:31:47 +0200 Subject: parisc: ucmpdi2: Fix no previous prototype for '__ucmpdi2' warning Signed-off-by: Helge Deller --- arch/parisc/lib/ucmpdi2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/lib/ucmpdi2.c b/arch/parisc/lib/ucmpdi2.c index 8e6014a142ef..9d8b4dbae273 100644 --- a/arch/parisc/lib/ucmpdi2.c +++ b/arch/parisc/lib/ucmpdi2.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include union ull_union { unsigned long long ull; @@ -9,7 +10,7 @@ union ull_union { } ui; }; -int __ucmpdi2(unsigned long long a, unsigned long long b) +word_type __ucmpdi2(unsigned long long a, unsigned long long b) { union ull_union au = {.ull = a}; union ull_union bu = {.ull = b}; -- cgit v1.2.3 From 388d5bdba3fd791f734cc6687309fe59fb3343bb Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 18:32:24 +0200 Subject: parisc: parisc_ksyms: Include libgcc.h for libgcc prototypes Signed-off-by: Helge Deller --- arch/parisc/kernel/parisc_ksyms.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 00297e8e1c88..6f0c92e8149d 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -14,6 +14,7 @@ #include #include #include +#include #include EXPORT_SYMBOL(memset); @@ -92,12 +93,6 @@ EXPORT_SYMBOL($$divI_12); EXPORT_SYMBOL($$divI_14); EXPORT_SYMBOL($$divI_15); -extern void __ashrdi3(void); -extern void __ashldi3(void); -extern void __lshrdi3(void); -extern void __muldi3(void); -extern void __ucmpdi2(void); - EXPORT_SYMBOL(__ashrdi3); EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__lshrdi3); -- cgit v1.2.3 From a4c59c9adc5f6b2a6b0115e3c4dc1e5127c2a01b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 19:12:16 +0200 Subject: parisc: dma: Add prototype for pcxl_dma_start Signed-off-by: Helge Deller --- arch/parisc/include/asm/dma.h | 2 ++ arch/parisc/kernel/pci-dma.c | 2 +- arch/parisc/mm/init.c | 4 ---- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/parisc/include/asm/dma.h b/arch/parisc/include/asm/dma.h index 9e8c101de902..582fb5d1a5d5 100644 --- a/arch/parisc/include/asm/dma.h +++ b/arch/parisc/include/asm/dma.h @@ -14,6 +14,8 @@ #define dma_outb outb #define dma_inb inb +extern unsigned long pcxl_dma_start; + /* ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up ** (or rather not merge) DMAs into manageable chunks. diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 3f6b507970eb..131d596e018f 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -39,7 +39,7 @@ static struct proc_dir_entry * proc_gsc_root __read_mostly = NULL; static unsigned long pcxl_used_bytes __read_mostly; static unsigned long pcxl_used_pages __read_mostly; -extern unsigned long pcxl_dma_start; /* Start of pcxl dma mapping area */ +unsigned long pcxl_dma_start __ro_after_init; /* pcxl dma mapping area start */ static DEFINE_SPINLOCK(pcxl_res_lock); static char *pcxl_res_map; static int pcxl_res_hint; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 389941c7f209..a088c243edea 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -523,10 +523,6 @@ void mark_rodata_ro(void) void *parisc_vmalloc_start __ro_after_init; EXPORT_SYMBOL(parisc_vmalloc_start); -#ifdef CONFIG_PA11 -unsigned long pcxl_dma_start __ro_after_init; -#endif - void __init mem_init(void) { /* Do sanity checks on IPC (compat) structures */ -- cgit v1.2.3 From 55c91fedd03d7b9cf0c5199b2eb12b9b8e95281a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 29 Jun 2023 14:05:26 +0200 Subject: virtio-mmio: don't break lifecycle of vm_dev vm_dev has a separate lifecycle because it has a 'struct device' embedded. Thus, having a release callback for it is correct. Allocating the vm_dev struct with devres totally breaks this protection, though. Instead of waiting for the vm_dev release callback, the memory is freed when the platform_device is removed. Resulting in a use-after-free when finally the callback is to be called. To easily see the problem, compile the kernel with CONFIG_DEBUG_KOBJECT_RELEASE and unbind with sysfs. The fix is easy, don't use devres in this case. Found during my research about object lifetime problems. Fixes: 7eb781b1bbb7 ("virtio_mmio: add cleanup for virtio_mmio_probe") Signed-off-by: Wolfram Sang Message-Id: <20230629120526.7184-1-wsa+renesas@sang-engineering.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index a46a4a29e929..97760f611295 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -607,9 +607,8 @@ static void virtio_mmio_release_dev(struct device *_d) struct virtio_device *vdev = container_of(_d, struct virtio_device, dev); struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); - struct platform_device *pdev = vm_dev->pdev; - devm_kfree(&pdev->dev, vm_dev); + kfree(vm_dev); } /* Platform device */ @@ -620,7 +619,7 @@ static int virtio_mmio_probe(struct platform_device *pdev) unsigned long magic; int rc; - vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL); + vm_dev = kzalloc(sizeof(*vm_dev), GFP_KERNEL); if (!vm_dev) return -ENOMEM; -- cgit v1.2.3 From 9ad1a29cb0991e3145996cdce691525e8ac65db7 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 6 Jul 2023 16:17:18 -0700 Subject: pds_vdpa: protect Makefile from unconfigured debugfs debugfs.h protects itself from an undefined DEBUG_FS, so it is not necessary to check it in the driver code or the Makefile. The driver code had been updated for this, but the Makefile had missed the update. Link: https://lore.kernel.org/linux-next/fec68c3c-8249-7af4-5390-0495386a76f9@infradead.org/ Fixes: a16291b5bcbb ("pds_vdpa: Add new vDPA driver for AMD/Pensando DSC") Signed-off-by: Shannon Nelson Message-Id: <20230706231718.54198-1-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Acked-by: Jason Wang --- drivers/vdpa/pds/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vdpa/pds/Makefile b/drivers/vdpa/pds/Makefile index 2e22418e3ab3..c2d314d4614d 100644 --- a/drivers/vdpa/pds/Makefile +++ b/drivers/vdpa/pds/Makefile @@ -5,6 +5,5 @@ obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o pds_vdpa-y := aux_drv.o \ cmds.o \ + debugfs.o \ vdpa_dev.o - -pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o -- cgit v1.2.3 From 5ced58bfa132c8ba0f9c893eb621595a84cfee12 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 9 Jul 2023 15:28:58 -0500 Subject: vhost-scsi: Fix alignment handling with windows The linux block layer requires bios/requests to have lengths with a 512 byte alignment. Some drivers/layers like dm-crypt and the directi IO code will test for it and just fail. Other drivers like SCSI just assume the requirement is met and will end up in infinte retry loops. The problem for drivers like SCSI is that it uses functions like blk_rq_cur_sectors and blk_rq_sectors which divide the request's length by 512. If there's lefovers then it just gets dropped. But other code in the block/scsi layer may use blk_rq_bytes/blk_rq_cur_bytes and end up thinking there is still data left and try to retry the cmd. We can then end up getting stuck in retry loops where part of the block/scsi thinks there is data left, but other parts think we want to do IOs of zero length. Linux will always check for alignment, but windows will not. When vhost-scsi then translates the iovec it gets from a windows guest to a scatterlist, we can end up with sg items where the sg->length is not divisible by 512 due to the misaligned offset: sg[0].offset = 255; sg[0].length = 3841; sg... sg[N].offset = 0; sg[N].length = 255; When the lio backends then convert the SG to bios or other iovecs, we end up sending them with the same misaligned values and can hit the issues above. This just has us drop down to allocating a temp page and copying the data when we detect a misaligned buffer and the IO is large enough that it will get split into multiple bad IOs. Signed-off-by: Mike Christie Message-Id: <20230709202859.138387-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 186 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 161 insertions(+), 25 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c83f7f043470..324e4b3846fa 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -75,6 +77,9 @@ struct vhost_scsi_cmd { u32 tvc_prot_sgl_count; /* Saved unpacked SCSI LUN for vhost_scsi_target_queue_cmd() */ u32 tvc_lun; + u32 copied_iov:1; + const void *saved_iter_addr; + struct iov_iter saved_iter; /* Pointer to the SGL formatted memory from virtio-scsi */ struct scatterlist *tvc_sgl; struct scatterlist *tvc_prot_sgl; @@ -328,8 +333,13 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) int i; if (tv_cmd->tvc_sgl_count) { - for (i = 0; i < tv_cmd->tvc_sgl_count; i++) - put_page(sg_page(&tv_cmd->tvc_sgl[i])); + for (i = 0; i < tv_cmd->tvc_sgl_count; i++) { + if (tv_cmd->copied_iov) + __free_page(sg_page(&tv_cmd->tvc_sgl[i])); + else + put_page(sg_page(&tv_cmd->tvc_sgl[i])); + } + kfree(tv_cmd->saved_iter_addr); } if (tv_cmd->tvc_prot_sgl_count) { for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++) @@ -504,6 +514,28 @@ static void vhost_scsi_evt_work(struct vhost_work *work) mutex_unlock(&vq->mutex); } +static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd) +{ + struct iov_iter *iter = &cmd->saved_iter; + struct scatterlist *sg = cmd->tvc_sgl; + struct page *page; + size_t len; + int i; + + for (i = 0; i < cmd->tvc_sgl_count; i++) { + page = sg_page(&sg[i]); + len = sg[i].length; + + if (copy_page_to_iter(page, 0, len, iter) != len) { + pr_err("Could not copy data while handling misaligned cmd. Error %zu\n", + len); + return -1; + } + } + + return 0; +} + /* Fill in status and signal that we are done processing this command * * This is scheduled in the vhost work queue so we are called with the owner @@ -527,15 +559,20 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__, cmd, se_cmd->residual_count, se_cmd->scsi_status); - memset(&v_rsp, 0, sizeof(v_rsp)); - v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count); - /* TODO is status_qualifier field needed? */ - v_rsp.status = se_cmd->scsi_status; - v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq, - se_cmd->scsi_sense_length); - memcpy(v_rsp.sense, cmd->tvc_sense_buf, - se_cmd->scsi_sense_length); + + if (cmd->saved_iter_addr && vhost_scsi_copy_sgl_to_iov(cmd)) { + v_rsp.response = VIRTIO_SCSI_S_BAD_TARGET; + } else { + v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, + se_cmd->residual_count); + /* TODO is status_qualifier field needed? */ + v_rsp.status = se_cmd->scsi_status; + v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq, + se_cmd->scsi_sense_length); + memcpy(v_rsp.sense, cmd->tvc_sense_buf, + se_cmd->scsi_sense_length); + } iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iov, cmd->tvc_in_iovs, sizeof(v_rsp)); @@ -613,12 +650,12 @@ static int vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, struct scatterlist *sgl, - bool write) + bool is_prot) { struct page **pages = cmd->tvc_upages; struct scatterlist *sg = sgl; - ssize_t bytes; - size_t offset; + ssize_t bytes, mapped_bytes; + size_t offset, mapped_offset; unsigned int npages = 0; bytes = iov_iter_get_pages2(iter, pages, LONG_MAX, @@ -627,13 +664,53 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, if (bytes <= 0) return bytes < 0 ? bytes : -EFAULT; + mapped_bytes = bytes; + mapped_offset = offset; + while (bytes) { unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes); + /* + * The block layer requires bios/requests to be a multiple of + * 512 bytes, but Windows can send us vecs that are misaligned. + * This can result in bios and later requests with misaligned + * sizes if we have to break up a cmd/scatterlist into multiple + * bios. + * + * We currently only break up a command into multiple bios if + * we hit the vec/seg limit, so check if our sgl_count is + * greater than the max and if a vec in the cmd has a + * misaligned offset/size. + */ + if (!is_prot && + (offset & (SECTOR_SIZE - 1) || n & (SECTOR_SIZE - 1)) && + cmd->tvc_sgl_count > BIO_MAX_VECS) { + WARN_ONCE(true, + "vhost-scsi detected misaligned IO. Performance may be degraded."); + goto revert_iter_get_pages; + } + sg_set_page(sg++, pages[npages++], n, offset); bytes -= n; offset = 0; } + return npages; + +revert_iter_get_pages: + iov_iter_revert(iter, mapped_bytes); + + npages = 0; + while (mapped_bytes) { + unsigned int n = min_t(unsigned int, PAGE_SIZE - mapped_offset, + mapped_bytes); + + put_page(pages[npages++]); + + mapped_bytes -= n; + mapped_offset = 0; + } + + return -EINVAL; } static int @@ -657,25 +734,80 @@ vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls) } static int -vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, - struct iov_iter *iter, - struct scatterlist *sg, int sg_count) +vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, + struct scatterlist *sg, int sg_count) +{ + size_t len = iov_iter_count(iter); + unsigned int nbytes = 0; + struct page *page; + int i; + + if (cmd->tvc_data_direction == DMA_FROM_DEVICE) { + cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter, + GFP_KERNEL); + if (!cmd->saved_iter_addr) + return -ENOMEM; + } + + for (i = 0; i < sg_count; i++) { + page = alloc_page(GFP_KERNEL); + if (!page) { + i--; + goto err; + } + + nbytes = min_t(unsigned int, PAGE_SIZE, len); + sg_set_page(&sg[i], page, nbytes, 0); + + if (cmd->tvc_data_direction == DMA_TO_DEVICE && + copy_page_from_iter(page, 0, nbytes, iter) != nbytes) + goto err; + + len -= nbytes; + } + + cmd->copied_iov = 1; + return 0; + +err: + pr_err("Could not read %u bytes while handling misaligned cmd\n", + nbytes); + + for (; i >= 0; i--) + __free_page(sg_page(&sg[i])); + kfree(cmd->saved_iter_addr); + return -ENOMEM; +} + +static int +vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, + struct scatterlist *sg, int sg_count, bool is_prot) { struct scatterlist *p = sg; + size_t revert_bytes; int ret; while (iov_iter_count(iter)) { - ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write); + ret = vhost_scsi_map_to_sgl(cmd, iter, sg, is_prot); if (ret < 0) { + revert_bytes = 0; + while (p < sg) { - struct page *page = sg_page(p++); - if (page) + struct page *page = sg_page(p); + + if (page) { put_page(page); + revert_bytes += p->length; + } + p++; } + + iov_iter_revert(iter, revert_bytes); return ret; } sg += ret; } + return 0; } @@ -685,7 +817,6 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, size_t data_bytes, struct iov_iter *data_iter) { int sgl_count, ret; - bool write = (cmd->tvc_data_direction == DMA_FROM_DEVICE); if (prot_bytes) { sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes, @@ -698,9 +829,8 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__, cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count); - ret = vhost_scsi_iov_to_sgl(cmd, write, prot_iter, - cmd->tvc_prot_sgl, - cmd->tvc_prot_sgl_count); + ret = vhost_scsi_iov_to_sgl(cmd, prot_iter, cmd->tvc_prot_sgl, + cmd->tvc_prot_sgl_count, true); if (ret < 0) { cmd->tvc_prot_sgl_count = 0; return ret; @@ -716,8 +846,14 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, pr_debug("%s data_sg %p data_sgl_count %u\n", __func__, cmd->tvc_sgl, cmd->tvc_sgl_count); - ret = vhost_scsi_iov_to_sgl(cmd, write, data_iter, - cmd->tvc_sgl, cmd->tvc_sgl_count); + ret = vhost_scsi_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, + cmd->tvc_sgl_count, false); + if (ret == -EINVAL) { + sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count); + ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, + cmd->tvc_sgl_count); + } + if (ret < 0) { cmd->tvc_sgl_count = 0; return ret; -- cgit v1.2.3 From c5ace19efb0ac884a9a417e2a1499ce9849bdaa5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 9 Jul 2023 15:28:59 -0500 Subject: vhost-scsi: Rename vhost_scsi_iov_to_sgl Rename vhost_scsi_iov_to_sgl to vhost_scsi_map_iov_to_sgl so it matches matches the naming style used for vhost_scsi_copy_iov_to_sgl. Signed-off-by: Mike Christie Message-Id: <20230709202859.138387-3-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 324e4b3846fa..abef0619c790 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -780,8 +780,8 @@ err: } static int -vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, - struct scatterlist *sg, int sg_count, bool is_prot) +vhost_scsi_map_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, + struct scatterlist *sg, int sg_count, bool is_prot) { struct scatterlist *p = sg; size_t revert_bytes; @@ -829,8 +829,9 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__, cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count); - ret = vhost_scsi_iov_to_sgl(cmd, prot_iter, cmd->tvc_prot_sgl, - cmd->tvc_prot_sgl_count, true); + ret = vhost_scsi_map_iov_to_sgl(cmd, prot_iter, + cmd->tvc_prot_sgl, + cmd->tvc_prot_sgl_count, true); if (ret < 0) { cmd->tvc_prot_sgl_count = 0; return ret; @@ -846,8 +847,8 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, pr_debug("%s data_sg %p data_sgl_count %u\n", __func__, cmd->tvc_sgl, cmd->tvc_sgl_count); - ret = vhost_scsi_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, - cmd->tvc_sgl_count, false); + ret = vhost_scsi_map_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, + cmd->tvc_sgl_count, false); if (ret == -EINVAL) { sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count); ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, -- cgit v1.2.3 From 8d4bdf11f096e5b343ee0f9aaa8c262dc16d2e1e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 15 Jul 2023 09:20:27 -0500 Subject: MAINTAINERS: add vhost-scsi entry and myself as a co-maintainer I've been doing a lot of the development on vhost-scsi the last couple of years, so per Michael T's suggestion this adds me as co-maintainer. Signed-off-by: Mike Christie Message-Id: <20230715142027.5572-1-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Stefano Garzarella Acked-by: Stefan Hajnoczi --- MAINTAINERS | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0f966f05fb0d..63a31c3bf7d4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22476,7 +22476,6 @@ L: virtualization@lists.linux-foundation.org S: Maintained F: drivers/block/virtio_blk.c F: drivers/scsi/virtio_scsi.c -F: drivers/vhost/scsi.c F: include/uapi/linux/virtio_blk.h F: include/uapi/linux/virtio_scsi.h @@ -22575,6 +22574,16 @@ F: include/linux/vhost_iotlb.h F: include/uapi/linux/vhost.h F: kernel/vhost_task.c +VIRTIO HOST (VHOST-SCSI) +M: "Michael S. Tsirkin" +M: Jason Wang +M: Mike Christie +R: Paolo Bonzini +R: Stefan Hajnoczi +L: virtualization@lists.linux-foundation.org +S: Maintained +F: drivers/vhost/scsi.c + VIRTIO I2C DRIVER M: Conghui Chen M: Viresh Kumar -- cgit v1.2.3 From 13f3efaca024e16ccfab0e8b2cf29d66489d8d54 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Wed, 19 Jul 2023 11:45:50 -0400 Subject: virtio-pci: Fix legacy device flag setting error in probe The 'is_legacy' flag is used to differentiate between legacy vs modern device. Currently, it is based on the value of vp_dev->ldev.ioaddr. However, due to the shared memory of the union between struct virtio_pci_legacy_device and struct virtio_pci_modern_device, when virtio_pci_modern_probe modifies the content of struct virtio_pci_modern_device, it affects the content of struct virtio_pci_legacy_device, and ldev.ioaddr is no longer zero, causing the 'is_legacy' flag to be set as true. To resolve issue, when legacy device is probed, mark 'is_legacy' as true, when modern device is probed, keep 'is_legacy' as false. Fixes: 4f0fc22534e3 ("virtio_pci: Optimize virtio_pci_device structure size") Signed-off-by: Feng Liu Reviewed-by: Parav Pandit Reviewed-by: Jiri Pirko Message-Id: <20230719154550.79536-1-feliu@nvidia.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Acked-by: Jason Wang --- drivers/virtio/virtio_pci_common.c | 2 -- drivers/virtio/virtio_pci_legacy.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index a6c86f916dbd..c2524a7207cf 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -557,8 +557,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, pci_set_master(pci_dev); - vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false; - rc = register_virtio_device(&vp_dev->vdev); reg_dev = vp_dev; if (rc) diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 2257f1b3d8ae..d9cbb02b35a1 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -223,6 +223,7 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) vp_dev->config_vector = vp_config_vector; vp_dev->setup_vq = setup_vq; vp_dev->del_vq = del_vq; + vp_dev->is_legacy = true; return 0; } -- cgit v1.2.3 From 79c8651587504ba263d2fd67fd4406240fb21f69 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 27 Jul 2023 20:57:48 +0300 Subject: vdpa: Add features attr to vdpa_nl_policy for nlattr length check The vdpa_nl_policy structure is used to validate the nlattr when parsing the incoming nlmsg. It will ensure the attribute being described produces a valid nlattr pointer in info->attrs before entering into each handler in vdpa_nl_ops. That is to say, the missing part in vdpa_nl_policy may lead to illegal nlattr after parsing, which could lead to OOB read just like CVE-2023-3773. This patch adds the missing nla_policy for vdpa features attr to avoid such bugs. Fixes: 90fea5a800c3 ("vdpa: device feature provisioning") Signed-off-by: Lin Ma Cc: stable@vger.kernel.org Message-Id: <20230727175757.73988-3-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 965e32529eb8..3ad355a2208a 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1249,6 +1249,7 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), + [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 }, }; static const struct genl_ops vdpa_nl_ops[] = { -- cgit v1.2.3 From b3003e1b54e057f5f3124e437b80c3bef26ed3fe Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 27 Jul 2023 20:57:50 +0300 Subject: vdpa: Add queue index attr to vdpa_nl_policy for nlattr length check The vdpa_nl_policy structure is used to validate the nlattr when parsing the incoming nlmsg. It will ensure the attribute being described produces a valid nlattr pointer in info->attrs before entering into each handler in vdpa_nl_ops. That is to say, the missing part in vdpa_nl_policy may lead to illegal nlattr after parsing, which could lead to OOB read just like CVE-2023-3773. This patch adds the missing nla_policy for vdpa queue index attr to avoid such bugs. Fixes: 13b00b135665 ("vdpa: Add support for querying vendor statistics") Signed-off-by: Lin Ma Cc: stable@vger.kernelorg Message-Id: <20230727175757.73988-5-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 3ad355a2208a..75f1df2b9d2a 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1249,6 +1249,7 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), + [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 }, [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 }, }; -- cgit v1.2.3 From 5d6ba607d6cb5c58a4ddf33381e18c83dbb4098f Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 27 Jul 2023 20:57:52 +0300 Subject: vdpa: Add max vqp attr to vdpa_nl_policy for nlattr length check The vdpa_nl_policy structure is used to validate the nlattr when parsing the incoming nlmsg. It will ensure the attribute being described produces a valid nlattr pointer in info->attrs before entering into each handler in vdpa_nl_ops. That is to say, the missing part in vdpa_nl_policy may lead to illegal nlattr after parsing, which could lead to OOB read just like CVE-2023-3773. This patch adds the missing nla_policy for vdpa max vqp attr to avoid such bugs. Fixes: ad69dd0bf26b ("vdpa: Introduce query of device config layout") Signed-off-by: Lin Ma Cc: stable@vger.kernel.org Message-Id: <20230727175757.73988-7-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 75f1df2b9d2a..f2f654fd84e5 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1247,6 +1247,7 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, + [VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 }, /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 }, -- cgit v1.2.3 From f46c1e1620c6bbc9aad5693082efd1b80822e97c Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 27 Jul 2023 20:57:54 +0300 Subject: vdpa: Enable strict validation for netlinks ops The previous patches added the missing nla policies that were required for validation to work. Now strict validation on netlink ops can be enabled. This patch does it. Signed-off-by: Dragos Tatulea Cc: stable@vger.kernel.org Message-Id: <20230727175757.73988-9-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index f2f654fd84e5..a7612e0783b3 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1257,37 +1257,31 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { static const struct genl_ops vdpa_nl_ops[] = { { .cmd = VDPA_CMD_MGMTDEV_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_mgmtdev_get_doit, .dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit, }, { .cmd = VDPA_CMD_DEV_NEW, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_add_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_DEL, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_del_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_get_doit, .dumpit = vdpa_nl_cmd_dev_get_dumpit, }, { .cmd = VDPA_CMD_DEV_CONFIG_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_config_get_doit, .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, }, { .cmd = VDPA_CMD_DEV_VSTATS_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_stats_get_doit, .flags = GENL_ADMIN_PERM, }, -- cgit v1.2.3 From 7ca26efb09a1543fddb29308ea3b63b66cb5d3ee Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Wed, 5 Jul 2023 13:45:05 +0200 Subject: vduse: Use proper spinlock for IRQ injection The IRQ injection work used spin_lock_irq() to protect the scheduling of the softirq, but spin_lock_bh() should be used. With spin_lock_irq(), we noticed delay of more than 6 seconds between the time a NAPI polling work is scheduled and the time it is executed. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Cc: xieyongji@bytedance.com Suggested-by: Jason Wang Signed-off-by: Maxime Coquelin Message-Id: <20230705114505.63274-1-maxime.coquelin@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Xie Yongji --- drivers/vdpa/vdpa_user/vduse_dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index dc38ed21319d..df7869537ef1 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -935,10 +935,10 @@ static void vduse_dev_irq_inject(struct work_struct *work) { struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); - spin_lock_irq(&dev->irq_lock); + spin_lock_bh(&dev->irq_lock); if (dev->config_cb.callback) dev->config_cb.callback(dev->config_cb.private); - spin_unlock_irq(&dev->irq_lock); + spin_unlock_bh(&dev->irq_lock); } static void vduse_vq_irq_inject(struct work_struct *work) @@ -946,10 +946,10 @@ static void vduse_vq_irq_inject(struct work_struct *work) struct vduse_virtqueue *vq = container_of(work, struct vduse_virtqueue, inject); - spin_lock_irq(&vq->irq_lock); + spin_lock_bh(&vq->irq_lock); if (vq->ready && vq->cb.callback) vq->cb.callback(vq->cb.private); - spin_unlock_irq(&vq->irq_lock); + spin_unlock_bh(&vq->irq_lock); } static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq) -- cgit v1.2.3 From df9557046440b0a62250fee3169a8f6a139f55a6 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 26 Jul 2023 22:10:07 +0300 Subject: virtio-vdpa: Fix cpumask memory leak in virtio_vdpa_find_vqs() Free the cpumask allocated by create_affinity_masks() before returning from the function. Fixes: 3dad56823b53 ("virtio-vdpa: Support interrupt affinity spreading mechanism") Signed-off-by: Gal Pressman Reviewed-by: Dragos Tatulea Message-Id: <20230726191036.14324-1-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Xie Yongji --- drivers/virtio/virtio_vdpa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 989e2d7184ce..961161da5900 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -393,11 +393,13 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, cb.callback = virtio_vdpa_config_cb; cb.private = vd_dev; ops->set_config_cb(vdpa, &cb); + kfree(masks); return 0; err_setup_vq: virtio_vdpa_del_vqs(vdev); + kfree(masks); return err; } -- cgit v1.2.3 From 3fe024193340b225d1fd410d78c495434a9d68e0 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 27 Jul 2023 20:23:46 +0300 Subject: vdpa/mlx5: Correct default number of queues when MQ is on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The standard specifies that the initial number of queues is the default, which is 1 (1 tx, 1 rx). Signed-off-by: Dragos Tatulea Reviewed-by: Eugenio Pérez Message-Id: <20230727172354.68243-2-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Tested-by: Lei Yang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 9138ef2fb2c8..6b6eb69a8a90 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2517,7 +2517,15 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) else ndev->rqt_size = 1; - ndev->cur_num_vqs = 2 * ndev->rqt_size; + /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section + * 5.1.6.5.5 "Device operation in multiqueue mode": + * + * Multiqueue is disabled by default. + * The driver enables multiqueue by sending a command using class + * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue + * operation, as follows: ... + */ + ndev->cur_num_vqs = 2; update_cvq_info(mvdev); return err; -- cgit v1.2.3 From 9ee811009ad8f87982b69e61d07447d12233ad01 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 2 Aug 2023 20:12:18 +0300 Subject: vdpa/mlx5: Fix mr->initialized semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mr->initialized flag is shared between the control vq and data vq part of the mr init/uninit. But if the control vq and data vq get placed in different ASIDs, it can happen that initializing the control vq will prevent the data vq mr from being initialized. This patch consolidates the control and data vq init parts into their own init functions. The mr->initialized will now be used for the data vq only. The control vq currently doesn't need a flag. The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got split into data and control vq functions which are now also ASID aware. Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data") Signed-off-by: Dragos Tatulea Reviewed-by: Eugenio Pérez Reviewed-by: Gal Pressman Message-Id: <20230802171231.11001-3-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 + drivers/vdpa/mlx5/core/mr.c | 97 +++++++++++++++++++++++++++----------- 2 files changed, 71 insertions(+), 27 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 25fc4120b618..a0420be5059f 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr { struct list_head head; unsigned long num_directs; unsigned long num_klms; + /* state of dvq mr */ bool initialized; /* serialize mkey creation and destruction */ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 03e543229791..4ae14a248a4b 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr } } -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +{ + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) + return; + + prune_iotlb(mvdev); +} + +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; - mutex_lock(&mr->mkey_mtx); + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) + return; + if (!mr->initialized) - goto out; + return; - prune_iotlb(mvdev); if (mr->user_mr) destroy_user_mr(mvdev, mr); else destroy_dma_mr(mvdev, mr); mr->initialized = false; -out: +} + +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + + mutex_lock(&mr->mkey_mtx); + + _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); + _mlx5_vdpa_destroy_cvq_mr(mvdev, asid); + mutex_unlock(&mr->mkey_mtx); } -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, unsigned int asid) +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) +{ + mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]); + mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]); +} + +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, + unsigned int asid) +{ + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) + return 0; + + return dup_iotlb(mvdev, iotlb); +} + +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, + unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; int err; - if (mr->initialized) + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) return 0; - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { - if (iotlb) - err = create_user_mr(mvdev, iotlb); - else - err = create_dma_mr(mvdev, mr); + if (mr->initialized) + return 0; - if (err) - return err; - } + if (iotlb) + err = create_user_mr(mvdev, iotlb); + else + err = create_dma_mr(mvdev, mr); - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) { - err = dup_iotlb(mvdev, iotlb); - if (err) - goto out_err; - } + if (err) + return err; mr->initialized = true; + + return 0; +} + +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, unsigned int asid) +{ + int err; + + err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid); + if (err) + return err; + + err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid); + if (err) + goto out_err; + return 0; out_err: - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { - if (iotlb) - destroy_user_mr(mvdev, mr); - else - destroy_dma_mr(mvdev, mr); - } + _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); return err; } -- cgit v1.2.3 From ad03a0f44cdb97b46e5c84ed353dac9b8ae2c276 Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Wed, 2 Aug 2023 20:12:20 +0300 Subject: vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mlx5_vdpa_destroy_mr can be called from .set_map with data ASID after the control virtqueue ASID iotlb has been populated. The control vq iotlb must not be cleared, since it will not be populated again. So call the ASID aware destroy function which makes sure that the right vq resource is destroyed. Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data") Signed-off-by: Eugenio Pérez Reviewed-by: Gal Pressman Message-Id: <20230802171231.11001-5-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 + drivers/vdpa/mlx5/core/mr.c | 2 +- drivers/vdpa/mlx5/net/mlx5_vnet.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index a0420be5059f..b53420e874ac 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -122,6 +122,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, unsigned int asid); void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid); #define mlx5_vdpa_warn(__dev, format, ...) \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 4ae14a248a4b..5a1971fcd87b 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -515,7 +515,7 @@ static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int mr->initialized = false; } -static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 6b6eb69a8a90..dbbc82eec7de 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2644,7 +2644,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, goto err_mr; teardown_driver(ndev); - mlx5_vdpa_destroy_mr(mvdev); + mlx5_vdpa_destroy_mr_asid(mvdev, asid); err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); if (err) goto err_mr; @@ -2660,7 +2660,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, return 0; err_setup: - mlx5_vdpa_destroy_mr(mvdev); + mlx5_vdpa_destroy_mr_asid(mvdev, asid); err_mr: return err; } -- cgit v1.2.3 From 810b0cc1c28a9b8d055dd8f7d85975e3cf9f4430 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 3 Aug 2023 18:26:33 +0300 Subject: vdpa/mlx5: Fix crash on shutdown for when no ndev exists The ndev was accessed on shutdown without a check if it actually exists. This triggered the crash pasted below. Instead of doing the ndev check, delete the shutdown handler altogether. The irqs will be released at the parent VF level (mlx5_core). BUG: kernel NULL pointer dereference, address: 0000000000000300 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 0 PID: 1 Comm: systemd-shutdow Not tainted 6.5.0-rc2_for_upstream_min_debug_2023_07_17_15_05 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5v_shutdown+0xe/0x50 [mlx5_vdpa] RSP: 0018:ffff8881003bfdc0 EFLAGS: 00010286 RAX: ffff888103befba0 RBX: ffff888109d28008 RCX: 0000000000000017 RDX: 0000000000000001 RSI: 0000000000000212 RDI: ffff888109d28000 RBP: 0000000000000000 R08: 0000000d3a3a3882 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffff888109d28000 R13: ffff888109d28080 R14: 00000000fee1dead R15: 0000000000000000 FS: 00007f4969e0be40(0000) GS:ffff88852c800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000300 CR3: 00000001051cd006 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __die+0x20/0x60 ? page_fault_oops+0x14c/0x3c0 ? exc_page_fault+0x75/0x140 ? asm_exc_page_fault+0x22/0x30 ? mlx5v_shutdown+0xe/0x50 [mlx5_vdpa] device_shutdown+0x13e/0x1e0 kernel_restart+0x36/0x90 __do_sys_reboot+0x141/0x210 ? vfs_writev+0xcd/0x140 ? handle_mm_fault+0x161/0x260 ? do_writev+0x6b/0x110 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f496990fb56 RSP: 002b:00007fffc7bdde88 EFLAGS: 00000206 ORIG_RAX: 00000000000000a9 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f496990fb56 RDX: 0000000001234567 RSI: 0000000028121969 RDI: fffffffffee1dead RBP: 00007fffc7bde1d0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 R13: 00007fffc7bddf10 R14: 0000000000000000 R15: 00007fffc7bde2b8 CR2: 0000000000000300 ---[ end trace 0000000000000000 ]--- Fixes: bc9a2b3e686e ("vdpa/mlx5: Support interrupt bypassing") Signed-off-by: Dragos Tatulea Message-Id: <20230803152648.199297-1-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index dbbc82eec7de..37be945a0230 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -3556,17 +3556,6 @@ static void mlx5v_remove(struct auxiliary_device *adev) kfree(mgtdev); } -static void mlx5v_shutdown(struct auxiliary_device *auxdev) -{ - struct mlx5_vdpa_mgmtdev *mgtdev; - struct mlx5_vdpa_net *ndev; - - mgtdev = auxiliary_get_drvdata(auxdev); - ndev = mgtdev->ndev; - - free_irqs(ndev); -} - static const struct auxiliary_device_id mlx5v_id_table[] = { { .name = MLX5_ADEV_NAME ".vnet", }, {}, @@ -3578,7 +3567,6 @@ static struct auxiliary_driver mlx5v_driver = { .name = "vnet", .probe = mlx5v_probe, .remove = mlx5v_remove, - .shutdown = mlx5v_shutdown, .id_table = mlx5v_id_table, }; -- cgit v1.2.3 From 5363fc488da579923edf6a2fdca3d3b651dd800b Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 9 Aug 2023 21:44:35 -0700 Subject: RDMA/bnxt_re: Properly order ib_device_unalloc() to avoid UAF ib_dealloc_device() should be called only after device cleanup. Fix the dealloc sequence. Fixes: 6d758147c7b8 ("RDMA/bnxt_re: Use auxiliary driver interface") Link: https://lore.kernel.org/r/1691642677-21369-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b42166fe7454..1c7646057893 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1526,8 +1526,8 @@ static void bnxt_re_remove(struct auxiliary_device *adev) } bnxt_re_setup_cc(rdev, false); ib_unregister_device(&rdev->ibdev); - ib_dealloc_device(&rdev->ibdev); bnxt_re_dev_uninit(rdev); + ib_dealloc_device(&rdev->ibdev); skip_remove: mutex_unlock(&bnxt_re_mutex); } -- cgit v1.2.3 From 5ac8480ae4d01f0ca5dfd561884424046df2478a Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 9 Aug 2023 21:44:36 -0700 Subject: RDMA/bnxt_re: Fix error handling in probe failure path During bnxt_re_dev_init(), when bnxt_re_setup_chip_ctx() fails unregister with L2 first before bailing out probe. Fixes: ae8637e13185 ("RDMA/bnxt_re: Add chip context to identify 57500 series") Link: https://lore.kernel.org/r/1691642677-21369-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 1c7646057893..63e98e2d3596 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1253,6 +1253,8 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode); if (rc) { + bnxt_unregister_dev(rdev->en_dev); + clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); return -EINVAL; } -- cgit v1.2.3 From 64b632654b97319b253c2c902fe4c11349aaa70f Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Wed, 9 Aug 2023 21:44:37 -0700 Subject: RDMA/bnxt_re: Initialize dpi_tbl_lock mutex Fix the missing dpi_tbl_lock mutex initialization. Fixes: 0ac20faf5d83 ("RDMA/bnxt_re: Reorg the bar mapping") Link: https://lore.kernel.org/r/1691642677-21369-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 5fd8f7c90bb0..739d942761d1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -819,6 +819,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, } memset((u8 *)dpit->tbl, 0xFF, bytes); + mutex_init(&res->dpi_tbl_lock); dpit->priv_db = dpit->ucreg.bar_reg + dpit->ucreg.offset; return 0; -- cgit v1.2.3 From 2c507ce90e02cd78d00fd4b0fe26c8641873c13f Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 10 Aug 2023 19:04:05 +0800 Subject: virtio-net: Zero max_tx_vq field for VIRTIO_NET_CTRL_MQ_HASH_CONFIG case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel uses `struct virtio_net_ctrl_rss` to save command-specific-data for both the VIRTIO_NET_CTRL_MQ_HASH_CONFIG and VIRTIO_NET_CTRL_MQ_RSS_CONFIG commands. According to the VirtIO standard, "Field reserved MUST contain zeroes. It is defined to make the structure to match the layout of virtio_net_rss_config structure, defined in 5.1.6.5.7.". Yet for the VIRTIO_NET_CTRL_MQ_HASH_CONFIG command case, the `max_tx_vq` field in struct virtio_net_ctrl_rss, which corresponds to the `reserved` field in struct virtio_net_hash_config, is not zeroed, thereby violating the VirtIO standard. This patch solves this problem by zeroing this field in virtnet_init_default_rss(). Cc: Andrew Melnychenko Cc: stable@vger.kernel.org Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Signed-off-by: Hawkins Jiawei Acked-by: Jason Wang Acked-by: Eugenio Pérez Acked-by: Michael S. Tsirkin Message-Id: <20230810110405.25558-1-yin31149@gmail.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Acked-by: Jason Wang --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1270c8d23463..8db38634ae82 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2761,7 +2761,7 @@ static void virtnet_init_default_rss(struct virtnet_info *vi) vi->ctrl->rss.indirection_table[i] = indir_val; } - vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs; + vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; vi->ctrl->rss.hash_key_length = vi->rss_key_size; netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size); -- cgit v1.2.3 From 0cd2c13b1c15dbbdf1e2ae5b7160537f97df06b5 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 10 Jul 2023 21:24:33 -0700 Subject: pds_vdpa: reset to vdpa specified mac When the vdpa device is reset, also reinitialize it with the mac address that was assigned when the device was added. Fixes: 151cc834f3dd ("pds_vdpa: add support for vdpa and vdpamgmt interfaces") Signed-off-by: Allen Hubbe Signed-off-by: Shannon Nelson Message-Id: <20230711042437.69381-2-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/vdpa_dev.c | 16 ++++++++-------- drivers/vdpa/pds/vdpa_dev.h | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 5071a4d58f8d..e2e99bb0be2b 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -409,6 +409,8 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) pdsv->vqs[i].avail_idx = 0; pdsv->vqs[i].used_idx = 0; } + + pds_vdpa_cmd_set_mac(pdsv, pdsv->mac); } if (status & ~old_status & VIRTIO_CONFIG_S_FEATURES_OK) { @@ -532,7 +534,6 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, struct device *dma_dev; struct pci_dev *pdev; struct device *dev; - u8 mac[ETH_ALEN]; int err; int i; @@ -617,19 +618,18 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, * or set a random mac if default is 00:..:00 */ if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) { - ether_addr_copy(mac, add_config->net.mac); - pds_vdpa_cmd_set_mac(pdsv, mac); + ether_addr_copy(pdsv->mac, add_config->net.mac); } else { struct virtio_net_config __iomem *vc; vc = pdsv->vdpa_aux->vd_mdev.device; - memcpy_fromio(mac, vc->mac, sizeof(mac)); - if (is_zero_ether_addr(mac)) { - eth_random_addr(mac); - dev_info(dev, "setting random mac %pM\n", mac); - pds_vdpa_cmd_set_mac(pdsv, mac); + memcpy_fromio(pdsv->mac, vc->mac, sizeof(pdsv->mac)); + if (is_zero_ether_addr(pdsv->mac)) { + eth_random_addr(pdsv->mac); + dev_info(dev, "setting random mac %pM\n", pdsv->mac); } } + pds_vdpa_cmd_set_mac(pdsv, pdsv->mac); for (i = 0; i < pdsv->num_vqs; i++) { pdsv->vqs[i].qid = i; diff --git a/drivers/vdpa/pds/vdpa_dev.h b/drivers/vdpa/pds/vdpa_dev.h index a1bc37de9537..cf02df287fc4 100644 --- a/drivers/vdpa/pds/vdpa_dev.h +++ b/drivers/vdpa/pds/vdpa_dev.h @@ -39,6 +39,7 @@ struct pds_vdpa_device { u64 req_features; /* features requested by vdpa */ u8 vdpa_index; /* rsvd for future subdevice use */ u8 num_vqs; /* num vqs in use */ + u8 mac[ETH_ALEN]; /* mac selected when the device was added */ struct vdpa_callback config_cb; struct notifier_block nb; }; -- cgit v1.2.3 From abdf31bd91120035172dc58e2e87064a72e9e087 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 10 Jul 2023 21:24:34 -0700 Subject: pds_vdpa: always allow offering VIRTIO_NET_F_MAC Our driver sets a mac if the HW is 00:..:00 so we need to be sure to advertise VIRTIO_NET_F_MAC even if the HW doesn't. We also need to be sure that virtio_net sees the VIRTIO_NET_F_MAC and doesn't rewrite the mac address that a user may have set with the vdpa utility. After reading the hw_feature bits, add the VIRTIO_NET_F_MAC to the driver's supported_features and use that for reporting what is available. If the HW is not advertising it, be sure to strip the VIRTIO_NET_F_MAC before finishing the feature negotiation. If the user specifies a device_features bitpattern in the vdpa utility without the VIRTIO_NET_F_MAC set, then don't set the mac. Fixes: 151cc834f3dd ("pds_vdpa: add support for vdpa and vdpamgmt interfaces") Signed-off-by: Shannon Nelson Message-Id: <20230711042437.69381-3-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/debugfs.c | 2 -- drivers/vdpa/pds/vdpa_dev.c | 30 +++++++++++++++++++++--------- drivers/vdpa/pds/vdpa_dev.h | 4 ++-- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c index 21a0dc0cb607..754ccb7a6666 100644 --- a/drivers/vdpa/pds/debugfs.c +++ b/drivers/vdpa/pds/debugfs.c @@ -224,8 +224,6 @@ static int config_show(struct seq_file *seq, void *v) seq_printf(seq, "dev_status: %#x\n", status); print_status_bits(seq, status); - seq_printf(seq, "req_features: %#llx\n", pdsv->req_features); - print_feature_bits_all(seq, pdsv->req_features); driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); seq_printf(seq, "driver_features: %#llx\n", driver_features); print_feature_bits_all(seq, driver_features); diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index e2e99bb0be2b..5b566e0eef0a 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -318,6 +318,7 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur struct device *dev = &pdsv->vdpa_dev.dev; u64 driver_features; u64 nego_features; + u64 hw_features; u64 missing; if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) { @@ -325,21 +326,26 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur return -EOPNOTSUPP; } - pdsv->req_features = features; - /* Check for valid feature bits */ - nego_features = features & le64_to_cpu(pdsv->vdpa_aux->ident.hw_features); - missing = pdsv->req_features & ~nego_features; + nego_features = features & pdsv->supported_features; + missing = features & ~nego_features; if (missing) { dev_err(dev, "Can't support all requested features in %#llx, missing %#llx features\n", - pdsv->req_features, missing); + features, missing); return -EOPNOTSUPP; } + pdsv->negotiated_features = nego_features; + driver_features = pds_vdpa_get_driver_features(vdpa_dev); dev_dbg(dev, "%s: %#llx => %#llx\n", __func__, driver_features, nego_features); + /* if we're faking the F_MAC, strip it before writing to device */ + hw_features = le64_to_cpu(pdsv->vdpa_aux->ident.hw_features); + if (!(hw_features & BIT_ULL(VIRTIO_NET_F_MAC))) + nego_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); + if (driver_features == nego_features) return 0; @@ -352,7 +358,7 @@ static u64 pds_vdpa_get_driver_features(struct vdpa_device *vdpa_dev) { struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); - return vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); + return pdsv->negotiated_features; } static void pds_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, @@ -564,7 +570,7 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { u64 unsupp_features = - add_config->device_features & ~mgmt->supported_features; + add_config->device_features & ~pdsv->supported_features; if (unsupp_features) { dev_err(dev, "Unsupported features: %#llx\n", unsupp_features); @@ -615,7 +621,8 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, } /* Set a mac, either from the user config if provided - * or set a random mac if default is 00:..:00 + * or use the device's mac if not 00:..:00 + * or set a random mac */ if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) { ether_addr_copy(pdsv->mac, add_config->net.mac); @@ -624,7 +631,8 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, vc = pdsv->vdpa_aux->vd_mdev.device; memcpy_fromio(pdsv->mac, vc->mac, sizeof(pdsv->mac)); - if (is_zero_ether_addr(pdsv->mac)) { + if (is_zero_ether_addr(pdsv->mac) && + (pdsv->supported_features & BIT_ULL(VIRTIO_NET_F_MAC))) { eth_random_addr(pdsv->mac); dev_info(dev, "setting random mac %pM\n", pdsv->mac); } @@ -752,6 +760,10 @@ int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux) mgmt->id_table = pds_vdpa_id_table; mgmt->device = dev; mgmt->supported_features = le64_to_cpu(vdpa_aux->ident.hw_features); + + /* advertise F_MAC even if the device doesn't */ + mgmt->supported_features |= BIT_ULL(VIRTIO_NET_F_MAC); + mgmt->config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); diff --git a/drivers/vdpa/pds/vdpa_dev.h b/drivers/vdpa/pds/vdpa_dev.h index cf02df287fc4..d984ba24a7da 100644 --- a/drivers/vdpa/pds/vdpa_dev.h +++ b/drivers/vdpa/pds/vdpa_dev.h @@ -35,8 +35,8 @@ struct pds_vdpa_device { struct pds_vdpa_aux *vdpa_aux; struct pds_vdpa_vq_info vqs[PDS_VDPA_MAX_QUEUES]; - u64 supported_features; /* specified device features */ - u64 req_features; /* features requested by vdpa */ + u64 supported_features; /* supported device features */ + u64 negotiated_features; /* negotiated features */ u8 vdpa_index; /* rsvd for future subdevice use */ u8 num_vqs; /* num vqs in use */ u8 mac[ETH_ALEN]; /* mac selected when the device was added */ -- cgit v1.2.3 From ed88863040daad18d3f9b12f7c9c1c3da3731e1f Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 10 Jul 2023 21:24:35 -0700 Subject: pds_vdpa: clean and reset vqs entries Make sure that we initialize the vqs[] entries the same way both for initial setup and after a vq reset. Fixes: 151cc834f3dd ("pds_vdpa: add support for vdpa and vdpamgmt interfaces") Signed-off-by: Shannon Nelson Message-Id: <20230711042437.69381-4-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/vdpa_dev.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 5b566e0eef0a..04a362648b02 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -428,6 +428,17 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) } } +static void pds_vdpa_init_vqs_entry(struct pds_vdpa_device *pdsv, int qid, + void __iomem *notify) +{ + memset(&pdsv->vqs[qid], 0, sizeof(pdsv->vqs[0])); + pdsv->vqs[qid].qid = qid; + pdsv->vqs[qid].pdsv = pdsv; + pdsv->vqs[qid].ready = false; + pdsv->vqs[qid].irq = VIRTIO_MSI_NO_VECTOR; + pdsv->vqs[qid].notify = notify; +} + static int pds_vdpa_reset(struct vdpa_device *vdpa_dev) { struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); @@ -450,8 +461,7 @@ static int pds_vdpa_reset(struct vdpa_device *vdpa_dev) dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", __func__, i, ERR_PTR(err)); pds_vdpa_release_irq(pdsv, i); - memset(&pdsv->vqs[i], 0, sizeof(pdsv->vqs[0])); - pdsv->vqs[i].ready = false; + pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify); } } @@ -640,11 +650,11 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, pds_vdpa_cmd_set_mac(pdsv, pdsv->mac); for (i = 0; i < pdsv->num_vqs; i++) { - pdsv->vqs[i].qid = i; - pdsv->vqs[i].pdsv = pdsv; - pdsv->vqs[i].irq = VIRTIO_MSI_NO_VECTOR; - pdsv->vqs[i].notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev, - i, &pdsv->vqs[i].notify_pa); + void __iomem *notify; + + notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev, + i, &pdsv->vqs[i].notify_pa); + pds_vdpa_init_vqs_entry(pdsv, i, notify); } pdsv->vdpa_dev.mdev = &vdpa_aux->vdpa_mdev; -- cgit v1.2.3 From c0a6c5cbf1a9e49357e942ed393da08a55808a49 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 10 Jul 2023 21:24:36 -0700 Subject: pds_vdpa: alloc irq vectors on DRIVER_OK We were allocating irq vectors at the time the aux dev was probed, but that is before the PCI VF is assigned to a separate iommu domain by vhost_vdpa. Because vhost_vdpa later changes the iommu domain the interrupts do not work. Instead, we can allocate the irq vectors later when we see DRIVER_OK and know that the reassignment of the PCI VF to an iommu domain has already happened. Fixes: 151cc834f3dd ("pds_vdpa: add support for vdpa and vdpamgmt interfaces") Signed-off-by: Allen Hubbe Signed-off-by: Shannon Nelson Acked-by: Jason Wang Message-Id: <20230711042437.69381-5-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/pds/vdpa_dev.c | 110 ++++++++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 29 deletions(-) diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 04a362648b02..52b2449182ad 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -126,11 +126,9 @@ static void pds_vdpa_release_irq(struct pds_vdpa_device *pdsv, int qid) static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready) { struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); - struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; struct device *dev = &pdsv->vdpa_dev.dev; u64 driver_features; u16 invert_idx = 0; - int irq; int err; dev_dbg(dev, "%s: qid %d ready %d => %d\n", @@ -143,19 +141,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re invert_idx = PDS_VDPA_PACKED_INVERT_IDX; if (ready) { - irq = pci_irq_vector(pdev, qid); - snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name), - "vdpa-%s-%d", dev_name(dev), qid); - - err = request_irq(irq, pds_vdpa_isr, 0, - pdsv->vqs[qid].irq_name, &pdsv->vqs[qid]); - if (err) { - dev_err(dev, "%s: no irq for qid %d: %pe\n", - __func__, qid, ERR_PTR(err)); - return; - } - pdsv->vqs[qid].irq = irq; - /* Pass vq setup info to DSC using adminq to gather up and * send all info at once so FW can do its full set up in * one easy operation @@ -164,7 +149,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re if (err) { dev_err(dev, "Failed to init vq %d: %pe\n", qid, ERR_PTR(err)); - pds_vdpa_release_irq(pdsv, qid); ready = false; } } else { @@ -172,7 +156,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re if (err) dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", __func__, qid, ERR_PTR(err)); - pds_vdpa_release_irq(pdsv, qid); } pdsv->vqs[qid].ready = ready; @@ -395,6 +378,72 @@ static u8 pds_vdpa_get_status(struct vdpa_device *vdpa_dev) return vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev); } +static int pds_vdpa_request_irqs(struct pds_vdpa_device *pdsv) +{ + struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; + struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux; + struct device *dev = &pdsv->vdpa_dev.dev; + int max_vq, nintrs, qid, err; + + max_vq = vdpa_aux->vdpa_mdev.max_supported_vqs; + + nintrs = pci_alloc_irq_vectors(pdev, max_vq, max_vq, PCI_IRQ_MSIX); + if (nintrs < 0) { + dev_err(dev, "Couldn't get %d msix vectors: %pe\n", + max_vq, ERR_PTR(nintrs)); + return nintrs; + } + + for (qid = 0; qid < pdsv->num_vqs; ++qid) { + int irq = pci_irq_vector(pdev, qid); + + snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name), + "vdpa-%s-%d", dev_name(dev), qid); + + err = request_irq(irq, pds_vdpa_isr, 0, + pdsv->vqs[qid].irq_name, + &pdsv->vqs[qid]); + if (err) { + dev_err(dev, "%s: no irq for qid %d: %pe\n", + __func__, qid, ERR_PTR(err)); + goto err_release; + } + + pdsv->vqs[qid].irq = irq; + } + + vdpa_aux->nintrs = nintrs; + + return 0; + +err_release: + while (qid--) + pds_vdpa_release_irq(pdsv, qid); + + pci_free_irq_vectors(pdev); + + vdpa_aux->nintrs = 0; + + return err; +} + +static void pds_vdpa_release_irqs(struct pds_vdpa_device *pdsv) +{ + struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; + struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux; + int qid; + + if (!vdpa_aux->nintrs) + return; + + for (qid = 0; qid < pdsv->num_vqs; qid++) + pds_vdpa_release_irq(pdsv, qid); + + pci_free_irq_vectors(pdev); + + vdpa_aux->nintrs = 0; +} + static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) { struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); @@ -405,6 +454,11 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) old_status = pds_vdpa_get_status(vdpa_dev); dev_dbg(dev, "%s: old %#x new %#x\n", __func__, old_status, status); + if (status & ~old_status & VIRTIO_CONFIG_S_DRIVER_OK) { + if (pds_vdpa_request_irqs(pdsv)) + status = old_status | VIRTIO_CONFIG_S_FAILED; + } + pds_vdpa_cmd_set_status(pdsv, status); /* Note: still working with FW on the need for this reset cmd */ @@ -426,6 +480,9 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) i, &pdsv->vqs[i].notify_pa); } } + + if (old_status & ~status & VIRTIO_CONFIG_S_DRIVER_OK) + pds_vdpa_release_irqs(pdsv); } static void pds_vdpa_init_vqs_entry(struct pds_vdpa_device *pdsv, int qid, @@ -460,13 +517,17 @@ static int pds_vdpa_reset(struct vdpa_device *vdpa_dev) if (err) dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", __func__, i, ERR_PTR(err)); - pds_vdpa_release_irq(pdsv, i); - pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify); } } pds_vdpa_set_status(vdpa_dev, 0); + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + /* Reset the vq info */ + for (i = 0; i < pdsv->num_vqs && !err; i++) + pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify); + } + return 0; } @@ -764,7 +825,7 @@ int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux) max_vqs = min_t(u16, dev_intrs, max_vqs); mgmt->max_supported_vqs = min_t(u16, PDS_VDPA_MAX_QUEUES, max_vqs); - vdpa_aux->nintrs = mgmt->max_supported_vqs; + vdpa_aux->nintrs = 0; mgmt->ops = &pds_vdpa_mgmt_dev_ops; mgmt->id_table = pds_vdpa_id_table; @@ -778,14 +839,5 @@ int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux) mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); - err = pci_alloc_irq_vectors(pdev, vdpa_aux->nintrs, vdpa_aux->nintrs, - PCI_IRQ_MSIX); - if (err < 0) { - dev_err(dev, "Couldn't get %d msix vectors: %pe\n", - vdpa_aux->nintrs, ERR_PTR(err)); - return err; - } - vdpa_aux->nintrs = err; - return 0; } -- cgit v1.2.3 From 8efc365b20dc9a5b0c8fd0e8a195690bf21cd8be Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 10 Jul 2023 21:24:37 -0700 Subject: pds_vdpa: fix up debugfs feature bit printing Make clearer in debugfs output the difference between the hw feature bits, the features supported through the driver, and the features that have been negotiated. Signed-off-by: Shannon Nelson Message-Id: <20230711042437.69381-6-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/debugfs.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c index 754ccb7a6666..9b04aad6ec35 100644 --- a/drivers/vdpa/pds/debugfs.c +++ b/drivers/vdpa/pds/debugfs.c @@ -176,6 +176,7 @@ static int identity_show(struct seq_file *seq, void *v) { struct pds_vdpa_aux *vdpa_aux = seq->private; struct vdpa_mgmt_dev *mgmt; + u64 hw_features; seq_printf(seq, "aux_dev: %s\n", dev_name(&vdpa_aux->padev->aux_dev.dev)); @@ -183,8 +184,9 @@ static int identity_show(struct seq_file *seq, void *v) mgmt = &vdpa_aux->vdpa_mdev; seq_printf(seq, "max_vqs: %d\n", mgmt->max_supported_vqs); seq_printf(seq, "config_attr_mask: %#llx\n", mgmt->config_attr_mask); - seq_printf(seq, "supported_features: %#llx\n", mgmt->supported_features); - print_feature_bits_all(seq, mgmt->supported_features); + hw_features = le64_to_cpu(vdpa_aux->ident.hw_features); + seq_printf(seq, "hw_features: %#llx\n", hw_features); + print_feature_bits_all(seq, hw_features); return 0; } @@ -200,7 +202,6 @@ static int config_show(struct seq_file *seq, void *v) { struct pds_vdpa_device *pdsv = seq->private; struct virtio_net_config vc; - u64 driver_features; u8 status; memcpy_fromio(&vc, pdsv->vdpa_aux->vd_mdev.device, @@ -223,10 +224,8 @@ static int config_show(struct seq_file *seq, void *v) status = vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev); seq_printf(seq, "dev_status: %#x\n", status); print_status_bits(seq, status); - - driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); - seq_printf(seq, "driver_features: %#llx\n", driver_features); - print_feature_bits_all(seq, driver_features); + seq_printf(seq, "negotiated_features: %#llx\n", pdsv->negotiated_features); + print_feature_bits_all(seq, pdsv->negotiated_features); seq_printf(seq, "vdpa_index: %d\n", pdsv->vdpa_index); seq_printf(seq, "num_vqs: %d\n", pdsv->num_vqs); -- cgit v1.2.3 From f504e15b94eb4e5b47f8715da59c0207f68dffe1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 13 Jul 2023 16:55:48 +0200 Subject: virtio-mem: remove unsafe unplug in Big Block Mode (BBM) When "unsafe unplug" is enabled, we don't fake-offline all memory ahead of actual memory offlining using alloc_contig_range(). Instead, we rely on offline_pages() to also perform actual page migration, which might fail or take a very long time. In that case, it's possible to easily run into endless loops that cannot be aborted anymore (as offlining is triggered by a workqueue then): For example, a single (accidentally) permanently unmovable page in ZONE_MOVABLE results in an endless loop. For ZONE_NORMAL, races between isolating the pageblock (and checking for unmovable pages) and concurrent page allocation are possible and similarly result in endless loops. The idea of the unsafe unplug mode was to make it possible to more reliably unplug large memory blocks. However, (a) we really should be tackling that differently, by extending the alloc_contig_range()-based mechanism; and (b) this mode is not the default and as far as I know, it's unused either way. So let's simply get rid of it. Signed-off-by: David Hildenbrand Message-Id: <20230713145551.2824980-2-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 51 ++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 31 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 835f6cc2fb66..ed15d2a4bd96 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -38,11 +38,6 @@ module_param(bbm_block_size, ulong, 0444); MODULE_PARM_DESC(bbm_block_size, "Big Block size in bytes. Default is 0 (auto-detection)."); -static bool bbm_safe_unplug = true; -module_param(bbm_safe_unplug, bool, 0444); -MODULE_PARM_DESC(bbm_safe_unplug, - "Use a safe unplug mechanism in BBM, avoiding long/endless loops"); - /* * virtio-mem currently supports the following modes of operation: * @@ -2111,38 +2106,32 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, VIRTIO_MEM_BBM_BB_ADDED)) return -EINVAL; - if (bbm_safe_unplug) { - /* - * Start by fake-offlining all memory. Once we marked the device - * block as fake-offline, all newly onlined memory will - * automatically be kept fake-offline. Protect from concurrent - * onlining/offlining until we have a consistent state. - */ - mutex_lock(&vm->hotplug_mutex); - virtio_mem_bbm_set_bb_state(vm, bb_id, - VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); + /* + * Start by fake-offlining all memory. Once we marked the device + * block as fake-offline, all newly onlined memory will + * automatically be kept fake-offline. Protect from concurrent + * onlining/offlining until we have a consistent state. + */ + mutex_lock(&vm->hotplug_mutex); + virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); - for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - page = pfn_to_online_page(pfn); - if (!page) - continue; + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + page = pfn_to_online_page(pfn); + if (!page) + continue; - rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); - if (rc) { - end_pfn = pfn; - goto rollback_safe_unplug; - } + rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); + if (rc) { + end_pfn = pfn; + goto rollback; } - mutex_unlock(&vm->hotplug_mutex); } + mutex_unlock(&vm->hotplug_mutex); rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); if (rc) { - if (bbm_safe_unplug) { - mutex_lock(&vm->hotplug_mutex); - goto rollback_safe_unplug; - } - return rc; + mutex_lock(&vm->hotplug_mutex); + goto rollback; } rc = virtio_mem_bbm_unplug_bb(vm, bb_id); @@ -2154,7 +2143,7 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, VIRTIO_MEM_BBM_BB_UNUSED); return rc; -rollback_safe_unplug: +rollback: for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { page = pfn_to_online_page(pfn); if (!page) -- cgit v1.2.3 From ddf409851461f515cc32974714b73efe2e012bde Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 13 Jul 2023 16:55:49 +0200 Subject: virtio-mem: convert most offline_and_remove_memory() errors to -EBUSY Just like we do with alloc_contig_range(), let's convert all unknown errors to -EBUSY, but WARN so we can look into the issue. For example, offline_pages() could fail with -EINTR, which would be unexpected in our case. Signed-off-by: David Hildenbrand Message-Id: <20230713145551.2824980-3-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index ed15d2a4bd96..1a76ba2bc118 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -741,11 +741,15 @@ static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, * immediately instead of waiting. */ virtio_mem_retry(vm); - } else { - dev_dbg(&vm->vdev->dev, - "offlining and removing memory failed: %d\n", rc); + return 0; } - return rc; + dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc); + /* + * We don't really expect this to fail, because we fake-offlined all + * memory already. But it could fail in corner cases. + */ + WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY); + return rc == -ENOMEM ? -ENOMEM : -EBUSY; } /* -- cgit v1.2.3 From a31648fd4f96fbe0a4d0aeb16b57a2405c6943c0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 13 Jul 2023 16:55:50 +0200 Subject: virtio-mem: keep retrying on offline_and_remove_memory() errors in Sub Block Mode (SBM) In case offline_and_remove_memory() fails in SBM, we leave a completely unplugged Linux memory block stick around until we try plugging memory again. We won't try removing that memory block again. offline_and_remove_memory() may, for example, fail if we're racing with another alloc_contig_range() user, if allocating temporary memory fails, or if some memory notifier rejected the offlining request. Let's handle that case better, by simple retrying to offline and remove such memory. Tested using CONFIG_MEMORY_NOTIFIER_ERROR_INJECT. Signed-off-by: David Hildenbrand Message-Id: <20230713145551.2824980-4-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 92 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 1a76ba2bc118..a5cf92e3e5af 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -168,6 +168,13 @@ struct virtio_mem { /* The number of subblocks per Linux memory block. */ uint32_t sbs_per_mb; + /* + * Some of the Linux memory blocks tracked as "partially + * plugged" are completely unplugged and can be offlined + * and removed -- which previously failed. + */ + bool have_unplugged_mb; + /* Summary of all memory block states. */ unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; @@ -765,6 +772,34 @@ static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, return virtio_mem_offline_and_remove_memory(vm, addr, size); } +/* + * Try (offlining and) removing memory from Linux in case all subblocks are + * unplugged. Can be called on online and offline memory blocks. + * + * May modify the state of memory blocks in virtio-mem. + */ +static int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm, + unsigned long mb_id) +{ + int rc; + + /* + * Once all subblocks of a memory block were unplugged, offline and + * remove it. + */ + if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) + return 0; + + /* offline_and_remove_memory() works for online and offline memory. */ + mutex_unlock(&vm->hotplug_mutex); + rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); + mutex_lock(&vm->hotplug_mutex); + if (!rc) + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); + return rc; +} + /* * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a * all Linux memory blocks covered by the big block. @@ -1988,20 +2023,10 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, } unplugged: - /* - * Once all subblocks of a memory block were unplugged, offline and - * remove it. This will usually not fail, as no memory is in use - * anymore - however some other notifiers might NACK the request. - */ - if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { - mutex_unlock(&vm->hotplug_mutex); - rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); - mutex_lock(&vm->hotplug_mutex); - if (!rc) - virtio_mem_sbm_set_mb_state(vm, mb_id, - VIRTIO_MEM_SBM_MB_UNUSED); - } - + rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id); + if (rc) + vm->sbm.have_unplugged_mb = 1; + /* Ignore errors, this is not critical. We'll retry later. */ return 0; } @@ -2253,12 +2278,13 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* * Try to unplug all blocks that couldn't be unplugged before, for example, - * because the hypervisor was busy. + * because the hypervisor was busy. Further, offline and remove any memory + * blocks where we previously failed. */ -static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) +static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm) { unsigned long id; - int rc; + int rc = 0; if (!vm->in_sbm) { virtio_mem_bbm_for_each_bb(vm, id, @@ -2280,6 +2306,27 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) VIRTIO_MEM_SBM_MB_UNUSED); } + if (!vm->sbm.have_unplugged_mb) + return 0; + + /* + * Let's retry (offlining and) removing completely unplugged Linux + * memory blocks. + */ + vm->sbm.have_unplugged_mb = false; + + mutex_lock(&vm->hotplug_mutex); + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL) + rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL) + rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) + rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); + mutex_unlock(&vm->hotplug_mutex); + + if (rc) + vm->sbm.have_unplugged_mb = true; + /* Ignore errors, this is not critical. We'll retry later. */ return 0; } @@ -2361,9 +2408,9 @@ retry: virtio_mem_refresh_config(vm); } - /* Unplug any leftovers from previous runs */ + /* Cleanup any leftovers from previous runs */ if (!rc) - rc = virtio_mem_unplug_pending_mb(vm); + rc = virtio_mem_cleanup_pending_mb(vm); if (!rc && vm->requested_size != vm->plugged_size) { if (vm->requested_size > vm->plugged_size) { @@ -2375,6 +2422,13 @@ retry: } } + /* + * Keep retrying to offline and remove completely unplugged Linux + * memory blocks. + */ + if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb) + rc = -EBUSY; + switch (rc) { case 0: vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; -- cgit v1.2.3 From f55484fd7be923b740e8e1fc304070ba53675cb4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 13 Jul 2023 16:55:51 +0200 Subject: virtio-mem: check if the config changed before fake offlining memory If we repeatedly fail to fake offline memory to unplug it, we won't be sending any unplug requests to the device. However, we only check if the config changed when sending such (un)plug requests. We could end up trying for a long time to unplug memory, even though the config changed already and we're not supposed to unplug memory anymore. For example, the hypervisor might detect a low-memory situation while unplugging memory and decide to replug some memory. Continuing trying to unplug memory in that case can be problematic. So let's check on a more regular basis. Signed-off-by: David Hildenbrand Message-Id: <20230713145551.2824980-5-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index a5cf92e3e5af..fa5226c198cc 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1189,7 +1189,8 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) * Try to allocate a range, marking pages fake-offline, effectively * fake-offlining them. */ -static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) +static int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn, + unsigned long nr_pages) { const bool is_movable = is_zone_movable_page(pfn_to_page(pfn)); int rc, retry_count; @@ -1202,6 +1203,14 @@ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) * some guarantees. */ for (retry_count = 0; retry_count < 5; retry_count++) { + /* + * If the config changed, stop immediately and go back to the + * main loop: avoid trying to keep unplugging if the device + * might have decided to not remove any more memory. + */ + if (atomic_read(&vm->config_changed)) + return -EAGAIN; + rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, GFP_KERNEL); if (rc == -ENOMEM) @@ -1951,7 +1960,7 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->sbm.sb_size); - rc = virtio_mem_fake_offline(start_pfn, nr_pages); + rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages); if (rc) return rc; @@ -2149,7 +2158,7 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, if (!page) continue; - rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); + rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION); if (rc) { end_pfn = pfn; goto rollback; -- cgit v1.2.3 From 547259580dfa9a5d345dd1b46fd5e9977654c1cc Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 19:56:19 +0200 Subject: parisc: Move proc_mckinley_root and proc_runway_root to sba_iommu Clean up the procfs root entries for gsc, runway, and mckinley busses. Signed-off-by: Helge Deller --- arch/parisc/kernel/pci-dma.c | 2 +- arch/parisc/kernel/setup.c | 48 -------------------------------------------- drivers/parisc/sba_iommu.c | 6 ++++++ 3 files changed, 7 insertions(+), 49 deletions(-) diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 131d596e018f..bf9f192c826e 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -381,7 +381,7 @@ pcxl_dma_init(void) pcxl_res_map = (char *)__get_free_pages(GFP_KERNEL, get_order(pcxl_res_size)); memset(pcxl_res_map, 0, pcxl_res_size); - proc_gsc_root = proc_mkdir("gsc", NULL); + proc_gsc_root = proc_mkdir("bus/gsc", NULL); if (!proc_gsc_root) printk(KERN_WARNING "pcxl_dma_init: Unable to create gsc /proc dir entry\n"); diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 573f8303e2b0..211a4afdd282 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -40,11 +40,6 @@ static char __initdata command_line[COMMAND_LINE_SIZE]; -/* Intended for ccio/sba/cpu statistics under /proc/bus/{runway|gsc} */ -struct proc_dir_entry * proc_runway_root __read_mostly = NULL; -struct proc_dir_entry * proc_gsc_root __read_mostly = NULL; -struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL; - static void __init setup_cmdline(char **cmdline_p) { extern unsigned int boot_args[]; @@ -196,48 +191,6 @@ const struct seq_operations cpuinfo_op = { .show = show_cpuinfo }; -static void __init parisc_proc_mkdir(void) -{ - /* - ** Can't call proc_mkdir() until after proc_root_init() has been - ** called by start_kernel(). In other words, this code can't - ** live in arch/.../setup.c because start_parisc() calls - ** start_kernel(). - */ - switch (boot_cpu_data.cpu_type) { - case pcxl: - case pcxl2: - if (NULL == proc_gsc_root) - { - proc_gsc_root = proc_mkdir("bus/gsc", NULL); - } - break; - case pcxt_: - case pcxu: - case pcxu_: - case pcxw: - case pcxw_: - case pcxw2: - if (NULL == proc_runway_root) - { - proc_runway_root = proc_mkdir("bus/runway", NULL); - } - break; - case mako: - case mako2: - if (NULL == proc_mckinley_root) - { - proc_mckinley_root = proc_mkdir("bus/mckinley", NULL); - } - break; - default: - /* FIXME: this was added to prevent the compiler - * complaining about missing pcx, pcxs and pcxt - * I'm assuming they have neither gsc nor runway */ - break; - } -} - static struct resource central_bus = { .name = "Central Bus", .start = F_EXTEND(0xfff80000), @@ -294,7 +247,6 @@ static int __init parisc_init(void) { u32 osid = (OS_ID_LINUX << 16); - parisc_proc_mkdir(); parisc_init_resources(); do_device_inventory(); /* probe for hardware */ diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index bf3405f4289e..8b1dcd537020 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -121,6 +121,8 @@ module_param(sba_reserve_agpgart, int, 0444); MODULE_PARM_DESC(sba_reserve_agpgart, "Reserve half of IO pdir as AGPGART"); #endif +struct proc_dir_entry *proc_runway_root __ro_after_init; +struct proc_dir_entry *proc_mckinley_root __ro_after_init; /************************************ ** SBA register read and write support @@ -1968,11 +1970,15 @@ static int __init sba_driver_callback(struct parisc_device *dev) #ifdef CONFIG_PROC_FS switch (dev->id.hversion) { case PLUTO_MCKINLEY_PORT: + if (!proc_mckinley_root) + proc_mckinley_root = proc_mkdir("bus/mckinley", NULL); root = proc_mckinley_root; break; case ASTRO_RUNWAY_PORT: case IKE_MERCED_PORT: default: + if (!proc_runway_root) + proc_runway_root = proc_mkdir("bus/runway", NULL); root = proc_runway_root; break; } -- cgit v1.2.3 From 66f80386a99f2773611e1b537ad045061fdd92ec Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 20:23:01 +0200 Subject: parisc: unaligned: Include linux/sysctl.h for unaligned_enabled Fix sparse warning that unaligned_enabled wasn't declared. Signed-off-by: Helge Deller --- arch/parisc/kernel/unaligned.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 813062701922..170d0dda4213 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 2c9227fd1c7e508f55eb4a38e8205f317e7c4ac9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 22:32:24 +0200 Subject: parisc: processor: Include asm/smp.h for init_per_cpu() Fix sparse warning that init_per_cpu() isn't declared. Signed-off-by: Helge Deller --- arch/parisc/kernel/processor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 00b0df97afb1..762289b9984e 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include /* for struct irq_region */ #include -- cgit v1.2.3 From b967f48d0240fa9b3ac0bfd7135647985016826e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 22:42:57 +0200 Subject: parisc: boot: Nuke some sparse warnings in decompressor Signed-off-by: Helge Deller --- arch/parisc/boot/compressed/misc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index 7ee49f5881d1..d389359e22ac 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -117,7 +117,7 @@ char *strchr(const char *s, int c) return NULL; } -int puts(const char *s) +static int puts(const char *s) { const char *nuline = s; @@ -172,7 +172,7 @@ static int print_num(unsigned long num, int base) return 0; } -int printf(const char *fmt, ...) +static int printf(const char *fmt, ...) { va_list args; int i = 0; @@ -204,13 +204,13 @@ void abort(void) } #undef malloc -void *malloc(size_t size) +static void *malloc(size_t size) { return malloc_gzip(size); } #undef free -void free(void *ptr) +static void free(void *ptr) { return free_gzip(ptr); } @@ -278,7 +278,7 @@ static void parse_elf(void *output) free(phdrs); } -unsigned long decompress_kernel(unsigned int started_wide, +asmlinkage unsigned long __visible decompress_kernel(unsigned int started_wide, unsigned int command_line, const unsigned int rd_start, const unsigned int rd_end) -- cgit v1.2.3 From 2794f8ecb483b680610968423179005758a5ce63 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 22:51:54 +0200 Subject: parisc: ftrace: Add declaration for ftrace_function_trampoline() Make sparse happy by adding declaration for ftrace_function_trampoline(). Signed-off-by: Helge Deller --- arch/parisc/include/asm/ftrace.h | 4 ++++ arch/parisc/kernel/ftrace.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h index a7cf0d05ccf4..f1cc1ee3a647 100644 --- a/arch/parisc/include/asm/ftrace.h +++ b/arch/parisc/include/asm/ftrace.h @@ -12,6 +12,10 @@ extern void mcount(void); extern unsigned long sys_call_table[]; extern unsigned long return_address(unsigned int); +struct ftrace_regs; +extern void ftrace_function_trampoline(unsigned long parent, + unsigned long self_addr, unsigned long org_sp_gr3, + struct ftrace_regs *fregs); #ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_caller(void); diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 4d392e4ed358..d1defb9ede70 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -53,7 +53,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent, static ftrace_func_t ftrace_func; -void notrace __hot ftrace_function_trampoline(unsigned long parent, +asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent, unsigned long self_addr, unsigned long org_sp_gr3, struct ftrace_regs *fregs) -- cgit v1.2.3 From d863066e6ce0a70c479a7f618088912ac0ba44ac Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 10 Aug 2023 23:00:18 +0200 Subject: parisc: perf: Make cpu_device variable static Signed-off-by: Helge Deller --- arch/parisc/kernel/perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index 90b04d8af212..b0f0816879df 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -57,7 +57,7 @@ struct rdr_tbl_ent { static int perf_processor_interface __read_mostly = UNKNOWN_INTF; static int perf_enabled __read_mostly; static DEFINE_SPINLOCK(perf_lock); -struct parisc_device *cpu_device __read_mostly; +static struct parisc_device *cpu_device __read_mostly; /* RDRs to write for PCX-W */ static const int perf_rdrs_W[] = -- cgit v1.2.3 From e8f5f849ffce24490eb9449e98312b66c0dba76f Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 10 Aug 2023 15:34:21 -0500 Subject: cifs: fix potential oops in cifs_oplock_break With deferred close we can have closes that race with lease breaks, and so with the current checks for whether to send the lease response, oplock_response(), this can mean that an unmount (kill_sb) can occur just before we were checking if the tcon->ses is valid. See below: [Fri Aug 4 04:12:50 2023] RIP: 0010:cifs_oplock_break+0x1f7/0x5b0 [cifs] [Fri Aug 4 04:12:50 2023] Code: 7d a8 48 8b 7d c0 c0 e9 02 48 89 45 b8 41 89 cf e8 3e f5 ff ff 4c 89 f7 41 83 e7 01 e8 82 b3 03 f2 49 8b 45 50 48 85 c0 74 5e <48> 83 78 60 00 74 57 45 84 ff 75 52 48 8b 43 98 48 83 eb 68 48 39 [Fri Aug 4 04:12:50 2023] RSP: 0018:ffffb30607ddbdf8 EFLAGS: 00010206 [Fri Aug 4 04:12:50 2023] RAX: 632d223d32612022 RBX: ffff97136944b1e0 RCX: 0000000080100009 [Fri Aug 4 04:12:50 2023] RDX: 0000000000000001 RSI: 0000000080100009 RDI: ffff97136944b188 [Fri Aug 4 04:12:50 2023] RBP: ffffb30607ddbe58 R08: 0000000000000001 R09: ffffffffc08e0900 [Fri Aug 4 04:12:50 2023] R10: 0000000000000001 R11: 000000000000000f R12: ffff97136944b138 [Fri Aug 4 04:12:50 2023] R13: ffff97149147c000 R14: ffff97136944b188 R15: 0000000000000000 [Fri Aug 4 04:12:50 2023] FS: 0000000000000000(0000) GS:ffff9714f7c00000(0000) knlGS:0000000000000000 [Fri Aug 4 04:12:50 2023] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [Fri Aug 4 04:12:50 2023] CR2: 00007fd8de9c7590 CR3: 000000011228e000 CR4: 0000000000350ef0 [Fri Aug 4 04:12:50 2023] Call Trace: [Fri Aug 4 04:12:50 2023] [Fri Aug 4 04:12:50 2023] process_one_work+0x225/0x3d0 [Fri Aug 4 04:12:50 2023] worker_thread+0x4d/0x3e0 [Fri Aug 4 04:12:50 2023] ? process_one_work+0x3d0/0x3d0 [Fri Aug 4 04:12:50 2023] kthread+0x12a/0x150 [Fri Aug 4 04:12:50 2023] ? set_kthread_struct+0x50/0x50 [Fri Aug 4 04:12:50 2023] ret_from_fork+0x22/0x30 [Fri Aug 4 04:12:50 2023] To fix this change the ordering of the checks before sending the oplock_response to first check if the openFileList is empty. Fixes: da787d5b7498 ("SMB3: Do not send lease break acknowledgment if all file handles have been closed") Suggested-by: Bharath SM Reviewed-by: Bharath SM Reviewed-by: Shyam Prasad N Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/file.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index fc5acc95cd13..60a49caf8425 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -4878,9 +4878,11 @@ void cifs_oplock_break(struct work_struct *work) struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, oplock_break); struct inode *inode = d_inode(cfile->dentry); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsInodeInfo *cinode = CIFS_I(inode); - struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - struct TCP_Server_Info *server = tcon->ses->server; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + struct tcon_link *tlink; int rc = 0; bool purge_cache = false, oplock_break_cancelled; __u64 persistent_fid, volatile_fid; @@ -4889,6 +4891,12 @@ void cifs_oplock_break(struct work_struct *work) wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, TASK_UNINTERRUPTIBLE); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + goto out; + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, cfile->oplock_epoch, &purge_cache); @@ -4938,18 +4946,19 @@ oplock_break_ack: /* * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require * an acknowledgment to be sent when the file has already been closed. - * check for server null, since can race with kill_sb calling tree disconnect. */ spin_lock(&cinode->open_file_lock); - if (tcon->ses && tcon->ses->server && !oplock_break_cancelled && - !list_empty(&cinode->openFileList)) { + /* check list empty since can race with kill_sb calling tree disconnect */ + if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { spin_unlock(&cinode->open_file_lock); - rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid, - volatile_fid, net_fid, cinode); + rc = server->ops->oplock_response(tcon, persistent_fid, + volatile_fid, net_fid, cinode); cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } else spin_unlock(&cinode->open_file_lock); + cifs_put_tlink(tlink); +out: cifs_done_oplock_break(cinode); } -- cgit v1.2.3 From 51b813176f098ff61bd2833f627f5319ead098a5 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 9 Aug 2023 23:12:56 -0400 Subject: virtio-net: set queues after driver_ok Commit 25266128fe16 ("virtio-net: fix race between set queues and probe") tries to fix the race between set queues and probe by calling _virtnet_set_queues() before DRIVER_OK is set. This violates virtio spec. Fixing this by setting queues after virtio_device_ready(). Note that rtnl needs to be held for userspace requests to change the number of queues. So we are serialized in this way. Fixes: 25266128fe16 ("virtio-net: fix race between set queues and probe") Reported-by: Dragos Tatulea Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1270c8d23463..ff03921e46df 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -4219,8 +4219,6 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->has_rss || vi->has_rss_hash_report) virtnet_init_default_rss(vi); - _virtnet_set_queues(vi, vi->curr_queue_pairs); - /* serialize netdev register + virtio_device_ready() with ndo_open() */ rtnl_lock(); @@ -4233,6 +4231,8 @@ static int virtnet_probe(struct virtio_device *vdev) virtio_device_ready(vdev); + _virtnet_set_queues(vi, vi->curr_queue_pairs); + /* a random MAC address has been assigned, notify the device. * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there * because many devices work fine without getting MAC explicitly -- cgit v1.2.3 From 829c6524d6729d05a82575dbcc16f99be5ee843d Mon Sep 17 00:00:00 2001 From: Xiang Yang Date: Thu, 10 Aug 2023 22:06:39 +0800 Subject: net: pcs: Add missing put_device call in miic_create The reference of pdev->dev is taken by of_find_device_by_node, so it should be released when not need anymore. Fixes: 7dc54d3b8d91 ("net: pcs: add Renesas MII converter driver") Signed-off-by: Xiang Yang Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-rzn1-miic.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c index 323bec5e57f8..356099169003 100644 --- a/drivers/net/pcs/pcs-rzn1-miic.c +++ b/drivers/net/pcs/pcs-rzn1-miic.c @@ -313,15 +313,21 @@ struct phylink_pcs *miic_create(struct device *dev, struct device_node *np) pdev = of_find_device_by_node(pcs_np); of_node_put(pcs_np); - if (!pdev || !platform_get_drvdata(pdev)) + if (!pdev || !platform_get_drvdata(pdev)) { + if (pdev) + put_device(&pdev->dev); return ERR_PTR(-EPROBE_DEFER); + } miic_port = kzalloc(sizeof(*miic_port), GFP_KERNEL); - if (!miic_port) + if (!miic_port) { + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); + } miic = platform_get_drvdata(pdev); device_link_add(dev, miic->dev, DL_FLAG_AUTOREMOVE_CONSUMER); + put_device(&pdev->dev); miic_port->miic = miic; miic_port->port = port - 1; -- cgit v1.2.3 From a7a7dabb5dd72d2875bc3ce56f94ea5ceb259d5b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 9 Aug 2023 10:04:40 +0800 Subject: nvme: core: don't hold rcu read lock in nvme_ns_chr_uring_cmd_iopoll Now nvme_ns_chr_uring_cmd_iopoll() has switched to request based io polling, and the associated NS is guaranteed to be live in case of io polling, so request is guaranteed to be valid because blk-mq uses pre-allocated request pool. Remove the rcu read lock in nvme_ns_chr_uring_cmd_iopoll(), which isn't needed any more after switching to request based io polling. Fix "BUG: sleeping function called from invalid context" because set_page_dirty_lock() from blk_rq_unmap_user() may sleep. Fixes: 585079b6e425 ("nvme: wire up async polling for io passthrough commands") Reported-by: Guangwu Zhang Cc: Kanchan Joshi Cc: Anuj Gupta Signed-off-by: Ming Lei Tested-by: Guangwu Zhang Link: https://lore.kernel.org/r/20230809020440.174682-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/nvme/host/ioctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 5c3250f36ce7..d39f3219358b 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -786,11 +786,9 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, if (!(ioucmd->flags & IORING_URING_CMD_POLLED)) return 0; - rcu_read_lock(); req = READ_ONCE(ioucmd->cookie); if (req && blk_rq_is_poll(req)) ret = blk_rq_poll(req, iob, poll_flags); - rcu_read_unlock(); return ret; } #ifdef CONFIG_NVME_MULTIPATH -- cgit v1.2.3 From 3477144c878a52fc3938a529186e81ea030e7779 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 11 Aug 2023 11:32:09 +0200 Subject: driver core: cpu: Fix the fallback cpu_show_gds() name In 6524c798b727 ("driver core: cpu: Make cpu_show_not_affected() static") I fat-fingered the name of cpu_show_gds(). Usually, I'd rebase but since those are extraordinary embargoed times, the commit above was already pulled into another tree so no no. Therefore, fix it ontop. Fixes: 6524c798b727 ("driver core: cpu: Make cpu_show_not_affected() static") Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230811095831.27513-1-bp@alien8.de --- drivers/base/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index d7300d885822..fe6690ecf563 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -532,7 +532,7 @@ CPU_SHOW_VULN_FALLBACK(srbds); CPU_SHOW_VULN_FALLBACK(mmio_stale_data); CPU_SHOW_VULN_FALLBACK(retbleed); CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); -CPU_SHOW_VULN_FALLBACK(gather_data_sampling); +CPU_SHOW_VULN_FALLBACK(gds); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -546,7 +546,7 @@ static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); -static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gather_data_sampling, NULL); +static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, -- cgit v1.2.3 From 33f83d13ded164cd49ce2a3bd2770115abc64e6f Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Thu, 10 Aug 2023 18:00:44 -0400 Subject: gpio: ws16c48: Fix off-by-one error in WS16C48 resource region extent The WinSystems WS16C48 I/O address region spans offsets 0x0 through 0xA, which is a total of 11 bytes. Fix the WS16C48_EXTENT define to the correct value of 11 so that access to necessary device registers is properly requested in the ws16c48_probe() callback by the devm_request_region() function call. Fixes: 2c05a0f29f41 ("gpio: ws16c48: Implement and utilize register structures") Cc: stable@vger.kernel.org Cc: Paul Demetrotion Signed-off-by: William Breathitt Gray Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-ws16c48.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c index e73885a4dc32..afb42a8e916f 100644 --- a/drivers/gpio/gpio-ws16c48.c +++ b/drivers/gpio/gpio-ws16c48.c @@ -18,7 +18,7 @@ #include #include -#define WS16C48_EXTENT 10 +#define WS16C48_EXTENT 11 #define MAX_NUM_WS16C48 max_num_isa_dev(WS16C48_EXTENT) static unsigned int base[MAX_NUM_WS16C48]; -- cgit v1.2.3 From 6dbef74aeb090d6bee7d64ef3fa82ae6fa53f271 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 11 Aug 2023 23:37:05 +0300 Subject: x86/cpu/amd: Enable Zenbleed fix for AMD Custom APU 0405 Commit 522b1d69219d ("x86/cpu/amd: Add a Zenbleed fix") provided a fix for the Zen2 VZEROUPPER data corruption bug affecting a range of CPU models, but the AMD Custom APU 0405 found on SteamDeck was not listed, although it is clearly affected by the vulnerability. Add this CPU variant to the Zenbleed erratum list, in order to unconditionally enable the fallback fix until a proper microcode update is available. Fixes: 522b1d69219d ("x86/cpu/amd: Add a Zenbleed fix") Signed-off-by: Cristian Ciocaltea Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230811203705.1699914-1-cristian.ciocaltea@collabora.com --- arch/x86/kernel/cpu/amd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 26ad7ca423e7..c15b4f02b4cf 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -73,6 +73,7 @@ static const int amd_erratum_1054[] = static const int amd_zenbleed[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf), AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf), + AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf), AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf)); static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) -- cgit v1.2.3 From 51e5e551af53259e0274b0cd4ff83d8351fb8c40 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Aug 2023 12:48:36 +0300 Subject: tpm: tpm_tis: Fix UPX-i11 DMI_MATCH condition The patch which made it to the kernel somehow changed the match condition from DMI_MATCH(DMI_PRODUCT_NAME, "UPX-TGL01") to DMI_MATCH(DMI_PRODUCT_VERSION, "UPX-TGL") Revert back to the correct match condition to disable the interrupt mode on the board. Cc: stable@vger.kernel.org # v6.4+ Fixes: edb13d7bb034 ("tpm: tpm_tis: Disable interrupts *only* for AEON UPX-i11") Link: https://lore.kernel.org/lkml/20230524085844.11580-1-peter.ujfalusi@linux.intel.com/ Signed-off-by: Peter Ujfalusi Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_tis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index ac4daaf294a3..3c0f68b9e44f 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -183,7 +183,7 @@ static const struct dmi_system_id tpm_tis_dmi_table[] = { .ident = "UPX-TGL", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "AAEON"), - DMI_MATCH(DMI_PRODUCT_VERSION, "UPX-TGL"), + DMI_MATCH(DMI_PRODUCT_NAME, "UPX-TGL01"), }, }, {} -- cgit v1.2.3 From 6aaf663ee04a80b445f8f5abff53cb92cb583c88 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Sat, 12 Aug 2023 02:07:10 +0300 Subject: tpm_tis: Opt-in interrupts Cc: stable@vger.kernel.org # v6.4+ Link: https://lore.kernel.org/linux-integrity/CAHk-=whRVp4h8uWOX1YO+Y99+44u4s=XxMK4v00B6F1mOfqPLg@mail.gmail.com/ Fixes: e644b2f498d2 ("tpm, tpm_tis: Enable interrupt test") Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_tis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 3c0f68b9e44f..7fa3d91042b2 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -89,7 +89,7 @@ static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr) tpm_tis_flush(iobase); } -static int interrupts = -1; +static int interrupts; module_param(interrupts, int, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); -- cgit v1.2.3 From c8afaa1b0f8bc93d013ab2ea6b9649958af3f1d3 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sat, 12 Aug 2023 18:15:54 +0200 Subject: locking: remove spin_lock_prefetch The only remaining consumer is new_inode, where it showed up in 2001 as commit c37fa164f793 ("v2.4.9.9 -> v2.4.9.10") in a historical repo [1] with a changelog which does not mention it. Since then the line got only touched up to keep compiling. While it may have been of benefit back in the day, it is guaranteed to at best not get in the way in the multicore setting -- as the code performs *a lot* of work between the prefetch and actual lock acquire, any contention means the cacheline is already invalid by the time the routine calls spin_lock(). It adds spurious traffic, for short. On top of it prefetch is notoriously tricky to use for single-threaded purposes, making it questionable from the get go. As such, remove it. I admit upfront I did not see value in benchmarking this change, but I can do it if that is deemed appropriate. Removal from new_inode and of the entire thing are in the same patch as requested by Linus, so whatever weird looks can be directed at that guy. Link: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/commit/fs/inode.c?id=c37fa164f793735b32aa3f53154ff1a7659e6442 [1] Signed-off-by: Mateusz Guzik Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/processor.h | 13 ------------- arch/arm64/include/asm/processor.h | 8 -------- arch/ia64/include/asm/processor.h | 3 --- .../include/asm/mach-cavium-octeon/cpu-feature-overrides.h | 2 -- arch/powerpc/include/asm/processor.h | 3 --- arch/sparc/include/asm/processor_64.h | 3 --- arch/x86/include/asm/processor.h | 6 ------ fs/inode.c | 3 --- include/linux/prefetch.h | 7 +------ 9 files changed, 1 insertion(+), 47 deletions(-) diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 714abe494e5f..55bb1c09fd39 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -47,12 +47,6 @@ unsigned long __get_wchan(struct task_struct *p); #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -#ifndef CONFIG_SMP -/* Nothing to prefetch. */ -#define spin_lock_prefetch(lock) do { } while (0) -#endif extern inline void prefetch(const void *ptr) { @@ -64,11 +58,4 @@ extern inline void prefetchw(const void *ptr) __builtin_prefetch(ptr, 1, 3); } -#ifdef CONFIG_SMP -extern inline void spin_lock_prefetch(const void *ptr) -{ - __builtin_prefetch(ptr, 1, 3); -} -#endif - #endif /* __ASM_ALPHA_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 3918f2a67970..e5bc54522e71 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -359,14 +359,6 @@ static inline void prefetchw(const void *ptr) asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr)); } -#define ARCH_HAS_SPINLOCK_PREFETCH -static inline void spin_lock_prefetch(const void *ptr) -{ - asm volatile(ARM64_LSE_ATOMIC_INSN( - "prfm pstl1strm, %a0", - "nop") : : "p" (ptr)); -} - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index d1978e004054..47e3801b526a 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -634,7 +634,6 @@ ia64_imva (void *addr) #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH #define PREFETCH_STRIDE L1_CACHE_BYTES static inline void @@ -649,8 +648,6 @@ prefetchw (const void *x) ia64_lfetch_excl(ia64_lfhint_none, x); } -#define spin_lock_prefetch(x) prefetchw(x) - extern unsigned long boot_option_idle_override; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT, diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h index 9151dcd9d0d5..af9cea21c853 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h @@ -58,8 +58,6 @@ #define cpu_has_rixi (cpu_data[0].cputype != CPU_CAVIUM_OCTEON) -#define ARCH_HAS_SPINLOCK_PREFETCH 1 -#define spin_lock_prefetch(x) prefetch(x) #define PREFETCH_STRIDE 128 #ifdef __OCTEON__ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8a6754ffdc7e..a6c7069bec5d 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -393,7 +393,6 @@ int validate_sp_size(unsigned long sp, struct task_struct *p, */ #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH static inline void prefetch(const void *x) { @@ -411,8 +410,6 @@ static inline void prefetchw(const void *x) __asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x)); } -#define spin_lock_prefetch(x) prefetchw(x) - /* asm stubs */ extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 2667f35d5ea5..0a0d5c3d184c 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -213,7 +213,6 @@ unsigned long __get_wchan(struct task_struct *task); */ #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH static inline void prefetch(const void *x) { @@ -239,8 +238,6 @@ static inline void prefetchw(const void *x) : "r" (x)); } -#define spin_lock_prefetch(x) prefetchw(x) - #define HAVE_ARCH_PICK_MMAP_LAYOUT int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4ae2773b873d..fd750247ca89 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -586,7 +586,6 @@ extern char ignore_fpu_irq; #define HAVE_ARCH_PICK_MMAP_LAYOUT 1 #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH #ifdef CONFIG_X86_32 # define BASE_PREFETCH "" @@ -620,11 +619,6 @@ static __always_inline void prefetchw(const void *x) "m" (*(const char *)x)); } -static inline void spin_lock_prefetch(const void *x) -{ - prefetchw(x); -} - #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ TOP_OF_KERNEL_STACK_PADDING) diff --git a/fs/inode.c b/fs/inode.c index 8fefb69e1f84..67611a360031 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -16,7 +16,6 @@ #include #include #include -#include #include /* for inode_has_buffers */ #include #include @@ -1041,8 +1040,6 @@ struct inode *new_inode(struct super_block *sb) { struct inode *inode; - spin_lock_prefetch(&sb->s_inode_list_lock); - inode = new_inode_pseudo(sb); if (inode) inode_sb_list_add(inode); diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h index b83a3f944f28..b068e2e60939 100644 --- a/include/linux/prefetch.h +++ b/include/linux/prefetch.h @@ -25,11 +25,10 @@ struct page; prefetch() should be defined by the architecture, if not, the #define below provides a no-op define. - There are 3 prefetch() macros: + There are 2 prefetch() macros: prefetch(x) - prefetches the cacheline at "x" for read prefetchw(x) - prefetches the cacheline at "x" for write - spin_lock_prefetch(x) - prefetches the spinlock *x for taking there is also PREFETCH_STRIDE which is the architecure-preferred "lookahead" size for prefetching streamed operations. @@ -44,10 +43,6 @@ struct page; #define prefetchw(x) __builtin_prefetch(x,1) #endif -#ifndef ARCH_HAS_SPINLOCK_PREFETCH -#define spin_lock_prefetch(x) prefetchw(x) -#endif - #ifndef PREFETCH_STRIDE #define PREFETCH_STRIDE (4*L1_CACHE_BYTES) #endif -- cgit v1.2.3 From cc941e548bffc01b5816b4edc5cb432a137a58b3 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 11 Aug 2023 11:26:30 +0100 Subject: net: phy: fix IRQ-based wake-on-lan over hibernate / power off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Uwe reports: "Most PHYs signal WoL using an interrupt. So disabling interrupts [at shutdown] breaks WoL at least on PHYs covered by the marvell driver." Discussing with Ioana, the problem which was trying to be solved was: "The board in question is a LS1021ATSN which has two AR8031 PHYs that share an interrupt line. In case only one of the PHYs is probed and there are pending interrupts on the PHY#2 an IRQ storm will happen since there is no entity to clear the interrupt from PHY#2's registers. PHY#1's driver will get stuck in .handle_interrupt() indefinitely." Further confirmation that "the two AR8031 PHYs are on the same MDIO bus." With WoL using interrupts to wake the system, in such a case, the system will begin booting with an asserted interrupt. Thus, we need to cope with an interrupt asserted during boot. Solve this instead by disabling interrupts during PHY probe. This will ensure in Ioana's situation that both PHYs of the same type sharing an interrupt line on a common MDIO bus will have their interrupt outputs disabled when the driver probes the device, but before we hook in any interrupt handlers - thus avoiding the interrupt storm. A better fix would be for platform firmware to disable the interrupting devices at source during boot, before control is handed to the kernel. Fixes: e2f016cf7751 ("net: phy: add a shutdown procedure") Link: 20230804071757.383971-1-u.kleine-koenig@pengutronix.de Reported-by: Uwe Kleine-König Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 61921d4dbb13..c7cf61fe41cf 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3216,6 +3216,8 @@ static int phy_probe(struct device *dev) goto out; } + phy_disable_interrupts(phydev); + /* Start out supporting everything. Eventually, * a controller will attach, and may modify one * or both of these values @@ -3333,16 +3335,6 @@ static int phy_remove(struct device *dev) return 0; } -static void phy_shutdown(struct device *dev) -{ - struct phy_device *phydev = to_phy_device(dev); - - if (phydev->state == PHY_READY || !phydev->attached_dev) - return; - - phy_disable_interrupts(phydev); -} - /** * phy_driver_register - register a phy_driver with the PHY layer * @new_driver: new phy_driver to register @@ -3376,7 +3368,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) new_driver->mdiodrv.driver.bus = &mdio_bus_type; new_driver->mdiodrv.driver.probe = phy_probe; new_driver->mdiodrv.driver.remove = phy_remove; - new_driver->mdiodrv.driver.shutdown = phy_shutdown; new_driver->mdiodrv.driver.owner = owner; new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS; -- cgit v1.2.3 From 8e3938cff0191c810b2abd827313c090fe09d166 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Sun, 13 Aug 2023 08:37:32 +0000 Subject: platform: mellanox: Fix order in exit flow Fix exit flow order: call mlxplat_post_exit() after mlxplat_i2c_main_exit() in order to unregister main i2c driver before to "mlxplat" driver. Fixes: 0170f616f496 ("platform: mellanox: Split initialization procedure") Signed-off-by: Vadim Pasternak Reviewed-by: Michael Shych Link: https://lore.kernel.org/r/20230813083735.39090-2-vadimp@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/x86/mlx-platform.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 67367f010139..5fb3348023a7 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -6238,8 +6238,6 @@ static void mlxplat_i2c_mux_topolgy_exit(struct mlxplat_priv *priv) if (priv->pdev_mux[i]) platform_device_unregister(priv->pdev_mux[i]); } - - mlxplat_post_exit(); } static int mlxplat_i2c_main_complition_notify(void *handle, int id) @@ -6369,6 +6367,7 @@ static void __exit mlxplat_exit(void) pm_power_off = NULL; mlxplat_pre_exit(priv); mlxplat_i2c_main_exit(priv); + mlxplat_post_exit(); } module_exit(mlxplat_exit); -- cgit v1.2.3 From 3c91d7e8c64f75c63da3565d16d5780320bd5d76 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Sun, 13 Aug 2023 08:37:33 +0000 Subject: platform: mellanox: mlx-platform: Fix signals polarity and latch mask Change polarity of chassis health and power signals and fix latch reset mask for L1 switch. Fixes: dd635e33b5c9 ("platform: mellanox: Introduce support of new Nvidia L1 switch") Signed-off-by: Vadim Pasternak Reviewed-by: Michael Shych Link: https://lore.kernel.org/r/20230813083735.39090-3-vadimp@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/x86/mlx-platform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 5fb3348023a7..69256af04f05 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -237,7 +237,7 @@ #define MLXPLAT_CPLD_GWP_MASK GENMASK(0, 0) #define MLXPLAT_CPLD_EROT_MASK GENMASK(1, 0) #define MLXPLAT_CPLD_PWR_BUTTON_MASK BIT(0) -#define MLXPLAT_CPLD_LATCH_RST_MASK BIT(5) +#define MLXPLAT_CPLD_LATCH_RST_MASK BIT(6) #define MLXPLAT_CPLD_THERMAL1_PDB_MASK BIT(3) #define MLXPLAT_CPLD_THERMAL2_PDB_MASK BIT(4) #define MLXPLAT_CPLD_INTRUSION_MASK BIT(6) @@ -2475,7 +2475,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_l1_switch_events_items[] = { .reg = MLXPLAT_CPLD_LPC_REG_PWRB_OFFSET, .mask = MLXPLAT_CPLD_PWR_BUTTON_MASK, .count = ARRAY_SIZE(mlxplat_mlxcpld_l1_switch_pwr_events_items_data), - .inversed = 0, + .inversed = 1, .health = false, }, { @@ -2484,7 +2484,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_l1_switch_events_items[] = { .reg = MLXPLAT_CPLD_LPC_REG_BRD_OFFSET, .mask = MLXPLAT_CPLD_L1_CHA_HEALTH_MASK, .count = ARRAY_SIZE(mlxplat_mlxcpld_l1_switch_health_events_items_data), - .inversed = 0, + .inversed = 1, .health = false, .ind = 8, }, @@ -3677,7 +3677,7 @@ static struct mlxreg_core_data mlxplat_mlxcpld_default_ng_regs_io_data[] = { { .label = "latch_reset", .reg = MLXPLAT_CPLD_LPC_REG_GP1_OFFSET, - .mask = GENMASK(7, 0) & ~BIT(5), + .mask = GENMASK(7, 0) & ~BIT(6), .mode = 0200, }, { -- cgit v1.2.3 From 9f8ccdb5088bd03062d9ad9c0f6abf600cbed8e8 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Sun, 13 Aug 2023 08:37:34 +0000 Subject: platform: mellanox: mlx-platform: Modify graceful shutdown callback and power down mask Use kernel_power_off() instead of kernel_halt() to pass through machine_power_off() -> pm_power_off(), otherwise axillary power does not go off. Change "power down" bitmask. Fixes: dd635e33b5c9 ("platform: mellanox: Introduce support of new Nvidia L1 switch") Signed-off-by: Vadim Pasternak Reviewed-by: Michael Shych Link: https://lore.kernel.org/r/20230813083735.39090-4-vadimp@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/x86/mlx-platform.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 69256af04f05..240bc3174caf 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -222,7 +222,7 @@ MLXPLAT_CPLD_AGGR_MASK_LC_SDWN) #define MLXPLAT_CPLD_LOW_AGGR_MASK_LOW 0xc1 #define MLXPLAT_CPLD_LOW_AGGR_MASK_ASIC2 BIT(2) -#define MLXPLAT_CPLD_LOW_AGGR_MASK_PWR_BUT BIT(4) +#define MLXPLAT_CPLD_LOW_AGGR_MASK_PWR_BUT GENMASK(5, 4) #define MLXPLAT_CPLD_LOW_AGGR_MASK_I2C BIT(6) #define MLXPLAT_CPLD_PSU_MASK GENMASK(1, 0) #define MLXPLAT_CPLD_PWR_MASK GENMASK(1, 0) @@ -2356,7 +2356,7 @@ mlxplat_mlxcpld_l1_switch_pwr_events_handler(void *handle, enum mlxreg_hotplug_k u8 action) { dev_info(&mlxplat_dev->dev, "System shutdown due to short press of power button"); - kernel_halt(); + kernel_power_off(); return 0; } -- cgit v1.2.3 From d66a8aab7dc36c975bbaa6aa74cf7445878e7c69 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Sun, 13 Aug 2023 08:37:35 +0000 Subject: platform: mellanox: Change register offset addresses Move debug register offsets to different location due to hardware changes. Fixes: dd635e33b5c9 ("platform: mellanox: Introduce support of new Nvidia L1 switch") Signed-off-by: Vadim Pasternak Reviewed-by: Michael Shych Link: https://lore.kernel.org/r/20230813083735.39090-5-vadimp@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/x86/mlx-platform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 240bc3174caf..7d33977d9c60 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -62,10 +62,6 @@ #define MLXPLAT_CPLD_LPC_REG_PWM_CONTROL_OFFSET 0x37 #define MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET 0x3a #define MLXPLAT_CPLD_LPC_REG_AGGR_MASK_OFFSET 0x3b -#define MLXPLAT_CPLD_LPC_REG_DBG1_OFFSET 0x3c -#define MLXPLAT_CPLD_LPC_REG_DBG2_OFFSET 0x3d -#define MLXPLAT_CPLD_LPC_REG_DBG3_OFFSET 0x3e -#define MLXPLAT_CPLD_LPC_REG_DBG4_OFFSET 0x3f #define MLXPLAT_CPLD_LPC_REG_AGGRLO_OFFSET 0x40 #define MLXPLAT_CPLD_LPC_REG_AGGRLO_MASK_OFFSET 0x41 #define MLXPLAT_CPLD_LPC_REG_AGGRCO_OFFSET 0x42 @@ -126,6 +122,10 @@ #define MLXPLAT_CPLD_LPC_REG_LC_SD_EVENT_OFFSET 0xaa #define MLXPLAT_CPLD_LPC_REG_LC_SD_MASK_OFFSET 0xab #define MLXPLAT_CPLD_LPC_REG_LC_PWR_ON 0xb2 +#define MLXPLAT_CPLD_LPC_REG_DBG1_OFFSET 0xb6 +#define MLXPLAT_CPLD_LPC_REG_DBG2_OFFSET 0xb7 +#define MLXPLAT_CPLD_LPC_REG_DBG3_OFFSET 0xb8 +#define MLXPLAT_CPLD_LPC_REG_DBG4_OFFSET 0xb9 #define MLXPLAT_CPLD_LPC_REG_GP4_RO_OFFSET 0xc2 #define MLXPLAT_CPLD_LPC_REG_SPI_CHNL_SELECT 0xc3 #define MLXPLAT_CPLD_LPC_REG_WD_CLEAR_OFFSET 0xc7 -- cgit v1.2.3 From 2b6aa6610dc9690f79d305ca938abfb799a4f766 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 12 Aug 2023 16:48:18 +0200 Subject: platform/x86: lenovo-ymc: Only bind on machines with a convertible DMI chassis-type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lenovo-ymc driver is causing the keyboard + touchpad to stop working on some regular laptop models such as the Lenovo ThinkBook 13s G2 ITL 20V9. The problem is that there are YMC WMI GUID methods in the ACPI tables of these laptops, despite them not being Yogas and lenovo-ymc loading causes libinput to see a SW_TABLET_MODE switch with state 1. This in turn causes libinput to ignore events from the builtin keyboard and touchpad, since it filters those out for a Yoga in tablet mode. Similar issues with false-positive SW_TABLET_MODE=1 reporting have been seen with the intel-hid driver. Copy the intel-hid driver approach to fix this and only bind to the WMI device on machines where the DMI chassis-type indicates the machine is a convertible. Add a 'force' module parameter to allow overriding the chassis-type check so that users can easily test if the YMC interface works on models which report an unexpected chassis-type. Fixes: e82882cdd241 ("platform/x86: Add driver for Yoga Tablet Mode switch") Link: https://bugzilla.redhat.com/show_bug.cgi?id=2229373 Cc: André Apitzsch Cc: stable@vger.kernel.org Tested-by: Andrew Kallmeyer Tested-by: Gergő Köteles Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230812144818.383230-1-hdegoede@redhat.com --- drivers/platform/x86/lenovo-ymc.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/platform/x86/lenovo-ymc.c b/drivers/platform/x86/lenovo-ymc.c index 41676188b373..f360370d5002 100644 --- a/drivers/platform/x86/lenovo-ymc.c +++ b/drivers/platform/x86/lenovo-ymc.c @@ -24,6 +24,10 @@ static bool ec_trigger __read_mostly; module_param(ec_trigger, bool, 0444); MODULE_PARM_DESC(ec_trigger, "Enable EC triggering work-around to force emitting tablet mode events"); +static bool force; +module_param(force, bool, 0444); +MODULE_PARM_DESC(force, "Force loading on boards without a convertible DMI chassis-type"); + static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = { { /* Lenovo Yoga 7 14ARB7 */ @@ -35,6 +39,20 @@ static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = { { } }; +static const struct dmi_system_id allowed_chasis_types_dmi_table[] = { + { + .matches = { + DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "31" /* Convertible */), + }, + }, + { + .matches = { + DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "32" /* Detachable */), + }, + }, + { } +}; + struct lenovo_ymc_private { struct input_dev *input_dev; struct acpi_device *ec_acpi_dev; @@ -111,6 +129,13 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) struct input_dev *input_dev; int err; + if (!dmi_check_system(allowed_chasis_types_dmi_table)) { + if (force) + dev_info(&wdev->dev, "Force loading Lenovo YMC support\n"); + else + return -ENODEV; + } + ec_trigger |= dmi_check_system(ec_trigger_quirk_dmi_table); priv = devm_kzalloc(&wdev->dev, sizeof(*priv), GFP_KERNEL); -- cgit v1.2.3 From 2ccdd1b13c591d306f0401d98dedc4bdcd02b421 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Aug 2023 11:29:55 -0700 Subject: Linux 6.5-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6bbf9db6b414..00cfb37a9ab8 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 5 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit v1.2.3 From ace0ab3a4b54205a01d3f4a0fd9bdb4616cfb60b Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 11 Aug 2023 17:45:23 +0200 Subject: Revert "vlan: Fix VLAN 0 memory leak" This reverts commit 718cb09aaa6fa78cc8124e9517efbc6c92665384. The commit triggers multiple syzbot issues, probably due to possibility of manually creating VLAN 0 on netdevice which will cause the code to delete it since it can't distinguish such VLAN from implicit VLAN 0 automatically created for devices with NETIF_F_HW_VLAN_CTAG_FILTER feature. Reported-by: syzbot+662f783a5cdf3add2719@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/00000000000090196d0602a6167d@google.com/ Reported-by: syzbot+4b4f06495414e92701d5@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/00000000000096ae870602a61602@google.com/ Reported-by: syzbot+d810d3cd45ed1848c3f7@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/0000000000009f0f9c0602a616ce@google.com/ Fixes: 718cb09aaa6f ("vlan: Fix VLAN 0 memory leak") Signed-off-by: Vlad Buslov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/8021q/vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index b3662119ddbc..e40aa3e3641c 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -384,7 +384,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, dev->name); vlan_vid_add(dev, htons(ETH_P_8021Q), 0); } - if (event == NETDEV_DOWN) + if (event == NETDEV_DOWN && + (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) vlan_vid_del(dev, htons(ETH_P_8021Q), 0); vlan_info = rtnl_dereference(dev->vlan_info); -- cgit v1.2.3 From 7a894c87374771f3cfb1b8e5453fbe03f1fb8135 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 13 Aug 2023 22:11:19 +0200 Subject: parisc: Fix CONFIG_TLB_PTLOCK to work with lightweight spinlock checks For the TLB_PTLOCK checks we used an optimization to store the spc register into the spinlock to unlock it. This optimization works as long as the lightweight spinlock checks (CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK) aren't enabled, because they really check if the lock word is zero or __ARCH_SPIN_LOCK_UNLOCKED_VAL and abort with a kernel crash ("Spinlock was trashed") otherwise. Drop that optimization to make it possible to activate both checks at the same time. Noticed-by: Sam James Signed-off-by: Helge Deller Tested-by: Sam James Cc: stable@vger.kernel.org # v6.4+ Fixes: 15e64ef6520e ("parisc: Add lightweight spinlock checks") --- arch/parisc/kernel/entry.S | 47 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 0e5ebfe8d9d2..ae03b8679696 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -406,7 +407,7 @@ LDREG 0(\ptp),\pte bb,<,n \pte,_PAGE_PRESENT_BIT,3f b \fault - stw \spc,0(\tmp) + stw \tmp1,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif 2: LDREG 0(\ptp),\pte @@ -415,24 +416,22 @@ .endm /* Release page_table_lock without reloading lock address. - Note that the values in the register spc are limited to - NR_SPACE_IDS (262144). Thus, the stw instruction always - stores a nonzero value even when register spc is 64 bits. We use an ordered store to ensure all prior accesses are performed prior to releasing the lock. */ - .macro ptl_unlock0 spc,tmp + .macro ptl_unlock0 spc,tmp,tmp2 #ifdef CONFIG_TLB_PTLOCK -98: or,COND(=) %r0,\spc,%r0 - stw,ma \spc,0(\tmp) +98: ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmp2 + or,COND(=) %r0,\spc,%r0 + stw,ma \tmp2,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm /* Release page_table_lock. */ - .macro ptl_unlock1 spc,tmp + .macro ptl_unlock1 spc,tmp,tmp2 #ifdef CONFIG_TLB_PTLOCK 98: get_ptl \tmp - ptl_unlock0 \spc,\tmp + ptl_unlock0 \spc,\tmp,\tmp2 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm @@ -1125,7 +1124,7 @@ dtlb_miss_20w: idtlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1151,7 +1150,7 @@ nadtlb_miss_20w: idtlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1185,7 +1184,7 @@ dtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1218,7 +1217,7 @@ nadtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1247,7 +1246,7 @@ dtlb_miss_20: idtlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1275,7 +1274,7 @@ nadtlb_miss_20: idtlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1320,7 +1319,7 @@ itlb_miss_20w: iitlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1344,7 +1343,7 @@ naitlb_miss_20w: iitlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1378,7 +1377,7 @@ itlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1402,7 +1401,7 @@ naitlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1432,7 +1431,7 @@ itlb_miss_20: iitlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1452,7 +1451,7 @@ naitlb_miss_20: iitlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1482,7 +1481,7 @@ dbit_trap_20w: idtlbt pte,prot - ptl_unlock0 spc,t0 + ptl_unlock0 spc,t0,t1 rfir nop #else @@ -1508,7 +1507,7 @@ dbit_trap_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock0 spc,t0 + ptl_unlock0 spc,t0,t1 rfir nop @@ -1528,7 +1527,7 @@ dbit_trap_20: idtlbt pte,prot - ptl_unlock0 spc,t0 + ptl_unlock0 spc,t0,t1 rfir nop #endif -- cgit v1.2.3 From 855067defa36b1f9effad8c219d9a85b655cf500 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 11 Aug 2023 17:59:27 +0200 Subject: selftests: mirror_gre_changes: Tighten up the TTL test match This test verifies whether the encapsulated packets have the correct configured TTL. It does so by sending ICMP packets through the test topology and mirroring them to a gretap netdevice. On a busy host however, more than just the test ICMP packets may end up flowing through the topology, get mirrored, and counted. This leads to potential spurious failures as the test observes much more mirrored packets than the sent test packets, and assumes a bug. Fix this by tightening up the mirror action match. Change it from matchall to a flower classifier matching on ICMP packets specifically. Fixes: 45315673e0c5 ("selftests: forwarding: Test changes in mirror-to-gretap") Signed-off-by: Petr Machata Tested-by: Mirsad Todorovac Reviewed-by: Ido Schimmel Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/mirror_gre_changes.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index aff88f78e339..5ea9d63915f7 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -72,7 +72,8 @@ test_span_gre_ttl() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" + mirror_install $swp1 ingress $tundev \ + "prot ip flower $tcflags ip_prot icmp" tc filter add dev $h3 ingress pref 77 prot $prot \ flower skip_hw ip_ttl 50 action pass -- cgit v1.2.3 From ae6546835efaa7195aaaa10e5ff4e695cd82a816 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Sat, 12 Aug 2023 20:52:39 +0200 Subject: drm/panel: JDI LT070ME05000 simplify with dev_err_probe() Use the dev_err_probe() helper to simplify error handling during probe. This also handle scenario, when EDEFER is returned and useless error is printed. Fixes error: panel-jdi-lt070me05000 4700000.dsi.0: cannot get enable-gpio -517 Signed-off-by: David Heidelberg Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230812185239.378582-1-david@ixit.cz --- drivers/gpu/drm/panel/panel-jdi-lt070me05000.c | 36 ++++++++++---------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c index 8f4f137a2af6..213008499caa 100644 --- a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c +++ b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c @@ -404,38 +404,30 @@ static int jdi_panel_add(struct jdi_panel *jdi) ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(jdi->supplies), jdi->supplies); - if (ret < 0) { - dev_err(dev, "failed to init regulator, ret=%d\n", ret); - return ret; - } + if (ret < 0) + return dev_err_probe(dev, ret, + "failed to init regulator, ret=%d\n", ret); jdi->enable_gpio = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(jdi->enable_gpio)) { - ret = PTR_ERR(jdi->enable_gpio); - dev_err(dev, "cannot get enable-gpio %d\n", ret); - return ret; + return dev_err_probe(dev, PTR_ERR(jdi->enable_gpio), + "cannot get enable-gpio %d\n", ret); } jdi->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); - if (IS_ERR(jdi->reset_gpio)) { - ret = PTR_ERR(jdi->reset_gpio); - dev_err(dev, "cannot get reset-gpios %d\n", ret); - return ret; - } + if (IS_ERR(jdi->reset_gpio)) + return dev_err_probe(dev, PTR_ERR(jdi->reset_gpio), + "cannot get reset-gpios %d\n", ret); jdi->dcdc_en_gpio = devm_gpiod_get(dev, "dcdc-en", GPIOD_OUT_LOW); - if (IS_ERR(jdi->dcdc_en_gpio)) { - ret = PTR_ERR(jdi->dcdc_en_gpio); - dev_err(dev, "cannot get dcdc-en-gpio %d\n", ret); - return ret; - } + if (IS_ERR(jdi->dcdc_en_gpio)) + return dev_err_probe(dev, PTR_ERR(jdi->dcdc_en_gpio), + "cannot get dcdc-en-gpio %d\n", ret); jdi->backlight = drm_panel_create_dsi_backlight(jdi->dsi); - if (IS_ERR(jdi->backlight)) { - ret = PTR_ERR(jdi->backlight); - dev_err(dev, "failed to register backlight %d\n", ret); - return ret; - } + if (IS_ERR(jdi->backlight)) + return dev_err_probe(dev, PTR_ERR(jdi->backlight), + "failed to register backlight %d\n", ret); drm_panel_init(&jdi->base, &jdi->dsi->dev, &jdi_panel_funcs, DRM_MODE_CONNECTOR_DSI); -- cgit v1.2.3 From e8470c0a7bcaa82f78ad34282d662dd7bd9630c2 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 4 Aug 2023 17:12:39 +0200 Subject: drm/panel: simple: Fix AUO G121EAN01 panel timings according to the docs Commit 03e909acd95a ("drm/panel: simple: Add support for AUO G121EAN01.4 panel") added support for this panel model, but the timings it implements are very different from what the datasheet describes. I checked both the G121EAN01.0 datasheet from [0] and the G121EAN01.4 one from [1] and they all have the same timings: for example the LVDS clock typical value is 74.4 MHz, not 66.7 MHz as implemented. Replace the timings with the ones from the documentation. These timings have been tested and the clock frequencies verified with an oscilloscope to ensure they are correct. Also use struct display_timing instead of struct drm_display_mode in order to also specify the minimum and maximum values. [0] https://embedded.avnet.com/product/g121ean01-0/ [1] https://embedded.avnet.com/product/g121ean01-4/ Fixes: 03e909acd95a ("drm/panel: simple: Add support for AUO G121EAN01.4 panel") Signed-off-by: Luca Ceresoli Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230804151239.835216-1-luca.ceresoli@bootlin.com --- drivers/gpu/drm/panel/panel-simple.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index aaba36b3a674..b38d0e95cd54 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -999,21 +999,21 @@ static const struct panel_desc auo_g104sn02 = { .connector_type = DRM_MODE_CONNECTOR_LVDS, }; -static const struct drm_display_mode auo_g121ean01_mode = { - .clock = 66700, - .hdisplay = 1280, - .hsync_start = 1280 + 58, - .hsync_end = 1280 + 58 + 8, - .htotal = 1280 + 58 + 8 + 70, - .vdisplay = 800, - .vsync_start = 800 + 6, - .vsync_end = 800 + 6 + 4, - .vtotal = 800 + 6 + 4 + 10, +static const struct display_timing auo_g121ean01_timing = { + .pixelclock = { 60000000, 74400000, 90000000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 20, 50, 100 }, + .hback_porch = { 20, 50, 100 }, + .hsync_len = { 30, 100, 200 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 2, 10, 25 }, + .vback_porch = { 2, 10, 25 }, + .vsync_len = { 4, 18, 50 }, }; static const struct panel_desc auo_g121ean01 = { - .modes = &auo_g121ean01_mode, - .num_modes = 1, + .timings = &auo_g121ean01_timing, + .num_timings = 1, .bpc = 8, .size = { .width = 261, -- cgit v1.2.3 From 69513dd669e243928f7450893190915a88f84a2b Mon Sep 17 00:00:00 2001 From: Russell Harmon via samba-technical Date: Thu, 10 Aug 2023 00:19:22 -0700 Subject: cifs: Release folio lock on fscache read hit. Under the current code, when cifs_readpage_worker is called, the call contract is that the callee should unlock the page. This is documented in the read_folio section of Documentation/filesystems/vfs.rst as: > The filesystem should unlock the folio once the read has completed, > whether it was successful or not. Without this change, when fscache is in use and cache hit occurs during a read, the page lock is leaked, producing the following stack on subsequent reads (via mmap) to the page: $ cat /proc/3890/task/12864/stack [<0>] folio_wait_bit_common+0x124/0x350 [<0>] filemap_read_folio+0xad/0xf0 [<0>] filemap_fault+0x8b1/0xab0 [<0>] __do_fault+0x39/0x150 [<0>] do_fault+0x25c/0x3e0 [<0>] __handle_mm_fault+0x6ca/0xc70 [<0>] handle_mm_fault+0xe9/0x350 [<0>] do_user_addr_fault+0x225/0x6c0 [<0>] exc_page_fault+0x84/0x1b0 [<0>] asm_exc_page_fault+0x27/0x30 This requires a reboot to resolve; it is a deadlock. Note however that the call to cifs_readpage_from_fscache does mark the page clean, but does not free the folio lock. This happens in __cifs_readpage_from_fscache on success. Releasing the lock at that point however is not appropriate as cifs_readahead also calls cifs_readpage_from_fscache and *does* unconditionally release the lock after its return. This change therefore effectively makes cifs_readpage_worker work like cifs_readahead. Signed-off-by: Russell Harmon Acked-by: Paulo Alcantara (SUSE) Reviewed-by: David Howells Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 60a49caf8425..6bc44f79d2e9 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -4681,9 +4681,9 @@ static int cifs_readpage_worker(struct file *file, struct page *page, io_error: kunmap(page); - unlock_page(page); read_complete: + unlock_page(page); return rc; } -- cgit v1.2.3 From 7b38f6ddc97bf572c3422d3175e8678dd95502fa Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 10 Aug 2023 21:41:03 -0500 Subject: smb3: display network namespace in debug information We recently had problems where a network namespace was deleted causing hard to debug reconnect problems. To help deal with configuration issues like this it is useful to dump the network namespace to better debug what happened. So add this to information displayed in /proc/fs/cifs/DebugData for the server (and channels if mounted with multichannel). For example: Local Users To Server: 1 SecMode: 0x1 Req On Wire: 0 Net namespace: 4026531840 This can be easily compared with what is displayed for the processes on the system. For example /proc/1/ns/net in this case showed the same thing (see below), and we can see that the namespace is still valid in this example. 'net:[4026531840]' Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index fb4162a52844..aec6e9137474 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -153,6 +153,11 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) in_flight(server), atomic_read(&server->in_send), atomic_read(&server->num_waiters)); +#ifdef CONFIG_NET_NS + if (server->net) + seq_printf(m, " Net namespace: %u ", server->net->ns.inum); +#endif /* NET_NS */ + } static inline const char *smb_speed_to_str(size_t bps) @@ -430,10 +435,15 @@ skip_rdma: server->reconnect_instance, server->srv_count, server->sec_mode, in_flight(server)); +#ifdef CONFIG_NET_NS + if (server->net) + seq_printf(m, " Net namespace: %u ", server->net->ns.inum); +#endif /* NET_NS */ seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d", atomic_read(&server->in_send), atomic_read(&server->num_waiters)); + if (server->leaf_fullpath) { seq_printf(m, "\nDFS leaf full path: %s", server->leaf_fullpath); -- cgit v1.2.3 From 5598c9bfdb81f40f2f5d769b342d25bff74b07a6 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 25 Jul 2023 18:00:44 -0700 Subject: drm/i915/guc/slpc: Restore efficient freq earlier This should be done before the soft min/max frequencies are restored. When we disable the "Ignore efficient frequency" flag, GuC does not actually bring the requested freq down to RPn. Specifically, this scenario- - ignore efficient freq set to true - reduce min to RPn (from efficient) - suspend - resume (includes GuC load, restore soft min/max, restore efficient freq) - validate min freq has been resored to RPn This will fail if we didn't first restore(disable, in this case) efficient freq flag before setting the soft min frequency. v2: Bring the min freq down to RPn when we disable efficient freq (Rodrigo) Also made the change to set the min softlimit to RPn at init. Otherwise, we were storing RPe there. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8736 Fixes: 55f9720dbf23 ("drm/i915/guc/slpc: Provide sysfs for efficient freq") Fixes: 95ccf312a1e4 ("drm/i915/guc/slpc: Allow SLPC to use efficient frequency") Signed-off-by: Vinay Belgaumkar Reviewed-by: Rodrigo Vivi Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230726010044.3280402-1-vinay.belgaumkar@intel.com (cherry picked from commit 28e671114fb0f28f334fac8d0a6b9c395c7b0498) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ee9f83af7cf6..477df260ae3a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -470,12 +470,19 @@ int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val) ret = slpc_set_param(slpc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, val); - if (ret) + if (ret) { guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n", val, ERR_PTR(ret)); - else + } else { slpc->ignore_eff_freq = val; + /* Set min to RPn when we disable efficient freq */ + if (val) + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + slpc->min_freq); + } + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&slpc->lock); return ret; @@ -602,9 +609,8 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return ret; if (!slpc->min_freq_softlimit) { - ret = intel_guc_slpc_get_min_freq(slpc, &slpc->min_freq_softlimit); - if (unlikely(ret)) - return ret; + /* Min softlimit is initialized to RPn */ + slpc->min_freq_softlimit = slpc->min_freq; slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit; } else { return intel_guc_slpc_set_min_freq(slpc, @@ -755,6 +761,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) return ret; } + /* Set cached value of ignore efficient freq */ + intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); + /* Revert SLPC min/max to softlimits if necessary */ ret = slpc_set_softlimits(slpc); if (unlikely(ret)) { @@ -765,9 +774,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Set cached media freq ratio mode */ intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); - /* Set cached value of ignore efficient freq */ - intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); - return 0; } -- cgit v1.2.3 From 2002eb6d3ea954dde9f8a223018d5335779937d0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 3 Aug 2023 15:27:06 +0300 Subject: drm/i915/sdvo: fix panel_type initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 3f9ffce5765d ("drm/i915: Do panel VBT init early if the VBT declares an explicit panel type") started using -1 as the value for unset panel_type. It gets initialized in intel_panel_init_alloc(), but the SDVO code never calls it. Call intel_panel_init_alloc() to initialize the panel, including the panel_type. Reported-by: Tomi Leppänen Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8896 Fixes: 3f9ffce5765d ("drm/i915: Do panel VBT init early if the VBT declares an explicit panel type") Cc: Ville Syrjälä Cc: # v6.1+ Reviewed-by: Uma Shankar Tested-by: Tomi Leppänen Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230803122706.838721-1-jani.nikula@intel.com (cherry picked from commit 26e60294e8eacedc8ebb33405b2c375fd80e0900) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_sdvo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 21f92123c844..67e3aaf9b432 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2752,7 +2752,7 @@ static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void) __drm_atomic_helper_connector_reset(&sdvo_connector->base.base, &conn_state->base.base); - INIT_LIST_HEAD(&sdvo_connector->base.panel.fixed_modes); + intel_panel_init_alloc(&sdvo_connector->base); return sdvo_connector; } -- cgit v1.2.3 From 423ffe62c06ae241ad460f4629dddb9dcf55e060 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 4 Aug 2023 11:45:59 +0300 Subject: drm/i915: fix display probe for IVB Q and IVB D GT2 server The current display probe is unable to differentiate between IVB Q and IVB D GT2 server, as they both have the same device id, but different subvendor and subdevice. This leads to the latter being misidentified as the former, and should just end up not having a display. However, the no display case returns a NULL as the display device info, and promptly oopses. As the IVB Q case is rare, and we're anyway moving towards GMD ID, handle the identification requiring subvendor and subdevice as a special case first, instead of unnecessarily growing the intel_display_ids[] array with subvendor and subdevice. [ 5.425298] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 5.426059] #PF: supervisor read access in kernel mode [ 5.426810] #PF: error_code(0x0000) - not-present page [ 5.427570] PGD 0 P4D 0 [ 5.428285] Oops: 0000 [#1] PREEMPT SMP PTI [ 5.429035] CPU: 0 PID: 137 Comm: (udev-worker) Not tainted 6.4.0-1-amd64 #1 Debian 6.4.4-1 [ 5.429759] Hardware name: HP HP Z220 SFF Workstation/HP Z220 SFF Workstation, BIOS 4.19-218-gb184e6e0a1 02/02/2023 [ 5.430485] RIP: 0010:intel_device_info_driver_create+0xf1/0x120 [i915] [ 5.431338] Code: 48 8b 97 80 1b 00 00 89 8f c0 1b 00 00 48 89 b7 b0 1b 00 00 48 89 97 b8 1b 00 00 0f b7 fd e8 76 e8 14 00 48 89 83 50 1b 00 00 <48> 8b 08 48 89 8b c4 1b 00 00 48 8b 48 08 48 89 8b cc 1b 00 00 8b [ 5.432920] RSP: 0018:ffffb8254044fb98 EFLAGS: 00010206 [ 5.433707] RAX: 0000000000000000 RBX: ffff923076e80000 RCX: 0000000000000000 [ 5.434494] RDX: 0000000000000260 RSI: 0000000100001000 RDI: 000000000000016a [ 5.435277] RBP: 000000000000016a R08: ffffb8254044fb00 R09: 0000000000000000 [ 5.436055] R10: ffff922d02761de8 R11: 00657361656c6572 R12: ffffffffc0e5d140 [ 5.436867] R13: ffff922d00b720d0 R14: 0000000076e80000 R15: ffff923078c0cae8 [ 5.437646] FS: 00007febd19a18c0(0000) GS:ffff92307c000000(0000) knlGS:0000000000000000 [ 5.438434] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5.439218] CR2: 0000000000000000 CR3: 000000010256e002 CR4: 00000000001706f0 [ 5.440009] Call Trace: [ 5.440824] [ 5.441611] ? __die+0x23/0x70 [ 5.442394] ? page_fault_oops+0x17d/0x4c0 [ 5.443173] ? exc_page_fault+0x7f/0x180 [ 5.443949] ? asm_exc_page_fault+0x26/0x30 [ 5.444756] ? intel_device_info_driver_create+0xf1/0x120 [i915] [ 5.445652] ? intel_device_info_driver_create+0xea/0x120 [i915] [ 5.446545] i915_driver_probe+0x7f/0xb60 [i915] [ 5.447431] ? drm_privacy_screen_get+0x15c/0x1a0 [drm] [ 5.448240] local_pci_probe+0x45/0xa0 [ 5.449013] pci_device_probe+0xc7/0x240 [ 5.449748] really_probe+0x19e/0x3e0 [ 5.450464] ? __pfx___driver_attach+0x10/0x10 [ 5.451172] __driver_probe_device+0x78/0x160 [ 5.451870] driver_probe_device+0x1f/0x90 [ 5.452601] __driver_attach+0xd2/0x1c0 [ 5.453293] bus_for_each_dev+0x88/0xd0 [ 5.453989] bus_add_driver+0x116/0x220 [ 5.454672] driver_register+0x59/0x100 [ 5.455336] i915_init+0x25/0xc0 [i915] [ 5.456104] ? __pfx_i915_init+0x10/0x10 [i915] [ 5.456882] do_one_initcall+0x5d/0x240 [ 5.457511] do_init_module+0x60/0x250 [ 5.458126] __do_sys_finit_module+0xac/0x120 [ 5.458721] do_syscall_64+0x60/0xc0 [ 5.459314] ? syscall_exit_to_user_mode+0x1b/0x40 [ 5.459897] ? do_syscall_64+0x6c/0xc0 [ 5.460510] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 5.461082] RIP: 0033:0x7febd20b0eb9 [ 5.461648] Code: 08 89 e8 5b 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 2f 1f 0d 00 f7 d8 64 89 01 48 [ 5.462905] RSP: 002b:00007fffabb1ba78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 5.463554] RAX: ffffffffffffffda RBX: 0000561e6304f410 RCX: 00007febd20b0eb9 [ 5.464201] RDX: 0000000000000000 RSI: 00007febd2244f0d RDI: 0000000000000015 [ 5.464869] RBP: 00007febd2244f0d R08: 0000000000000000 R09: 000000000000000a [ 5.465512] R10: 0000000000000015 R11: 0000000000000246 R12: 0000000000020000 [ 5.466124] R13: 0000000000000000 R14: 0000561e63032b60 R15: 000000000000000a [ 5.466700] [ 5.467271] Modules linked in: i915(+) drm_buddy video crc32_pclmul sr_mod hid_generic wmi crc32c_intel i2c_algo_bit sd_mod cdrom drm_display_helper cec usbhid rc_core ghash_clmulni_intel hid sha512_ssse3 ttm sha512_generic xhci_pci ehci_pci xhci_hcd ehci_hcd nvme ahci drm_kms_helper nvme_core libahci t10_pi libata psmouse aesni_intel scsi_mod crypto_simd i2c_i801 scsi_common crc64_rocksoft_generic cryptd i2c_smbus drm lpc_ich crc64_rocksoft crc_t10dif e1000e usbcore crct10dif_generic usb_common crct10dif_pclmul crc64 crct10dif_common button [ 5.469750] CR2: 0000000000000000 [ 5.470364] ---[ end trace 0000000000000000 ]--- [ 5.470971] RIP: 0010:intel_device_info_driver_create+0xf1/0x120 [i915] [ 5.471699] Code: 48 8b 97 80 1b 00 00 89 8f c0 1b 00 00 48 89 b7 b0 1b 00 00 48 89 97 b8 1b 00 00 0f b7 fd e8 76 e8 14 00 48 89 83 50 1b 00 00 <48> 8b 08 48 89 8b c4 1b 00 00 48 8b 48 08 48 89 8b cc 1b 00 00 8b [ 5.473034] RSP: 0018:ffffb8254044fb98 EFLAGS: 00010206 [ 5.473698] RAX: 0000000000000000 RBX: ffff923076e80000 RCX: 0000000000000000 [ 5.474371] RDX: 0000000000000260 RSI: 0000000100001000 RDI: 000000000000016a [ 5.475045] RBP: 000000000000016a R08: ffffb8254044fb00 R09: 0000000000000000 [ 5.475725] R10: ffff922d02761de8 R11: 00657361656c6572 R12: ffffffffc0e5d140 [ 5.476405] R13: ffff922d00b720d0 R14: 0000000076e80000 R15: ffff923078c0cae8 [ 5.477124] FS: 00007febd19a18c0(0000) GS:ffff92307c000000(0000) knlGS:0000000000000000 [ 5.477811] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5.478499] CR2: 0000000000000000 CR3: 000000010256e002 CR4: 00000000001706f0 Fixes: 69d439818fe5 ("drm/i915/display: Make display responsible for probing its own IP") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8991 Cc: Matt Roper Cc: Andrzej Hajda Reviewed-by: Luca Coelho Reviewed-by: Matt Roper Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230804084600.1005818-1-jani.nikula@intel.com (cherry picked from commit 1435188307d128671f677eb908e165666dd83652) Signed-off-by: Rodrigo Vivi --- .../gpu/drm/i915/display/intel_display_device.c | 24 +++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index f0ee9bcf661d..b0c6a2a86f2f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -662,10 +662,24 @@ static const struct intel_display_device_info xe_lpdp_display = { BIT(TRANSCODER_C) | BIT(TRANSCODER_D), }; +/* + * Separate detection for no display cases to keep the display id array simple. + * + * IVB Q requires subvendor and subdevice matching to differentiate from IVB D + * GT2 server. + */ +static bool has_no_display(struct pci_dev *pdev) +{ + static const struct pci_device_id ids[] = { + INTEL_IVB_Q_IDS(0), + {} + }; + + return pci_match_id(ids, pdev); +} + #undef INTEL_VGA_DEVICE -#undef INTEL_QUANTA_VGA_DEVICE #define INTEL_VGA_DEVICE(id, info) { id, info } -#define INTEL_QUANTA_VGA_DEVICE(info) { 0x16a, info } static const struct { u32 devid; @@ -690,7 +704,6 @@ static const struct { INTEL_IRONLAKE_M_IDS(&ilk_m_display), INTEL_SNB_D_IDS(&snb_display), INTEL_SNB_M_IDS(&snb_display), - INTEL_IVB_Q_IDS(NULL), /* must be first IVB in list */ INTEL_IVB_M_IDS(&ivb_display), INTEL_IVB_D_IDS(&ivb_display), INTEL_HSW_IDS(&hsw_display), @@ -775,6 +788,11 @@ intel_display_device_probe(struct drm_i915_private *i915, bool has_gmdid, if (has_gmdid) return probe_gmdid_display(i915, gmdid_ver, gmdid_rel, gmdid_step); + if (has_no_display(pdev)) { + drm_dbg_kms(&i915->drm, "Device doesn't have display\n"); + return &no_display; + } + for (i = 0; i < ARRAY_SIZE(intel_display_ids); i++) { if (intel_display_ids[i].devid == pdev->device) return intel_display_ids[i].info; -- cgit v1.2.3 From c96e2a695e00bca5487824d84b85aab6aa2c1891 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 14 Aug 2023 13:36:54 -0400 Subject: sunrpc: set the bv_offset of first bvec in svc_tcp_sendmsg svc_tcp_sendmsg used to factor in the xdr->page_base when sending pages, but commit 5df5dd03a8f7 ("sunrpc: Use sendmsg(MSG_SPLICE_PAGES) rather then sendpage") dropped that part of the handling. Fix it by setting the bv_offset of the first bvec. Fixes: 5df5dd03a8f7 ("sunrpc: Use sendmsg(MSG_SPLICE_PAGES) rather then sendpage") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e43f26382411..2eb8df44f894 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1244,6 +1244,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, if (ret != head->iov_len) goto out; + if (xdr_buf_pagecount(xdr)) + xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base); + msg.msg_flags = MSG_SPLICE_PAGES; iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec, xdr_buf_pagecount(xdr), xdr->page_len); -- cgit v1.2.3 From 6c461e394d11a981c662cc16cebfb05b602e23ba Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Mon, 7 Aug 2023 18:44:51 +0530 Subject: net: macb: In ZynqMP resume always configure PS GTR for non-wakeup source On Zynq UltraScale+ MPSoC ubuntu platform when systemctl issues suspend, network manager bring down the interface and goes into suspend. When it wakes up it again enables the interface. This leads to xilinx-psgtr "PLL lock timeout" on interface bringup, as the power management controller power down the entire FPD (including SERDES) if none of the FPD devices are in use and serdes is not initialized on resume. $ sudo rtcwake -m no -s 120 -v $ sudo systemctl suspend $ ifconfig eth1 up xilinx-psgtr fd400000.phy: lane 0 (type 10, protocol 5): PLL lock timeout phy phy-fd400000.phy.0: phy poweron failed --> -110 macb driver is called in this way: 1. macb_close: Stop network interface. In this function, it reset MACB IP and disables PHY and network interface. 2. macb_suspend: It is called in kernel suspend flow. But because network interface has been disabled(netif_running(ndev) is false), it does nothing and returns directly; 3. System goes into suspend state. Some time later, system is waken up by RTC wakeup device; 4. macb_resume: It does nothing because network interface has been disabled; 5. macb_open: It is called to enable network interface again. ethernet interface is initialized in this API but serdes which is power-off by PMUFW during FPD-off suspend is not initialized again and so we hit GT PLL lock issue on open. To resolve this PLL timeout issue always do PS GTR initialization when ethernet device is configured as non-wakeup source. Fixes: f22bd29ba19a ("net: macb: Fix ZynqMP SGMII non-wakeup source resume failure") Fixes: 8b73fa3ae02b ("net: macb: Added ZynqMP-specific initialization") Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1691414091-2260697-1-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f6a0f12a6d52..82929ee76739 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5194,6 +5194,9 @@ static int __maybe_unused macb_suspend(struct device *dev) unsigned int q; int err; + if (!device_may_wakeup(&bp->dev->dev)) + phy_exit(bp->sgmii_phy); + if (!netif_running(netdev)) return 0; @@ -5254,7 +5257,6 @@ static int __maybe_unused macb_suspend(struct device *dev) if (!(bp->wol & MACB_WOL_ENABLED)) { rtnl_lock(); phylink_stop(bp->phylink); - phy_exit(bp->sgmii_phy); rtnl_unlock(); spin_lock_irqsave(&bp->lock, flags); macb_reset_hw(bp); @@ -5284,6 +5286,9 @@ static int __maybe_unused macb_resume(struct device *dev) unsigned int q; int err; + if (!device_may_wakeup(&bp->dev->dev)) + phy_init(bp->sgmii_phy); + if (!netif_running(netdev)) return 0; @@ -5344,8 +5349,6 @@ static int __maybe_unused macb_resume(struct device *dev) macb_set_rx_mode(netdev); macb_restore_features(bp); rtnl_lock(); - if (!device_may_wakeup(&bp->dev->dev)) - phy_init(bp->sgmii_phy); phylink_start(bp->phylink); rtnl_unlock(); -- cgit v1.2.3 From 519b227904f0e70d4a1d6cf41daa5392715f2d2f Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 10 Aug 2023 17:01:11 +0200 Subject: octeon_ep: fix timeout value for waiting on mbox response The intention was to wait up to 500 ms for the mbox response. The third argument to wait_event_interruptible_timeout() is supposed to be the timeout duration. The driver mistakenly passed absolute time instead. Fixes: 577f0d1b1c5f ("octeon_ep: add separate mailbox command and response queues") Signed-off-by: Michal Schmidt Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230810150114.107765-2-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c index 1cc6af2feb38..565320ec24f8 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c @@ -55,7 +55,7 @@ static int octep_send_mbox_req(struct octep_device *oct, list_add_tail(&d->list, &oct->ctrl_req_wait_list); ret = wait_event_interruptible_timeout(oct->ctrl_req_wait_q, (d->done != 0), - jiffies + msecs_to_jiffies(500)); + msecs_to_jiffies(500)); list_del(&d->list); if (ret == 0 || ret == 1) return -EAGAIN; -- cgit v1.2.3 From 28458c80006bb4e993a09fc094094a8578cad292 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 10 Aug 2023 17:01:12 +0200 Subject: octeon_ep: cancel tx_timeout_task later in remove sequence tx_timeout_task is canceled too early when removing the driver. Nothing prevents .ndo_tx_timeout from triggering and queuing the work again. Better cancel it after the netdev is unregistered. It's harmless for octep_tx_timeout_task to run in the window between the unregistration and cancelation, because it checks netif_running. Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Michal Schmidt Link: https://lore.kernel.org/r/20230810150114.107765-3-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 43eb6e871351..d8066bff5f7b 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1200,12 +1200,12 @@ static void octep_remove(struct pci_dev *pdev) if (!oct) return; - cancel_work_sync(&oct->tx_timeout_task); cancel_work_sync(&oct->ctrl_mbox_task); netdev = oct->netdev; if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); + cancel_work_sync(&oct->tx_timeout_task); oct->poll_non_ioq_intr = false; cancel_delayed_work_sync(&oct->intr_poll_task); octep_device_cleanup(oct); -- cgit v1.2.3 From 607a7a45cdf38c1901e0d81e4e00a2a88786330a Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 10 Aug 2023 17:01:13 +0200 Subject: octeon_ep: cancel ctrl_mbox_task after intr_poll_task intr_poll_task may queue ctrl_mbox_task. The function octep_poll_non_ioq_interrupts_cn93_pf does this. When removing the driver and canceling these two works, cancel ctrl_mbox_task last to guarantee it does not run anymore. Fixes: 24d4333233b3 ("octeon_ep: poll for control messages") Signed-off-by: Michal Schmidt Link: https://lore.kernel.org/r/20230810150114.107765-4-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index d8066bff5f7b..ab69b6d62509 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1200,7 +1200,6 @@ static void octep_remove(struct pci_dev *pdev) if (!oct) return; - cancel_work_sync(&oct->ctrl_mbox_task); netdev = oct->netdev; if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); @@ -1208,6 +1207,7 @@ static void octep_remove(struct pci_dev *pdev) cancel_work_sync(&oct->tx_timeout_task); oct->poll_non_ioq_intr = false; cancel_delayed_work_sync(&oct->intr_poll_task); + cancel_work_sync(&oct->ctrl_mbox_task); octep_device_cleanup(oct); pci_release_mem_regions(pdev); free_netdev(netdev); -- cgit v1.2.3 From 758c91078165ae641b698750a72eafe7968b3756 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 10 Aug 2023 17:01:14 +0200 Subject: octeon_ep: cancel queued works in probe error path If it fails to get the devices's MAC address, octep_probe exits while leaving the delayed work intr_poll_task queued. When the work later runs, it's a use after free. Move the cancelation of intr_poll_task from octep_remove into octep_device_cleanup. This does not change anything in the octep_remove flow, but octep_device_cleanup is called also in the octep_probe error path, where the cancelation is needed. Note that the cancelation of ctrl_mbox_task has to follow intr_poll_task's, because the ctrl_mbox_task may be queued by intr_poll_task. Fixes: 24d4333233b3 ("octeon_ep: poll for control messages") Signed-off-by: Michal Schmidt Link: https://lore.kernel.org/r/20230810150114.107765-5-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index ab69b6d62509..4424de2ffd70 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1038,6 +1038,10 @@ static void octep_device_cleanup(struct octep_device *oct) { int i; + oct->poll_non_ioq_intr = false; + cancel_delayed_work_sync(&oct->intr_poll_task); + cancel_work_sync(&oct->ctrl_mbox_task); + dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n"); for (i = 0; i < OCTEP_MAX_VF; i++) { @@ -1205,9 +1209,6 @@ static void octep_remove(struct pci_dev *pdev) unregister_netdev(netdev); cancel_work_sync(&oct->tx_timeout_task); - oct->poll_non_ioq_intr = false; - cancel_delayed_work_sync(&oct->intr_poll_task); - cancel_work_sync(&oct->ctrl_mbox_task); octep_device_cleanup(oct); pci_release_mem_regions(pdev); free_netdev(netdev); -- cgit v1.2.3 From 8a519a572598b7c0c07b02f69bf5b4e8dd4b2d7d Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Sat, 12 Aug 2023 10:30:16 +0800 Subject: net: veth: Page pool creation error handling for existing pools only The failure handling procedure destroys page pools for all queues, including those that haven't had their page pool created yet. this patch introduces necessary adjustments to prevent potential risks and inconsistency with the error handling behavior. Fixes: 0ebab78cbcbf ("net: veth: add page_pool for page recycling") Acked-by: Jesper Dangaard Brouer Signed-off-by: Liang Chen Link: https://lore.kernel.org/r/20230812023016.10553-1-liangchen.linux@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 614f3e3efab0..509e901da41d 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1081,8 +1081,9 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end) err_xdp_ring: for (i--; i >= start; i--) ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); + i = end; err_page_pool: - for (i = start; i < end; i++) { + for (i--; i >= start; i--) { page_pool_destroy(priv->rq[i].page_pool); priv->rq[i].page_pool = NULL; } -- cgit v1.2.3 From 2d956177b7c96e62fac762a3b7da4318cde27a73 Mon Sep 17 00:00:00 2001 From: Pranjal Ramajor Asha Kanojiya Date: Wed, 2 Aug 2023 08:59:37 -0600 Subject: accel/qaic: Fix slicing memory leak The temporary buffer storing slicing configuration data from user is only freed on error. This is a memory leak. Free the buffer unconditionally. Fixes: ff13be830333 ("accel/qaic: Add datapath") Signed-off-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Carl Vanderlip Reviewed-by: Jeffrey Hugo Signed-off-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20230802145937.14827-1-quic_jhugo@quicinc.com --- drivers/accel/qaic/qaic_data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index e9a1cb779b30..6b6d981a71be 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -1021,6 +1021,7 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi bo->dbc = dbc; srcu_read_unlock(&dbc->ch_lock, rcu_id); drm_gem_object_put(obj); + kfree(slice_ent); srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); -- cgit v1.2.3 From 96d3c1cadedb6ae2e8965e19cd12caa244afbd9c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 10 Aug 2023 15:23:06 +0300 Subject: accel/qaic: Clean up integer overflow checking in map_user_pages() The encode_dma() function has some validation on in_trans->size but it would be more clear to move those checks to find_and_map_user_pages(). The encode_dma() had two checks: if (in_trans->addr + in_trans->size < in_trans->addr || !in_trans->size) return -EINVAL; The in_trans->addr variable is the starting address. The in_trans->size variable is the total size of the transfer. The transfer can occur in parts and the resources->xferred_dma_size tracks how many bytes we have already transferred. This patch introduces a new variable "remaining" which represents the amount we want to transfer (in_trans->size) minus the amount we have already transferred (resources->xferred_dma_size). I have modified the check for if in_trans->size is zero to instead check if in_trans->size is less than resources->xferred_dma_size. If we have already transferred more bytes than in_trans->size then there are negative bytes remaining which doesn't make sense. If there are zero bytes remaining to be copied, just return success. The check in encode_dma() checked that "addr + size" could not overflow and barring a driver bug that should work, but it's easier to check if we do this in parts. First check that "in_trans->addr + resources->xferred_dma_size" is safe. Then check that "xfer_start_addr + remaining" is safe. My final concern was that we are dealing with u64 values but on 32bit systems the kmalloc() function will truncate the sizes to 32 bits. So I calculated "total = in_trans->size + offset_in_page(xfer_start_addr);" and returned -EINVAL if it were >= SIZE_MAX. This will not affect 64bit systems. Fixes: 129776ac2e38 ("accel/qaic: Add control path") Signed-off-by: Dan Carpenter Reviewed-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Signed-off-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/24d3348b-25ac-4c1b-b171-9dae7c43e4e0@moroto.mountain --- drivers/accel/qaic/qaic_control.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c index cfbc92da426f..388abd40024b 100644 --- a/drivers/accel/qaic/qaic_control.c +++ b/drivers/accel/qaic/qaic_control.c @@ -392,18 +392,31 @@ static int find_and_map_user_pages(struct qaic_device *qdev, struct qaic_manage_trans_dma_xfer *in_trans, struct ioctl_resources *resources, struct dma_xfer *xfer) { + u64 xfer_start_addr, remaining, end, total; unsigned long need_pages; struct page **page_list; unsigned long nr_pages; struct sg_table *sgt; - u64 xfer_start_addr; int ret; int i; - xfer_start_addr = in_trans->addr + resources->xferred_dma_size; + if (check_add_overflow(in_trans->addr, resources->xferred_dma_size, &xfer_start_addr)) + return -EINVAL; - need_pages = DIV_ROUND_UP(in_trans->size + offset_in_page(xfer_start_addr) - - resources->xferred_dma_size, PAGE_SIZE); + if (in_trans->size < resources->xferred_dma_size) + return -EINVAL; + remaining = in_trans->size - resources->xferred_dma_size; + if (remaining == 0) + return 0; + + if (check_add_overflow(xfer_start_addr, remaining, &end)) + return -EINVAL; + + total = remaining + offset_in_page(xfer_start_addr); + if (total >= SIZE_MAX) + return -EINVAL; + + need_pages = DIV_ROUND_UP(total, PAGE_SIZE); nr_pages = need_pages; @@ -435,7 +448,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev, ret = sg_alloc_table_from_pages(sgt, page_list, nr_pages, offset_in_page(xfer_start_addr), - in_trans->size - resources->xferred_dma_size, GFP_KERNEL); + remaining, GFP_KERNEL); if (ret) { ret = -ENOMEM; goto free_sgt; @@ -566,9 +579,6 @@ static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list QAIC_MANAGE_EXT_MSG_LENGTH) return -ENOMEM; - if (in_trans->addr + in_trans->size < in_trans->addr || !in_trans->size) - return -EINVAL; - xfer = kmalloc(sizeof(*xfer), GFP_KERNEL); if (!xfer) return -ENOMEM; -- cgit v1.2.3 From e4dd0d3a2f64b8bd8029ec70f52bdbebd0644408 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 11 Aug 2023 10:37:47 +0800 Subject: net: fix the RTO timer retransmitting skb every 1ms if linear option is enabled In the real workload, I encountered an issue which could cause the RTO timer to retransmit the skb per 1ms with linear option enabled. The amount of lost-retransmitted skbs can go up to 1000+ instantly. The root cause is that if the icsk_rto happens to be zero in the 6th round (which is the TCP_THIN_LINEAR_RETRIES value), then it will always be zero due to the changed calculation method in tcp_retransmit_timer() as follows: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); Above line could be converted to icsk->icsk_rto = min(0 << 1, TCP_RTO_MAX) = 0 Therefore, the timer expires so quickly without any doubt. I read through the RFC 6298 and found that the RTO value can be rounded up to a certain value, in Linux, say TCP_RTO_MIN as default, which is regarded as the lower bound in this patch as suggested by Eric. Fixes: 36e31b0af587 ("net: TCP thin linear timeouts") Suggested-by: Eric Dumazet Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 470f581eedd4..206418b6d7c4 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -591,7 +591,9 @@ out_reset_timer: tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; - icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); + icsk->icsk_rto = clamp(__tcp_set_rto(tp), + tcp_rto_min(sk), + TCP_RTO_MAX); } else if (sk->sk_state != TCP_SYN_SENT || icsk->icsk_backoff > READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { -- cgit v1.2.3 From b9f052dc68f69dac89fe1e24693354c033daa091 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Aug 2023 20:40:17 +0200 Subject: netfilter: nf_tables: fix false-positive lockdep splat ->abort invocation may cause splat on debug kernels: WARNING: suspicious RCU usage net/netfilter/nft_set_pipapo.c:1697 suspicious rcu_dereference_check() usage! [..] rcu_scheduler_active = 2, debug_locks = 1 1 lock held by nft/133554: [..] (nft_net->commit_mutex){+.+.}-{3:3}, at: nf_tables_valid_genid [..] lockdep_rcu_suspicious+0x1ad/0x260 nft_pipapo_abort+0x145/0x180 __nf_tables_abort+0x5359/0x63d0 nf_tables_abort+0x24/0x40 nfnetlink_rcv+0x1a0a/0x22c0 netlink_unicast+0x73c/0x900 netlink_sendmsg+0x7f0/0xc20 ____sys_sendmsg+0x48d/0x760 Transaction mutex is held, so parallel updates are not possible. Switch to _protected and check mutex is held for lockdep enabled builds. Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index a5b8301afe4a..5fa12cfc7b84 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1697,6 +1697,17 @@ static void nft_pipapo_commit(const struct nft_set *set) priv->clone = new_clone; } +static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set) +{ +#ifdef CONFIG_PROVE_LOCKING + const struct net *net = read_pnet(&set->net); + + return lockdep_is_held(&nft_pernet(net)->commit_mutex); +#else + return true; +#endif +} + static void nft_pipapo_abort(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); @@ -1705,7 +1716,7 @@ static void nft_pipapo_abort(const struct nft_set *set) if (!priv->dirty) return; - m = rcu_dereference(priv->match); + m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set)); new_clone = pipapo_clone(m); if (IS_ERR(new_clone)) -- cgit v1.2.3 From 08713cb006b6f07434f276c5ee214fb20c7fd965 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 10 Aug 2023 23:59:03 +0200 Subject: netfilter: nf_tables: fix kdoc warnings after gc rework Jakub Kicinski says: We've got some new kdoc warnings here: net/netfilter/nft_set_pipapo.c:1557: warning: Function parameter or member '_set' not described in 'pipapo_gc' net/netfilter/nft_set_pipapo.c:1557: warning: Excess function parameter 'set' description in 'pipapo_gc' include/net/netfilter/nf_tables.h:577: warning: Function parameter or member 'dead' not described in 'nft_set' Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20230810104638.746e46f1@kernel.org/ Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nft_set_pipapo.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 35870858ddf2..e9ae567c037d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -534,6 +534,7 @@ struct nft_set_elem_expr { * @expr: stateful expression * @ops: set ops * @flags: set flags + * @dead: set will be freed, never cleared * @genmask: generation mask * @klen: key length * @dlen: data length diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 5fa12cfc7b84..f95b3844162e 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1549,7 +1549,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, /** * pipapo_gc() - Drop expired entries from set, destroy start and end elements - * @set: nftables API set representation + * @_set: nftables API set representation * @m: Matching data */ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) -- cgit v1.2.3 From 90e5b3462efa37b8bba82d7c4e63683856e188af Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2023 13:05:16 +0200 Subject: netfilter: nf_tables: deactivate catchall elements in next generation When flushing, individual set elements are disabled in the next generation via the ->flush callback. Catchall elements are not disabled. This is incorrect and may lead to double-deactivations of catchall elements which then results in memory leaks: WARNING: CPU: 1 PID: 3300 at include/net/netfilter/nf_tables.h:1172 nft_map_deactivate+0x549/0x730 CPU: 1 PID: 3300 Comm: nft Not tainted 6.5.0-rc5+ #60 RIP: 0010:nft_map_deactivate+0x549/0x730 [..] ? nft_map_deactivate+0x549/0x730 nf_tables_delset+0xb66/0xeb0 (the warn is due to nft_use_dec() detecting underflow). Fixes: aaa31047a6d2 ("netfilter: nftables: add catch-all set element support") Reported-by: lonial con Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c62227ae7746..6f31022cacc6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7091,6 +7091,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx, ret = __nft_set_catchall_flush(ctx, set, &elem); if (ret < 0) break; + nft_set_elem_change_active(ctx->net, set, ext); } return ret; -- cgit v1.2.3 From 7845914f45f066497ac75b30c50dbc735e84e884 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2023 20:03:57 +0200 Subject: netfilter: nf_tables: don't fail inserts if duplicate has expired nftables selftests fail: run-tests.sh testcases/sets/0044interval_overlap_0 Expected: 0-2 . 0-3, got: W: [FAILED] ./testcases/sets/0044interval_overlap_0: got 1 Insertion must ignore duplicate but expired entries. Moreover, there is a strange asymmetry in nft_pipapo_activate: It refetches the current element, whereas the other ->activate callbacks (bitmap, hash, rhash, rbtree) use elem->priv. Same for .remove: other set implementations take elem->priv, nft_pipapo_remove fetches elem->priv, then does a relookup, remove this. I suspect this was the reason for the change that prompted the removal of the expired check in pipapo_get() in the first place, but skipping exired elements there makes no sense to me, this helper is used for normal get requests, insertions (duplicate check) and deactivate callback. In first two cases expired elements must be skipped. For ->deactivate(), this gets called for DELSETELEM, so it seems to me that expired elements should be skipped as well, i.e. delete request should fail with -ENOENT error. Fixes: 24138933b97b ("netfilter: nf_tables: don't skip expired elements during walk") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index f95b3844162e..3757fcc55723 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -566,6 +566,8 @@ next_match: goto out; if (last) { + if (nft_set_elem_expired(&f->mt[b].e->ext)) + goto next_match; if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) goto next_match; @@ -600,17 +602,8 @@ out: static void *nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { - struct nft_pipapo_elem *ret; - - ret = pipapo_get(net, set, (const u8 *)elem->key.val.data, + return pipapo_get(net, set, (const u8 *)elem->key.val.data, nft_genmask_cur(net)); - if (IS_ERR(ret)) - return ret; - - if (nft_set_elem_expired(&ret->ext)) - return ERR_PTR(-ENOENT); - - return ret; } /** @@ -1743,11 +1736,7 @@ static void nft_pipapo_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_pipapo_elem *e; - - e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0); - if (IS_ERR(e)) - return; + struct nft_pipapo_elem *e = elem->priv; nft_set_elem_change_active(net, set, &e->ext); } @@ -1961,10 +1950,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, data = (const u8 *)nft_set_ext_key(&e->ext); - e = pipapo_get(net, set, data, 0); - if (IS_ERR(e)) - return; - while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *match_start, *match_end; -- cgit v1.2.3 From 9bfab6d23a2865966a4f89a96536fbf23f83bc8c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 15 Aug 2023 14:08:47 -0400 Subject: netfilter: set default timeout to 3 secs for sctp shutdown send and recv state In SCTP protocol, it is using the same timer (T2 timer) for SHUTDOWN and SHUTDOWN_ACK retransmission. However in sctp conntrack the default timeout value for SCTP_CONNTRACK_SHUTDOWN_ACK_SENT state is 3 secs while it's 300 msecs for SCTP_CONNTRACK_SHUTDOWN_SEND/RECV state. As Paolo Valerio noticed, this might cause unwanted expiration of the ct entry. In my test, with 1s tc netem delay set on the NAT path, after the SHUTDOWN is sent, the sctp ct entry enters SCTP_CONNTRACK_SHUTDOWN_SEND state. However, due to 300ms (too short) delay, when the SHUTDOWN_ACK is sent back from the peer, the sctp ct entry has expired and been deleted, and then the SHUTDOWN_ACK has to be dropped. Also, it is confusing these two sysctl options always show 0 due to all timeout values using sec as unit: net.netfilter.nf_conntrack_sctp_timeout_shutdown_recd = 0 net.netfilter.nf_conntrack_sctp_timeout_shutdown_sent = 0 This patch fixes it by also using 3 secs for sctp shutdown send and recv state in sctp conntrack, which is also RTO.initial value in SCTP protocol. Note that the very short time value for SCTP_CONNTRACK_SHUTDOWN_SEND/RECV was probably used for a rare scenario where SHUTDOWN is sent on 1st path but SHUTDOWN_ACK is replied on 2nd path, then a new connection started immediately on 1st path. So this patch also moves from SHUTDOWN_SEND/RECV to CLOSE when receiving INIT in the ORIGINAL direction. Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Reported-by: Paolo Valerio Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: Florian Westphal --- Documentation/networking/nf_conntrack-sysctl.rst | 4 ++-- net/netfilter/nf_conntrack_proto_sctp.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 8b1045c3b59e..c383a394c665 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -178,10 +178,10 @@ nf_conntrack_sctp_timeout_established - INTEGER (seconds) Default is set to (hb_interval * path_max_retrans + rto_max) nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds) - default 0.3 + default 3 nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds) - default 0.3 + default 3 nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds) default 3 diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 91eacc9b0b98..b6bcc8f2f46b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS, - [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, - [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, + [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS, + [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS, [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, }; @@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ -/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW}, +/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW}, /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL}, /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL}, -- cgit v1.2.3 From 5310760af1d4fbea1452bfc77db5f9a680f7ae47 Mon Sep 17 00:00:00 2001 From: Sishuai Gong Date: Thu, 10 Aug 2023 15:12:42 -0400 Subject: ipvs: fix racy memcpy in proc_do_sync_threshold When two threads run proc_do_sync_threshold() in parallel, data races could happen between the two memcpy(): Thread-1 Thread-2 memcpy(val, valp, sizeof(val)); memcpy(valp, val, sizeof(val)); This race might mess up the (struct ctl_table *) table->data, so we add a mutex lock to serialize them. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/netdev/B6988E90-0A1E-4B85-BF26-2DAF6D482433@gmail.com/ Signed-off-by: Sishuai Gong Acked-by: Simon Horman Acked-by: Julian Anastasov Signed-off-by: Florian Westphal --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 62606fb44d02..4bb0d90eca1c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1876,6 +1876,7 @@ static int proc_do_sync_threshold(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { + struct netns_ipvs *ipvs = table->extra2; int *valp = table->data; int val[2]; int rc; @@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write, .mode = table->mode, }; + mutex_lock(&ipvs->sync_mutex); memcpy(val, valp, sizeof(val)); rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write) { @@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write, else memcpy(valp, val, sizeof(val)); } + mutex_unlock(&ipvs->sync_mutex); return rc; } @@ -4321,6 +4324,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; tbl[idx].data = &ipvs->sysctl_sync_threshold; + tbl[idx].extra2 = ipvs; tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; -- cgit v1.2.3 From 6a33d8b73dfac0a41f3877894b38082bd0c9a5bc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Aug 2023 15:39:00 +0200 Subject: netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path Netlink event path is missing a synchronization point with GC transactions. Add GC sequence number update to netns release path and netlink event path, any GC transaction losing race will be discarded. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6f31022cacc6..8ac4dd8be1a2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9739,6 +9739,22 @@ static void nft_set_commit_update(struct list_head *set_update_list) } } +static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) +{ + unsigned int gc_seq; + + /* Bump gc counter, it becomes odd, this is the busy mark. */ + gc_seq = READ_ONCE(nft_net->gc_seq); + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); + + return gc_seq; +} + +static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) +{ + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -9824,9 +9840,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) WRITE_ONCE(nft_net->base_seq, base_seq); - /* Bump gc counter, it becomes odd, this is the busy mark. */ - gc_seq = READ_ONCE(nft_net->gc_seq); - WRITE_ONCE(nft_net->gc_seq, ++gc_seq); + gc_seq = nft_gc_seq_begin(nft_net); /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); @@ -10039,7 +10053,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, nft_net->base_seq); - WRITE_ONCE(nft_net->gc_seq, ++gc_seq); + nft_gc_seq_end(nft_net, gc_seq); nf_tables_commit_release(net); return 0; @@ -11040,6 +11054,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, struct net *net = n->net; unsigned int deleted; bool restart = false; + unsigned int gc_seq; if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) return NOTIFY_DONE; @@ -11047,6 +11062,9 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, nft_net = nft_pernet(net); deleted = 0; mutex_lock(&nft_net->commit_mutex); + + gc_seq = nft_gc_seq_begin(nft_net); + if (!list_empty(&nf_tables_destroy_list)) rcu_barrier(); again: @@ -11069,6 +11087,8 @@ again: if (restart) goto again; } + nft_gc_seq_end(nft_net, gc_seq); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; @@ -11106,12 +11126,20 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net) static void __net_exit nf_tables_exit_net(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); + unsigned int gc_seq; mutex_lock(&nft_net->commit_mutex); + + gc_seq = nft_gc_seq_begin(nft_net); + if (!list_empty(&nft_net->commit_list) || !list_empty(&nft_net->module_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); + __nft_release_tables(net); + + nft_gc_seq_end(nft_net, gc_seq); + mutex_unlock(&nft_net->commit_mutex); WARN_ON_ONCE(!list_empty(&nft_net->tables)); WARN_ON_ONCE(!list_empty(&nft_net->module_list)); -- cgit v1.2.3 From 02c6c24402bf1c1e986899c14ba22a10b510916b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Aug 2023 15:39:01 +0200 Subject: netfilter: nf_tables: GC transaction race with netns dismantle Use maybe_get_net() since GC workqueue might race with netns exit path. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8ac4dd8be1a2..3e841e45f2c0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9481,9 +9481,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, if (!trans) return NULL; + trans->net = maybe_get_net(net); + if (!trans->net) { + kfree(trans); + return NULL; + } + refcount_inc(&set->refs); trans->set = set; - trans->net = get_net(net); trans->seq = gc_seq; return trans; -- cgit v1.2.3 From 23185c6aed1ffb8fc44087880ba2767aba493779 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Aug 2023 15:39:02 +0200 Subject: netfilter: nft_dynset: disallow object maps Do not allow to insert elements from datapath to objects maps. Fixes: 8aeff920dcc9 ("netfilter: nf_tables: add stateful object reference to set elements") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nft_dynset.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 4fb34d76dbea..5c5cc01c73c5 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_OBJECT) + return -EOPNOTSUPP; + if (set->ops->update == NULL) return -EOPNOTSUPP; -- cgit v1.2.3 From 096516d092d54604d590827d05b1022c8f326639 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Sat, 12 Aug 2023 21:41:47 -0700 Subject: net: phy: broadcom: stub c45 read/write for 54810 The 54810 does not support c45. The mmd_phy_indirect accesses return arbirtary values leading to odd behavior like saying it supports EEE when it doesn't. We also see that reading/writing these non-existent MMD registers leads to phy instability in some cases. Fixes: b14995ac2527 ("net: phy: broadcom: Add BCM54810 PHY entry") Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/1691901708-28650-1-git-send-email-justin.chen@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/broadcom.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 59cae0d808aa..04b2e6eeb195 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -542,6 +542,17 @@ static int bcm54xx_resume(struct phy_device *phydev) return bcm54xx_config_init(phydev); } +static int bcm54810_read_mmd(struct phy_device *phydev, int devnum, u16 regnum) +{ + return -EOPNOTSUPP; +} + +static int bcm54810_write_mmd(struct phy_device *phydev, int devnum, u16 regnum, + u16 val) +{ + return -EOPNOTSUPP; +} + static int bcm54811_config_init(struct phy_device *phydev) { int err, reg; @@ -1103,6 +1114,8 @@ static struct phy_driver broadcom_drivers[] = { .get_strings = bcm_phy_get_strings, .get_stats = bcm54xx_get_stats, .probe = bcm54xx_phy_probe, + .read_mmd = bcm54810_read_mmd, + .write_mmd = bcm54810_write_mmd, .config_init = bcm54xx_config_init, .config_aneg = bcm5481_config_aneg, .config_intr = bcm_phy_config_intr, -- cgit v1.2.3 From dafcbce07136d799edc4c67f04f9fd69ff1eac1f Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 14 Aug 2023 11:23:01 +0800 Subject: team: Fix incorrect deletion of ETH_P_8021AD protocol vid from slaves Similar to commit 01f4fd270870 ("bonding: Fix incorrect deletion of ETH_P_8021AD protocol vid from slaves"), we can trigger BUG_ON(!vlan_info) in unregister_vlan_dev() with the following testcase: # ip netns add ns1 # ip netns exec ns1 ip link add team1 type team # ip netns exec ns1 ip link add team_slave type veth peer veth2 # ip netns exec ns1 ip link set team_slave master team1 # ip netns exec ns1 ip link add link team_slave name team_slave.10 type vlan id 10 protocol 802.1ad # ip netns exec ns1 ip link add link team1 name team1.10 type vlan id 10 protocol 802.1ad # ip netns exec ns1 ip link set team_slave nomaster # ip netns del ns1 Add S-VLAN tag related features support to team driver. So the team driver will always propagate the VLAN info to its slaves. Fixes: 8ad227ff89a7 ("net: vlan: add 802.1ad support") Suggested-by: Ido Schimmel Signed-off-by: Ziyang Xuan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230814032301.2804971-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/team/team.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index d3dc22509ea5..382756c3fb83 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2200,7 +2200,9 @@ static void team_setup(struct net_device *dev) dev->hw_features = TEAM_VLAN_FEATURES | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER; + NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_STAG_RX | + NETIF_F_HW_VLAN_STAG_FILTER; dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; dev->features |= dev->hw_features; -- cgit v1.2.3 From a552bfa16bab4ce901ee721346a28c4e483f4066 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Aug 2023 13:38:40 -0700 Subject: net: openvswitch: reject negative ifindex Recent changes in net-next (commit 759ab1edb56c ("net: store netdevs in an xarray")) refactored the handling of pre-assigned ifindexes and let syzbot surface a latent problem in ovs. ovs does not validate ifindex, making it possible to create netdev ports with negative ifindex values. It's easy to repro with YNL: $ ./cli.py --spec netlink/specs/ovs_datapath.yaml \ --do new \ --json '{"upcall-pid": 1, "name":"my-dp"}' $ ./cli.py --spec netlink/specs/ovs_vport.yaml \ --do new \ --json '{"upcall-pid": "00000001", "name": "some-port0", "dp-ifindex":3,"ifindex":4294901760,"type":2}' $ ip link show -65536: some-port0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 7a:48:21:ad:0b:fb brd ff:ff:ff:ff:ff:ff ... Validate the inputs. Now the second command correctly returns: $ ./cli.py --spec netlink/specs/ovs_vport.yaml \ --do new \ --json '{"upcall-pid": "00000001", "name": "some-port0", "dp-ifindex":3,"ifindex":4294901760,"type":2}' lib.ynl.NlError: Netlink error: Numerical result out of range nl_len = 108 (92) nl_flags = 0x300 nl_type = 2 error: -34 extack: {'msg': 'integer out of range', 'unknown': [[type:4 len:36] b'\x0c\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x03\x00\xff\xff\xff\x7f\x00\x00\x00\x00\x08\x00\x01\x00\x08\x00\x00\x00'], 'bad-attr': '.ifindex'} Accept 0 since it used to be silently ignored. Fixes: 54c4ef34c4b6 ("openvswitch: allow specifying ifindex of new interfaces") Reported-by: syzbot+7456b5dcf65111553320@syzkaller.appspotmail.com Reviewed-by: Leon Romanovsky Reviewed-by: Aaron Conole Link: https://lore.kernel.org/r/20230814203840.2908710-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a6d2a0b1aa21..3d7a91e64c88 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1829,7 +1829,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX] - ? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0; + ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0; /* So far only local changes have been made, now need the lock. */ ovs_lock(); @@ -2049,7 +2049,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), - [OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 }, + [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0), }; static const struct genl_small_ops dp_datapath_genl_ops[] = { @@ -2302,7 +2302,7 @@ restart: parms.port_no = port_no; parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX] - ? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0; + ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0; vport = new_vport(&parms); err = PTR_ERR(vport); @@ -2539,7 +2539,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, - [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0), [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED }, }; -- cgit v1.2.3 From 9944d203fa63721b87eee84a89f7275dc3d25c05 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Tue, 15 Aug 2023 00:00:30 +0300 Subject: broadcom: b44: Use b44_writephy() return value Return result of b44_writephy() instead of zero to deal with possible error. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Artem Chernyshev Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 392ec09a1d8a..3e4fb3c3e834 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1793,11 +1793,9 @@ static int b44_nway_reset(struct net_device *dev) b44_readphy(bp, MII_BMCR, &bmcr); b44_readphy(bp, MII_BMCR, &bmcr); r = -EINVAL; - if (bmcr & BMCR_ANENABLE) { - b44_writephy(bp, MII_BMCR, - bmcr | BMCR_ANRESTART); - r = 0; - } + if (bmcr & BMCR_ANENABLE) + r = b44_writephy(bp, MII_BMCR, + bmcr | BMCR_ANRESTART); spin_unlock_irq(&bp->lock); return r; -- cgit v1.2.3 From 0b70f1950e79b37df5617c83ed1ad1a4cc7fc89c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 15 Aug 2023 17:27:49 +0200 Subject: mailmap: add entries for Simon Horman Retire some of my email addresses from Kernel activities. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 5dd318121982..e50662536c48 100644 --- a/.mailmap +++ b/.mailmap @@ -538,6 +538,8 @@ Shuah Khan Sibi Sankar Sid Manning Simon Arlott +Simon Horman +Simon Horman Simon Kelley Sricharan Ramabadhran Srinivas Ramana -- cgit v1.2.3 From b35c968363c036e93f95cb233182f2d1c44605c2 Mon Sep 17 00:00:00 2001 From: Prasad Pandit Date: Wed, 16 Aug 2023 13:26:06 +0530 Subject: ipv6: fix indentation of a config attribute Fix indentation of a type attribute of IPV6_VTI config entry. Signed-off-by: Prasad Pandit Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 658bfed1df8b..08d4b7132d4c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -152,7 +152,7 @@ config INET6_TUNNEL default n config IPV6_VTI -tristate "Virtual (secure) IPv6: tunneling" + tristate "Virtual (secure) IPv6: tunneling" select IPV6_TUNNEL select NET_IP_TUNNEL select XFRM -- cgit v1.2.3 From 751969e5b1196821ef78f0aa664a8a97c92c9057 Mon Sep 17 00:00:00 2001 From: Piotr Gardocki Date: Mon, 7 Aug 2023 16:46:04 +0200 Subject: iavf: fix FDIR rule fields masks validation Return an error if a field's mask is neither full nor empty. When a mask is only partial the field is not being used for rule programming but it gives a wrong impression it is used. Fix by returning an error on any partial mask to make it clear they are not supported. The ip_ver assignment is moved earlier in code to allow using it in iavf_validate_fdir_fltr_masks. Fixes: 527691bf0682 ("iavf: Support IPv4 Flow Director filters") Fixes: e90cbc257a6f ("iavf: Support IPv6 Flow Director filters") Signed-off-by: Piotr Gardocki Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 10 ++++ drivers/net/ethernet/intel/iavf/iavf_fdir.c | 77 ++++++++++++++++++++++++-- drivers/net/ethernet/intel/iavf/iavf_fdir.h | 2 + 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 460ca561819a..a34303ad057d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1289,6 +1289,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc; fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst; fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos; + fltr->ip_ver = 4; break; case AH_V4_FLOW: case ESP_V4_FLOW: @@ -1300,6 +1301,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst; fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi; fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos; + fltr->ip_ver = 4; break; case IPV4_USER_FLOW: fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src; @@ -1312,6 +1314,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes; fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos; fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto; + fltr->ip_ver = 4; break; case TCP_V6_FLOW: case UDP_V6_FLOW: @@ -1330,6 +1333,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc; fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst; fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass; + fltr->ip_ver = 6; break; case AH_V6_FLOW: case ESP_V6_FLOW: @@ -1345,6 +1349,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe sizeof(struct in6_addr)); fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi; fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass; + fltr->ip_ver = 6; break; case IPV6_USER_FLOW: memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src, @@ -1361,6 +1366,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes; fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass; fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto; + fltr->ip_ver = 6; break; case ETHER_FLOW: fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto; @@ -1371,6 +1377,10 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe return -EINVAL; } + err = iavf_validate_fdir_fltr_masks(adapter, fltr); + if (err) + return err; + if (iavf_fdir_is_dup_fltr(adapter, fltr)) return -EEXIST; diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c index 505e82ebafe4..03e774bd2a5b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c @@ -18,6 +18,79 @@ static const struct in6_addr ipv6_addr_full_mask = { } }; +static const struct in6_addr ipv6_addr_zero_mask = { + .in6_u = { + .u6_addr8 = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + } + } +}; + +/** + * iavf_validate_fdir_fltr_masks - validate Flow Director filter fields masks + * @adapter: pointer to the VF adapter structure + * @fltr: Flow Director filter data structure + * + * Returns 0 if all masks of packet fields are either full or empty. Returns + * error on at least one partial mask. + */ +int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter, + struct iavf_fdir_fltr *fltr) +{ + if (fltr->eth_mask.etype && fltr->eth_mask.etype != htons(U16_MAX)) + goto partial_mask; + + if (fltr->ip_ver == 4) { + if (fltr->ip_mask.v4_addrs.src_ip && + fltr->ip_mask.v4_addrs.src_ip != htonl(U32_MAX)) + goto partial_mask; + + if (fltr->ip_mask.v4_addrs.dst_ip && + fltr->ip_mask.v4_addrs.dst_ip != htonl(U32_MAX)) + goto partial_mask; + + if (fltr->ip_mask.tos && fltr->ip_mask.tos != U8_MAX) + goto partial_mask; + } else if (fltr->ip_ver == 6) { + if (memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_zero_mask, + sizeof(struct in6_addr)) && + memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask, + sizeof(struct in6_addr))) + goto partial_mask; + + if (memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_zero_mask, + sizeof(struct in6_addr)) && + memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask, + sizeof(struct in6_addr))) + goto partial_mask; + + if (fltr->ip_mask.tclass && fltr->ip_mask.tclass != U8_MAX) + goto partial_mask; + } + + if (fltr->ip_mask.proto && fltr->ip_mask.proto != U8_MAX) + goto partial_mask; + + if (fltr->ip_mask.src_port && fltr->ip_mask.src_port != htons(U16_MAX)) + goto partial_mask; + + if (fltr->ip_mask.dst_port && fltr->ip_mask.dst_port != htons(U16_MAX)) + goto partial_mask; + + if (fltr->ip_mask.spi && fltr->ip_mask.spi != htonl(U32_MAX)) + goto partial_mask; + + if (fltr->ip_mask.l4_header && + fltr->ip_mask.l4_header != htonl(U32_MAX)) + goto partial_mask; + + return 0; + +partial_mask: + dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, partial masks are not supported\n"); + return -EOPNOTSUPP; +} + /** * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload * @fltr: Flow Director filter data structure @@ -263,8 +336,6 @@ iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr, VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST); } - fltr->ip_ver = 4; - return 0; } @@ -309,8 +380,6 @@ iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr, VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST); } - fltr->ip_ver = 6; - return 0; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h index 33c55c366315..9eb9f73f6adf 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h @@ -110,6 +110,8 @@ struct iavf_fdir_fltr { struct virtchnl_fdir_add vc_add_msg; }; +int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter, + struct iavf_fdir_fltr *fltr); int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); -- cgit v1.2.3 From 2f2beb8874cb0844e84ad26e990f05f4f13ff63f Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 2 Aug 2023 09:47:32 +0200 Subject: i40e: fix misleading debug logs Change "write" into the actual "read" word. Change parameters description. Fixes: 7073f46e443e ("i40e: Add AQ commands for NVM Update for X722") Signed-off-by: Aleksandr Loktionov Signed-off-by: Andrii Staikov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_nvm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 9da0c87f0328..f99c1f7fec40 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -210,11 +210,11 @@ read_nvm_exit: * @hw: pointer to the HW structure. * @module_pointer: module pointer location in words from the NVM beginning * @offset: offset in words from module start - * @words: number of words to write - * @data: buffer with words to write to the Shadow RAM + * @words: number of words to read + * @data: buffer with words to read to the Shadow RAM * @last_command: tells the AdminQ that this is the last command * - * Writes a 16 bit words buffer to the Shadow RAM using the admin command. + * Reads a 16 bit words buffer to the Shadow RAM using the admin command. **/ static int i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer, u32 offset, @@ -234,18 +234,18 @@ static int i40e_read_nvm_aq(struct i40e_hw *hw, */ if ((offset + words) > hw->nvm.sr_size) i40e_debug(hw, I40E_DEBUG_NVM, - "NVM write error: offset %d beyond Shadow RAM limit %d\n", + "NVM read error: offset %d beyond Shadow RAM limit %d\n", (offset + words), hw->nvm.sr_size); else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS) - /* We can write only up to 4KB (one sector), in one AQ write */ + /* We can read only up to 4KB (one sector), in one AQ write */ i40e_debug(hw, I40E_DEBUG_NVM, - "NVM write fail error: tried to write %d words, limit is %d.\n", + "NVM read fail error: tried to read %d words, limit is %d.\n", words, I40E_SR_SECTOR_SIZE_IN_WORDS); else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS) != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS)) - /* A single write cannot spread over two sectors */ + /* A single read cannot spread over two sectors */ i40e_debug(hw, I40E_DEBUG_NVM, - "NVM write error: cannot spread over two sectors in a single write offset=%d words=%d\n", + "NVM read error: cannot spread over two sectors in a single read offset=%d words=%d\n", offset, words); else ret_code = i40e_aq_read_nvm(hw, module_pointer, -- cgit v1.2.3 From b6360a5ec31d160d58c1a64387b323b556cedca8 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 9 Aug 2023 18:06:05 +0800 Subject: drm/amd/pm: disallow the fan setting if there is no fan on smu 13.0.0 drm/amd/pm: disallow the fan setting if there is no fan on smu 13.0.0 V2: depend on pm.no_fan to check Signed-off-by: Kenneth Feng Reviewed-by: Alex Deucher Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index fddcd834bcec..0fb6be11a0cc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -331,6 +331,7 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu) struct smu_13_0_0_powerplay_table *powerplay_table = table_context->power_play_table; struct smu_baco_context *smu_baco = &smu->smu_baco; + PPTable_t *pptable = smu->smu_table.driver_pptable; #if 0 PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = @@ -371,6 +372,9 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu) table_context->thermal_controller_type = powerplay_table->thermal_controller_type; + smu->adev->pm.no_fan = + !(pptable->SkuTable.FeaturesToRun[0] & (1 << FEATURE_FAN_CONTROL_BIT)); + return 0; } -- cgit v1.2.3 From 0d6f374c0c66e8ecc2897f0837d2cb4bd169bb42 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Tue, 8 Aug 2023 10:59:25 +0800 Subject: drm/amdgpu: disable mcbp if parameter zero is set The parameter amdgpu_mcbp shall have priority against the default value calculated from the chip version. User could disable mcbp by setting the parameter mcbp as zero. v2: do not trigger preemption in sw ring muxer when mcbp is disabled. Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 45e9d737e5b8..5539ec77153f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3722,10 +3722,11 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) { if (amdgpu_mcbp == 1) adev->gfx.mcbp = true; - - if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) && - adev->gfx.num_gfx_rings) + else if (amdgpu_mcbp == 0) + adev->gfx.mcbp = false; + else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) && + (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) && + adev->gfx.num_gfx_rings) adev->gfx.mcbp = true; if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index b779ee4bbaa7..e1ee1c7117fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -397,7 +397,7 @@ void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring) struct amdgpu_ring_mux *mux = &adev->gfx.muxer; WARN_ON(!ring->is_sw_ring); - if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) { + if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) { if (amdgpu_mcbp_scan(mux) > 0) amdgpu_mcbp_trigger_preempt(mux); return; -- cgit v1.2.3 From 6a92761a86817ad15c9a562e2a809386237fae3e Mon Sep 17 00:00:00 2001 From: Umio Yasuno Date: Tue, 8 Aug 2023 06:40:42 +0000 Subject: drm/amdgpu/pm: fix throttle_status for other than MP1 11.0.7 Use the right metrics table version based on the firmware. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2720 Reviewed-by: Evan Quan Signed-off-by: Umio Yasuno Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 0cda3b276f61..f0800c0c5168 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -588,7 +588,9 @@ err0_out: return -ENOMEM; } -static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *smu) +static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *smu, + bool use_metrics_v3, + bool use_metrics_v2) { struct smu_table_context *smu_table= &smu->smu_table; SmuMetricsExternal_t *metrics_ext = @@ -596,13 +598,11 @@ static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *s uint32_t throttler_status = 0; int i; - if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4900)) { + if (use_metrics_v3) { for (i = 0; i < THROTTLER_COUNT; i++) throttler_status |= (metrics_ext->SmuMetrics_V3.ThrottlingPercentage[i] ? 1U << i : 0); - } else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4300)) { + } else if (use_metrics_v2) { for (i = 0; i < THROTTLER_COUNT; i++) throttler_status |= (metrics_ext->SmuMetrics_V2.ThrottlingPercentage[i] ? 1U << i : 0); @@ -864,7 +864,7 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, metrics->TemperatureVrSoc) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_THROTTLER_STATUS: - *value = sienna_cichlid_get_throttler_status_locked(smu); + *value = sienna_cichlid_get_throttler_status_locked(smu, use_metrics_v3, use_metrics_v2); break; case METRICS_CURR_FANSPEED: *value = use_metrics_v3 ? metrics_v3->CurrFanSpeed : @@ -4017,7 +4017,7 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, gpu_metrics->current_dclk1 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_1] : use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : metrics->CurrClock[PPCLK_DCLK_1]; - gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu); + gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu, use_metrics_v3, use_metrics_v2); gpu_metrics->indep_throttle_status = smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status, sienna_cichlid_throttler_map); -- cgit v1.2.3 From 8d036427f0042a91136e6f19a39542eedec4e96c Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 10 Aug 2023 16:10:03 +0530 Subject: drm/amd/pm: Fix temperature unit of SMU v13.0.6 Temperature needs to be reported in millidegree Celsius. Signed-off-by: Lijo Lazar Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index fe4ee2daa5d8..4fafcfbe3e5f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -708,16 +708,19 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, *value = SMUQ10_TO_UINT(metrics->SocketPower) << 8; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature); + *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature) * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_MEM: - *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature); + *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature) * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; /* This is the max of all VRs and not just SOC VR. * No need to define another data type for the same. */ case METRICS_TEMPERATURE_VRSOC: - *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature); + *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature) * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; default: *value = UINT_MAX; -- cgit v1.2.3 From d621114ffba56b032e91ee82d6469b2f9f0b2427 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Thu, 10 Aug 2023 20:44:54 +0800 Subject: drm/amd/pm: Update pci link width for smu v13.0.6 Update addresses of PCIE link width registers, & link width format used to populate gpu metrics table for smu v13.0.6 v2: Removed ESM register update v3: Updated patch subject and message Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 4fafcfbe3e5f..dc6104a04dce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -81,9 +81,10 @@ #define EPSILON 1 #define smnPCIE_ESM_CTRL 0x193D0 -#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1ab40288 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1a340288 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK 0x00000070L #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT 0x4 +#define MAX_LINK_WIDTH 6 static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), @@ -1969,6 +1970,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table struct amdgpu_device *adev = smu->adev; int ret = 0, inst0, xcc0; MetricsTable_t *metrics; + u16 link_width_level; inst0 = adev->sdma.instance[0].aid_id; xcc0 = GET_INST(GC, 0); @@ -2019,8 +2021,12 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->throttle_status = 0; if (!(adev->flags & AMD_IS_APU)) { + link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); + if (link_width_level > MAX_LINK_WIDTH) + link_width_level = 0; + gpu_metrics->pcie_link_width = - smu_v13_0_6_get_current_pcie_link_width_level(smu); + DECODE_LANE_WIDTH(link_width_level); gpu_metrics->pcie_link_speed = smu_v13_0_6_get_current_pcie_link_speed(smu); } -- cgit v1.2.3 From b25fdc048cb2250c7e859184f54d3261b55ad099 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 9 Aug 2023 16:45:04 -0400 Subject: drm/amdgpu: skip xcp drm device allocation when out of drm resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return 0 when drm device alloc failed with -ENOSPC in order to allow amdgpu drive loading. But the xcp without drm device node assigned won't be visiable in user space. This helps amdgpu driver loading on system which has more than 64 nodes, the current limitation. The proposal to add more drm nodes is discussed in public, which will support up to 2^20 nodes totally. kernel drm: https://lore.kernel.org/lkml/20230724211428.3831636-1-michal.winiarski@intel.com/T/ libdrm: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/305 Signed-off-by: James Zhu Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 13 ++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 9 ++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 9c9cca129498..565a1fa436d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -239,8 +239,13 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev) for (i = 1; i < MAX_XCP; i++) { ret = amdgpu_xcp_drm_dev_alloc(&p_ddev); - if (ret) + if (ret == -ENOSPC) { + dev_warn(adev->dev, + "Skip xcp node #%d when out of drm node resource.", i); + return 0; + } else if (ret) { return ret; + } /* Redirect all IOCTLs to the primary device */ adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev; @@ -328,6 +333,9 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev, return 0; for (i = 1; i < MAX_XCP; i++) { + if (!adev->xcp_mgr->xcp[i].ddev) + break; + ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data); if (ret) return ret; @@ -345,6 +353,9 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev) return; for (i = 1; i < MAX_XCP; i++) { + if (!adev->xcp_mgr->xcp[i].ddev) + break; + p_ddev = adev->xcp_mgr->xcp[i].ddev; drm_dev_unplug(p_ddev); p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 61fc62f3e003..4a17bb7c7b27 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1965,7 +1965,14 @@ int kfd_topology_add_device(struct kfd_node *gpu) const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; gpu_id = kfd_generate_gpu_id(gpu); - pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + if (gpu->xcp && !gpu->xcp->ddev) { + dev_warn(gpu->adev->dev, + "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.", + gpu_id); + return 0; + } else { + pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + } /* Check to see if this gpu device exists in the topology_device_list. * If so, assign the gpu to that device, -- cgit v1.2.3 From f1740b1ab2703b2a057da7cf33b03297e0381aa0 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 14 Aug 2023 15:13:04 +0800 Subject: drm/amdgpu: skip fence GFX interrupts disable/enable for S0ix GFX v11.0.1 reported fence fallback timer expired issue on SDMA and GFX rings after S0ix resume. This is generated by EOP interrupts are disabled when S0ix suspend but fails to re-enable when resume because of the GFX is in GFXOFF. [ 203.349571] [drm] Fence fallback timer expired on ring sdma0 [ 203.349572] [drm] Fence fallback timer expired on ring gfx_0.0.0 [ 203.861635] [drm] Fence fallback timer expired on ring gfx_0.0.0 For S0ix, GFX is in GFXOFF state, avoid to touch the GFX registers to configure the fence driver interrupts for rings that belong to GFX. The interrupts configuration will be restored by GFXOFF exit. Signed-off-by: Tim Huang Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 41 +++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index c694b41f6461..7537f5aa76f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -551,6 +551,41 @@ int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev) return 0; } +/** + * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether + * fence driver interrupts need to be restored. + * + * @ring: ring that to be checked + * + * Interrupts for rings that belong to GFX IP don't need to be restored + * when the target power state is s0ix. + * + * Return true if need to restore interrupts, false otherwise. + */ +static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool is_gfx_power_domain = false; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_SDMA: + /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */ + if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) + is_gfx_power_domain = true; + break; + case AMDGPU_RING_TYPE_GFX: + case AMDGPU_RING_TYPE_COMPUTE: + case AMDGPU_RING_TYPE_KIQ: + case AMDGPU_RING_TYPE_MES: + is_gfx_power_domain = true; + break; + default: + break; + } + + return !(adev->in_s0ix && is_gfx_power_domain); +} + /** * amdgpu_fence_driver_hw_fini - tear down the fence driver * for all possible rings. @@ -579,7 +614,8 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) amdgpu_fence_driver_force_completion(ring); if (!drm_dev_is_unplugged(adev_to_drm(adev)) && - ring->fence_drv.irq_src) + ring->fence_drv.irq_src && + amdgpu_fence_need_ring_interrupt_restore(ring)) amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); @@ -655,7 +691,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) continue; /* enable the interrupt */ - if (ring->fence_drv.irq_src) + if (ring->fence_drv.irq_src && + amdgpu_fence_need_ring_interrupt_restore(ring)) amdgpu_irq_get(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); } -- cgit v1.2.3 From a7b7d9e8aee4f71b4c7151702fd74237b8cef989 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 18 May 2023 11:52:51 -0500 Subject: drm/amd: flush any delayed gfxoff on suspend entry DCN 3.1.4 is reported to hang on s2idle entry if graphics activity is happening during entry. This is because GFXOFF was scheduled as delayed but RLC gets disabled in s2idle entry sequence which will hang GFX IP if not already in GFXOFF. To help this problem, flush any delayed work for GFXOFF early in s2idle entry sequence to ensure that it's off when RLC is changed. commit 4b31b92b143f ("drm/amdgpu: complete gfxoff allow signal during suspend without delay") modified power gating flow so that if called in s0ix that it ensured that GFXOFF wasn't put in work queue but instead processed immediately. This is dead code due to commit 10cb67eb8a1b ("drm/amdgpu: skip CG/PG for gfx during S0ix") because GFXOFF will now not be explicitly called as part of the suspend entry code. Remove that dead code. Signed-off-by: Mario Limonciello Signed-off-by: Tim Huang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 +-------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5539ec77153f..6238701cde23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4394,6 +4394,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); + flush_delayed_work(&adev->gfx.gfx_off_delay_work); amdgpu_ras_suspend(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a33d4bc34cee..fd81b04559d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -692,15 +692,8 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { - /* If going to s2idle, no need to wait */ - if (adev->in_s0ix) { - if (!amdgpu_dpm_set_powergating_by_smu(adev, - AMD_IP_BLOCK_TYPE_GFX, true)) - adev->gfx.gfx_off_state = true; - } else { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); - } } } else { if (adev->gfx.gfx_off_req_count == 0) { -- cgit v1.2.3 From 6ecc10295abb2fdd9c21dd17b34e4cacfd829cd4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Aug 2023 17:25:37 -0400 Subject: Revert "Revert "drm/amdgpu/display: change pipe policy for DCN 2.0"" This reverts commit 27dd79c00aeab36cd7542c7a4481a32549038659. It appears MPC_SPLIT_DYNAMIC still causes problems with multiple displays on DCN2.0 hardware. Switch back to MPC_SPLIT_AVOID_MULT_DISP. This increases power usage with multiple displays, but avoids hangs. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2475 Cc: Rodrigo Siqueira Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.4.x --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 4cc8de2627ce..9f2e24398cd7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -712,7 +712,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, -- cgit v1.2.3 From 34a79876d9f77e971115236bcf7b5d14a8ecf542 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 1 Aug 2023 20:41:03 +0300 Subject: net/mlx5e: XDP, Fix fifo overrun on XDP_REDIRECT Before this fix, running high rate traffic through XDP_REDIRECT with multibuf could overrun the fifo used to release the xdp frames after tx completion. This resulted in corrupted data being consumed on the free side. The culplirt was a miscalculation of the fifo size: the maximum ratio between fifo entries / data segments was incorrect. This ratio serves to calculate the max fifo size for a full sq where each packet uses the worst case number of entries in the fifo. This patch fixes the formula and names the constant. It also makes sure that future values will use a power of 2 number of entries for the fifo mask to work. Signed-off-by: Dragos Tatulea Fixes: 3f734b8c594b ("net/mlx5e: XDP, Use multiple single-entry objects in xdpi_fifo") Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 9e8e6184f9e4..ecfe93a479da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -84,6 +84,8 @@ enum mlx5e_xdp_xmit_mode { * MLX5E_XDP_XMIT_MODE_XSK: * none. */ +#define MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO 4 + union mlx5e_xdp_info { enum mlx5e_xdp_xmit_mode mode; union { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c27df14df145..f7b494125eee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1298,11 +1298,13 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of - * entries of all xmit_modes. - */ + int entries; size_t size; + /* upper bound for maximum num of entries of all xmit_modes. */ + entries = roundup_pow_of_two(wq_sz * MLX5_SEND_WQEBB_NUM_DS * + MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO); + size = array_size(sizeof(*xdpi_fifo->xi), entries); xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa); if (!xdpi_fifo->xi) -- cgit v1.2.3 From 0fd23db0cc74cf6d28d26ce5e7802e982608d830 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 25 Jun 2023 10:43:03 +0300 Subject: net/mlx5: Fix mlx5_cmd_update_root_ft() error flow The cited patch change mlx5_cmd_update_root_ft() to work with multiple peer devices. However, it didn't align the error flow as well. Hence, Fix the error code to work with multiple peer devices. Fixes: 222dd185833e ("{net/RDMA}/mlx5: introduce lag_for_each_peer") Signed-off-by: Shay Drory Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index aab7059bf6e9..244cfd470903 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -245,12 +245,20 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, mlx5_lag_is_shared_fdb(dev) && mlx5_lag_is_master(dev)) { struct mlx5_core_dev *peer_dev; - int i; + int i, j; mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect, (!disconnect) ? ft->id : 0); if (err && !disconnect) { + mlx5_lag_for_each_peer_mdev(dev, peer_dev, j) { + if (j < i) + mlx5_cmd_set_slave_root_fdb(dev, peer_dev, 1, + ns->root_ft->id); + else + break; + } + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); MLX5_SET(set_flow_table_root_in, in, table_id, ns->root_ft->id); -- cgit v1.2.3 From 1b254b791d7b7dea6e8adc887fbbd51746d8bb27 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 14 Aug 2023 16:49:32 +0200 Subject: drm/nouveau/disp: fix use-after-free in error handling of nouveau_connector_create We can't simply free the connector after calling drm_connector_init on it. We need to clean up the drm side first. It might not fix all regressions from commit 2b5d1c29f6c4 ("drm/nouveau/disp: PIOR DP uses GPIO for HPD, not PMGR AUX interrupts"), but at least it fixes a memory corruption in error handling related to that commit. Link: https://lore.kernel.org/lkml/20230806213107.GFZNARG6moWpFuSJ9W@fat_crate.local/ Fixes: 95983aea8003 ("drm/nouveau/disp: add connector class") Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230814144933.3956959-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nouveau_connector.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index a2e0033e8a26..622f6eb9a8bf 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1408,8 +1408,7 @@ nouveau_connector_create(struct drm_device *dev, ret = nvif_conn_ctor(&disp->disp, nv_connector->base.name, nv_connector->index, &nv_connector->conn); if (ret) { - kfree(nv_connector); - return ERR_PTR(ret); + goto drm_conn_err; } ret = nvif_conn_event_ctor(&nv_connector->conn, "kmsHotplug", @@ -1426,8 +1425,7 @@ nouveau_connector_create(struct drm_device *dev, if (ret) { nvif_event_dtor(&nv_connector->hpd); nvif_conn_dtor(&nv_connector->conn); - kfree(nv_connector); - return ERR_PTR(ret); + goto drm_conn_err; } } } @@ -1475,4 +1473,9 @@ nouveau_connector_create(struct drm_device *dev, drm_connector_register(connector); return connector; + +drm_conn_err: + drm_connector_cleanup(connector); + kfree(nv_connector); + return ERR_PTR(ret); } -- cgit v1.2.3 From 23d775f12dcd23d052a4927195f15e970e27ab26 Mon Sep 17 00:00:00 2001 From: Alfred Lee Date: Mon, 14 Aug 2023 17:13:23 -0700 Subject: net: dsa: mv88e6xxx: Wait for EEPROM done before HW reset If the switch is reset during active EEPROM transactions, as in just after an SoC reset after power up, the I2C bus transaction may be cut short leaving the EEPROM internal I2C state machine in the wrong state. When the switch is reset again, the bad state machine state may result in data being read from the wrong memory location causing the switch to enter unexpected mode rendering it inoperational. Fixes: a3dcb3e7e70c ("net: dsa: mv88e6xxx: Wait for EEPROM done after HW reset") Signed-off-by: Alfred Lee Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230815001323.24739-1-l00g33k@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index c7d51a539451..7af2f08a62f1 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3034,6 +3034,14 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip) /* If there is a GPIO connected to the reset pin, toggle it */ if (gpiod) { + /* If the switch has just been reset and not yet completed + * loading EEPROM, the reset may interrupt the I2C transaction + * mid-byte, causing the first EEPROM read after the reset + * from the wrong location resulting in the switch booting + * to wrong mode and inoperable. + */ + mv88e6xxx_g1_wait_eeprom_done(chip); + gpiod_set_value_cansleep(gpiod, 1); usleep_range(10000, 20000); gpiod_set_value_cansleep(gpiod, 0); -- cgit v1.2.3 From 50b6f2c8297793f7f3315623db78dcff85158e96 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 15 Aug 2023 13:19:07 +0300 Subject: Revert "drm/edid: Fix csync detailed mode parsing" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit ca62297b2085b5b3168bd891ca24862242c635a1. Commit ca62297b2085 ("drm/edid: Fix csync detailed mode parsing") fixed EDID detailed mode sync parsing. Unfortunately, there are quite a few displays out there that have bogus (zero) sync field that are broken by the change. Zero means analog composite sync, which is not right for digital displays, and the modes get rejected. Regardless, it used to work, and it needs to continue to work. Revert the change. Rejecting modes with analog composite sync was the part that fixed the gitlab issue 8146 [1]. We'll need to get back to the drawing board with that. [1] https://gitlab.freedesktop.org/drm/intel/-/issues/8146 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8789 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8930 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9044 Fixes: ca62297b2085 ("drm/edid: Fix csync detailed mode parsing") Cc: Ville Syrjälä Cc: dri-devel@lists.freedesktop.org Cc: # v6.4+ Signed-off-by: Jani Nikula Acked-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230815101907.2900768-1-jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 29 ++++++++--------------------- include/drm/drm_edid.h | 12 +++--------- 2 files changed, 11 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index e0dbd9140726..1f470968ed14 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3456,6 +3456,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto connector->base.id, connector->name); return NULL; } + if (!(pt->misc & DRM_EDID_PT_SEPARATE_SYNC)) { + drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Composite sync not supported\n", + connector->base.id, connector->name); + } /* it is incorrect if hsync/vsync width is zero */ if (!hsync_pulse_width || !vsync_pulse_width) { @@ -3502,27 +3506,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto if (info->quirks & EDID_QUIRK_DETAILED_SYNC_PP) { mode->flags |= DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC; } else { - switch (pt->misc & DRM_EDID_PT_SYNC_MASK) { - case DRM_EDID_PT_ANALOG_CSYNC: - case DRM_EDID_PT_BIPOLAR_ANALOG_CSYNC: - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Analog composite sync!\n", - connector->base.id, connector->name); - mode->flags |= DRM_MODE_FLAG_CSYNC | DRM_MODE_FLAG_NCSYNC; - break; - case DRM_EDID_PT_DIGITAL_CSYNC: - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Digital composite sync!\n", - connector->base.id, connector->name); - mode->flags |= DRM_MODE_FLAG_CSYNC; - mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ? - DRM_MODE_FLAG_PCSYNC : DRM_MODE_FLAG_NCSYNC; - break; - case DRM_EDID_PT_DIGITAL_SEPARATE_SYNC: - mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ? - DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC; - mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ? - DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC; - break; - } + mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ? + DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC; + mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ? + DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC; } set_size: diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 169755d3de19..48e93f909ef6 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -61,15 +61,9 @@ struct std_timing { u8 vfreq_aspect; } __attribute__((packed)); -#define DRM_EDID_PT_SYNC_MASK (3 << 3) -# define DRM_EDID_PT_ANALOG_CSYNC (0 << 3) -# define DRM_EDID_PT_BIPOLAR_ANALOG_CSYNC (1 << 3) -# define DRM_EDID_PT_DIGITAL_CSYNC (2 << 3) -# define DRM_EDID_PT_CSYNC_ON_RGB (1 << 1) /* analog csync only */ -# define DRM_EDID_PT_CSYNC_SERRATE (1 << 2) -# define DRM_EDID_PT_DIGITAL_SEPARATE_SYNC (3 << 3) -# define DRM_EDID_PT_HSYNC_POSITIVE (1 << 1) /* also digital csync */ -# define DRM_EDID_PT_VSYNC_POSITIVE (1 << 2) +#define DRM_EDID_PT_HSYNC_POSITIVE (1 << 1) +#define DRM_EDID_PT_VSYNC_POSITIVE (1 << 2) +#define DRM_EDID_PT_SEPARATE_SYNC (3 << 3) #define DRM_EDID_PT_STEREO (1 << 5) #define DRM_EDID_PT_INTERLACED (1 << 7) -- cgit v1.2.3 From fa165e1949976704500a442faeef8d9596faee76 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 15 Aug 2023 16:57:27 +0100 Subject: sfc: don't unregister flow_indr if it was never registered In efx_init_tc(), move the setting of efx->tc->up after the flow_indr_dev_register() call, so that if it fails, efx_fini_tc() won't call flow_indr_dev_unregister(). Fixes: 5b2e12d51bd8 ("sfc: bind indirect blocks for TC offload on EF100") Suggested-by: Pieter Jansen van Vuuren Reviewed-by: Martin Habets Signed-off-by: Edward Cree Link: https://lore.kernel.org/r/a81284d7013aba74005277bd81104e4cfbea3f6f.1692114888.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 15ebd3973922..fe268b6c1cac 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -1657,10 +1657,10 @@ int efx_init_tc(struct efx_nic *efx) rc = efx_tc_configure_fallback_acts_reps(efx); if (rc) return rc; - efx->tc->up = true; rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx); if (rc) return rc; + efx->tc->up = true; return 0; } -- cgit v1.2.3 From 54c9016eb8eda55952a195b071359cd13f50ed9b Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 15 Aug 2023 16:57:28 +0100 Subject: sfc: don't fail probe if MAE/TC setup fails Existing comment in the source explains why we don't want efx_init_tc() failure to be fatal. Cited commit erroneously consolidated failure paths causing the probe to be failed in this case. Fixes: 7e056e2360d9 ("sfc: obtain device mac address based on firmware handle for ef100") Reviewed-by: Martin Habets Signed-off-by: Edward Cree Link: https://lore.kernel.org/r/aa7f589dd6028bd1ad49f0a85f37ab33c09b2b45.1692114888.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef100_nic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 7adde9639c8a..35d8e9811998 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -1194,7 +1194,7 @@ int ef100_probe_netdev_pf(struct efx_nic *efx) net_dev->features |= NETIF_F_HW_TC; efx->fixed_features |= NETIF_F_HW_TC; } - return rc; + return 0; } int ef100_probe_vf(struct efx_nic *efx) -- cgit v1.2.3 From 2d0c88e84e483982067a82073f6125490ddf3614 Mon Sep 17 00:00:00 2001 From: Abel Wu Date: Wed, 16 Aug 2023 17:12:22 +0800 Subject: sock: Fix misuse of sk_under_memory_pressure() The status of global socket memory pressure is updated when: a) __sk_mem_raise_allocated(): enter: sk_memory_allocated(sk) > sysctl_mem[1] leave: sk_memory_allocated(sk) <= sysctl_mem[0] b) __sk_mem_reduce_allocated(): leave: sk_under_memory_pressure(sk) && sk_memory_allocated(sk) < sysctl_mem[0] So the conditions of leaving global pressure are inconstant, which may lead to the situation that one pressured net-memcg prevents the global pressure from being cleared when there is indeed no global pressure, thus the global constrains are still in effect unexpectedly on the other sockets. This patch fixes this by ignoring the net-memcg's pressure when deciding whether should leave global memory pressure. Fixes: e1aab161e013 ("socket: initial cgroup code.") Signed-off-by: Abel Wu Acked-by: Shakeel Butt Link: https://lore.kernel.org/r/20230816091226.1542-1-wuyun.abel@bytedance.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 6 ++++++ net/core/sock.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 2eb916d1ff64..e3d987b2ef12 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1420,6 +1420,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk) return sk->sk_prot->memory_pressure != NULL; } +static inline bool sk_under_global_memory_pressure(const struct sock *sk) +{ + return sk->sk_prot->memory_pressure && + !!*sk->sk_prot->memory_pressure; +} + static inline bool sk_under_memory_pressure(const struct sock *sk) { if (!sk->sk_prot->memory_pressure) diff --git a/net/core/sock.c b/net/core/sock.c index 732fc37a4771..c9cffb7acbea 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3159,7 +3159,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount) if (mem_cgroup_sockets_enabled && sk->sk_memcg) mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); - if (sk_under_memory_pressure(sk) && + if (sk_under_global_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); } -- cgit v1.2.3 From b616be6b97688f2f2bd7c4a47ab32f27f94fb2a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2023 14:21:58 +0000 Subject: net: do not allow gso_size to be set to GSO_BY_FRAGS One missing check in virtio_net_hdr_to_skb() allowed syzbot to crash kernels again [1] Do not allow gso_size to be set to GSO_BY_FRAGS (0xffff), because this magic value is used by the kernel. [1] general protection fault, probably for non-canonical address 0xdffffc000000000e: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000070-0x0000000000000077] CPU: 0 PID: 5039 Comm: syz-executor401 Not tainted 6.5.0-rc5-next-20230809-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 RIP: 0010:skb_segment+0x1a52/0x3ef0 net/core/skbuff.c:4500 Code: 00 00 00 e9 ab eb ff ff e8 6b 96 5d f9 48 8b 84 24 00 01 00 00 48 8d 78 70 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e ea 21 00 00 48 8b 84 24 00 01 RSP: 0018:ffffc90003d3f1c8 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 000000000001fffe RCX: 0000000000000000 RDX: 000000000000000e RSI: ffffffff882a3115 RDI: 0000000000000070 RBP: ffffc90003d3f378 R08: 0000000000000005 R09: 000000000000ffff R10: 000000000000ffff R11: 5ee4a93e456187d6 R12: 000000000001ffc6 R13: dffffc0000000000 R14: 0000000000000008 R15: 000000000000ffff FS: 00005555563f2380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020020000 CR3: 000000001626d000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: udp6_ufo_fragment+0x9d2/0xd50 net/ipv6/udp_offload.c:109 ipv6_gso_segment+0x5c4/0x17b0 net/ipv6/ip6_offload.c:120 skb_mac_gso_segment+0x292/0x610 net/core/gso.c:53 __skb_gso_segment+0x339/0x710 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] validate_xmit_skb+0x3a5/0xf10 net/core/dev.c:3625 __dev_queue_xmit+0x8f0/0x3d60 net/core/dev.c:4329 dev_queue_xmit include/linux/netdevice.h:3082 [inline] packet_xmit+0x257/0x380 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3087 [inline] packet_sendmsg+0x24c7/0x5570 net/packet/af_packet.c:3119 sock_sendmsg_nosec net/socket.c:727 [inline] sock_sendmsg+0xd9/0x180 net/socket.c:750 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2496 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2550 __sys_sendmsg+0x117/0x1e0 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7ff27cdb34d9 Fixes: 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Xin Long Cc: "Michael S. Tsirkin" Cc: Jason Wang Reviewed-by: Willem de Bruijn Reviewed-by: Marcelo Ricardo Leitner Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20230816142158.1779798-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index bdf8de2cdd93..7b4dd69555e4 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -155,6 +155,10 @@ retry: if (gso_type & SKB_GSO_UDP) nh_off -= thlen; + /* Kernel has a special handling for GSO_BY_FRAGS. */ + if (gso_size == GSO_BY_FRAGS) + return -EINVAL; + /* Too small packets are not really GSO ones. */ if (skb->len - nh_off > gso_size) { shinfo->gso_size = gso_size; -- cgit v1.2.3 From 2eb9625a3a32251ecea470cd576659a3a03b4e59 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 16 Aug 2023 20:37:11 +0530 Subject: qede: fix firmware halt over suspend and resume While performing certain power-off sequences, PCI drivers are called to suspend and resume their underlying devices through PCI PM (power management) interface. However this NIC hardware does not support PCI PM suspend/resume operations so system wide suspend/resume leads to bad MFW (management firmware) state which causes various follow-up errors in driver when communicating with the device/firmware afterwards. To fix this driver implements PCI PM suspend handler to indicate unsupported operation to the PCI subsystem explicitly, thus avoiding system to go into suspended/standby mode. Without this fix device/firmware does not recover unless system is power cycled. Fixes: 2950219d87b0 ("qede: Add basic network device support") Signed-off-by: Manish Chopra Signed-off-by: Alok Prasad Reviewed-by: John Meneghini Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230816150711.59035-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 4b004a728190..99df00c30b8c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -176,6 +176,15 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param) } #endif +static int __maybe_unused qede_suspend(struct device *dev) +{ + dev_info(dev, "Device does not support suspend operation\n"); + + return -EOPNOTSUPP; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(qede_pm_ops, qede_suspend, NULL); + static const struct pci_error_handlers qede_err_handler = { .error_detected = qede_io_error_detected, }; @@ -190,6 +199,7 @@ static struct pci_driver qede_pci_driver = { .sriov_configure = qede_sriov_configure, #endif .err_handler = &qede_err_handler, + .driver.pm = &qede_pm_ops, }; static struct qed_eth_cb_ops qede_ll_ops = { -- cgit v1.2.3 From 43d00e102d9ecbe2635d7e3f2e14d2e90183d6af Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Wed, 16 Aug 2023 12:34:05 -0700 Subject: ice: Block switchdev mode when ADQ is active and vice versa ADQ and switchdev are not supported simultaneously. Enabling both at the same time can result in nullptr dereference. To prevent this, check if ADQ is active when changing devlink mode to switchdev mode, and check if switchdev is active when enabling ADQ. Fixes: fbc7b27af0f9 ("ice: enable ndo_setup_tc support for mqprio_qdisc") Signed-off-by: Marcin Szycik Reviewed-by: Przemek Kitszel Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230816193405.1307580-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 6 ++++++ drivers/net/ethernet/intel/ice/ice_main.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index ad0a007b7398..8f232c41a89e 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -538,6 +538,12 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, break; case DEVLINK_ESWITCH_MODE_SWITCHDEV: { + if (ice_is_adq_active(pf)) { + dev_err(ice_pf_to_dev(pf), "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root"); + NL_SET_ERR_MSG_MOD(extack, "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root"); + return -EOPNOTSUPP; + } + dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev", pf->hw.pf_id); NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev"); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index cf92c39467c8..b40dfe6ae321 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8823,6 +8823,11 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, ice_setup_tc_block_cb, np, np, true); case TC_SETUP_QDISC_MQPRIO: + if (ice_is_eswitch_mode_switchdev(pf)) { + netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n"); + return -EOPNOTSUPP; + } + if (pf->adev) { mutex_lock(&pf->adev_mutex); device_lock(&pf->adev->dev); -- cgit v1.2.3 From c611589b4259ed63b9b77be6872b1ce07ec0ac16 Mon Sep 17 00:00:00 2001 From: Wander Lairson Costa Date: Mon, 14 Aug 2023 13:51:19 -0300 Subject: drm/qxl: fix UAF on handle creation qxl_mode_dumb_create() dereferences the qobj returned by qxl_gem_object_create_with_handle(), but the handle is the only one holding a reference to it. A potential attacker could guess the returned handle value and closes it between the return of qxl_gem_object_create_with_handle() and the qobj usage, triggering a use-after-free scenario. Reproducer: int dri_fd =-1; struct drm_mode_create_dumb arg = {0}; void gem_close(int handle); void* trigger(void* ptr) { int ret; arg.width = arg.height = 0x20; arg.bpp = 32; ret = ioctl(dri_fd, DRM_IOCTL_MODE_CREATE_DUMB, &arg); if(ret) { perror("[*] DRM_IOCTL_MODE_CREATE_DUMB Failed"); exit(-1); } gem_close(arg.handle); while(1) { struct drm_mode_create_dumb args = {0}; args.width = args.height = 0x20; args.bpp = 32; ret = ioctl(dri_fd, DRM_IOCTL_MODE_CREATE_DUMB, &args); if (ret) { perror("[*] DRM_IOCTL_MODE_CREATE_DUMB Failed"); exit(-1); } printf("[*] DRM_IOCTL_MODE_CREATE_DUMB created, %d\n", args.handle); gem_close(args.handle); } return NULL; } void gem_close(int handle) { struct drm_gem_close args; args.handle = handle; int ret = ioctl(dri_fd, DRM_IOCTL_GEM_CLOSE, &args); // gem close handle if (!ret) printf("gem close handle %d\n", args.handle); } int main(void) { dri_fd= open("/dev/dri/card0", O_RDWR); printf("fd:%d\n", dri_fd); if(dri_fd == -1) return -1; pthread_t tid1; if(pthread_create(&tid1,NULL,trigger,NULL)){ perror("[*] thread_create tid1\n"); return -1; } while (1) { gem_close(arg.handle); } return 0; } This is a KASAN report: ================================================================== BUG: KASAN: slab-use-after-free in qxl_mode_dumb_create+0x3c2/0x400 linux/drivers/gpu/drm/qxl/qxl_dumb.c:69 Write of size 1 at addr ffff88801136c240 by task poc/515 CPU: 1 PID: 515 Comm: poc Not tainted 6.3.0 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-4 04/01/2014 Call Trace: __dump_stack linux/lib/dump_stack.c:88 dump_stack_lvl+0x48/0x70 linux/lib/dump_stack.c:106 print_address_description linux/mm/kasan/report.c:319 print_report+0xd2/0x660 linux/mm/kasan/report.c:430 kasan_report+0xd2/0x110 linux/mm/kasan/report.c:536 __asan_report_store1_noabort+0x17/0x30 linux/mm/kasan/report_generic.c:383 qxl_mode_dumb_create+0x3c2/0x400 linux/drivers/gpu/drm/qxl/qxl_dumb.c:69 drm_mode_create_dumb linux/drivers/gpu/drm/drm_dumb_buffers.c:96 drm_mode_create_dumb_ioctl+0x1f5/0x2d0 linux/drivers/gpu/drm/drm_dumb_buffers.c:102 drm_ioctl_kernel+0x21d/0x430 linux/drivers/gpu/drm/drm_ioctl.c:788 drm_ioctl+0x56f/0xcc0 linux/drivers/gpu/drm/drm_ioctl.c:891 vfs_ioctl linux/fs/ioctl.c:51 __do_sys_ioctl linux/fs/ioctl.c:870 __se_sys_ioctl linux/fs/ioctl.c:856 __x64_sys_ioctl+0x13d/0x1c0 linux/fs/ioctl.c:856 do_syscall_x64 linux/arch/x86/entry/common.c:50 do_syscall_64+0x5b/0x90 linux/arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc linux/arch/x86/entry/entry_64.S:120 RIP: 0033:0x7ff5004ff5f7 Code: 00 00 00 48 8b 05 99 c8 0d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 69 c8 0d 00 f7 d8 64 89 01 48 RSP: 002b:00007ff500408ea8 EFLAGS: 00000286 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ff5004ff5f7 RDX: 00007ff500408ec0 RSI: 00000000c02064b2 RDI: 0000000000000003 RBP: 00007ff500408ef0 R08: 0000000000000000 R09: 000000000000002a R10: 0000000000000000 R11: 0000000000000286 R12: 00007fff1c6cdafe R13: 00007fff1c6cdaff R14: 00007ff500408fc0 R15: 0000000000802000 Allocated by task 515: kasan_save_stack+0x38/0x70 linux/mm/kasan/common.c:45 kasan_set_track+0x25/0x40 linux/mm/kasan/common.c:52 kasan_save_alloc_info+0x1e/0x40 linux/mm/kasan/generic.c:510 ____kasan_kmalloc linux/mm/kasan/common.c:374 __kasan_kmalloc+0xc3/0xd0 linux/mm/kasan/common.c:383 kasan_kmalloc linux/./include/linux/kasan.h:196 kmalloc_trace+0x48/0xc0 linux/mm/slab_common.c:1066 kmalloc linux/./include/linux/slab.h:580 kzalloc linux/./include/linux/slab.h:720 qxl_bo_create+0x11a/0x610 linux/drivers/gpu/drm/qxl/qxl_object.c:124 qxl_gem_object_create+0xd9/0x360 linux/drivers/gpu/drm/qxl/qxl_gem.c:58 qxl_gem_object_create_with_handle+0xa1/0x180 linux/drivers/gpu/drm/qxl/qxl_gem.c:89 qxl_mode_dumb_create+0x1cd/0x400 linux/drivers/gpu/drm/qxl/qxl_dumb.c:63 drm_mode_create_dumb linux/drivers/gpu/drm/drm_dumb_buffers.c:96 drm_mode_create_dumb_ioctl+0x1f5/0x2d0 linux/drivers/gpu/drm/drm_dumb_buffers.c:102 drm_ioctl_kernel+0x21d/0x430 linux/drivers/gpu/drm/drm_ioctl.c:788 drm_ioctl+0x56f/0xcc0 linux/drivers/gpu/drm/drm_ioctl.c:891 vfs_ioctl linux/fs/ioctl.c:51 __do_sys_ioctl linux/fs/ioctl.c:870 __se_sys_ioctl linux/fs/ioctl.c:856 __x64_sys_ioctl+0x13d/0x1c0 linux/fs/ioctl.c:856 do_syscall_x64 linux/arch/x86/entry/common.c:50 do_syscall_64+0x5b/0x90 linux/arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc linux/arch/x86/entry/entry_64.S:120 Freed by task 515: kasan_save_stack+0x38/0x70 linux/mm/kasan/common.c:45 kasan_set_track+0x25/0x40 linux/mm/kasan/common.c:52 kasan_save_free_info+0x2e/0x60 linux/mm/kasan/generic.c:521 ____kasan_slab_free linux/mm/kasan/common.c:236 ____kasan_slab_free+0x180/0x1f0 linux/mm/kasan/common.c:200 __kasan_slab_free+0x12/0x30 linux/mm/kasan/common.c:244 kasan_slab_free linux/./include/linux/kasan.h:162 slab_free_hook linux/mm/slub.c:1781 slab_free_freelist_hook+0xd2/0x1a0 linux/mm/slub.c:1807 slab_free linux/mm/slub.c:3787 __kmem_cache_free+0x196/0x2d0 linux/mm/slub.c:3800 kfree+0x78/0x120 linux/mm/slab_common.c:1019 qxl_ttm_bo_destroy+0x140/0x1a0 linux/drivers/gpu/drm/qxl/qxl_object.c:49 ttm_bo_release+0x678/0xa30 linux/drivers/gpu/drm/ttm/ttm_bo.c:381 kref_put linux/./include/linux/kref.h:65 ttm_bo_put+0x50/0x80 linux/drivers/gpu/drm/ttm/ttm_bo.c:393 qxl_gem_object_free+0x3e/0x60 linux/drivers/gpu/drm/qxl/qxl_gem.c:42 drm_gem_object_free+0x5c/0x90 linux/drivers/gpu/drm/drm_gem.c:974 kref_put linux/./include/linux/kref.h:65 __drm_gem_object_put linux/./include/drm/drm_gem.h:431 drm_gem_object_put linux/./include/drm/drm_gem.h:444 qxl_gem_object_create_with_handle+0x151/0x180 linux/drivers/gpu/drm/qxl/qxl_gem.c:100 qxl_mode_dumb_create+0x1cd/0x400 linux/drivers/gpu/drm/qxl/qxl_dumb.c:63 drm_mode_create_dumb linux/drivers/gpu/drm/drm_dumb_buffers.c:96 drm_mode_create_dumb_ioctl+0x1f5/0x2d0 linux/drivers/gpu/drm/drm_dumb_buffers.c:102 drm_ioctl_kernel+0x21d/0x430 linux/drivers/gpu/drm/drm_ioctl.c:788 drm_ioctl+0x56f/0xcc0 linux/drivers/gpu/drm/drm_ioctl.c:891 vfs_ioctl linux/fs/ioctl.c:51 __do_sys_ioctl linux/fs/ioctl.c:870 __se_sys_ioctl linux/fs/ioctl.c:856 __x64_sys_ioctl+0x13d/0x1c0 linux/fs/ioctl.c:856 do_syscall_x64 linux/arch/x86/entry/common.c:50 do_syscall_64+0x5b/0x90 linux/arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc linux/arch/x86/entry/entry_64.S:120 The buggy address belongs to the object at ffff88801136c000 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 576 bytes inside of freed 1024-byte region [ffff88801136c000, ffff88801136c400) The buggy address belongs to the physical page: page:0000000089fc329b refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11368 head:0000000089fc329b order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0xfffffc0010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff) raw: 000fffffc0010200 ffff888007841dc0 dead000000000122 0000000000000000 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88801136c100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88801136c180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88801136c200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88801136c280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88801136c300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Disabling lock debugging due to kernel taint Instead of returning a weak reference to the qxl_bo object, return the created drm_gem_object and let the caller decrement the reference count when it no longer needs it. As a convenience, if the caller is not interested in the gobj object, it can pass NULL to the parameter and the reference counting is descremented internally. The bug and the reproducer were originally found by the Zero Day Initiative project (ZDI-CAN-20940). Link: https://www.zerodayinitiative.com/ Signed-off-by: Wander Lairson Costa Cc: stable@vger.kernel.org Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230814165119.90847-1-wander@redhat.com --- drivers/gpu/drm/qxl/qxl_drv.h | 2 +- drivers/gpu/drm/qxl/qxl_dumb.c | 5 ++++- drivers/gpu/drm/qxl/qxl_gem.c | 25 +++++++++++++++++-------- drivers/gpu/drm/qxl/qxl_ioctl.c | 6 ++---- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index ea993d7162e8..307a890fde13 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -310,7 +310,7 @@ int qxl_gem_object_create_with_handle(struct qxl_device *qdev, u32 domain, size_t size, struct qxl_surface *surf, - struct qxl_bo **qobj, + struct drm_gem_object **gobj, uint32_t *handle); void qxl_gem_object_free(struct drm_gem_object *gobj); int qxl_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv); diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c index d636ba685451..17df5c7ccf69 100644 --- a/drivers/gpu/drm/qxl/qxl_dumb.c +++ b/drivers/gpu/drm/qxl/qxl_dumb.c @@ -34,6 +34,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv, { struct qxl_device *qdev = to_qxl(dev); struct qxl_bo *qobj; + struct drm_gem_object *gobj; uint32_t handle; int r; struct qxl_surface surf; @@ -62,11 +63,13 @@ int qxl_mode_dumb_create(struct drm_file *file_priv, r = qxl_gem_object_create_with_handle(qdev, file_priv, QXL_GEM_DOMAIN_CPU, - args->size, &surf, &qobj, + args->size, &surf, &gobj, &handle); if (r) return r; + qobj = gem_to_qxl_bo(gobj); qobj->is_dumb = true; + drm_gem_object_put(gobj); args->pitch = pitch; args->handle = handle; return 0; diff --git a/drivers/gpu/drm/qxl/qxl_gem.c b/drivers/gpu/drm/qxl/qxl_gem.c index a08da0bd9098..fc5e3763c359 100644 --- a/drivers/gpu/drm/qxl/qxl_gem.c +++ b/drivers/gpu/drm/qxl/qxl_gem.c @@ -72,32 +72,41 @@ int qxl_gem_object_create(struct qxl_device *qdev, int size, return 0; } +/* + * If the caller passed a valid gobj pointer, it is responsible to call + * drm_gem_object_put() when it no longer needs to acess the object. + * + * If gobj is NULL, it is handled internally. + */ int qxl_gem_object_create_with_handle(struct qxl_device *qdev, struct drm_file *file_priv, u32 domain, size_t size, struct qxl_surface *surf, - struct qxl_bo **qobj, + struct drm_gem_object **gobj, uint32_t *handle) { - struct drm_gem_object *gobj; int r; + struct drm_gem_object *local_gobj; - BUG_ON(!qobj); BUG_ON(!handle); r = qxl_gem_object_create(qdev, size, 0, domain, false, false, surf, - &gobj); + &local_gobj); if (r) return -ENOMEM; - r = drm_gem_handle_create(file_priv, gobj, handle); + r = drm_gem_handle_create(file_priv, local_gobj, handle); if (r) return r; - /* drop reference from allocate - handle holds it now */ - *qobj = gem_to_qxl_bo(gobj); - drm_gem_object_put(gobj); + + if (gobj) + *gobj = local_gobj; + else + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(local_gobj); + return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index 30f58b21372a..dd0f834d881c 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -38,7 +38,6 @@ int qxl_alloc_ioctl(struct drm_device *dev, void *data, struct drm_file *file_pr struct qxl_device *qdev = to_qxl(dev); struct drm_qxl_alloc *qxl_alloc = data; int ret; - struct qxl_bo *qobj; uint32_t handle; u32 domain = QXL_GEM_DOMAIN_VRAM; @@ -50,7 +49,7 @@ int qxl_alloc_ioctl(struct drm_device *dev, void *data, struct drm_file *file_pr domain, qxl_alloc->size, NULL, - &qobj, &handle); + NULL, &handle); if (ret) { DRM_ERROR("%s: failed to create gem ret=%d\n", __func__, ret); @@ -386,7 +385,6 @@ int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data, struct drm_file *fi { struct qxl_device *qdev = to_qxl(dev); struct drm_qxl_alloc_surf *param = data; - struct qxl_bo *qobj; int handle; int ret; int size, actual_stride; @@ -406,7 +404,7 @@ int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data, struct drm_file *fi QXL_GEM_DOMAIN_SURFACE, size, &surf, - &qobj, &handle); + NULL, &handle); if (ret) { DRM_ERROR("%s: failed to create gem ret=%d\n", __func__, ret); -- cgit v1.2.3