From 913eda2b08cc49d31f382579e2be34c2709eb789 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 18 Oct 2023 18:39:08 +0200 Subject: i40e: xsk: remove count_mask Cited commit introduced a neat way of updating next_to_clean that does not require boundary checks on each increment. This was done by masking the new value with (ring length - 1) mask. Problem is that this is applicable only for power of 2 ring sizes, for every other size this assumption can not be made. In turn, it leads to cleaning descriptors out of order as well as splats: [ 1388.411915] Workqueue: events xp_release_deferred [ 1388.411919] RIP: 0010:xp_free+0x1a/0x50 [ 1388.411921] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 8b 57 70 48 8d 47 70 48 89 e5 48 39 d0 74 06 <5d> c3 cc cc cc cc 48 8b 57 60 83 82 b8 00 00 00 01 48 8b 57 60 48 [ 1388.411922] RSP: 0018:ffa0000000a83cb0 EFLAGS: 00000206 [ 1388.411923] RAX: ff11000119aa5030 RBX: 000000000000001d RCX: ff110001129b6e50 [ 1388.411924] RDX: ff11000119aa4fa0 RSI: 0000000055555554 RDI: ff11000119aa4fc0 [ 1388.411925] RBP: ffa0000000a83cb0 R08: 0000000000000000 R09: 0000000000000000 [ 1388.411926] R10: 0000000000000001 R11: 0000000000000000 R12: ff11000115829b80 [ 1388.411927] R13: 000000000000005f R14: 0000000000000000 R15: ff11000119aa4fc0 [ 1388.411928] FS: 0000000000000000(0000) GS:ff11000277e00000(0000) knlGS:0000000000000000 [ 1388.411929] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1388.411930] CR2: 00007f1f564e6c14 CR3: 000000000783c005 CR4: 0000000000771ef0 [ 1388.411931] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1388.411931] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1388.411932] PKRU: 55555554 [ 1388.411933] Call Trace: [ 1388.411934] [ 1388.411935] ? show_regs+0x6e/0x80 [ 1388.411937] ? watchdog_timer_fn+0x1d2/0x240 [ 1388.411939] ? __pfx_watchdog_timer_fn+0x10/0x10 [ 1388.411941] ? __hrtimer_run_queues+0x10e/0x290 [ 1388.411945] ? clockevents_program_event+0xae/0x130 [ 1388.411947] ? hrtimer_interrupt+0x105/0x240 [ 1388.411949] ? __sysvec_apic_timer_interrupt+0x54/0x150 [ 1388.411952] ? sysvec_apic_timer_interrupt+0x7f/0x90 [ 1388.411955] [ 1388.411955] [ 1388.411956] ? asm_sysvec_apic_timer_interrupt+0x1f/0x30 [ 1388.411958] ? xp_free+0x1a/0x50 [ 1388.411960] i40e_xsk_clean_rx_ring+0x5d/0x100 [i40e] [ 1388.411968] i40e_clean_rx_ring+0x14c/0x170 [i40e] [ 1388.411977] i40e_queue_pair_disable+0xda/0x260 [i40e] [ 1388.411986] i40e_xsk_pool_setup+0x192/0x1d0 [i40e] [ 1388.411993] i40e_reconfig_rss_queues+0x1f0/0x1450 [i40e] [ 1388.412002] xp_disable_drv_zc+0x73/0xf0 [ 1388.412004] ? mutex_lock+0x17/0x50 [ 1388.412007] xp_release_deferred+0x2b/0xc0 [ 1388.412010] process_one_work+0x178/0x350 [ 1388.412011] ? __pfx_worker_thread+0x10/0x10 [ 1388.412012] worker_thread+0x2f7/0x420 [ 1388.412014] ? __pfx_worker_thread+0x10/0x10 [ 1388.412015] kthread+0xf8/0x130 [ 1388.412017] ? __pfx_kthread+0x10/0x10 [ 1388.412019] ret_from_fork+0x3d/0x60 [ 1388.412021] ? __pfx_kthread+0x10/0x10 [ 1388.412023] ret_from_fork_asm+0x1b/0x30 [ 1388.412026] It comes from picking wrong ring entries when cleaning xsk buffers during pool detach. Remove the count_mask logic and use they boundary check when updating next_to_process (which used to be a next_to_clean). Fixes: c8a8ca3408dc ("i40e: remove unnecessary memory writes of the next to clean pointer") Reported-by: Tushar Vyavahare Tested-by: Tushar Vyavahare Signed-off-by: Maciej Fijalkowski Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20231018163908.40841-1-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 37f41c8a682f..7d991e4d9b89 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -437,12 +437,12 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 next_to_process = rx_ring->next_to_process; u16 next_to_clean = rx_ring->next_to_clean; - u16 count_mask = rx_ring->count - 1; unsigned int xdp_res, xdp_xmit = 0; struct xdp_buff *first = NULL; + u32 count = rx_ring->count; struct bpf_prog *xdp_prog; + u32 entries_to_alloc; bool failure = false; - u16 cleaned_count; if (next_to_process != next_to_clean) first = *i40e_rx_bi(rx_ring, next_to_clean); @@ -475,7 +475,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) qword); bi = *i40e_rx_bi(rx_ring, next_to_process); xsk_buff_free(bi); - next_to_process = (next_to_process + 1) & count_mask; + if (++next_to_process == count) + next_to_process = 0; continue; } @@ -493,7 +494,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) else if (i40e_add_xsk_frag(rx_ring, first, bi, size)) break; - next_to_process = (next_to_process + 1) & count_mask; + if (++next_to_process == count) + next_to_process = 0; if (i40e_is_non_eop(rx_ring, rx_desc)) continue; @@ -513,10 +515,10 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) rx_ring->next_to_clean = next_to_clean; rx_ring->next_to_process = next_to_process; - cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask; - if (cleaned_count >= I40E_RX_BUFFER_WRITE) - failure |= !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count); + entries_to_alloc = I40E_DESC_UNUSED(rx_ring); + if (entries_to_alloc >= I40E_RX_BUFFER_WRITE) + failure |= !i40e_alloc_rx_buffers_zc(rx_ring, entries_to_alloc); i40e_finalize_xdp_rx(rx_ring, xdp_xmit); i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); @@ -752,14 +754,16 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring) { - u16 count_mask = rx_ring->count - 1; u16 ntc = rx_ring->next_to_clean; u16 ntu = rx_ring->next_to_use; - for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) { + while (ntc != ntu) { struct xdp_buff *rx_bi = *i40e_rx_bi(rx_ring, ntc); xsk_buff_free(rx_bi); + ntc++; + if (ntc >= rx_ring->count) + ntc = 0; } } -- cgit v1.2.3 From c1c0ce31b2420d5c173228a2132a492ede03d81f Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Wed, 18 Oct 2023 21:34:34 +0200 Subject: r8169: fix the KCSAN reported data-race in rtl_tx() while reading tp->cur_tx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KCSAN reported the following data-race: ================================================================== BUG: KCSAN: data-race in rtl8169_poll [r8169] / rtl8169_start_xmit [r8169] write (marked) to 0xffff888102474b74 of 4 bytes by task 5358 on cpu 29: rtl8169_start_xmit (drivers/net/ethernet/realtek/r8169_main.c:4254) r8169 dev_hard_start_xmit (./include/linux/netdevice.h:4889 ./include/linux/netdevice.h:4903 net/core/dev.c:3544 net/core/dev.c:3560) sch_direct_xmit (net/sched/sch_generic.c:342) __dev_queue_xmit (net/core/dev.c:3817 net/core/dev.c:4306) ip_finish_output2 (./include/linux/netdevice.h:3082 ./include/net/neighbour.h:526 ./include/net/neighbour.h:540 net/ipv4/ip_output.c:233) __ip_finish_output (net/ipv4/ip_output.c:311 net/ipv4/ip_output.c:293) ip_finish_output (net/ipv4/ip_output.c:328) ip_output (net/ipv4/ip_output.c:435) ip_send_skb (./include/net/dst.h:458 net/ipv4/ip_output.c:127 net/ipv4/ip_output.c:1486) udp_send_skb (net/ipv4/udp.c:963) udp_sendmsg (net/ipv4/udp.c:1246) inet_sendmsg (net/ipv4/af_inet.c:840 (discriminator 4)) sock_sendmsg (net/socket.c:730 net/socket.c:753) __sys_sendto (net/socket.c:2177) __x64_sys_sendto (net/socket.c:2185) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) read to 0xffff888102474b74 of 4 bytes by interrupt on cpu 21: rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4397 drivers/net/ethernet/realtek/r8169_main.c:4581) r8169 __napi_poll (net/core/dev.c:6527) net_rx_action (net/core/dev.c:6596 net/core/dev.c:6727) __do_softirq (kernel/softirq.c:553) __irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632) irq_exit_rcu (kernel/softirq.c:647) common_interrupt (arch/x86/kernel/irq.c:247 (discriminator 14)) asm_common_interrupt (./arch/x86/include/asm/idtentry.h:636) cpuidle_enter_state (drivers/cpuidle/cpuidle.c:291) cpuidle_enter (drivers/cpuidle/cpuidle.c:390) call_cpuidle (kernel/sched/idle.c:135) do_idle (kernel/sched/idle.c:219 kernel/sched/idle.c:282) cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294) secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:433) value changed: 0x002f4815 -> 0x002f4816 Reported by Kernel Concurrency Sanitizer on: CPU: 21 PID: 0 Comm: swapper/21 Tainted: G L 6.6.0-rc2-kcsan-00143-gb5cbe7c00aa0 #41 Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023 ================================================================== The write side of drivers/net/ethernet/realtek/r8169_main.c is: ================== 4251 /* rtl_tx needs to see descriptor changes before updated tp->cur_tx */ 4252 smp_wmb(); 4253 → 4254 WRITE_ONCE(tp->cur_tx, tp->cur_tx + frags + 1); 4255 4256 stop_queue = !netif_subqueue_maybe_stop(dev, 0, rtl_tx_slots_avail(tp), 4257 R8169_TX_STOP_THRS, 4258 R8169_TX_START_THRS); The read side is the function rtl_tx(): 4355 static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, 4356 int budget) 4357 { 4358 unsigned int dirty_tx, bytes_compl = 0, pkts_compl = 0; 4359 struct sk_buff *skb; 4360 4361 dirty_tx = tp->dirty_tx; 4362 4363 while (READ_ONCE(tp->cur_tx) != dirty_tx) { 4364 unsigned int entry = dirty_tx % NUM_TX_DESC; 4365 u32 status; 4366 4367 status = le32_to_cpu(tp->TxDescArray[entry].opts1); 4368 if (status & DescOwn) 4369 break; 4370 4371 skb = tp->tx_skb[entry].skb; 4372 rtl8169_unmap_tx_skb(tp, entry); 4373 4374 if (skb) { 4375 pkts_compl++; 4376 bytes_compl += skb->len; 4377 napi_consume_skb(skb, budget); 4378 } 4379 dirty_tx++; 4380 } 4381 4382 if (tp->dirty_tx != dirty_tx) { 4383 dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl); 4384 WRITE_ONCE(tp->dirty_tx, dirty_tx); 4385 4386 netif_subqueue_completed_wake(dev, 0, pkts_compl, bytes_compl, 4387 rtl_tx_slots_avail(tp), 4388 R8169_TX_START_THRS); 4389 /* 4390 * 8168 hack: TxPoll requests are lost when the Tx packets are 4391 * too close. Let's kick an extra TxPoll request when a burst 4392 * of start_xmit activity is detected (if it is not detected, 4393 * it is slow enough). -- FR 4394 * If skb is NULL then we come here again once a tx irq is 4395 * triggered after the last fragment is marked transmitted. 4396 */ → 4397 if (tp->cur_tx != dirty_tx && skb) 4398 rtl8169_doorbell(tp); 4399 } 4400 } Obviously from the code, an earlier detected data-race for tp->cur_tx was fixed in the line 4363: 4363 while (READ_ONCE(tp->cur_tx) != dirty_tx) { but the same solution is required for protecting the other access to tp->cur_tx: → 4397 if (READ_ONCE(tp->cur_tx) != dirty_tx && skb) 4398 rtl8169_doorbell(tp); The write in the line 4254 is protected with WRITE_ONCE(), but the read in the line 4397 might have suffered read tearing under some compiler optimisations. The fix eliminated the KCSAN data-race report for this bug. It is yet to be evaluated what happens if tp->cur_tx changes between the test in line 4363 and line 4397. This test should certainly not be cached by the compiler in some register for such a long time, while asynchronous writes to tp->cur_tx might have occurred in line 4254 in the meantime. Fixes: 94d8a98e6235c ("r8169: reduce number of workaround doorbell rings") Cc: Heiner Kallweit Cc: nic_swsd@realtek.com Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Marco Elver Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/lkml/dc7fc8fa-4ea4-e9a9-30a6-7c83e6b53188@alu.unizg.hr/ Signed-off-by: Mirsad Goran Todorovac Acked-by: Marco Elver Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 6351a2dc13bc..281aaa851847 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4394,7 +4394,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, * If skb is NULL then we come here again once a tx irq is * triggered after the last fragment is marked transmitted. */ - if (tp->cur_tx != dirty_tx && skb) + if (READ_ONCE(tp->cur_tx) != dirty_tx && skb) rtl8169_doorbell(tp); } } -- cgit v1.2.3 From dcf75a0f6bc136de94e88178ae5f51b7f879abc9 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Wed, 18 Oct 2023 21:34:36 +0200 Subject: r8169: fix the KCSAN reported data-race in rtl_tx while reading TxDescArray[entry].opts1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KCSAN reported the following data-race: ================================================================== BUG: KCSAN: data-race in rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4368 drivers/net/ethernet/realtek/r8169_main.c:4581) r8169 race at unknown origin, with read to 0xffff888140d37570 of 4 bytes by interrupt on cpu 21: rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4368 drivers/net/ethernet/realtek/r8169_main.c:4581) r8169 __napi_poll (net/core/dev.c:6527) net_rx_action (net/core/dev.c:6596 net/core/dev.c:6727) __do_softirq (kernel/softirq.c:553) __irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632) irq_exit_rcu (kernel/softirq.c:647) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1074 (discriminator 14)) asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:645) cpuidle_enter_state (drivers/cpuidle/cpuidle.c:291) cpuidle_enter (drivers/cpuidle/cpuidle.c:390) call_cpuidle (kernel/sched/idle.c:135) do_idle (kernel/sched/idle.c:219 kernel/sched/idle.c:282) cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294) secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:433) value changed: 0xb0000042 -> 0x00000000 Reported by Kernel Concurrency Sanitizer on: CPU: 21 PID: 0 Comm: swapper/21 Tainted: G L 6.6.0-rc2-kcsan-00143-gb5cbe7c00aa0 #41 Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023 ================================================================== The read side is in drivers/net/ethernet/realtek/r8169_main.c ========================================= 4355 static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, 4356 int budget) 4357 { 4358 unsigned int dirty_tx, bytes_compl = 0, pkts_compl = 0; 4359 struct sk_buff *skb; 4360 4361 dirty_tx = tp->dirty_tx; 4362 4363 while (READ_ONCE(tp->cur_tx) != dirty_tx) { 4364 unsigned int entry = dirty_tx % NUM_TX_DESC; 4365 u32 status; 4366 → 4367 status = le32_to_cpu(tp->TxDescArray[entry].opts1); 4368 if (status & DescOwn) 4369 break; 4370 4371 skb = tp->tx_skb[entry].skb; 4372 rtl8169_unmap_tx_skb(tp, entry); 4373 4374 if (skb) { 4375 pkts_compl++; 4376 bytes_compl += skb->len; 4377 napi_consume_skb(skb, budget); 4378 } 4379 dirty_tx++; 4380 } 4381 4382 if (tp->dirty_tx != dirty_tx) { 4383 dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl); 4384 WRITE_ONCE(tp->dirty_tx, dirty_tx); 4385 4386 netif_subqueue_completed_wake(dev, 0, pkts_compl, bytes_compl, 4387 rtl_tx_slots_avail(tp), 4388 R8169_TX_START_THRS); 4389 /* 4390 * 8168 hack: TxPoll requests are lost when the Tx packets are 4391 * too close. Let's kick an extra TxPoll request when a burst 4392 * of start_xmit activity is detected (if it is not detected, 4393 * it is slow enough). -- FR 4394 * If skb is NULL then we come here again once a tx irq is 4395 * triggered after the last fragment is marked transmitted. 4396 */ 4397 if (READ_ONCE(tp->cur_tx) != dirty_tx && skb) 4398 rtl8169_doorbell(tp); 4399 } 4400 } tp->TxDescArray[entry].opts1 is reported to have a data-race and READ_ONCE() fixes this KCSAN warning. 4366 → 4367 status = le32_to_cpu(READ_ONCE(tp->TxDescArray[entry].opts1)); 4368 if (status & DescOwn) 4369 break; 4370 Cc: Heiner Kallweit Cc: nic_swsd@realtek.com Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Marco Elver Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/lkml/dc7fc8fa-4ea4-e9a9-30a6-7c83e6b53188@alu.unizg.hr/ Signed-off-by: Mirsad Goran Todorovac Acked-by: Marco Elver Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 281aaa851847..7e14a1d958c8 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4364,7 +4364,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, unsigned int entry = dirty_tx % NUM_TX_DESC; u32 status; - status = le32_to_cpu(tp->TxDescArray[entry].opts1); + status = le32_to_cpu(READ_ONCE(tp->TxDescArray[entry].opts1)); if (status & DescOwn) break; -- cgit v1.2.3 From f97eee484e71890131f9c563c5cc6d5a69e4308d Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Wed, 18 Oct 2023 21:34:38 +0200 Subject: r8169: fix the KCSAN reported data race in rtl_rx while reading desc->opts1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KCSAN reported the following data-race bug: ================================================================== BUG: KCSAN: data-race in rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4430 drivers/net/ethernet/realtek/r8169_main.c:4583) r8169 race at unknown origin, with read to 0xffff888117e43510 of 4 bytes by interrupt on cpu 21: rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4430 drivers/net/ethernet/realtek/r8169_main.c:4583) r8169 __napi_poll (net/core/dev.c:6527) net_rx_action (net/core/dev.c:6596 net/core/dev.c:6727) __do_softirq (kernel/softirq.c:553) __irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632) irq_exit_rcu (kernel/softirq.c:647) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1074 (discriminator 14)) asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:645) cpuidle_enter_state (drivers/cpuidle/cpuidle.c:291) cpuidle_enter (drivers/cpuidle/cpuidle.c:390) call_cpuidle (kernel/sched/idle.c:135) do_idle (kernel/sched/idle.c:219 kernel/sched/idle.c:282) cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294) secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:433) value changed: 0x80003fff -> 0x3402805f Reported by Kernel Concurrency Sanitizer on: CPU: 21 PID: 0 Comm: swapper/21 Tainted: G L 6.6.0-rc2-kcsan-00143-gb5cbe7c00aa0 #41 Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023 ================================================================== drivers/net/ethernet/realtek/r8169_main.c: ========================================== 4429 → 4430 status = le32_to_cpu(desc->opts1); 4431 if (status & DescOwn) 4432 break; 4433 4434 /* This barrier is needed to keep us from reading 4435 * any other fields out of the Rx descriptor until 4436 * we know the status of DescOwn 4437 */ 4438 dma_rmb(); 4439 4440 if (unlikely(status & RxRES)) { 4441 if (net_ratelimit()) 4442 netdev_warn(dev, "Rx ERROR. status = %08x\n", Marco Elver explained that dma_rmb() doesn't prevent the compiler to tear up the access to desc->opts1 which can be written to concurrently. READ_ONCE() should prevent that from happening: 4429 → 4430 status = le32_to_cpu(READ_ONCE(desc->opts1)); 4431 if (status & DescOwn) 4432 break; 4433 As the consequence of this fix, this KCSAN warning was eliminated. Fixes: 6202806e7c03a ("r8169: drop member opts1_mask from struct rtl8169_private") Suggested-by: Marco Elver Cc: Heiner Kallweit Cc: nic_swsd@realtek.com Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/lkml/dc7fc8fa-4ea4-e9a9-30a6-7c83e6b53188@alu.unizg.hr/ Signed-off-by: Mirsad Goran Todorovac Acked-by: Marco Elver Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 7e14a1d958c8..361b90007148 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4427,7 +4427,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget dma_addr_t addr; u32 status; - status = le32_to_cpu(desc->opts1); + status = le32_to_cpu(READ_ONCE(desc->opts1)); if (status & DescOwn) break; -- cgit v1.2.3 From 7db3111043885c146e795c199d39c3f9042d97c0 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 19 Oct 2023 09:13:46 +0200 Subject: iavf: initialize waitqueues before starting watchdog_task It is not safe to initialize the waitqueues after queueing the watchdog_task. It will be using them. The chance of this causing a real problem is very small, because there will be some sleeping before any of the waitqueues get used. I got a crash only after inserting an artificial sleep in iavf_probe. Queue the watchdog_task as the last step in iavf_probe. Add a comment to prevent repeating the mistake. Fixes: fe2647ab0c99 ("i40evf: prevent VF close returning before state transitions to DOWN") Signed-off-by: Michal Schmidt Reviewed-by: Paul Menzel Reviewed-by: Przemek Kitszel Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 6a2e6d64bc3a..5b5c0525aa13 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4982,8 +4982,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->finish_config, iavf_finish_config); INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(5 * (pdev->devfn & 0x07))); /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); @@ -4994,6 +4992,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating virtchannel events */ init_waitqueue_head(&adapter->vc_waitqueue); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, + msecs_to_jiffies(5 * (pdev->devfn & 0x07))); + /* Initialization goes on in the work. Do not add more of it below. */ return 0; err_ioremap: -- cgit v1.2.3 From 665e7d83c5386f9abdc67b2e4b6e6d9579aadfcb Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 19 Oct 2023 18:37:20 +0200 Subject: i40e: Fix I40E_FLAG_VF_VLAN_PRUNING value Commit c87c938f62d8f1 ("i40e: Add VF VLAN pruning") added new PF flag I40E_FLAG_VF_VLAN_PRUNING but its value collides with existing I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED flag. Move the affected flag at the end of the flags and fix its value. Reproducer: [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 link-down-on-close on [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 vf-vlan-pruning on [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 link-down-on-close off [ 6323.142585] i40e 0000:02:00.0: Setting link-down-on-close not supported on this port (because total-port-shutdown is enabled) netlink error: Operation not supported [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 vf-vlan-pruning off [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 link-down-on-close off The link-down-on-close flag cannot be modified after setting vf-vlan-pruning because vf-vlan-pruning shares the same bit with total-port-shutdown flag that prevents any modification of link-down-on-close flag. Fixes: c87c938f62d8 ("i40e: Add VF VLAN pruning") Cc: Mateusz Palczewski Cc: Simon Horman Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 6e310a539467..55bb0b5310d5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -580,7 +580,6 @@ struct i40e_pf { #define I40E_FLAG_DISABLE_FW_LLDP BIT(24) #define I40E_FLAG_RS_FEC BIT(25) #define I40E_FLAG_BASE_R_FEC BIT(26) -#define I40E_FLAG_VF_VLAN_PRUNING BIT(27) /* TOTAL_PORT_SHUTDOWN * Allows to physically disable the link on the NIC's port. * If enabled, (after link down request from the OS) @@ -603,6 +602,7 @@ struct i40e_pf { * in abilities field of i40e_aq_set_phy_config structure */ #define I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED BIT(27) +#define I40E_FLAG_VF_VLAN_PRUNING BIT(28) struct i40e_client_instance *cinst; bool stat_offsets_loaded; -- cgit v1.2.3 From fb71ba0ed8be9534493c80ba00142a64d9972a72 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 20 Oct 2023 17:31:56 +0800 Subject: treewide: Spelling fix in comment reques -> request Fixes: 09dde54c6a69 ("PS3: gelic: Add wireless support for PS3") Signed-off-by: Kunwu Chan Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_wireless.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c index dc14a66583ff..44488c153ea2 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c @@ -1217,7 +1217,7 @@ static int gelic_wl_set_encodeext(struct net_device *netdev, key_index = wl->current_key; if (!enc->length && (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY)) { - /* reques to change default key index */ + /* request to change default key index */ pr_debug("%s: request to change default key to %d\n", __func__, key_index); wl->current_key = key_index; -- cgit v1.2.3 From 8c0b48e01daba5ca58f939a8425855d3f4f2ed14 Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Thu, 19 Oct 2023 13:40:35 -0700 Subject: igb: Fix potential memory leak in igb_add_ethtool_nfc_entry Add check for return of igb_update_ethtool_nfc_entry so that in case of any potential errors the memory alocated for input will be freed. Fixes: 0e71def25281 ("igb: add support of RX network flow classification") Reviewed-by: Wojciech Drewek Signed-off-by: Mateusz Palczewski Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 319ed601eaa1..4ee849985e2b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2978,11 +2978,15 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, if (err) goto err_out_w_lock; - igb_update_ethtool_nfc_entry(adapter, input, input->sw_idx); + err = igb_update_ethtool_nfc_entry(adapter, input, input->sw_idx); + if (err) + goto err_out_input_filter; spin_unlock(&adapter->nfc_lock); return 0; +err_out_input_filter: + igb_erase_filter(adapter, input); err_out_w_lock: spin_unlock(&adapter->nfc_lock); err_out: -- cgit v1.2.3 From 72bf4f1767f0386970dc04726dc5bc2e3991dc19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Oct 2023 11:24:57 +0000 Subject: net: do not leave an empty skb in write queue Under memory stress conditions, tcp_sendmsg_locked() might call sk_stream_wait_memory(), thus releasing the socket lock. If a fresh skb has been allocated prior to this, we should not leave it in the write queue otherwise tcp_write_xmit() could panic. This apparently does not happen often, but a future change in __sk_mem_raise_allocated() that Shakeel and others are considering would increase chances of being hurt. Under discussion is to remove this controversial part: /* Fail only if socket is _under_ its sndbuf. * In this case we cannot block, so that we have to fail. */ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { /* Force charge with __GFP_NOFAIL */ if (memcg_charge && !charged) { mem_cgroup_charge_skmem(sk->sk_memcg, amt, gfp_memcg_charge() | __GFP_NOFAIL); } return 1; } Fixes: fdfc5c8594c2 ("tcp: remove empty skb from write queue in error cases") Signed-off-by: Eric Dumazet Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20231019112457.1190114-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d3456cf840de..3d3a24f79573 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -927,10 +927,11 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } -/* In some cases, both sendmsg() could have added an skb to the write queue, - * but failed adding payload on it. We need to remove it to consume less +/* In some cases, sendmsg() could have added an skb to the write queue, + * but failed adding payload on it. We need to remove it to consume less * memory, but more importantly be able to generate EPOLLOUT for Edge Trigger - * epoll() users. + * epoll() users. Another reason is that tcp_write_xmit() does not like + * finding an empty skb in the write queue. */ void tcp_remove_empty_skb(struct sock *sk) { @@ -1289,6 +1290,7 @@ new_segment: wait_for_space: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + tcp_remove_empty_skb(sk); if (copied) tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, size_goal); -- cgit v1.2.3 From a9beb7e81bcb876615e1fbb3c07f3f9dba69831f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Oct 2023 12:21:04 +0000 Subject: neighbour: fix various data-races 1) tbl->gc_thresh1, tbl->gc_thresh2, tbl->gc_thresh3 and tbl->gc_interval can be written from sysfs. 2) tbl->last_flush is read locklessly from neigh_alloc() 3) tbl->proxy_queue.qlen is read locklessly from neightbl_fill_info() 4) neightbl_fill_info() reads cpu stats that can be changed concurrently. Fixes: c7fb64db001f ("[NETLINK]: Neighbour table configuration and statistics via rtnetlink") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20231019122104.1448310-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 67 +++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9c09f091cbff..df81c1f0a570 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -251,7 +251,8 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) static int neigh_forced_gc(struct neigh_table *tbl) { - int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; + int max_clean = atomic_read(&tbl->gc_entries) - + READ_ONCE(tbl->gc_thresh2); unsigned long tref = jiffies - 5 * HZ; struct neighbour *n, *tmp; int shrunk = 0; @@ -280,7 +281,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) } } - tbl->last_flush = jiffies; + WRITE_ONCE(tbl->last_flush, jiffies); write_unlock_bh(&tbl->lock); @@ -464,17 +465,17 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, { struct neighbour *n = NULL; unsigned long now = jiffies; - int entries; + int entries, gc_thresh3; if (exempt_from_gc) goto do_alloc; entries = atomic_inc_return(&tbl->gc_entries) - 1; - if (entries >= tbl->gc_thresh3 || - (entries >= tbl->gc_thresh2 && - time_after(now, tbl->last_flush + 5 * HZ))) { - if (!neigh_forced_gc(tbl) && - entries >= tbl->gc_thresh3) { + gc_thresh3 = READ_ONCE(tbl->gc_thresh3); + if (entries >= gc_thresh3 || + (entries >= READ_ONCE(tbl->gc_thresh2) && + time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) { + if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) { net_info_ratelimited("%s: neighbor table overflow!\n", tbl->id); NEIGH_CACHE_STAT_INC(tbl, table_fulls); @@ -955,13 +956,14 @@ static void neigh_periodic_work(struct work_struct *work) if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { struct neigh_parms *p; - tbl->last_rand = jiffies; + + WRITE_ONCE(tbl->last_rand, jiffies); list_for_each_entry(p, &tbl->parms_list, list) p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } - if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1)) goto out; for (i = 0 ; i < (1 << nht->hash_shift); i++) { @@ -2167,15 +2169,16 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndtmsg->ndtm_pad2 = 0; if (nla_put_string(skb, NDTA_NAME, tbl->id) || - nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || - nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || - nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || - nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) + nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval), + NDTA_PAD) || + nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) || + nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) || + nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3))) goto nla_put_failure; { unsigned long now = jiffies; - long flush_delta = now - tbl->last_flush; - long rand_delta = now - tbl->last_rand; + long flush_delta = now - READ_ONCE(tbl->last_flush); + long rand_delta = now - READ_ONCE(tbl->last_rand); struct neigh_hash_table *nht; struct ndt_config ndc = { .ndtc_key_len = tbl->key_len, @@ -2183,7 +2186,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, .ndtc_entries = atomic_read(&tbl->entries), .ndtc_last_flush = jiffies_to_msecs(flush_delta), .ndtc_last_rand = jiffies_to_msecs(rand_delta), - .ndtc_proxy_qlen = tbl->proxy_queue.qlen, + .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen), }; rcu_read_lock(); @@ -2206,17 +2209,17 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, struct neigh_statistics *st; st = per_cpu_ptr(tbl->stats, cpu); - ndst.ndts_allocs += st->allocs; - ndst.ndts_destroys += st->destroys; - ndst.ndts_hash_grows += st->hash_grows; - ndst.ndts_res_failed += st->res_failed; - ndst.ndts_lookups += st->lookups; - ndst.ndts_hits += st->hits; - ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; - ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; - ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; - ndst.ndts_forced_gc_runs += st->forced_gc_runs; - ndst.ndts_table_fulls += st->table_fulls; + ndst.ndts_allocs += READ_ONCE(st->allocs); + ndst.ndts_destroys += READ_ONCE(st->destroys); + ndst.ndts_hash_grows += READ_ONCE(st->hash_grows); + ndst.ndts_res_failed += READ_ONCE(st->res_failed); + ndst.ndts_lookups += READ_ONCE(st->lookups); + ndst.ndts_hits += READ_ONCE(st->hits); + ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast); + ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast); + ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs); + ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs); + ndst.ndts_table_fulls += READ_ONCE(st->table_fulls); } if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, @@ -2445,16 +2448,16 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout_tbl_lock; if (tb[NDTA_THRESH1]) - tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); + WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1])); if (tb[NDTA_THRESH2]) - tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); + WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2])); if (tb[NDTA_THRESH3]) - tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); + WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3])); if (tb[NDTA_GC_INTERVAL]) - tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); + WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL])); err = 0; -- cgit v1.2.3 From e7684d29efdf37304c62bb337ea55b3428ca118e Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Thu, 19 Oct 2023 13:36:41 -0700 Subject: igc: Fix ambiguity in the ethtool advertising The 'ethtool_convert_link_mode_to_legacy_u32' method does not allow us to advertise 2500M speed support and TP (twisted pair) properly. Convert to 'ethtool_link_ksettings_test_link_mode' to advertise supported speed and eliminate ambiguity. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Suggested-by: Dima Ruinskiy Suggested-by: Vitaly Lifshits Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231019203641.3661960-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 35 ++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 7ab6dd58e400..dd8a9d27a167 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1817,7 +1817,7 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev, struct igc_adapter *adapter = netdev_priv(netdev); struct net_device *dev = adapter->netdev; struct igc_hw *hw = &adapter->hw; - u32 advertising; + u16 advertised = 0; /* When adapter in resetting mode, autoneg/speed/duplex * cannot be changed @@ -1842,18 +1842,33 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev, while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) usleep_range(1000, 2000); - ethtool_convert_link_mode_to_legacy_u32(&advertising, - cmd->link_modes.advertising); - /* Converting to legacy u32 drops ETHTOOL_LINK_MODE_2500baseT_Full_BIT. - * We have to check this and convert it to ADVERTISE_2500_FULL - * (aka ETHTOOL_LINK_MODE_2500baseX_Full_BIT) explicitly. - */ - if (ethtool_link_ksettings_test_link_mode(cmd, advertising, 2500baseT_Full)) - advertising |= ADVERTISE_2500_FULL; + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 2500baseT_Full)) + advertised |= ADVERTISE_2500_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 1000baseT_Full)) + advertised |= ADVERTISE_1000_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100baseT_Full)) + advertised |= ADVERTISE_100_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100baseT_Half)) + advertised |= ADVERTISE_100_HALF; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10baseT_Full)) + advertised |= ADVERTISE_10_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10baseT_Half)) + advertised |= ADVERTISE_10_HALF; if (cmd->base.autoneg == AUTONEG_ENABLE) { hw->mac.autoneg = 1; - hw->phy.autoneg_advertised = advertising; + hw->phy.autoneg_advertised = advertised; if (adapter->fc_autoneg) hw->fc.requested_mode = igc_fc_default; } else { -- cgit v1.2.3 From 068d8b75c1aee153193522211ace6c13c21cd16b Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Thu, 19 Oct 2023 13:38:52 -0700 Subject: i40e: sync next_to_clean and next_to_process for programming status desc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a programming status desc is encountered on the rx_ring, next_to_process is bumped along with cleaned_count but next_to_clean is not. This causes I40E_DESC_UNUSED() macro to misbehave resulting in overwriting whole ring with new buffers. Update next_to_clean to point to next_to_process on seeing a programming status desc if not in the middle of handling a multi-frag packet. Also, bump cleaned_count only for such case as otherwise next_to_clean buffer may be returned to hardware on reaching clean_threshold. Fixes: e9031f2da1ae ("i40e: introduce next_to_process to i40e_ring") Suggested-by: Maciej Fijalkowski Reported-by: hq.dev+kernel@msdfc.xyz Reported by: Solomon Peachy Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217678 Tested-by: hq.dev+kernel@msdfc.xyz Tested by: Indrek Järve Signed-off-by: Tirthendu Sarkar Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Jacob Keller Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20231019203852.3663665-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 0b3a27f118fb..50c70a8e470a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2544,7 +2544,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, rx_buffer = i40e_rx_bi(rx_ring, ntp); i40e_inc_ntp(rx_ring); i40e_reuse_rx_page(rx_ring, rx_buffer); - cleaned_count++; + /* Update ntc and bump cleaned count if not in the + * middle of mb packet. + */ + if (rx_ring->next_to_clean == ntp) { + rx_ring->next_to_clean = + rx_ring->next_to_process; + cleaned_count++; + } continue; } -- cgit v1.2.3 From d2ca43f30611ac41bccfa8352c53f5432f0ecbb0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 19 Oct 2023 13:23:37 -0500 Subject: net: xgene: Fix unused xgene_enet_of_match warning for !CONFIG_OF Commit b0377116decd ("net: ethernet: Use device_get_match_data()") dropped the unconditional use of xgene_enet_of_match resulting in this warning: drivers/net/ethernet/apm/xgene/xgene_enet_main.c:2004:34: warning: unused variable 'xgene_enet_of_match' [-Wunused-const-variable] The fix is to drop of_match_ptr() which is not necessary because DT is always used for this driver (well, it could in theory support ACPI only, but CONFIG_OF is always enabled for arm64). Fixes: b0377116decd ("net: ethernet: Use device_get_match_data()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310170627.2Kvf6ZHY-lkp@intel.com/ Signed-off-by: Rob Herring Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 4d4140b7c450..f3305c434c95 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -2170,7 +2170,7 @@ static void xgene_enet_shutdown(struct platform_device *pdev) static struct platform_driver xgene_enet_driver = { .driver = { .name = "xgene-enet", - .of_match_table = of_match_ptr(xgene_enet_of_match), + .of_match_table = xgene_enet_of_match, .acpi_match_table = ACPI_PTR(xgene_enet_acpi_match), }, .probe = xgene_enet_probe, -- cgit v1.2.3 From 95201f36f395df34321fcddbce12103e8bbe4970 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Fri, 20 Oct 2023 11:25:35 +0800 Subject: net: stmmac: update MAC capabilities when tx queues are updated Upon boot up, the driver will configure the MAC capabilities based on the maximum number of tx and rx queues. When the user changes the tx queues to single queue, the MAC should be capable of supporting Half Duplex, but the driver does not update the MAC capabilities when it is configured so. Using the stmmac_reinit_queues() to check the number of tx queues and set the MAC capabilities accordingly. Fixes: 0366f7e06a6b ("net: stmmac: add ethtool support for get/set channels") Cc: # 5.17+ Signed-off-by: Michael Sit Wei Hong Signed-off-by: Gan, Yi Fang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ed1a5a31a491..5801f4d50f95 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1197,6 +1197,17 @@ static int stmmac_init_phy(struct net_device *dev) return ret; } +static void stmmac_set_half_duplex(struct stmmac_priv *priv) +{ + /* Half-Duplex can only work with single tx queue */ + if (priv->plat->tx_queues_to_use > 1) + priv->phylink_config.mac_capabilities &= + ~(MAC_10HD | MAC_100HD | MAC_1000HD); + else + priv->phylink_config.mac_capabilities |= + (MAC_10HD | MAC_100HD | MAC_1000HD); +} + static int stmmac_phy_setup(struct stmmac_priv *priv) { struct stmmac_mdio_bus_data *mdio_bus_data; @@ -1228,10 +1239,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) MAC_10FD | MAC_100FD | MAC_1000FD; - /* Half-Duplex can only work with single queue */ - if (priv->plat->tx_queues_to_use <= 1) - priv->phylink_config.mac_capabilities |= MAC_10HD | MAC_100HD | - MAC_1000HD; + stmmac_set_half_duplex(priv); /* Get the MAC specific capabilities */ stmmac_mac_phylink_get_caps(priv); @@ -7172,6 +7180,7 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) priv->rss.table[i] = ethtool_rxfh_indir_default(i, rx_cnt); + stmmac_set_half_duplex(priv); stmmac_napi_add(dev); if (netif_running(dev)) -- cgit v1.2.3 From 965f9b8c0c1b37fa2a0e3ef56e40d5666d4cbb5c Mon Sep 17 00:00:00 2001 From: Dell Jin Date: Fri, 20 Oct 2023 09:20:53 +0300 Subject: net: ethernet: adi: adin1110: Fix uninitialized variable The spi_transfer struct has to have all it's fields initialized to 0 in this case, since not all of them are set before starting the transfer. Otherwise, spi_sync_transfer() will sometimes return an error. Fixes: a526a3cc9c8d ("net: ethernet: adi: adin1110: Fix SPI transfers") Signed-off-by: Dell Jin Signed-off-by: Ciprian Regus Signed-off-by: David S. Miller --- drivers/net/ethernet/adi/adin1110.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index ca66b747b7c5..d7c274af6d4d 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -294,7 +294,7 @@ static int adin1110_read_fifo(struct adin1110_port_priv *port_priv) { struct adin1110_priv *priv = port_priv->priv; u32 header_len = ADIN1110_RD_HEADER_LEN; - struct spi_transfer t; + struct spi_transfer t = {0}; u32 frame_size_no_fcs; struct sk_buff *rxb; u32 frame_size; -- cgit v1.2.3 From ca082f019d8fbb983f03080487946da714154bae Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 21 Oct 2023 20:03:53 +0200 Subject: net: ieee802154: adf7242: Fix some potential buffer overflow in adf7242_stats_show() strncat() usage in adf7242_debugfs_init() is wrong. The size given to strncat() is the maximum number of bytes that can be written, excluding the trailing NULL. Here, the size that is passed, DNAME_INLINE_LEN, does not take into account the size of "adf7242-" that is already in the array. In order to fix it, use snprintf() instead. Fixes: 7302b9d90117 ("ieee802154/adf7242: Driver for ADF7242 MAC IEEE802154") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ieee802154/adf7242.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index a03490ba2e5b..cc7ddc40020f 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1162,9 +1162,10 @@ static int adf7242_stats_show(struct seq_file *file, void *offset) static void adf7242_debugfs_init(struct adf7242_local *lp) { - char debugfs_dir_name[DNAME_INLINE_LEN + 1] = "adf7242-"; + char debugfs_dir_name[DNAME_INLINE_LEN + 1]; - strncat(debugfs_dir_name, dev_name(&lp->spi->dev), DNAME_INLINE_LEN); + snprintf(debugfs_dir_name, sizeof(debugfs_dir_name), + "adf7242-%s", dev_name(&lp->spi->dev)); lp->debugfs_root = debugfs_create_dir(debugfs_dir_name, NULL); -- cgit v1.2.3 From 9f771493da935299c6393ad3563b581255d01a37 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Fri, 20 Oct 2023 17:27:59 +0800 Subject: net: chelsio: cxgb4: add an error code check in t4_load_phy_fw t4_set_params_timeout() can return -EINVAL if failed, add check for this. Signed-off-by: Su Hui Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 8d719f82854a..76de55306c4d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3816,6 +3816,8 @@ int t4_load_phy_fw(struct adapter *adap, int win, FW_PARAMS_PARAM_Z_V(FW_PARAMS_PARAM_DEV_PHYFW_DOWNLOAD)); ret = t4_set_params_timeout(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val, 30000); + if (ret) + return ret; /* If we have version number support, then check to see that the new * firmware got loaded properly. -- cgit v1.2.3 From 51a32e828109b4a209efde44505baa356b37a4ce Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sat, 21 Oct 2023 02:03:44 +0900 Subject: net: usb: smsc95xx: Fix uninit-value access in smsc95xx_read_reg syzbot reported the following uninit-value access issue [1]: smsc95xx 1-1:0.0 (unnamed net_device) (uninitialized): Failed to read reg index 0x00000030: -32 smsc95xx 1-1:0.0 (unnamed net_device) (uninitialized): Error reading E2P_CMD ===================================================== BUG: KMSAN: uninit-value in smsc95xx_reset+0x409/0x25f0 drivers/net/usb/smsc95xx.c:896 smsc95xx_reset+0x409/0x25f0 drivers/net/usb/smsc95xx.c:896 smsc95xx_bind+0x9bc/0x22e0 drivers/net/usb/smsc95xx.c:1131 usbnet_probe+0x100b/0x4060 drivers/net/usb/usbnet.c:1750 usb_probe_interface+0xc75/0x1210 drivers/usb/core/driver.c:396 really_probe+0x506/0xf40 drivers/base/dd.c:658 __driver_probe_device+0x2a7/0x5d0 drivers/base/dd.c:800 driver_probe_device+0x72/0x7b0 drivers/base/dd.c:830 __device_attach_driver+0x55a/0x8f0 drivers/base/dd.c:958 bus_for_each_drv+0x3ff/0x620 drivers/base/bus.c:457 __device_attach+0x3bd/0x640 drivers/base/dd.c:1030 device_initial_probe+0x32/0x40 drivers/base/dd.c:1079 bus_probe_device+0x3d8/0x5a0 drivers/base/bus.c:532 device_add+0x16ae/0x1f20 drivers/base/core.c:3622 usb_set_configuration+0x31c9/0x38c0 drivers/usb/core/message.c:2207 usb_generic_driver_probe+0x109/0x2a0 drivers/usb/core/generic.c:238 usb_probe_device+0x290/0x4a0 drivers/usb/core/driver.c:293 really_probe+0x506/0xf40 drivers/base/dd.c:658 __driver_probe_device+0x2a7/0x5d0 drivers/base/dd.c:800 driver_probe_device+0x72/0x7b0 drivers/base/dd.c:830 __device_attach_driver+0x55a/0x8f0 drivers/base/dd.c:958 bus_for_each_drv+0x3ff/0x620 drivers/base/bus.c:457 __device_attach+0x3bd/0x640 drivers/base/dd.c:1030 device_initial_probe+0x32/0x40 drivers/base/dd.c:1079 bus_probe_device+0x3d8/0x5a0 drivers/base/bus.c:532 device_add+0x16ae/0x1f20 drivers/base/core.c:3622 usb_new_device+0x15f6/0x22f0 drivers/usb/core/hub.c:2589 hub_port_connect drivers/usb/core/hub.c:5440 [inline] hub_port_connect_change drivers/usb/core/hub.c:5580 [inline] port_event drivers/usb/core/hub.c:5740 [inline] hub_event+0x53bc/0x7290 drivers/usb/core/hub.c:5822 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x104e/0x1e70 kernel/workqueue.c:2703 worker_thread+0xf45/0x1490 kernel/workqueue.c:2784 kthread+0x3e8/0x540 kernel/kthread.c:388 ret_from_fork+0x66/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 Local variable buf.i225 created at: smsc95xx_read_reg drivers/net/usb/smsc95xx.c:90 [inline] smsc95xx_reset+0x203/0x25f0 drivers/net/usb/smsc95xx.c:892 smsc95xx_bind+0x9bc/0x22e0 drivers/net/usb/smsc95xx.c:1131 CPU: 1 PID: 773 Comm: kworker/1:2 Not tainted 6.6.0-rc1-syzkaller-00125-ge42bebf6db29 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 Workqueue: usb_hub_wq hub_event ===================================================== Similar to e9c65989920f ("net: usb: smsc75xx: Fix uninit-value access in __smsc75xx_read_reg"), this issue is caused because usbnet_read_cmd() reads less bytes than requested (zero byte in the reproducer). In this case, 'buf' is not properly filled. This patch fixes the issue by returning -ENODATA if usbnet_read_cmd() reads less bytes than requested. sysbot reported similar uninit-value access issue [2]. The root cause is the same as mentioned above, and this patch addresses it as well. Fixes: 2f7ca802bdae ("net: Add SMSC LAN9500 USB2.0 10/100 ethernet adapter driver") Reported-and-tested-by: syzbot+c74c24b43c9ae534f0e0@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+2c97a98a5ba9ea9c23bd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c74c24b43c9ae534f0e0 [1] Closes: https://syzkaller.appspot.com/bug?extid=2c97a98a5ba9ea9c23bd [2] Signed-off-by: Shigeru Yoshida Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 17da42fe605c..a530f20ee257 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -95,7 +95,9 @@ static int __must_check smsc95xx_read_reg(struct usbnet *dev, u32 index, ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, &buf, 4); - if (ret < 0) { + if (ret < 4) { + ret = ret < 0 ? ret : -ENODATA; + if (ret != -ENODEV) netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n", index, ret); -- cgit v1.2.3 From a5feba71ec9c14a54c3babdc732c5b6866d8ee43 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:52 -0700 Subject: r8152: Increase USB control msg timeout to 5000ms as per spec According to the comment next to USB_CTRL_GET_TIMEOUT and USB_CTRL_SET_TIMEOUT, although sending/receiving control messages is usually quite fast, the spec allows them to take up to 5 seconds. Let's increase the timeout in the Realtek driver from 500ms to 5000ms (using the #defines) to account for this. This is not just a theoretical change. The need for the longer timeout was seen in testing. Specifically, if you drop a sc7180-trogdor based Chromebook into the kdb debugger and then "go" again after sitting in the debugger for a while, the next USB control message takes a long time. Out of ~40 tests the slowest USB control message was 4.5 seconds. While dropping into kdb is not exactly an end-user scenario, the above is similar to what could happen due to an temporary interrupt storm, what could happen if there was a host controller (HW or SW) issue, or what could happen if the Realtek device got into a confused state and needed time to recover. This change is fairly critical since the r8152 driver in Linux doesn't expect register reads/writes (which are backed by USB control messages) to fail. Fixes: ac718b69301c ("net/usb: new driver for RTL8152") Suggested-by: Hayes Wang Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0c13d9950cd8..482957beae66 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1212,7 +1212,7 @@ int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) ret = usb_control_msg(tp->udev, tp->pipe_ctrl_in, RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, - value, index, tmp, size, 500); + value, index, tmp, size, USB_CTRL_GET_TIMEOUT); if (ret < 0) memset(data, 0xff, size); else @@ -1235,7 +1235,7 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) ret = usb_control_msg(tp->udev, tp->pipe_ctrl_out, RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE, - value, index, tmp, size, 500); + value, index, tmp, size, USB_CTRL_SET_TIMEOUT); kfree(tmp); @@ -9494,7 +9494,8 @@ static u8 __rtl_get_hw_ver(struct usb_device *udev) ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, - PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500); + PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), + USB_CTRL_GET_TIMEOUT); if (ret > 0) ocp_data = (__le32_to_cpu(*tmp) >> 16) & VERSION_MASK; -- cgit v1.2.3 From 5dd17689526971c5ae12bc8398f34bd68cd0499e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:53 -0700 Subject: r8152: Run the unload routine if we have errors during probe The rtl8152_probe() function lacks a call to the chip-specific unload() routine when it sees an error in probe. Add it in to match the cleanup code in rtl8152_disconnect(). Fixes: ac718b69301c ("net/usb: new driver for RTL8152") Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 482957beae66..201c688e3e3f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9783,6 +9783,8 @@ static int rtl8152_probe(struct usb_interface *intf, out1: tasklet_kill(&tp->tx_tl); + if (tp->rtl_ops.unload) + tp->rtl_ops.unload(tp); usb_set_intfdata(intf, NULL); out: free_netdev(netdev); -- cgit v1.2.3 From bb8adff9123e492598162ac1baad01a53891aef6 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:54 -0700 Subject: r8152: Cancel hw_phy_work if we have an error in probe The error handling in rtl8152_probe() is missing a call to cancel the hw_phy_work. Add it in to match what's in the cleanup code in rtl8152_disconnect(). Fixes: a028a9e003f2 ("r8152: move the settings of PHY to a work queue") Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 201c688e3e3f..d10b0886b652 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9783,6 +9783,7 @@ static int rtl8152_probe(struct usb_interface *intf, out1: tasklet_kill(&tp->tx_tl); + cancel_delayed_work_sync(&tp->hw_phy_work); if (tp->rtl_ops.unload) tp->rtl_ops.unload(tp); usb_set_intfdata(intf, NULL); -- cgit v1.2.3 From b8d35024d4059ca550cba11ac9ab23a6c238d929 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:55 -0700 Subject: r8152: Release firmware if we have an error in probe The error handling in rtl8152_probe() is missing a call to release firmware. Add it in to match what's in the cleanup code in rtl8152_disconnect(). Fixes: 9370f2d05a2a ("r8152: support request_firmware for RTL8153") Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d10b0886b652..656fe90734fc 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9786,6 +9786,7 @@ out1: cancel_delayed_work_sync(&tp->hw_phy_work); if (tp->rtl_ops.unload) tp->rtl_ops.unload(tp); + rtl8152_release_firmware(tp); usb_set_intfdata(intf, NULL); out: free_netdev(netdev); -- cgit v1.2.3 From dc90ba37a8c37042407fa6970b9830890cfe6047 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:56 -0700 Subject: r8152: Check for unplug in rtl_phy_patch_request() If the adapter is unplugged while we're looping in rtl_phy_patch_request() we could end up looping for 10 seconds (2 ms * 5000 loops). Add code similar to what's done in other places in the driver to check for unplug and bail. Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 656fe90734fc..9888bc43e903 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4046,6 +4046,9 @@ static int rtl_phy_patch_request(struct r8152 *tp, bool request, bool wait) for (i = 0; wait && i < 5000; i++) { u32 ocp_data; + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; + usleep_range(1000, 2000); ocp_data = ocp_reg_read(tp, OCP_PHY_PATCH_STAT); if ((ocp_data & PATCH_READY) ^ check) -- cgit v1.2.3 From bc65cc42af737a5a35f83842408ef2c6c79ba025 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:57 -0700 Subject: r8152: Check for unplug in r8153b_ups_en() / r8153c_ups_en() If the adapter is unplugged while we're looping in r8153b_ups_en() / r8153c_ups_en() we could end up looping for 10 seconds (20 ms * 500 loops). Add code similar to what's done in other places in the driver to check for unplug and bail. Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 9888bc43e903..982f9ca03e7a 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3663,6 +3663,8 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable) int i; for (i = 0; i < 500; i++) { + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + return; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; @@ -3703,6 +3705,8 @@ static void r8153c_ups_en(struct r8152 *tp, bool enable) int i; for (i = 0; i < 500; i++) { + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + return; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; -- cgit v1.2.3 From 715f67f33af45ce2cc3a5b1ef133cc8c8e7787b0 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:58 -0700 Subject: r8152: Rename RTL8152_UNPLUG to RTL8152_INACCESSIBLE Whenever the RTL8152_UNPLUG is set that just tells the driver that all accesses will fail and we should just immediately bail. A future patch will use this same concept at a time when the driver hasn't actually been unplugged but is about to be reset. Rename the flag in preparation for the future patch. This is a no-op change and just a search and replace. Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 96 ++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 982f9ca03e7a..65232848b31d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -764,7 +764,7 @@ enum rtl_register_content { /* rtl8152 flags */ enum rtl8152_flags { - RTL8152_UNPLUG = 0, + RTL8152_INACCESSIBLE = 0, RTL8152_SET_RX_MODE, WORK_ENABLE, RTL8152_LINK_CHG, @@ -1245,7 +1245,7 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) static void rtl_set_unplug(struct r8152 *tp) { if (tp->udev->state == USB_STATE_NOTATTACHED) { - set_bit(RTL8152_UNPLUG, &tp->flags); + set_bit(RTL8152_INACCESSIBLE, &tp->flags); smp_mb__after_atomic(); } } @@ -1256,7 +1256,7 @@ static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size, u16 limit = 64; int ret = 0; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; /* both size and indix must be 4 bytes align */ @@ -1300,7 +1300,7 @@ static int generic_ocp_write(struct r8152 *tp, u16 index, u16 byteen, u16 byteen_start, byteen_end, byen; u16 limit = 512; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; /* both size and indix must be 4 bytes align */ @@ -1537,7 +1537,7 @@ static int read_mii_word(struct net_device *netdev, int phy_id, int reg) struct r8152 *tp = netdev_priv(netdev); int ret; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; if (phy_id != R8152_PHY_ID) @@ -1553,7 +1553,7 @@ void write_mii_word(struct net_device *netdev, int phy_id, int reg, int val) { struct r8152 *tp = netdev_priv(netdev); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (phy_id != R8152_PHY_ID) @@ -1758,7 +1758,7 @@ static void read_bulk_callback(struct urb *urb) if (!tp) return; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (!test_bit(WORK_ENABLE, &tp->flags)) @@ -1850,7 +1850,7 @@ static void write_bulk_callback(struct urb *urb) if (!test_bit(WORK_ENABLE, &tp->flags)) return; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (!skb_queue_empty(&tp->tx_queue)) @@ -1871,7 +1871,7 @@ static void intr_callback(struct urb *urb) if (!test_bit(WORK_ENABLE, &tp->flags)) return; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; switch (status) { @@ -2615,7 +2615,7 @@ static void bottom_half(struct tasklet_struct *t) { struct r8152 *tp = from_tasklet(tp, t, tx_tl); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (!test_bit(WORK_ENABLE, &tp->flags)) @@ -2658,7 +2658,7 @@ int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags) int ret; /* The rx would be stopped, so skip submitting */ - if (test_bit(RTL8152_UNPLUG, &tp->flags) || + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags) || !test_bit(WORK_ENABLE, &tp->flags) || !netif_carrier_ok(tp->netdev)) return 0; @@ -3058,7 +3058,7 @@ static int rtl_enable(struct r8152 *tp) static int rtl8152_enable(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; set_tx_qlen(tp); @@ -3145,7 +3145,7 @@ static int rtl8153_enable(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; set_tx_qlen(tp); @@ -3177,7 +3177,7 @@ static void rtl_disable(struct r8152 *tp) u32 ocp_data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -3631,7 +3631,7 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired) } msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) break; } @@ -3663,7 +3663,7 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable) int i; for (i = 0; i < 500; i++) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) @@ -3705,7 +3705,7 @@ static void r8153c_ups_en(struct r8152 *tp, bool enable) int i; for (i = 0; i < 500; i++) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) @@ -4050,8 +4050,8 @@ static int rtl_phy_patch_request(struct r8152 *tp, bool request, bool wait) for (i = 0; wait && i < 5000; i++) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) - break; + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return -ENODEV; usleep_range(1000, 2000); ocp_data = ocp_reg_read(tp, OCP_PHY_PATCH_STAT); @@ -6009,7 +6009,7 @@ static int rtl8156_enable(struct r8152 *tp) u32 ocp_data; u16 speed; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; r8156_fc_parameter(tp); @@ -6067,7 +6067,7 @@ static int rtl8156b_enable(struct r8152 *tp) u32 ocp_data; u16 speed; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; set_tx_qlen(tp); @@ -6253,7 +6253,7 @@ out: static void rtl8152_up(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8152_aldps_en(tp, false); @@ -6263,7 +6263,7 @@ static void rtl8152_up(struct r8152 *tp) static void rtl8152_down(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -6278,7 +6278,7 @@ static void rtl8153_up(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153_u1u2en(tp, false); @@ -6318,7 +6318,7 @@ static void rtl8153_down(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -6339,7 +6339,7 @@ static void rtl8153b_up(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -6363,7 +6363,7 @@ static void rtl8153b_down(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -6400,7 +6400,7 @@ static void rtl8153c_up(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -6481,7 +6481,7 @@ static void rtl8156_up(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -6554,7 +6554,7 @@ static void rtl8156_down(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -6692,7 +6692,7 @@ static void rtl_work_func_t(struct work_struct *work) /* If the device is unplugged or !netif_running(), the workqueue * doesn't need to wake the device, and could return directly. */ - if (test_bit(RTL8152_UNPLUG, &tp->flags) || !netif_running(tp->netdev)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags) || !netif_running(tp->netdev)) return; if (usb_autopm_get_interface(tp->intf) < 0) @@ -6731,7 +6731,7 @@ static void rtl_hw_phy_work_func_t(struct work_struct *work) { struct r8152 *tp = container_of(work, struct r8152, hw_phy_work.work); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (usb_autopm_get_interface(tp->intf) < 0) @@ -6858,7 +6858,7 @@ static int rtl8152_close(struct net_device *netdev) netif_stop_queue(netdev); res = usb_autopm_get_interface(tp->intf); - if (res < 0 || test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (res < 0 || test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); rtl_stop_rx(tp); } else { @@ -6891,7 +6891,7 @@ static void r8152b_init(struct r8152 *tp) u32 ocp_data; u16 data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; data = r8152_mdio_read(tp, MII_BMCR); @@ -6935,7 +6935,7 @@ static void r8153_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153_u1u2en(tp, false); @@ -6946,7 +6946,7 @@ static void r8153_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) break; } @@ -7075,7 +7075,7 @@ static void r8153b_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -7086,7 +7086,7 @@ static void r8153b_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) break; } @@ -7157,7 +7157,7 @@ static void r8153c_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -7177,7 +7177,7 @@ static void r8153c_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; } @@ -8006,7 +8006,7 @@ static void r8156_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_ECM_OP); @@ -8027,7 +8027,7 @@ static void r8156_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; } @@ -8102,7 +8102,7 @@ static void r8156b_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_ECM_OP); @@ -8136,7 +8136,7 @@ static void r8156b_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; } @@ -9165,7 +9165,7 @@ static int rtl8152_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) struct mii_ioctl_data *data = if_mii(rq); int res; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; res = usb_autopm_get_interface(tp->intf); @@ -9267,7 +9267,7 @@ static const struct net_device_ops rtl8152_netdev_ops = { static void rtl8152_unload(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (tp->version != RTL_VER_01) @@ -9276,7 +9276,7 @@ static void rtl8152_unload(struct r8152 *tp) static void rtl8153_unload(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153_power_cut_en(tp, false); @@ -9284,7 +9284,7 @@ static void rtl8153_unload(struct r8152 *tp) static void rtl8153b_unload(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_power_cut_en(tp, false); -- cgit v1.2.3 From d9962b0d42029bcb40fe3c38bce06d1870fa4df4 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:59 -0700 Subject: r8152: Block future register access if register access fails Even though the functions to read/write registers can fail, most of the places in the r8152 driver that read/write register values don't check error codes. The lack of error code checking is problematic in at least two ways. The first problem is that the r8152 driver often uses code patterns similar to this: x = read_register() x = x | SOME_BIT; write_register(x); ...with the above pattern, if the read_register() fails and returns garbage then we'll end up trying to write modified garbage back to the Realtek adapter. If the write_register() succeeds that's bad. Note that as of commit f53a7ad18959 ("r8152: Set memory to all 0xFFs on failed reg reads") the "garbage" returned by read_register() will at least be consistent garbage, but it is still garbage. It turns out that this problem is very serious. Writing garbage to some of the hardware registers on the Ethernet adapter can put the adapter in such a bad state that it needs to be power cycled (fully unplugged and plugged in again) before it can enumerate again. The second problem is that the r8152 driver generally has functions that are long sequences of register writes. Assuming everything will be OK if a random register write fails in the middle isn't a great assumption. One might wonder if the above two problems are real. You could ask if we would really have a successful write after a failed read. It turns out that the answer appears to be "yes, this can happen". In fact, we've seen at least two distinct failure modes where this happens. On a sc7180-trogdor Chromebook if you drop into kdb for a while and then resume, you can see: 1. We get a "Tx timeout" 2. The "Tx timeout" queues up a USB reset. 3. In rtl8152_pre_reset() we try to reinit the hardware. 4. The first several (2-9) register accesses fail with a timeout, then things recover. The above test case was actually fixed by the patch ("r8152: Increase USB control msg timeout to 5000ms as per spec") but at least shows that we really can see successful calls after failed ones. On a different (AMD) based Chromebook with a particular adapter, we found that during reboot tests we'd also sometimes get a transitory failure. In this case we saw -EPIPE being returned sometimes. Retrying worked, but retrying is not always safe for all register accesses since reading/writing some registers might have side effects (like registers that clear on read). Let's fully lock out all register access if a register access fails. When we do this, we'll try to queue up a USB reset and try to unlock register access after the reset. This is slightly tricker than it sounds since the r8152 driver has an optimized reset sequence that only works reliably after probe happens. In order to handle this, we avoid the optimized reset if probe didn't finish. Instead, we simply retry the probe routine in this case. When locking out access, we'll use the existing infrastructure that the driver was using when it detected we were unplugged. This keeps us from getting stuck in delay loops in some parts of the driver. Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 207 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 176 insertions(+), 31 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 65232848b31d..afb20c0ed688 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -773,6 +773,9 @@ enum rtl8152_flags { SCHEDULE_TASKLET, GREEN_ETHERNET, RX_EPROTO, + IN_PRE_RESET, + PROBED_WITH_NO_ERRORS, + PROBE_SHOULD_RETRY, }; #define DEVICE_ID_LENOVO_USB_C_TRAVEL_HUB 0x721e @@ -953,6 +956,8 @@ struct r8152 { u8 version; u8 duplex; u8 autoneg; + + unsigned int reg_access_reset_count; }; /** @@ -1200,6 +1205,96 @@ static unsigned int agg_buf_sz = 16384; #define RTL_LIMITED_TSO_SIZE (size_to_mtu(agg_buf_sz) - sizeof(struct tx_desc)) +/* If register access fails then we block access and issue a reset. If this + * happens too many times in a row without a successful access then we stop + * trying to reset and just leave access blocked. + */ +#define REGISTER_ACCESS_MAX_RESETS 3 + +static void rtl_set_inaccessible(struct r8152 *tp) +{ + set_bit(RTL8152_INACCESSIBLE, &tp->flags); + smp_mb__after_atomic(); +} + +static void rtl_set_accessible(struct r8152 *tp) +{ + clear_bit(RTL8152_INACCESSIBLE, &tp->flags); + smp_mb__after_atomic(); +} + +static +int r8152_control_msg(struct r8152 *tp, unsigned int pipe, __u8 request, + __u8 requesttype, __u16 value, __u16 index, void *data, + __u16 size, const char *msg_tag) +{ + struct usb_device *udev = tp->udev; + int ret; + + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return -ENODEV; + + ret = usb_control_msg(udev, pipe, request, requesttype, + value, index, data, size, + USB_CTRL_GET_TIMEOUT); + + /* No need to issue a reset to report an error if the USB device got + * unplugged; just return immediately. + */ + if (ret == -ENODEV) + return ret; + + /* If the write was successful then we're done */ + if (ret >= 0) { + tp->reg_access_reset_count = 0; + return ret; + } + + dev_err(&udev->dev, + "Failed to %s %d bytes at %#06x/%#06x (%d)\n", + msg_tag, size, value, index, ret); + + /* Block all future register access until we reset. Much of the code + * in the driver doesn't check for errors. Notably, many parts of the + * driver do a read/modify/write of a register value without + * confirming that the read succeeded. Writing back modified garbage + * like this can fully wedge the adapter, requiring a power cycle. + */ + rtl_set_inaccessible(tp); + + /* If probe hasn't yet finished, then we'll request a retry of the + * whole probe routine if we get any control transfer errors. We + * never have to clear this bit since we free/reallocate the whole "tp" + * structure if we retry probe. + */ + if (!test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) { + set_bit(PROBE_SHOULD_RETRY, &tp->flags); + return ret; + } + + /* Failing to access registers in pre-reset is not surprising since we + * wouldn't be resetting if things were behaving normally. The register + * access we do in pre-reset isn't truly mandatory--we're just reusing + * the disable() function and trying to be nice by powering the + * adapter down before resetting it. Thus, if we're in pre-reset, + * we'll return right away and not try to queue up yet another reset. + * We know the post-reset is already coming. + */ + if (test_bit(IN_PRE_RESET, &tp->flags)) + return ret; + + if (tp->reg_access_reset_count < REGISTER_ACCESS_MAX_RESETS) { + usb_queue_reset_device(tp->intf); + tp->reg_access_reset_count++; + } else if (tp->reg_access_reset_count == REGISTER_ACCESS_MAX_RESETS) { + dev_err(&udev->dev, + "Tried to reset %d times; giving up.\n", + REGISTER_ACCESS_MAX_RESETS); + } + + return ret; +} + static int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) { @@ -1210,9 +1305,10 @@ int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) if (!tmp) return -ENOMEM; - ret = usb_control_msg(tp->udev, tp->pipe_ctrl_in, - RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, - value, index, tmp, size, USB_CTRL_GET_TIMEOUT); + ret = r8152_control_msg(tp, tp->pipe_ctrl_in, + RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, + value, index, tmp, size, "read"); + if (ret < 0) memset(data, 0xff, size); else @@ -1233,9 +1329,9 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) if (!tmp) return -ENOMEM; - ret = usb_control_msg(tp->udev, tp->pipe_ctrl_out, - RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE, - value, index, tmp, size, USB_CTRL_SET_TIMEOUT); + ret = r8152_control_msg(tp, tp->pipe_ctrl_out, + RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE, + value, index, tmp, size, "write"); kfree(tmp); @@ -1244,10 +1340,8 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) static void rtl_set_unplug(struct r8152 *tp) { - if (tp->udev->state == USB_STATE_NOTATTACHED) { - set_bit(RTL8152_INACCESSIBLE, &tp->flags); - smp_mb__after_atomic(); - } + if (tp->udev->state == USB_STATE_NOTATTACHED) + rtl_set_inaccessible(tp); } static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size, @@ -8262,7 +8356,7 @@ static int rtl8152_pre_reset(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); struct net_device *netdev; - if (!tp) + if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) return 0; netdev = tp->netdev; @@ -8277,7 +8371,9 @@ static int rtl8152_pre_reset(struct usb_interface *intf) napi_disable(&tp->napi); if (netif_carrier_ok(netdev)) { mutex_lock(&tp->control); + set_bit(IN_PRE_RESET, &tp->flags); tp->rtl_ops.disable(tp); + clear_bit(IN_PRE_RESET, &tp->flags); mutex_unlock(&tp->control); } @@ -8290,9 +8386,11 @@ static int rtl8152_post_reset(struct usb_interface *intf) struct net_device *netdev; struct sockaddr sa; - if (!tp) + if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) return 0; + rtl_set_accessible(tp); + /* reset the MAC address in case of policy change */ if (determine_ethernet_addr(tp, &sa) >= 0) { rtnl_lock(); @@ -9494,17 +9592,29 @@ static u8 __rtl_get_hw_ver(struct usb_device *udev) __le32 *tmp; u8 version; int ret; + int i; tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); if (!tmp) return 0; - ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), - RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, - PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), - USB_CTRL_GET_TIMEOUT); - if (ret > 0) - ocp_data = (__le32_to_cpu(*tmp) >> 16) & VERSION_MASK; + /* Retry up to 3 times in case there is a transitory error. We do this + * since retrying a read of the version is always safe and this + * function doesn't take advantage of r8152_control_msg(). + */ + for (i = 0; i < 3; i++) { + ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), + RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, + PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), + USB_CTRL_GET_TIMEOUT); + if (ret > 0) { + ocp_data = (__le32_to_cpu(*tmp) >> 16) & VERSION_MASK; + break; + } + } + + if (i != 0 && ret > 0) + dev_warn(&udev->dev, "Needed %d retries to read version\n", i); kfree(tmp); @@ -9603,25 +9713,14 @@ static bool rtl8152_supports_lenovo_macpassthru(struct usb_device *udev) return 0; } -static int rtl8152_probe(struct usb_interface *intf, - const struct usb_device_id *id) +static int rtl8152_probe_once(struct usb_interface *intf, + const struct usb_device_id *id, u8 version) { struct usb_device *udev = interface_to_usbdev(intf); struct r8152 *tp; struct net_device *netdev; - u8 version; int ret; - if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) - return -ENODEV; - - if (!rtl_check_vendor_ok(intf)) - return -ENODEV; - - version = rtl8152_get_version(intf); - if (version == RTL_VER_UNKNOWN) - return -ENODEV; - usb_reset_device(udev); netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { @@ -9784,10 +9883,20 @@ static int rtl8152_probe(struct usb_interface *intf, else device_set_wakeup_enable(&udev->dev, false); + /* If we saw a control transfer error while probing then we may + * want to try probe() again. Consider this an error. + */ + if (test_bit(PROBE_SHOULD_RETRY, &tp->flags)) + goto out2; + + set_bit(PROBED_WITH_NO_ERRORS, &tp->flags); netif_info(tp, probe, netdev, "%s\n", DRIVER_VERSION); return 0; +out2: + unregister_netdev(netdev); + out1: tasklet_kill(&tp->tx_tl); cancel_delayed_work_sync(&tp->hw_phy_work); @@ -9796,10 +9905,46 @@ out1: rtl8152_release_firmware(tp); usb_set_intfdata(intf, NULL); out: + if (test_bit(PROBE_SHOULD_RETRY, &tp->flags)) + ret = -EAGAIN; + free_netdev(netdev); return ret; } +#define RTL8152_PROBE_TRIES 3 + +static int rtl8152_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + u8 version; + int ret; + int i; + + if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) + return -ENODEV; + + if (!rtl_check_vendor_ok(intf)) + return -ENODEV; + + version = rtl8152_get_version(intf); + if (version == RTL_VER_UNKNOWN) + return -ENODEV; + + for (i = 0; i < RTL8152_PROBE_TRIES; i++) { + ret = rtl8152_probe_once(intf, id, version); + if (ret != -EAGAIN) + break; + } + if (ret == -EAGAIN) { + dev_err(&intf->dev, + "r8152 failed probe after %d tries; giving up\n", i); + return -ENODEV; + } + + return ret; +} + static void rtl8152_disconnect(struct usb_interface *intf) { struct r8152 *tp = usb_get_intfdata(intf); -- cgit v1.2.3 From d2a0fc372aca561556e765d0a9ec365c7c12f0ad Mon Sep 17 00:00:00 2001 From: Fred Chen Date: Sat, 21 Oct 2023 08:19:47 +0800 Subject: tcp: fix wrong RTO timeout when received SACK reneging This commit fix wrong RTO timeout when received SACK reneging. When an ACK arrived pointing to a SACK reneging, tcp_check_sack_reneging() will rearm the RTO timer for min(1/2*srtt, 10ms) into to the future. But since the commit 62d9f1a6945b ("tcp: fix TLP timer not set when CA_STATE changes from DISORDER to OPEN") merged, the tcp_set_xmit_timer() is moved after tcp_fastretrans_alert()(which do the SACK reneging check), so the RTO timeout will be overwrited by tcp_set_xmit_timer() with icsk_rto instead of 1/2*srtt. Here is a packetdrill script to check this bug: 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 // simulate srtt to 100ms +0 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 +.1 < . 1:1(0) ack 1 win 1024 +0 accept(3, ..., ...) = 4 +0 write(4, ..., 10000) = 10000 +0 > P. 1:10001(10000) ack 1 // inject sack +.1 < . 1:1(0) ack 1 win 257 +0 > . 1:1001(1000) ack 1 // inject sack reneging +.1 < . 1:1(0) ack 1001 win 257 // we expect rto fired in 1/2*srtt (50ms) +.05 > . 1001:2001(1000) ack 1 This fix remove the FLAG_SET_XMIT_TIMER from ack_flag when tcp_check_sack_reneging() set RTO timer with 1/2*srtt to avoid being overwrited later. Fixes: 62d9f1a6945b ("tcp: fix TLP timer not set when CA_STATE changes from DISORDER to OPEN") Signed-off-by: Fred Chen Reviewed-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8afb0950a697..804821d6bd4d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2207,16 +2207,17 @@ void tcp_enter_loss(struct sock *sk) * restore sanity to the SACK scoreboard. If the apparent reneging * persists until this RTO then we'll clear the SACK scoreboard. */ -static bool tcp_check_sack_reneging(struct sock *sk, int flag) +static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag) { - if (flag & FLAG_SACK_RENEGING && - flag & FLAG_SND_UNA_ADVANCED) { + if (*ack_flag & FLAG_SACK_RENEGING && + *ack_flag & FLAG_SND_UNA_ADVANCED) { struct tcp_sock *tp = tcp_sk(sk); unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), msecs_to_jiffies(10)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); + *ack_flag &= ~FLAG_SET_XMIT_TIMER; return true; } return false; @@ -2986,7 +2987,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ - if (tcp_check_sack_reneging(sk, flag)) + if (tcp_check_sack_reneging(sk, ack_flag)) return; /* C. Check consistency of the current state. */ -- cgit v1.2.3 From 13454e6e0df2ff37853596d546438ac84ca6a413 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Mon, 23 Oct 2023 14:37:58 +0800 Subject: isdn: mISDN: hfcsusb: Spelling fix in comment protocoll -> protocol Signed-off-by: Kunwu Chan Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcsusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 1efd17979f24..b82b89888a5e 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -678,7 +678,7 @@ ph_state(struct dchannel *dch) } /* - * disable/enable BChannel for desired protocoll + * disable/enable BChannel for desired protocol */ static int hfcsusb_setup_bch(struct bchannel *bch, int protocol) -- cgit v1.2.3 From 3e3929ef889e650dd585dc0f4f7f18240688811a Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Sat, 21 Oct 2023 08:48:27 -0700 Subject: wifi: cfg80211: pass correct pointer to rdev_inform_bss() Confusing struct member names here resulted in passing the wrong pointer, causing crashes. Pass the correct one. Fixes: eb142608e2c4 ("wifi: cfg80211: use a struct for inform_single_bss data") Signed-off-by: Ben Greear Link: https://lore.kernel.org/r/20231021154827.1142734-1-greearb@candelatech.com [rewrite commit message, add fixes] Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 939deecf0bbe..8210a6090ac1 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2125,7 +2125,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, if (!res) goto drop; - rdev_inform_bss(rdev, &res->pub, ies, data->drv_data); + rdev_inform_bss(rdev, &res->pub, ies, drv_data->drv_data); if (data->bss_source == BSS_SOURCE_MBSSID) { /* this is a nontransmitting bss, we need to add it to -- cgit v1.2.3 From c434b2be2d80d236bb090fdb493d4bd5ed589238 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Oct 2023 11:42:51 +0200 Subject: wifi: cfg80211: fix assoc response warning on failed links The warning here shouldn't be done before we even set the bss field (or should've used the input data). Move the assignment before the warning to fix it. We noticed this now because of Wen's bugfix, where the bug fixed there had previously hidden this other bug. Fixes: 53ad07e9823b ("wifi: cfg80211: support reporting failed links") Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 3e2c398abddc..55a1d3633853 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -43,10 +43,11 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) { cr.links[link_id].status = data->links[link_id].status; + cr.links[link_id].bss = data->links[link_id].bss; + WARN_ON_ONCE(cr.links[link_id].status != WLAN_STATUS_SUCCESS && (!cr.ap_mld_addr || !cr.links[link_id].bss)); - cr.links[link_id].bss = data->links[link_id].bss; if (!cr.links[link_id].bss) continue; cr.links[link_id].bssid = data->links[link_id].bss->bssid; -- cgit v1.2.3 From 91535613b6090fc968c601d11d4e2f16b333713c Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 16 Oct 2023 14:52:48 +0300 Subject: wifi: mac80211: don't drop all unprotected public action frames Not all public action frames have a protected variant. When MFP is enabled drop only public action frames that have a dual protected variant. Fixes: 76a3059cf124 ("wifi: mac80211: drop some unprotected action frames") Signed-off-by: Avraham Stern Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20231016145213.2973e3c8d3bb.I6198b8d3b04cf4a97b06660d346caec3032f232a@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 29 +++++++++++++++++++++++++++++ net/mac80211/rx.c | 3 +-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index bd2f6e19c357..b24fb80782c5 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4355,6 +4355,35 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC; } +/** + * ieee80211_is_protected_dual_of_public_action - check if skb contains a + * protected dual of public action management frame + * @skb: the skb containing the frame, length will be checked + * + * Return: true if the skb contains a protected dual of public action + * management frame, false otherwise. + */ +static inline bool +ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) +{ + u8 action; + + if (!ieee80211_is_public_action((void *)skb->data, skb->len) || + skb->len < IEEE80211_MIN_ACTION_SIZE + 1) + return false; + + action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE); + + return action != WLAN_PUB_ACTION_20_40_BSS_COEX && + action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN && + action != WLAN_PUB_ACTION_MSMT_PILOT && + action != WLAN_PUB_ACTION_TDLS_DISCOVER_RES && + action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && + action != WLAN_PUB_ACTION_FTM_REQUEST && + action != WLAN_PUB_ACTION_FTM_RESPONSE && + action != WLAN_PUB_ACTION_FILS_DISCOVERY; +} + /** * _ieee80211_is_group_privacy_action - check if frame is a group addressed * privacy action frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e751cda5eef6..8f6b6f56b65b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2468,8 +2468,7 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) /* drop unicast public action frames when using MPF */ if (is_unicast_ether_addr(mgmt->da) && - ieee80211_is_public_action((void *)rx->skb->data, - rx->skb->len)) + ieee80211_is_protected_dual_of_public_action(rx->skb)) return -EACCES; } -- cgit v1.2.3 From 7798b59409c345d4a6034a4326bceb9f7e2e8b58 Mon Sep 17 00:00:00 2001 From: Moritz Wanzenböck Date: Thu, 19 Oct 2023 14:58:47 +0200 Subject: net/handshake: fix file ref count in handshake_nl_accept_doit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If req->hr_proto->hp_accept() fail, we call fput() twice: Once in the error path, but also a second time because sock->file is at that point already associated with the file descriptor. Once the task exits, as it would probably do after receiving an error reading from netlink, the fd is closed, calling fput() a second time. To fix, we move installing the file after the error path for the hp_accept() call. In the case of errors we simply put the unused fd. In case of success we can use fd_install() to link the sock->file to the reserved fd. Fixes: 7ea9c1ec66bc ("net/handshake: Fix handshake_dup() ref counting") Signed-off-by: Moritz Wanzenböck Reviewed-by: Chuck Lever Link: https://lore.kernel.org/r/20231019125847.276443-1-moritz.wanzenboeck@linbit.com Signed-off-by: Jakub Kicinski --- net/handshake/netlink.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index d0bc1dd8e65a..80c7302692c7 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -87,29 +87,6 @@ struct nlmsghdr *handshake_genl_put(struct sk_buff *msg, } EXPORT_SYMBOL(handshake_genl_put); -/* - * dup() a kernel socket for use as a user space file descriptor - * in the current process. The kernel socket must have an - * instatiated struct file. - * - * Implicit argument: "current()" - */ -static int handshake_dup(struct socket *sock) -{ - struct file *file; - int newfd; - - file = get_file(sock->file); - newfd = get_unused_fd_flags(O_CLOEXEC); - if (newfd < 0) { - fput(file); - return newfd; - } - - fd_install(newfd, file); - return newfd; -} - int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); @@ -133,17 +110,20 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) goto out_status; sock = req->hr_sk->sk_socket; - fd = handshake_dup(sock); + fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { err = fd; goto out_complete; } + err = req->hr_proto->hp_accept(req, info, fd); if (err) { - fput(sock->file); + put_unused_fd(fd); goto out_complete; } + fd_install(fd, get_file(sock->file)); + trace_handshake_cmd_accept(net, req, req->hr_sk, fd); return 0; -- cgit v1.2.3 From d788c9338342a3146d115281922901c1e3e1cbff Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 20 Oct 2023 15:01:49 +0100 Subject: sfc: cleanup and reduce netlink error messages Reduce the length of netlink error messages as they are likely to be truncated anyway. Additionally, reword netlink error messages so they are more consistent with previous messages. Fixes: 9dbc8d2b9a02 ("sfc: add decrement ipv6 hop limit by offloading set hop limit actions") Fixes: 3c9561c0a5b9 ("sfc: support TC decap rules matching on enc_ip_tos") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310202136.4u7bv0hp-lkp@intel.com/ Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Edward Cree Link: https://lore.kernel.org/r/20231020140149.30490-1-pieter.jansen-van-vuuren@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 834f000ba1c4..30ebef88248d 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -629,14 +629,14 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, } if (child_ip_tos_mask != old->child_ip_tos_mask) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x", + "Pseudo encap match for TOS mask %#04x conflicts with existing mask %#04x", child_ip_tos_mask, old->child_ip_tos_mask); return -EEXIST; } if (child_udp_sport_mask != old->child_udp_sport_mask) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x", + "Pseudo encap match for UDP src port mask %#x conflicts with existing mask %#x", child_udp_sport_mask, old->child_udp_sport_mask); return -EEXIST; @@ -1081,7 +1081,7 @@ static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, /* check that we do not decrement ttl twice */ if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DEC_TTL)) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); + NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported"); return -EOPNOTSUPP; } act->do_ttl_dec = 1; @@ -1106,7 +1106,7 @@ static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, /* check that we do not decrement hoplimit twice */ if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DEC_TTL)) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); + NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported"); return -EOPNOTSUPP; } act->do_ttl_dec = 1; @@ -1120,7 +1120,7 @@ static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, } NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: ttl add action type %x %x %x/%x", + "ttl add action type %x %x %x/%x is not supported", fa->mangle.htype, fa->mangle.offset, fa->mangle.val, fa->mangle.mask); return -EOPNOTSUPP; @@ -1164,7 +1164,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, case 0: if (fa->mangle.mask) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) of eth.dst32 mangle", + "mask (%#x) of eth.dst32 mangle is not supported", fa->mangle.mask); return -EOPNOTSUPP; } @@ -1184,7 +1184,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, mung->dst_mac_16 = 1; } else { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) of eth+4 mangle is not high or low 16b", + "mask (%#x) of eth+4 mangle is not high or low 16b", fa->mangle.mask); return -EOPNOTSUPP; } @@ -1192,7 +1192,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, case 8: if (fa->mangle.mask) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) of eth.src32 mangle", + "mask (%#x) of eth.src32 mangle is not supported", fa->mangle.mask); return -EOPNOTSUPP; } @@ -1201,7 +1201,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, mung->src_mac_32 = 1; return efx_tc_complete_mac_mangle(efx, act, mung, extack); default: - NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported: mangle eth+%u %x/%x", + NL_SET_ERR_MSG_FMT_MOD(extack, "mangle eth+%u %x/%x is not supported", fa->mangle.offset, fa->mangle.val, fa->mangle.mask); return -EOPNOTSUPP; } @@ -1217,7 +1217,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, /* check that pedit applies to ttl only */ if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) out of range, only support mangle action on ipv4.ttl", + "mask (%#x) out of range, only support mangle action on ipv4.ttl", fa->mangle.mask); return -EOPNOTSUPP; } @@ -1227,7 +1227,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, */ if (match->mask.ip_ttl != U8_MAX) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: only support mangle ipv4.ttl when we have an exact match on ttl, mask used for match (%#x)", + "only support mangle ttl when we have an exact match, current mask (%#x)", match->mask.ip_ttl); return -EOPNOTSUPP; } @@ -1237,7 +1237,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, */ if (match->value.ip_ttl == 0) { NL_SET_ERR_MSG_MOD(extack, - "Unsupported: we cannot decrement ttl past 0"); + "decrement ttl past 0 is not supported"); return -EOPNOTSUPP; } @@ -1245,7 +1245,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DEC_TTL)) { NL_SET_ERR_MSG_MOD(extack, - "Unsupported: multiple dec ttl"); + "multiple dec ttl is not supported"); return -EOPNOTSUPP; } @@ -1259,7 +1259,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, fallthrough; default: NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: only support mangle on the ttl field (offset is %u)", + "only support mangle on the ttl field (offset is %u)", fa->mangle.offset); return -EOPNOTSUPP; } @@ -1275,7 +1275,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, /* check that pedit applies to ttl only */ if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) out of range, only support mangle action on ipv6.hop_limit", + "mask (%#x) out of range, only support mangle action on ipv6.hop_limit", fa->mangle.mask); return -EOPNOTSUPP; @@ -1286,7 +1286,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, */ if (match->mask.ip_ttl != U8_MAX) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: only support mangle ipv6.hop_limit when we have an exact match on ttl, mask used for match (%#x)", + "only support hop_limit when we have an exact match, current mask (%#x)", match->mask.ip_ttl); return -EOPNOTSUPP; } @@ -1296,7 +1296,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, */ if (match->value.ip_ttl == 0) { NL_SET_ERR_MSG_MOD(extack, - "Unsupported: we cannot decrement hop_limit past 0"); + "decrementing hop_limit past 0 is not supported"); return -EOPNOTSUPP; } @@ -1304,7 +1304,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DEC_TTL)) { NL_SET_ERR_MSG_MOD(extack, - "Unsupported: multiple dec ttl"); + "multiple dec ttl is not supported"); return -EOPNOTSUPP; } @@ -1318,7 +1318,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, fallthrough; default: NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: only support mangle on the hop_limit field"); + "only support mangle on the hop_limit field"); return -EOPNOTSUPP; } default: -- cgit v1.2.3 From 9644bc49705723bf7c69aa9bf542bb5161b91dba Mon Sep 17 00:00:00 2001 From: Anjali Kulkarni Date: Fri, 20 Oct 2023 16:40:58 -0700 Subject: Fix NULL pointer dereference in cn_filter() Check that sk_user_data is not NULL, else return from cn_filter(). Could not reproduce this issue, but Oliver Sang verified it has fixed the "Closes" problem below. Fixes: 2aa1f7a1f47c ("connector/cn_proc: Add filtering to fix some bugs") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202309201456.84c19e27-oliver.sang@intel.com/ Signed-off-by: Anjali Kulkarni Link: https://lore.kernel.org/r/20231020234058.2232347-1-anjali.k.kulkarni@oracle.com Signed-off-by: Paolo Abeni --- drivers/connector/cn_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 05d562e9c8b1..44b19e696176 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -54,7 +54,7 @@ static int cn_filter(struct sock *dsk, struct sk_buff *skb, void *data) enum proc_cn_mcast_op mc_op; uintptr_t val; - if (!dsk || !data) + if (!dsk || !dsk->sk_user_data || !data) return 0; ptr = (__u32 *)data; -- cgit v1.2.3 From adc8df12d91a2b8350b0cd4c7fec3e8546c9d1f8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 22 Oct 2023 22:25:17 +0200 Subject: gtp: uapi: fix GTPA_MAX Subtract one to __GTPA_MAX, otherwise GTPA_MAX is off by 2. Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Paolo Abeni --- include/uapi/linux/gtp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 2f61298a7b77..3dcdb9e33cba 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -33,6 +33,6 @@ enum gtp_attrs { GTPA_PAD, __GTPA_MAX, }; -#define GTPA_MAX (__GTPA_MAX + 1) +#define GTPA_MAX (__GTPA_MAX - 1) #endif /* _UAPI_LINUX_GTP_H_ */ -- cgit v1.2.3 From 4530e5b8e2dad63dcad2206232dd86e4b1489b6c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 22 Oct 2023 22:25:18 +0200 Subject: gtp: fix fragmentation needed check with gso Call skb_gso_validate_network_len() to check if packet is over PMTU. Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Paolo Abeni --- drivers/net/gtp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 144ec626230d..b22596b18ee8 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -872,8 +872,9 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, skb_dst_update_pmtu_no_confirm(skb, mtu); - if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && - mtu < ntohs(iph->tot_len)) { + if (iph->frag_off & htons(IP_DF) && + ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)))) { netdev_dbg(dev, "packet too big, fragmentation needed\n"); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); -- cgit v1.2.3 From 77a8c982ff0d4c3a14022c6fe9e3dbfb327552ec Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 23 Oct 2023 14:27:14 -0700 Subject: i40e: Fix wrong check for I40E_TXR_FLAGS_WB_ON_ITR The I40E_TXR_FLAGS_WB_ON_ITR is i40e_ring flag and not i40e_pf one. Fixes: 8e0764b4d6be42 ("i40e/i40evf: Add support for writeback on ITR feature for X722") Signed-off-by: Ivan Vecera Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231023212714.178032-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 50c70a8e470a..b047c587629b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2854,7 +2854,7 @@ tx_only: return budget; } - if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) + if (q_vector->tx.ring[0].flags & I40E_TXR_FLAGS_WB_ON_ITR) q_vector->arm_wb_state = false; /* Exit the polling mode, but don't re-enable interrupts if stack might -- cgit v1.2.3 From 735795f68b37e9bb49f642407a0d49b1631ea1c7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Oct 2023 21:09:47 +0200 Subject: netfilter: flowtable: GC pushes back packets to classic path Since 41f2c7c342d3 ("net/sched: act_ct: Fix promotion of offloaded unreplied tuple"), flowtable GC pushes back flows with IPS_SEEN_REPLY back to classic path in every run, ie. every second. This is because of a new check for NF_FLOW_HW_ESTABLISHED which is specific of sched/act_ct. In Netfilter's flowtable case, NF_FLOW_HW_ESTABLISHED never gets set on and IPS_SEEN_REPLY is unreliable since users decide when to offload the flow before, such bit might be set on at a later stage. Fix it by adding a custom .gc handler that sched/act_ct can use to deal with its NF_FLOW_HW_ESTABLISHED bit. Fixes: 41f2c7c342d3 ("net/sched: act_ct: Fix promotion of offloaded unreplied tuple") Reported-by: Vladimir Smelhaus Reviewed-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 + net/netfilter/nf_flow_table_core.c | 14 +++++++------- net/sched/act_ct.c | 7 +++++++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d466e1a3b0b1..fe1507c1db82 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -53,6 +53,7 @@ struct nf_flowtable_type { struct list_head list; int family; int (*init)(struct nf_flowtable *ft); + bool (*gc)(const struct flow_offload *flow); int (*setup)(struct nf_flowtable *ft, struct net_device *dev, enum flow_block_command cmd); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 1d34d700bd09..920a5a29ae1d 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -316,12 +316,6 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, } EXPORT_SYMBOL_GPL(flow_offload_refresh); -static bool nf_flow_is_outdated(const struct flow_offload *flow) -{ - return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && - !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); -} - static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return nf_flow_timeout_delta(flow->timeout) <= 0; @@ -407,12 +401,18 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } +static bool nf_flow_custom_gc(struct nf_flowtable *flow_table, + const struct flow_offload *flow) +{ + return flow_table->type->gc && flow_table->type->gc(flow); +} + static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || - nf_flow_is_outdated(flow)) + nf_flow_custom_gc(flow_table, flow)) flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 7c652d14528b..0d44da4e8c8e 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -278,7 +278,14 @@ err_nat: return err; } +static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) +{ + return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && + !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); +} + static struct nf_flowtable_type flowtable_ct = { + .gc = tcf_ct_flow_is_outdated, .action = tcf_ct_flow_table_fill_actions, .owner = THIS_MODULE, }; -- cgit v1.2.3 From a63b6622120cd03a304796dbccb80655b3a21798 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 24 Oct 2023 21:58:57 +0200 Subject: net/sched: act_ct: additional checks for outdated flows Current nf_flow_is_outdated() implementation considers any flow table flow which state diverged from its underlying CT connection status for teardown which can be problematic in the following cases: - Flow has never been offloaded to hardware in the first place either because flow table has hardware offload disabled (flag NF_FLOWTABLE_HW_OFFLOAD is not set) or because it is still pending on 'add' workqueue to be offloaded for the first time. The former is incorrect, the later generates excessive deletions and additions of flows. - Flow is already pending to be updated on the workqueue. Tearing down such flows will also generate excessive removals from the flow table, especially on highly loaded system where the latency to re-offload a flow via 'add' workqueue can be quite high. When considering a flow for teardown as outdated verify that it is both offloaded to hardware and doesn't have any pending updates. Fixes: 41f2c7c342d3 ("net/sched: act_ct: Fix promotion of offloaded unreplied tuple") Reviewed-by: Paul Blakey Signed-off-by: Vlad Buslov Signed-off-by: Pablo Neira Ayuso --- net/sched/act_ct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 0d44da4e8c8e..fb52d6f9aff9 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -281,6 +281,8 @@ err_nat: static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) { return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && + test_bit(IPS_HW_OFFLOAD_BIT, &flow->ct->status) && + !test_bit(NF_FLOW_HW_PENDING, &flow->flags) && !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); } -- cgit v1.2.3 From 197f9fba9663e765f8f3ae3b2375c6cc32b2e2b3 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Wed, 25 Oct 2023 02:14:34 -0400 Subject: net: ipv4: fix typo in comments The word "advertize" should be replaced by "advertise". Signed-off-by: Deming Wang Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d18f0f092fe7..4ccfc104f13a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -786,7 +786,7 @@ int esp_input_done2(struct sk_buff *skb, int err) /* * 1) if the NAT-T peer's IP or port changed then - * advertize the change to the keying daemon. + * advertise the change to the keying daemon. * This is an inbound SA, so just compare * SRC ports. */ -- cgit v1.2.3 From 1711435e3e67e079d6a2bce54d96d1af21c7ef2c Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Wed, 25 Oct 2023 02:16:56 -0400 Subject: net: ipv6: fix typo in comments The word "advertize" should be replaced by "advertise". Signed-off-by: Deming Wang Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e023d29e919c..2cc1a45742d8 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -833,7 +833,7 @@ int esp6_input_done2(struct sk_buff *skb, int err) /* * 1) if the NAT-T peer's IP or port changed then - * advertize the change to the keying daemon. + * advertise the change to the keying daemon. * This is an inbound SA, so just compare * SRC ports. */ -- cgit v1.2.3 From 53b08c4985158430fd6d035fb49443bada535210 Mon Sep 17 00:00:00 2001 From: Alexandru Matei Date: Tue, 24 Oct 2023 22:17:42 +0300 Subject: vsock/virtio: initialize the_virtio_vsock before using VQs Once VQs are filled with empty buffers and we kick the host, it can send connection requests. If the_virtio_vsock is not initialized before, replies are silently dropped and do not reach the host. virtio_transport_send_pkt() can queue packets once the_virtio_vsock is set, but they won't be processed until vsock->tx_run is set to true. We queue vsock->send_pkt_work when initialization finishes to send those packets queued earlier. Fixes: 0deab087b16a ("vsock/virtio: use RCU to avoid use-after-free on the_virtio_vsock") Signed-off-by: Alexandru Matei Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20231024191742.14259-1-alexandru.matei@uipath.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e95df847176b..b80bf681327b 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -555,6 +555,11 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) virtio_device_ready(vdev); + return 0; +} + +static void virtio_vsock_vqs_start(struct virtio_vsock *vsock) +{ mutex_lock(&vsock->tx_lock); vsock->tx_run = true; mutex_unlock(&vsock->tx_lock); @@ -569,7 +574,16 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) vsock->event_run = true; mutex_unlock(&vsock->event_lock); - return 0; + /* virtio_transport_send_pkt() can queue packets once + * the_virtio_vsock is set, but they won't be processed until + * vsock->tx_run is set to true. We queue vsock->send_pkt_work + * when initialization finishes to send those packets queued + * earlier. + * We don't need to queue the other workers (rx, event) because + * as long as we don't fill the queues with empty buffers, the + * host can't send us any notification. + */ + queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); } static void virtio_vsock_vqs_del(struct virtio_vsock *vsock) @@ -664,6 +678,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev) goto out; rcu_assign_pointer(the_virtio_vsock, vsock); + virtio_vsock_vqs_start(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -736,6 +751,7 @@ static int virtio_vsock_restore(struct virtio_device *vdev) goto out; rcu_assign_pointer(the_virtio_vsock, vsock); + virtio_vsock_vqs_start(vsock); out: mutex_unlock(&the_virtio_vsock_mutex); -- cgit v1.2.3 From 53798666648af3aa0dd512c2380576627237a800 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Wed, 25 Oct 2023 11:32:13 -0700 Subject: iavf: in iavf_down, disable queues when removing the driver In iavf_down, we're skipping the scheduling of certain operations if the driver is being removed. However, the IAVF_FLAG_AQ_DISABLE_QUEUES request must not be skipped in this case, because iavf_close waits for the transition to the __IAVF_DOWN state, which happens in iavf_virtchnl_completion after the queues are released. Without this fix, "rmmod iavf" takes half a second per interface that's up and prints the "Device resources not yet released" warning. Fixes: c8de44b577eb ("iavf: do not process adminq tasks when __IAVF_IN_REMOVE_TASK is set") Signed-off-by: Michal Schmidt Reviewed-by: Wojciech Drewek Tested-by: Rafal Romanowski Tested-by: Jacob Keller Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231025183213.874283-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 5b5c0525aa13..b3434dbc90d6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1437,9 +1437,9 @@ void iavf_down(struct iavf_adapter *adapter) adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; if (!list_empty(&adapter->adv_rss_list_head)) adapter->aq_required |= IAVF_FLAG_AQ_DEL_ADV_RSS_CFG; - adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; } + adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); } -- cgit v1.2.3