From 32d53c0aa3a7b727243473949bad2a830b908edc Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 23 Mar 2022 13:43:52 +0100 Subject: ice: fix 'scheduling while atomic' on aux critical err interrupt There's a kernel BUG splat on processing aux critical error interrupts in ice_misc_intr(): [ 2100.917085] BUG: scheduling while atomic: swapper/15/0/0x00010000 ... [ 2101.060770] Call Trace: [ 2101.063229] [ 2101.065252] dump_stack+0x41/0x60 [ 2101.068587] __schedule_bug.cold.100+0x4c/0x58 [ 2101.073060] __schedule+0x6a4/0x830 [ 2101.076570] schedule+0x35/0xa0 [ 2101.079727] schedule_preempt_disabled+0xa/0x10 [ 2101.084284] __mutex_lock.isra.7+0x310/0x420 [ 2101.088580] ? ice_misc_intr+0x201/0x2e0 [ice] [ 2101.093078] ice_send_event_to_aux+0x25/0x70 [ice] [ 2101.097921] ice_misc_intr+0x220/0x2e0 [ice] [ 2101.102232] __handle_irq_event_percpu+0x40/0x180 [ 2101.106965] handle_irq_event_percpu+0x30/0x80 [ 2101.111434] handle_irq_event+0x36/0x53 [ 2101.115292] handle_edge_irq+0x82/0x190 [ 2101.119148] handle_irq+0x1c/0x30 [ 2101.122480] do_IRQ+0x49/0xd0 [ 2101.125465] common_interrupt+0xf/0xf [ 2101.129146] ... As Andrew correctly mentioned previously[0], the following call ladder happens: ice_misc_intr() <- hardirq ice_send_event_to_aux() device_lock() mutex_lock() might_sleep() might_resched() <- oops Add a new PF state bit which indicates that an aux critical error occurred and serve it in ice_service_task() in process context. The new ice_pf::oicr_err_reg is read-write in both hardirq and process contexts, but only 3 bits of non-critical data probably aren't worth explicit synchronizing (and they're even in the same byte [31:24]). [0] https://lore.kernel.org/all/YeSRUVmrdmlUXHDn@lunn.ch Fixes: 348048e724a0e ("ice: Implement iidc operations") Signed-off-by: Alexander Lobakin Tested-by: Michal Kubiak Acked-by: Tony Nguyen Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice.h | 2 ++ drivers/net/ethernet/intel/ice/ice_main.c | 25 +++++++++++++++---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index bea1d1e39fa2..2ca887076dd4 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -290,6 +290,7 @@ enum ice_pf_state { ICE_LINK_DEFAULT_OVERRIDE_PENDING, ICE_PHY_INIT_COMPLETE, ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ + ICE_AUX_ERR_PENDING, ICE_STATE_NBITS /* must be last */ }; @@ -559,6 +560,7 @@ struct ice_pf { wait_queue_head_t reset_wait_queue; u32 hw_csum_rx_error; + u32 oicr_err_reg; u16 oicr_idx; /* Other interrupt cause MSIX vector index */ u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ u16 max_pf_txqs; /* Total Tx queues PF wide */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b7e8744b0c0a..296f9d5f7408 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2255,6 +2255,19 @@ static void ice_service_task(struct work_struct *work) return; } + if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) { + struct iidc_event *event; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (event) { + set_bit(IIDC_EVENT_CRIT_ERR, event->type); + /* report the entire OICR value to AUX driver */ + swap(event->reg, pf->oicr_err_reg); + ice_send_event_to_aux(pf, event); + kfree(event); + } + } + if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) { /* Plug aux device per request */ ice_plug_aux_dev(pf); @@ -3041,17 +3054,9 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M) if (oicr & ICE_AUX_CRIT_ERR) { - struct iidc_event *event; - + pf->oicr_err_reg |= oicr; + set_bit(ICE_AUX_ERR_PENDING, pf->state); ena_mask &= ~ICE_AUX_CRIT_ERR; - event = kzalloc(sizeof(*event), GFP_ATOMIC); - if (event) { - set_bit(IIDC_EVENT_CRIT_ERR, event->type); - /* report the entire OICR value to AUX driver */ - event->reg = oicr; - ice_send_event_to_aux(pf, event); - kfree(event); - } } /* Report any remaining unexpected interrupts */ -- cgit v1.2.3 From 5a3156932da06f09953764de113419f254086faf Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 23 Mar 2022 13:43:53 +0100 Subject: ice: don't allow to run ice_send_event_to_aux() in atomic ctx ice_send_event_to_aux() eventually descends to mutex_lock() (-> might_sched()), so it must not be called under non-task context. However, at least two fixes have happened already for the bug splats occurred due to this function being called from atomic context. To make the emergency landings softer, bail out early when executed in non-task context emitting a warn splat only once. This way we trade some events being potentially lost for system stability and avoid any related hangs and crashes. Fixes: 348048e724a0e ("ice: Implement iidc operations") Signed-off-by: Alexander Lobakin Tested-by: Michal Kubiak Reviewed-by: Maciej Fijalkowski Acked-by: Tony Nguyen Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_idc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index fc3580167e7b..5559230eff8b 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -34,6 +34,9 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event) { struct iidc_auxiliary_drv *iadrv; + if (WARN_ON_ONCE(!in_task())) + return; + if (!pf->adev) return; -- cgit v1.2.3