From 55e049910e08caff21c8535c6d6672dc4665f36d Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 7 Sep 2020 05:14:41 -0700 Subject: scsi: qedf: Add schedule_hw_err_handler callback for fan failure On fan failure, disable the PCI function and initiate recovery for ramrod failure. Link: https://lore.kernel.org/r/20200907121443.5150-7-jhasan@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Javed Hasan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf.h | 4 ++++ drivers/scsi/qedf/qedf_main.c | 45 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'drivers/scsi/qedf') diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h index 15d6cbef7459..0e2cbb164eeb 100644 --- a/drivers/scsi/qedf/qedf.h +++ b/drivers/scsi/qedf/qedf.h @@ -389,6 +389,7 @@ struct qedf_ctx { mempool_t *io_mempool; struct workqueue_struct *dpc_wq; struct delayed_work recovery_work; + struct delayed_work board_disable_work; struct delayed_work grcdump_work; struct delayed_work stag_work; @@ -541,6 +542,9 @@ extern void qedf_get_generic_tlv_data(void *dev, struct qed_generic_tlvs *data); extern void qedf_wq_grcdump(struct work_struct *work); void qedf_stag_change_work(struct work_struct *work); void qedf_ctx_soft_reset(struct fc_lport *lport); +extern void qedf_board_disable_work(struct work_struct *work); +extern void qedf_schedule_hw_err_handler(void *dev, + enum qed_hw_err_type err_type); #define FCOE_WORD_TO_BYTE 4 #define QEDF_MAX_TASK_NUM 0xFFFF diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 073de50dfbe8..ed595c83be3d 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -105,6 +105,12 @@ module_param_named(dp_level, qedf_dp_level, uint, S_IRUGO); MODULE_PARM_DESC(dp_level, " printk verbosity control passed to qed module " "during probe (0-3: 0 more verbose)."); +static bool qedf_enable_recovery = true; +module_param_named(enable_recovery, qedf_enable_recovery, + bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(enable_recovery, "Enable/disable recovery on driver/firmware " + "interface level errors 0 = Disabled, 1 = Enabled (Default: 1)."); + struct workqueue_struct *qedf_io_wq; static struct fcoe_percpu_s qedf_global; @@ -690,6 +696,7 @@ static struct qed_fcoe_cb_ops qedf_cb_ops = { .dcbx_aen = qedf_dcbx_handler, .get_generic_tlv_data = qedf_get_generic_tlv_data, .get_protocol_tlv_data = qedf_get_protocol_tlv_data, + .schedule_hw_err_handler = qedf_schedule_hw_err_handler, } }; @@ -3799,6 +3806,44 @@ void qedf_wq_grcdump(struct work_struct *work) qedf_capture_grc_dump(qedf); } +void qedf_schedule_hw_err_handler(void *dev, enum qed_hw_err_type err_type) +{ + struct qedf_ctx *qedf = dev; + + QEDF_ERR(&(qedf->dbg_ctx), + "Hardware error handler scheduled, event=%d.\n", + err_type); + + if (test_bit(QEDF_IN_RECOVERY, &qedf->flags)) { + QEDF_ERR(&(qedf->dbg_ctx), + "Already in recovery, not scheduling board disable work.\n"); + return; + } + + switch (err_type) { + case QED_HW_ERR_FAN_FAIL: + schedule_delayed_work(&qedf->board_disable_work, 0); + break; + case QED_HW_ERR_MFW_RESP_FAIL: + case QED_HW_ERR_HW_ATTN: + case QED_HW_ERR_DMAE_FAIL: + case QED_HW_ERR_FW_ASSERT: + /* Prevent HW attentions from being reasserted */ + qed_ops->common->attn_clr_enable(qedf->cdev, true); + break; + case QED_HW_ERR_RAMROD_FAIL: + /* Prevent HW attentions from being reasserted */ + qed_ops->common->attn_clr_enable(qedf->cdev, true); + + if (qedf_enable_recovery) + qed_ops->common->recovery_process(qedf->cdev); + + break; + default: + break; + } +} + /* * Protocol TLV handler */ -- cgit v1.2.3