From 649a125a88da64a66b0836cb7998bb433bbf1bf5 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 18 Mar 2024 08:49:24 -0700 Subject: drm/xe: Always check force_wake_get return code A force_wake_get failure means that the HW might not be awake for the access we're doing; this can lead to an immediate error or it can be a more subtle problem (e.g. a register read might return an incorrect value that is still valid, leading the driver to make a wrong choice instead of flagging an error). We avoid an error from the force_wake function because callers might handle or tolerate the error, but this only works if all callers are checking the error code. The majority already do, but a few are not. These are mainly falling into 3 categories, which are each handled differently: 1) error capture: in this case we want to continue the capture, but we log an info message in dmesg to notify the user that the capture might have incorrect data. 2) ioctl: in this case we return a -EIO error to userspace 3) unabortable actions: these are scenarios where we can't simply abort and retry and so it's better to just try it anyway because there is a chance the HW is awake even with the failure. In this case we throw a warning so we know there was a forcewake problem if something fails down the line. v2: use gt_WARN_ON where appropriate Signed-off-by: Daniele Ceraolo Spurio Cc: Tejas Upadhyay Reviewed-by: Matt Roper Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240318154924.3453513-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 9 +++++++-- drivers/gpu/drm/xe/xe_gsc.c | 2 +- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 +- drivers/gpu/drm/xe/xe_guc.c | 5 +++-- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 4 +++- drivers/gpu/drm/xe/xe_query.c | 3 ++- 7 files changed, 18 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 0fcd30680323..7d3aa6bd3524 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -13,6 +13,7 @@ #include "xe_exec_queue.h" #include "xe_force_wake.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc_ct.h" #include "xe_guc_submit.h" #include "xe_hw_engine.h" @@ -64,7 +65,9 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) { struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); xe_vm_snapshot_capture_delayed(ss->vm); xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); @@ -180,7 +183,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, } } - xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 09f6e7899921..8339b0b49dfb 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -326,7 +326,7 @@ static void gsc_work(struct work_struct *work) spin_unlock_irq(&gsc->lock); xe_pm_runtime_get(xe); - xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); if (actions & GSC_ACTION_ER_COMPLETE) { ret = gsc_er_complete(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index a3c4ffba679d..25b4111097bc 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -247,7 +247,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_gt_tlb_invalidation_wait(gt, seqno); } else if (xe_device_uc_enabled(xe)) { - xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, PVC_GUC_TLB_INV_DESC1_INVALIDATE); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index fa11cad8386c..c558d978462a 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -240,10 +240,11 @@ static void guc_write_params(struct xe_guc *guc) static void guc_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + struct xe_gt *gt = guc_to_gt(guc); - xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_uc_fini_hw(&guc_to_gt(guc)->uc); - xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index eb174547c8f2..786acaf71931 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -927,7 +927,7 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg) return; } - xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); + XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 4a2b8e6b81b8..a71be57f0fc2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -833,7 +833,9 @@ static void simple_error_capture(struct xe_exec_queue *q) } } - xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL)) + xe_gt_info(guc_to_gt(guc), + "failed to get forcewake for error capture"); xe_guc_ct_print(&guc->ct, &p, true); guc_exec_queue_print(q, &p); for_each_hw_engine(hwe, guc_to_gt(guc), id) { diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e80321b34918..fcd8680d2ccc 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -148,7 +148,8 @@ query_engine_cycles(struct xe_device *xe, if (!hwe) return -EINVAL; - xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)) + return -EIO; __read_timestamps(gt, RING_TIMESTAMP(hwe->mmio_base), -- cgit v1.2.3