summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHimal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>2023-07-27 02:26:50 +0300
committerRodrigo Vivi <rodrigo.vivi@intel.com>2023-12-21 19:39:15 +0300
commit8f3013e0b22206b27f37dcf1b96ce68df3393040 (patch)
tree6cb7413d278ff1d402a05a6b93c9ebede120148c
parent4f027e304a6c7ae77150965d10b8a1edee0398a2 (diff)
downloadlinux-8f3013e0b22206b27f37dcf1b96ce68df3393040.tar.xz
drm/xe: Introduce fault injection for gt reset
To trigger gt reset failure: echo 100 > /sys/kernel/debug/dri/<cardX>/fail_gt_reset/probability echo 2 > /sys/kernel/debug/dri/<cardX>/fail_gt_reset/times Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Lucas De Marchi <lucas.demarchi@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c10
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h14
3 files changed, 31 insertions, 1 deletions
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 491506a1e12e..2de8a0b9da18 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -5,6 +5,7 @@
#include "xe_debugfs.h"
+#include <linux/fault-inject.h>
#include <linux/string_helpers.h>
#include <drm/drm_debugfs.h>
@@ -20,6 +21,10 @@
#include "xe_vm.h"
#endif
+#ifdef CONFIG_FAULT_INJECTION
+DECLARE_FAULT_ATTR(gt_reset_failure);
+#endif
+
static struct xe_device *node_to_xe(struct drm_info_node *node)
{
return to_xe_device(node->minor->dev);
@@ -135,4 +140,9 @@ void xe_debugfs_register(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_debugfs_register(gt);
+
+#ifdef CONFIG_FAULT_INJECTION
+ fault_create_debugfs_attr("fail_gt_reset", root, &gt_reset_failure);
+#endif
+
}
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 82b987404070..28bf577c7bf2 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -524,6 +524,11 @@ static int gt_reset(struct xe_gt *gt)
xe_gt_info(gt, "reset started\n");
+ if (xe_fault_inject_gt_reset()) {
+ err = -ECANCELED;
+ goto err_fail;
+ }
+
xe_gt_sanitize(gt);
xe_device_mem_access_get(gt_to_xe(gt));
@@ -562,6 +567,7 @@ err_out:
err_msg:
XE_WARN_ON(xe_uc_start(&gt->uc));
xe_device_mem_access_put(gt_to_xe(gt));
+err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
/* Notify userspace about gt reset failure */
@@ -583,7 +589,7 @@ void xe_gt_reset_async(struct xe_gt *gt)
xe_gt_info(gt, "trying reset\n");
/* Don't do a reset while one is already in flight */
- if (xe_uc_reset_prepare(&gt->uc))
+ if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
return;
xe_gt_info(gt, "reset queued\n");
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 7298653a73de..caded203a8a0 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -7,6 +7,7 @@
#define _XE_GT_H_
#include <drm/drm_util.h>
+#include <linux/fault-inject.h>
#include "xe_device_types.h"
#include "xe_hw_engine.h"
@@ -16,6 +17,19 @@
for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
xe_hw_engine_is_valid((hwe__)))
+#ifdef CONFIG_FAULT_INJECTION
+extern struct fault_attr gt_reset_failure;
+static inline bool xe_fault_inject_gt_reset(void)
+{
+ return should_fail(&gt_reset_failure, 1);
+}
+#else
+static inline bool xe_fault_inject_gt_reset(void)
+{
+ return false;
+}
+#endif
+
struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
int xe_gt_init_early(struct xe_gt *gt);
int xe_gt_init(struct xe_gt *gt);