summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_hw_engine.c
diff options
context:
space:
mode:
authorDaniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>2024-03-04 17:56:34 +0300
committerDaniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>2024-03-15 00:47:13 +0300
commitcc244ce531d4ef013d0d87e11141bb94d4235828 (patch)
treebcaca4e43fd91566dc3c07db6e46a966475f2e0b /drivers/gpu/drm/xe/xe_hw_engine.c
parent2c5b70f74d61438a071a19370e63c234d2bd8938 (diff)
downloadlinux-cc244ce531d4ef013d0d87e11141bb94d4235828.tar.xz
drm/xe/gsc: Handle GSCCS ER interrupt
Starting on Xe2, the GSCCS engine reset is a 2-step process. When the driver or the GuC hits the GDRST register, the CS is immediately reset and a success is reported, but the GSC shim continues its reset in the background. While the shim reset is ongoing, the CS is able to accept new context submission, but any commands that require the shim will be stalled until the reset is completed. This means that we can keep submitting to the GSCCS as long as we make sure that the preemption timeout is big enough to cover any delay introduced by the reset; since the GSC preempt timeout is not tunable at runtime, we only need to check that the value set in kconfig is big enough (and increase it if it isn't). When the shim reset completes, a specific CS interrupt is triggered, in response to which we need to check the GSCI_TIMER_STATUS register to see if the reset was successful or not. Note that the GSCI_TIMER_STATUS register is not power save/restored, so it gets reset on MC6 entry. However, a reset failure stops MC6, so in that scenario we're always guaranteed to find the correct value. Since we can't check the register within interrupt context, the existing GSC worker has been updated to handle it. The expected action to take on ER failure is to trigger a driver FLR, but we still don't support that, so for now we just print an error. A comment has been added to the code to keep track of the FLR requirement. v2: Add a check for the initial timeout value (Alan) Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Alan Previn <alan.previn.teres.alexis@intel.com> Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240304145634.820684-1-daniele.ceraolospurio@intel.com
Diffstat (limited to 'drivers/gpu/drm/xe/xe_hw_engine.c')
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c35
1 files changed, 34 insertions, 1 deletions
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b5e83ea172f3..2c5615130a38 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -14,8 +14,10 @@
#include "xe_device.h"
#include "xe_execlist.h"
#include "xe_force_wake.h"
+#include "xe_gsc.h"
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
+#include "xe_gt_printk.h"
#include "xe_gt_topology.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
@@ -463,6 +465,32 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+
+ /*
+ * The GSC engine can accept submissions while the GSC shim is
+ * being reset, during which time the submission is stalled. In
+ * the worst case, the shim reset can take up to the maximum GSC
+ * command execution time (250ms), so the request start can be
+ * delayed by that much; the request itself can take that long
+ * without being preemptible, which means worst case it can
+ * theoretically take up to 500ms for a preemption to go through
+ * on the GSC engine. Adding to that an extra 100ms as a safety
+ * margin, we get a minimum recommended timeout of 600ms.
+ * The preempt_timeout value can't be tuned for OTHER_CLASS
+ * because the class is reserved for kernel usage, so we just
+ * need to make sure that the starting value is above that
+ * threshold; since our default value (640ms) is greater than
+ * 600ms, the only way we can go below is via a kconfig setting.
+ * If that happens, log it in dmesg and update the value.
+ */
+ if (hwe->class == XE_ENGINE_CLASS_OTHER) {
+ const u32 min_preempt_timeout = 600 * 1000;
+ if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
+ hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
+ xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
+ }
+ }
+
/* Record default props */
hwe->eclass->defaults = hwe->eclass->sched_props;
}
@@ -509,8 +537,13 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
}
}
- if (xe_device_uc_enabled(xe))
+ if (xe_device_uc_enabled(xe)) {
+ /* GSCCS has a special interrupt for reset */
+ if (hwe->class == XE_ENGINE_CLASS_OTHER)
+ hwe->irq_handler = xe_gsc_hwe_irq_handler;
+
xe_hw_engine_enable_ring(hwe);
+ }
/* We reserve the highest BCS instance for USM */
if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)