summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c')
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c106
1 files changed, 60 insertions, 46 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index c10977cb06b9..53f3ed3244d5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -27,6 +27,7 @@
#include "intel_guc_ads.h"
#include "intel_guc_capture.h"
+#include "intel_guc_print.h"
#include "intel_guc_submission.h"
#include "i915_drv.h"
@@ -1443,8 +1444,7 @@ static void guc_init_engine_stats(struct intel_guc *guc)
int ret = guc_action_enable_usage_stats(guc);
if (ret)
- drm_err(&gt->i915->drm,
- "Failed to enable usage stats: %d!\n", ret);
+ guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret));
}
}
@@ -1621,7 +1621,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
intel_engine_stop_cs(engine);
/*
- * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
+ * Wa_22011802037: In addition to stopping the cs, we need
* to wait for any pending mi force wakeups
*/
intel_engine_wait_for_pending_mi_fw(engine);
@@ -1891,7 +1891,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
if (guc->submission_initialized)
return 0;
- if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) {
+ if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) {
ret = guc_lrc_desc_pool_create_v69(guc);
if (ret)
return ret;
@@ -2331,7 +2331,7 @@ static int register_context(struct intel_context *ce, bool loop)
GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_register(ce);
- if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
+ if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
ret = register_context_v70(guc, ce, loop);
else
ret = register_context_v69(guc, ce, loop);
@@ -2343,7 +2343,7 @@ static int register_context(struct intel_context *ce, bool loop)
set_context_registered(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
+ if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
guc_context_policy_init_v70(ce, loop);
}
@@ -2535,6 +2535,7 @@ static void prepare_context_registration_info_v69(struct intel_context *ce)
i915_gem_object_is_lmem(ce->ring->vma->obj));
desc = __get_lrc_desc_v69(guc, ctx_id);
+ GEM_BUG_ON(!desc);
desc->engine_class = engine_class_to_guc_class(engine->class);
desc->engine_submit_mask = engine->logical_mask;
desc->hw_context_desc = ce->lrc.lrca;
@@ -2957,7 +2958,7 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
u16 guc_id,
u32 preemption_timeout)
{
- if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
+ if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
struct context_policy policy;
__guc_context_policy_start_klv(&policy, guc_id);
@@ -3284,7 +3285,7 @@ static int guc_context_alloc(struct intel_context *ce)
static void __guc_context_set_prio(struct intel_guc *guc,
struct intel_context *ce)
{
- if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
+ if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
struct context_policy policy;
__guc_context_policy_start_klv(&policy, ce->guc_id.id);
@@ -3585,8 +3586,7 @@ static int guc_request_alloc(struct i915_request *rq)
intel_context_sched_disable_unpin(ce);
else if (intel_context_is_closed(ce))
if (wait_for(context_close_done(ce), 1500))
- drm_warn(&guc_to_gt(guc)->i915->drm,
- "timed out waiting on context sched close before realloc\n");
+ guc_warn(guc, "timed out waiting on context sched close before realloc\n");
/*
* Call pin_guc_id here rather than in the pinning step as with
* dma_resv, contexts can be repeatedly pinned / unpinned trashing the
@@ -4203,8 +4203,10 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
/* Wa_14014475959:dg2 */
- if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
- engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+ if (engine->class == COMPUTE_CLASS)
+ if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+ IS_DG2(engine->i915))
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
/*
* TODO: GuC supports timeslicing and semaphores as well, but they're
@@ -4347,11 +4349,14 @@ static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
ret = intel_guc_send(guc, (u32 *)&policy->h2g,
__guc_scheduling_policy_action_size(policy));
- if (ret < 0)
+ if (ret < 0) {
+ guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n",
+ ERR_PTR(ret));
return ret;
+ }
if (ret != policy->count) {
- drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!",
+ guc_warn(guc, "global scheduler policy processed %d of %d KLVs!",
ret, policy->count);
if (ret > policy->count)
return -EPROTO;
@@ -4365,9 +4370,9 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc)
struct scheduling_policy policy;
struct intel_gt *gt = guc_to_gt(guc);
intel_wakeref_t wakeref;
- int ret = 0;
+ int ret;
- if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0))
+ if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
return 0;
__guc_scheduling_policy_start_klv(&policy);
@@ -4383,10 +4388,6 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc)
yield, ARRAY_SIZE(yield));
ret = __guc_action_set_scheduling_policies(guc, &policy);
- if (ret)
- i915_probe_error(gt->i915,
- "Failed to configure global scheduling policies: %pe!\n",
- ERR_PTR(ret));
}
return ret;
@@ -4485,21 +4486,18 @@ g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
struct intel_context *ce;
if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
- drm_err(&guc_to_gt(guc)->i915->drm,
- "Invalid ctx_id %u\n", ctx_id);
+ guc_err(guc, "Invalid ctx_id %u\n", ctx_id);
return NULL;
}
ce = __get_context(guc, ctx_id);
if (unlikely(!ce)) {
- drm_err(&guc_to_gt(guc)->i915->drm,
- "Context is NULL, ctx_id %u\n", ctx_id);
+ guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id);
return NULL;
}
if (unlikely(intel_context_is_child(ce))) {
- drm_err(&guc_to_gt(guc)->i915->drm,
- "Context is child, ctx_id %u\n", ctx_id);
+ guc_err(guc, "Context is child, ctx_id %u\n", ctx_id);
return NULL;
}
@@ -4514,7 +4512,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
u32 ctx_id;
if (unlikely(len < 1)) {
- drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
+ guc_err(guc, "Invalid length %u\n", len);
return -EPROTO;
}
ctx_id = msg[0];
@@ -4566,7 +4564,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
u32 ctx_id;
if (unlikely(len < 2)) {
- drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
+ guc_err(guc, "Invalid length %u\n", len);
return -EPROTO;
}
ctx_id = msg[0];
@@ -4578,8 +4576,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
if (unlikely(context_destroyed(ce) ||
(!context_pending_enable(ce) &&
!context_pending_disable(ce)))) {
- drm_err(&guc_to_gt(guc)->i915->drm,
- "Bad context sched_state 0x%x, ctx_id %u\n",
+ guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n",
ce->guc_state.sched_state, ctx_id);
return -EPROTO;
}
@@ -4663,12 +4660,15 @@ static void guc_handle_context_reset(struct intel_guc *guc,
{
trace_intel_context_reset(ce);
+ drm_dbg(&guc_to_gt(guc)->i915->drm, "Got GuC reset of 0x%04X, exiting = %d, banned = %d\n",
+ ce->guc_id.id, test_bit(CONTEXT_EXITING, &ce->flags),
+ test_bit(CONTEXT_BANNED, &ce->flags));
+
if (likely(intel_context_is_schedulable(ce))) {
capture_error_state(guc, ce);
guc_context_replay(ce);
} else {
- drm_info(&guc_to_gt(guc)->i915->drm,
- "Ignoring context reset notification of exiting context 0x%04X on %s",
+ guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s",
ce->guc_id.id, ce->engine->name);
}
}
@@ -4681,7 +4681,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
int ctx_id;
if (unlikely(len != 1)) {
- drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+ guc_err(guc, "Invalid length %u", len);
return -EPROTO;
}
@@ -4714,13 +4714,13 @@ int intel_guc_error_capture_process_msg(struct intel_guc *guc,
u32 status;
if (unlikely(len != 1)) {
- drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+ guc_dbg(guc, "Invalid length %u", len);
return -EPROTO;
}
status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
- drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
+ guc_warn(guc, "No space for error capture");
intel_guc_capture_process(guc);
@@ -4752,24 +4752,36 @@ static void reset_fail_worker_func(struct work_struct *w)
guc->submission_state.reset_fail_mask = 0;
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
- if (likely(reset_fail_mask))
+ if (likely(reset_fail_mask)) {
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * GuC is toast at this point - it dead loops after sending the failed
+ * reset notification. So need to manually determine the guilty context.
+ * Note that it should be reliable to do this here because the GuC is
+ * toast and will not be scheduling behind the KMD's back.
+ */
+ for_each_engine_masked(engine, gt, reset_fail_mask, id)
+ intel_guc_find_hung_context(engine);
+
intel_gt_handle_error(gt, reset_fail_mask,
I915_ERROR_CAPTURE,
- "GuC failed to reset engine mask=0x%x\n",
+ "GuC failed to reset engine mask=0x%x",
reset_fail_mask);
+ }
}
int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
struct intel_engine_cs *engine;
- struct intel_gt *gt = guc_to_gt(guc);
u8 guc_class, instance;
u32 reason;
unsigned long flags;
if (unlikely(len != 3)) {
- drm_err(&gt->i915->drm, "Invalid length %u", len);
+ guc_err(guc, "Invalid length %u", len);
return -EPROTO;
}
@@ -4779,8 +4791,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
engine = intel_guc_lookup_engine(guc, guc_class, instance);
if (unlikely(!engine)) {
- drm_err(&gt->i915->drm,
- "Invalid engine %d:%d", guc_class, instance);
+ guc_err(guc, "Invalid engine %d:%d", guc_class, instance);
return -EPROTO;
}
@@ -4788,7 +4799,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
* This is an unexpected failure of a hardware feature. So, log a real
* error message not just the informational that comes with the reset.
*/
- drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
+ guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X",
guc_class, instance, engine->name, reason);
spin_lock_irqsave(&guc->submission_state.lock, flags);
@@ -4917,6 +4928,9 @@ void intel_guc_submission_print_info(struct intel_guc *guc,
if (!sched_engine)
return;
+ drm_printf(p, "GuC Submission API Version: %d.%d.%d\n",
+ guc->submission_version.major, guc->submission_version.minor,
+ guc->submission_version.patch);
drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
atomic_read(&guc->outstanding_submission_g2h));
drm_printf(p, "GuC tasklet count: %u\n",
@@ -5348,8 +5362,8 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
GEM_BUG_ON(!is_power_of_2(sibling->mask));
if (sibling->mask & ve->base.mask) {
- DRM_DEBUG("duplicate %s entry in load balancer\n",
- sibling->name);
+ guc_dbg(guc, "duplicate %s entry in load balancer\n",
+ sibling->name);
err = -EINVAL;
goto err_put;
}
@@ -5358,8 +5372,8 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
ve->base.logical_mask |= sibling->logical_mask;
if (n != 0 && ve->base.class != sibling->class) {
- DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
- sibling->class, ve->base.class);
+ guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n",
+ sibling->class, ve->base.class);
err = -EINVAL;
goto err_put;
} else if (n == 0) {