diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 106 |
1 files changed, 60 insertions, 46 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c10977cb06b9..53f3ed3244d5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -27,6 +27,7 @@ #include "intel_guc_ads.h" #include "intel_guc_capture.h" +#include "intel_guc_print.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -1443,8 +1444,7 @@ static void guc_init_engine_stats(struct intel_guc *guc) int ret = guc_action_enable_usage_stats(guc); if (ret) - drm_err(>->i915->drm, - "Failed to enable usage stats: %d!\n", ret); + guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); } } @@ -1621,7 +1621,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) intel_engine_stop_cs(engine); /* - * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need + * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ intel_engine_wait_for_pending_mi_fw(engine); @@ -1891,7 +1891,7 @@ int intel_guc_submission_init(struct intel_guc *guc) if (guc->submission_initialized) return 0; - if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) { + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { ret = guc_lrc_desc_pool_create_v69(guc); if (ret) return ret; @@ -2331,7 +2331,7 @@ static int register_context(struct intel_context *ce, bool loop) GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) ret = register_context_v70(guc, ce, loop); else ret = register_context_v69(guc, ce, loop); @@ -2343,7 +2343,7 @@ static int register_context(struct intel_context *ce, bool loop) set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) guc_context_policy_init_v70(ce, loop); } @@ -2535,6 +2535,7 @@ static void prepare_context_registration_info_v69(struct intel_context *ce) i915_gem_object_is_lmem(ce->ring->vma->obj)); desc = __get_lrc_desc_v69(guc, ctx_id); + GEM_BUG_ON(!desc); desc->engine_class = engine_class_to_guc_class(engine->class); desc->engine_submit_mask = engine->logical_mask; desc->hw_context_desc = ce->lrc.lrca; @@ -2957,7 +2958,7 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { struct context_policy policy; __guc_context_policy_start_klv(&policy, guc_id); @@ -3284,7 +3285,7 @@ static int guc_context_alloc(struct intel_context *ce) static void __guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce) { - if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { struct context_policy policy; __guc_context_policy_start_klv(&policy, ce->guc_id.id); @@ -3585,8 +3586,7 @@ static int guc_request_alloc(struct i915_request *rq) intel_context_sched_disable_unpin(ce); else if (intel_context_is_closed(ce)) if (wait_for(context_close_done(ce), 1500)) - drm_warn(&guc_to_gt(guc)->i915->drm, - "timed out waiting on context sched close before realloc\n"); + guc_warn(guc, "timed out waiting on context sched close before realloc\n"); /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the @@ -4203,8 +4203,10 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_TIMESLICES; /* Wa_14014475959:dg2 */ - if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) - engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + if (engine->class == COMPUTE_CLASS) + if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + IS_DG2(engine->i915)) + engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; /* * TODO: GuC supports timeslicing and semaphores as well, but they're @@ -4347,11 +4349,14 @@ static int __guc_action_set_scheduling_policies(struct intel_guc *guc, ret = intel_guc_send(guc, (u32 *)&policy->h2g, __guc_scheduling_policy_action_size(policy)); - if (ret < 0) + if (ret < 0) { + guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n", + ERR_PTR(ret)); return ret; + } if (ret != policy->count) { - drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!", + guc_warn(guc, "global scheduler policy processed %d of %d KLVs!", ret, policy->count); if (ret > policy->count) return -EPROTO; @@ -4365,9 +4370,9 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc) struct scheduling_policy policy; struct intel_gt *gt = guc_to_gt(guc); intel_wakeref_t wakeref; - int ret = 0; + int ret; - if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0)) + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) return 0; __guc_scheduling_policy_start_klv(&policy); @@ -4383,10 +4388,6 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc) yield, ARRAY_SIZE(yield)); ret = __guc_action_set_scheduling_policies(guc, &policy); - if (ret) - i915_probe_error(gt->i915, - "Failed to configure global scheduling policies: %pe!\n", - ERR_PTR(ret)); } return ret; @@ -4485,21 +4486,18 @@ g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) struct intel_context *ce; if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { - drm_err(&guc_to_gt(guc)->i915->drm, - "Invalid ctx_id %u\n", ctx_id); + guc_err(guc, "Invalid ctx_id %u\n", ctx_id); return NULL; } ce = __get_context(guc, ctx_id); if (unlikely(!ce)) { - drm_err(&guc_to_gt(guc)->i915->drm, - "Context is NULL, ctx_id %u\n", ctx_id); + guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id); return NULL; } if (unlikely(intel_context_is_child(ce))) { - drm_err(&guc_to_gt(guc)->i915->drm, - "Context is child, ctx_id %u\n", ctx_id); + guc_err(guc, "Context is child, ctx_id %u\n", ctx_id); return NULL; } @@ -4514,7 +4512,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, u32 ctx_id; if (unlikely(len < 1)) { - drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); + guc_err(guc, "Invalid length %u\n", len); return -EPROTO; } ctx_id = msg[0]; @@ -4566,7 +4564,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, u32 ctx_id; if (unlikely(len < 2)) { - drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); + guc_err(guc, "Invalid length %u\n", len); return -EPROTO; } ctx_id = msg[0]; @@ -4578,8 +4576,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, if (unlikely(context_destroyed(ce) || (!context_pending_enable(ce) && !context_pending_disable(ce)))) { - drm_err(&guc_to_gt(guc)->i915->drm, - "Bad context sched_state 0x%x, ctx_id %u\n", + guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n", ce->guc_state.sched_state, ctx_id); return -EPROTO; } @@ -4663,12 +4660,15 @@ static void guc_handle_context_reset(struct intel_guc *guc, { trace_intel_context_reset(ce); + drm_dbg(&guc_to_gt(guc)->i915->drm, "Got GuC reset of 0x%04X, exiting = %d, banned = %d\n", + ce->guc_id.id, test_bit(CONTEXT_EXITING, &ce->flags), + test_bit(CONTEXT_BANNED, &ce->flags)); + if (likely(intel_context_is_schedulable(ce))) { capture_error_state(guc, ce); guc_context_replay(ce); } else { - drm_info(&guc_to_gt(guc)->i915->drm, - "Ignoring context reset notification of exiting context 0x%04X on %s", + guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s", ce->guc_id.id, ce->engine->name); } } @@ -4681,7 +4681,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, int ctx_id; if (unlikely(len != 1)) { - drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + guc_err(guc, "Invalid length %u", len); return -EPROTO; } @@ -4714,13 +4714,13 @@ int intel_guc_error_capture_process_msg(struct intel_guc *guc, u32 status; if (unlikely(len != 1)) { - drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + guc_dbg(guc, "Invalid length %u", len); return -EPROTO; } status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) - drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); + guc_warn(guc, "No space for error capture"); intel_guc_capture_process(guc); @@ -4752,24 +4752,36 @@ static void reset_fail_worker_func(struct work_struct *w) guc->submission_state.reset_fail_mask = 0; spin_unlock_irqrestore(&guc->submission_state.lock, flags); - if (likely(reset_fail_mask)) + if (likely(reset_fail_mask)) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * GuC is toast at this point - it dead loops after sending the failed + * reset notification. So need to manually determine the guilty context. + * Note that it should be reliable to do this here because the GuC is + * toast and will not be scheduling behind the KMD's back. + */ + for_each_engine_masked(engine, gt, reset_fail_mask, id) + intel_guc_find_hung_context(engine); + intel_gt_handle_error(gt, reset_fail_mask, I915_ERROR_CAPTURE, - "GuC failed to reset engine mask=0x%x\n", + "GuC failed to reset engine mask=0x%x", reset_fail_mask); + } } int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) { struct intel_engine_cs *engine; - struct intel_gt *gt = guc_to_gt(guc); u8 guc_class, instance; u32 reason; unsigned long flags; if (unlikely(len != 3)) { - drm_err(>->i915->drm, "Invalid length %u", len); + guc_err(guc, "Invalid length %u", len); return -EPROTO; } @@ -4779,8 +4791,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, engine = intel_guc_lookup_engine(guc, guc_class, instance); if (unlikely(!engine)) { - drm_err(>->i915->drm, - "Invalid engine %d:%d", guc_class, instance); + guc_err(guc, "Invalid engine %d:%d", guc_class, instance); return -EPROTO; } @@ -4788,7 +4799,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, * This is an unexpected failure of a hardware feature. So, log a real * error message not just the informational that comes with the reset. */ - drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", + guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X", guc_class, instance, engine->name, reason); spin_lock_irqsave(&guc->submission_state.lock, flags); @@ -4917,6 +4928,9 @@ void intel_guc_submission_print_info(struct intel_guc *guc, if (!sched_engine) return; + drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", + guc->submission_version.major, guc->submission_version.minor, + guc->submission_version.patch); drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", atomic_read(&guc->outstanding_submission_g2h)); drm_printf(p, "GuC tasklet count: %u\n", @@ -5348,8 +5362,8 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, GEM_BUG_ON(!is_power_of_2(sibling->mask)); if (sibling->mask & ve->base.mask) { - DRM_DEBUG("duplicate %s entry in load balancer\n", - sibling->name); + guc_dbg(guc, "duplicate %s entry in load balancer\n", + sibling->name); err = -EINVAL; goto err_put; } @@ -5358,8 +5372,8 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, ve->base.logical_mask |= sibling->logical_mask; if (n != 0 && ve->base.class != sibling->class) { - DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", - sibling->class, ve->base.class); + guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); err = -EINVAL; goto err_put; } else if (n == 0) { |