diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_perf.c | 51 |
1 files changed, 39 insertions, 12 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 125b6ca25a75..824a34ec0b83 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1846,8 +1846,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, for (d = 0; d < dword_count; d++) { *cs++ = cmd; *cs++ = i915_mmio_reg_offset(reg) + 4 * d; - *cs++ = intel_gt_scratch_offset(stream->engine->gt, - offset) + 4 * d; + *cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d; *cs++ = 0; } @@ -1880,7 +1879,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) MI_PREDICATE_RESULT_2_ENGINE(base) : MI_PREDICATE_RESULT_1(RENDER_RING_BASE); - bo = i915_gem_object_create_internal(i915, 4096); + /* + * gt->scratch was being used to save/restore the GPR registers, but on + * MTL the scratch uses stolen lmem. An MI_SRM to this memory region + * causes an engine hang. Instead allocate an additional page here to + * save/restore GPR registers + */ + bo = i915_gem_object_create_internal(i915, 8192); if (IS_ERR(bo)) { drm_err(&i915->drm, "Failed to allocate NOA wait batchbuffer\n"); @@ -1914,14 +1919,19 @@ retry: goto err_unpin; } + stream->noa_wait = vma; + +#define GPR_SAVE_OFFSET 4096 +#define PREDICATE_SAVE_OFFSET 4160 + /* Save registers. */ for (i = 0; i < N_CS_GPR; i++) cs = save_restore_register( stream, cs, true /* save */, CS_GPR(i), - INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + GPR_SAVE_OFFSET + 8 * i, 2); cs = save_restore_register( stream, cs, true /* save */, mi_predicate_result, - INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + PREDICATE_SAVE_OFFSET, 1); /* First timestamp snapshot location. */ ts0 = cs; @@ -2037,10 +2047,10 @@ retry: for (i = 0; i < N_CS_GPR; i++) cs = save_restore_register( stream, cs, false /* restore */, CS_GPR(i), - INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + GPR_SAVE_OFFSET + 8 * i, 2); cs = save_restore_register( stream, cs, false /* restore */, mi_predicate_result, - INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + PREDICATE_SAVE_OFFSET, 1); /* And return to the ring. */ *cs++ = MI_BATCH_BUFFER_END; @@ -2050,7 +2060,6 @@ retry: i915_gem_object_flush_map(bo); __i915_gem_object_release_map(bo); - stream->noa_wait = vma; goto out_ww; err_unpin: @@ -2263,7 +2272,7 @@ retry: goto err_add_request; err = rq->engine->emit_bb_start(rq, - vma->node.start, 0, + i915_vma_offset(vma), 0, I915_DISPATCH_SECURE); if (err) goto err_add_request; @@ -3131,8 +3140,11 @@ get_sseu_config(struct intel_sseu *out_sseu, */ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) { - /* Wa_18013179988:dg2 */ - if (IS_DG2(i915)) { + /* + * Wa_18013179988:dg2 + * Wa_14015846243:mtl + */ + if (IS_DG2(i915) || IS_METEORLAKE(i915)) { intel_wakeref_t wakeref; u32 reg, shift; @@ -4310,6 +4322,17 @@ static const struct i915_range gen12_oa_mux_regs[] = { {} }; +/* + * Ref: 14010536224: + * 0x20cc is repurposed on MTL, so use a separate array for MTL. + */ +static const struct i915_range mtl_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ +}; + static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { return reg_in_range_table(addr, gen7_oa_b_counters); @@ -4353,7 +4376,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return reg_in_range_table(addr, gen12_oa_mux_regs); + if (IS_METEORLAKE(perf->i915)) + return reg_in_range_table(addr, mtl_oa_mux_regs); + else + return reg_in_range_table(addr, gen12_oa_mux_regs); } static u32 mask_reg_value(u32 reg, u32 val) @@ -4750,6 +4776,7 @@ static void oa_init_supported_formats(struct i915_perf *perf) break; case INTEL_DG2: + case INTEL_METEORLAKE: oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); break; |