summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
authorUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>2023-03-24 01:58:59 +0300
committerUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>2023-03-24 18:50:04 +0300
commit1cc064dce4ed0ff111b6d6cb06b3cccf1cba29f5 (patch)
treee98da43fba940793d0e289dbca2131d3b62930bb /drivers/gpu/drm/i915/i915_perf.c
parentc61d04c9eb4354980839cf938488ca703eba0f83 (diff)
downloadlinux-1cc064dce4ed0ff111b6d6cb06b3cccf1cba29f5.tar.xz
drm/i915/perf: Add support for OA media units
MTL introduces additional OA units dedicated to media use cases. Add support for programming these OA units by passing the media engine class and instance parameters. UMD specific changes for GPUvis support: https://patchwork.freedesktop.org/patch/522827/?series=114023 https://patchwork.freedesktop.org/patch/522822/?series=114023 https://patchwork.freedesktop.org/patch/522826/?series=114023 https://patchwork.freedesktop.org/patch/522828/?series=114023 https://patchwork.freedesktop.org/patch/522816/?series=114023 https://patchwork.freedesktop.org/patch/522825/?series=114023 v2: (Ashutosh) - check for IP_VER(12, 70) instead of MTL - remove PERF_GROUP_OAG comment in mtl_oa_base - remove oa_buffer.group - use engine->oa_group->type in engine_supports_oa_format - remove fw_domains and use FORCEWAKE_ALL - remove MPES/MPEC comment - s/xehp/mtl/ in b counter validation function name - remove engine_supports_oa in __oa_engine_group - remove warn_ON from __oam_engine_group - refactor oa_init_groups and oa_init_regs - assign g->type correctly - use enum oa_type definition v3: (Ashutosh) - Drop oa_unit_functional as engine_supports_oa is enough v4: - s/DRM_DEBUG/drm_dbg/ Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-10-umesh.nerlige.ramappa@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c184
1 files changed, 162 insertions, 22 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 081d1dd743e0..247dad57ab89 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,6 +192,7 @@
*/
#include <linux/anon_inodes.h>
+#include <linux/nospec.h>
#include <linux/sizes.h>
#include <linux/uuid.h>
@@ -326,6 +327,12 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 },
+ [I915_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, TYPE_OAM, HDR_64_BIT },
+ [I915_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, TYPE_OAM, HDR_64_BIT },
+};
+
+static const u32 mtl_oa_base[] = {
+ [PERF_GROUP_OAM_SAMEDIA_0] = 0x393000,
};
#define SAMPLE_OA_REPORT (1<<0)
@@ -418,11 +425,17 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
kfree(oa_bo);
}
+static inline const
+struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream)
+{
+ return &stream->engine->oa_group->regs;
+}
+
static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+ return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) &
GEN12_OAG_OATAILPTR_MASK;
}
@@ -875,7 +888,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
i915_reg_t oaheadptr;
oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
+ __oa_regs(stream)->oa_head_ptr :
+ GEN8_OAHEADPTR;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
@@ -928,7 +942,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
return -EIO;
oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OASTATUS : GEN8_OASTATUS;
+ __oa_regs(stream)->oa_status :
+ GEN8_OASTATUS;
oastatus = intel_uncore_read(uncore, oastatus_reg);
@@ -1637,6 +1652,11 @@ static bool engine_supports_oa(const struct intel_engine_cs *engine)
return engine->oa_group;
}
+static bool engine_supports_oa_format(struct intel_engine_cs *engine, int type)
+{
+ return engine->oa_group && engine->oa_group->type == type;
+}
+
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct i915_perf *perf = stream->perf;
@@ -1788,8 +1808,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
- intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr,
gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = gtt_offset;
@@ -1801,9 +1821,9 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
* to enable proper functionality of the overflow
* bit."
*/
- intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset |
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
- intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & GEN12_OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
@@ -2563,7 +2583,8 @@ err_add_request:
return err;
}
-static int gen8_configure_context(struct i915_gem_context *ctx,
+static int gen8_configure_context(struct i915_perf_stream *stream,
+ struct i915_gem_context *ctx,
struct flex *flex, unsigned int count)
{
struct i915_gem_engines_iter it;
@@ -2704,7 +2725,7 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,
spin_unlock(&i915->gem.contexts.lock);
- err = gen8_configure_context(ctx, regs, num_regs);
+ err = gen8_configure_context(stream, ctx, regs, num_regs);
if (err) {
i915_gem_context_put(ctx);
return err;
@@ -2749,6 +2770,9 @@ gen12_configure_all_contexts(struct i915_perf_stream *stream,
},
};
+ if (stream->engine->class != RENDER_CLASS)
+ return 0;
+
return oa_configure_all_contexts(stream,
regs, ARRAY_SIZE(regs),
active);
@@ -2878,7 +2902,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
_MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
}
- intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_debug,
/* Disable clk ratio reports, like previous Gens. */
_MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
@@ -2888,7 +2912,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
*/
oag_report_ctx_switches(stream));
- intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ?
(GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
(period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
@@ -3042,8 +3066,8 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
static void gen12_oa_enable(struct i915_perf_stream *stream)
{
- struct intel_uncore *uncore = stream->uncore;
- u32 report_format = stream->oa_buffer.format->format;
+ const struct i915_perf_regs *regs;
+ u32 val;
/*
* If we don't want OA reports from the OA buffer, then we don't even
@@ -3054,9 +3078,11 @@ static void gen12_oa_enable(struct i915_perf_stream *stream)
gen12_init_oa_buffer(stream);
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
- (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
- GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
+ regs = __oa_regs(stream);
+ val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) |
+ GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE;
+
+ intel_uncore_write(stream->uncore, regs->oa_ctrl, val);
}
/**
@@ -3108,9 +3134,9 @@ static void gen12_oa_disable(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0);
if (intel_wait_for_register(uncore,
- GEN12_OAG_OACONTROL,
+ __oa_regs(stream)->oa_ctrl,
GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
50))
drm_err(&stream->perf->i915->drm,
@@ -4011,6 +4037,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
struct perf_open_properties *props)
{
struct drm_i915_gem_context_param_sseu user_sseu;
+ const struct i915_oa_format *f;
u64 __user *uprop = uprops;
bool config_instance = false;
bool config_class = false;
@@ -4196,6 +4223,15 @@ static int read_properties_unlocked(struct i915_perf *perf,
return -EINVAL;
}
+ i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX);
+ f = &perf->oa_formats[i];
+ if (!engine_supports_oa_format(props->engine, f->type)) {
+ drm_dbg(&perf->i915->drm,
+ "Invalid OA format %d for class %d\n",
+ f->type, props->engine->class);
+ return -EINVAL;
+ }
+
if (config_sseu) {
ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
if (ret) {
@@ -4376,6 +4412,14 @@ static const struct i915_range gen12_oa_b_counters[] = {
{}
};
+static const struct i915_range mtl_oam_b_counters[] = {
+ { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */
+ { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */
+ { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */
+ { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */
+ {}
+};
+
static const struct i915_range xehp_oa_b_counters[] = {
{ .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */
{ .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
@@ -4429,6 +4473,8 @@ static const struct i915_range mtl_oa_mux_regs[] = {
{ .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
{ .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
{ .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
+ { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */
+ {}
};
static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
@@ -4466,10 +4512,20 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
return reg_in_range_table(addr, gen12_oa_b_counters);
}
+static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+ if (HAS_OAM(perf->i915) &&
+ GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
+ return reg_in_range_table(addr, mtl_oam_b_counters);
+
+ return false;
+}
+
static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
return reg_in_range_table(addr, xehp_oa_b_counters) ||
- reg_in_range_table(addr, gen12_oa_b_counters);
+ reg_in_range_table(addr, gen12_oa_b_counters) ||
+ mtl_is_valid_oam_b_counter_addr(perf, addr);
}
static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
@@ -4839,12 +4895,86 @@ static u32 num_perf_groups_per_gt(struct intel_gt *gt)
return 1;
}
+static u32 __oam_engine_group(struct intel_engine_cs *engine)
+{
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) {
+ /*
+ * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
+ * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
+ */
+ drm_WARN_ON(&engine->i915->drm,
+ engine->gt->type != GT_MEDIA);
+
+ return PERF_GROUP_OAM_SAMEDIA_0;
+ }
+
+ return PERF_GROUP_INVALID;
+}
+
static u32 __oa_engine_group(struct intel_engine_cs *engine)
{
- if (engine->class == RENDER_CLASS)
+ switch (engine->class) {
+ case RENDER_CLASS:
return PERF_GROUP_OAG;
- else
+
+ case VIDEO_DECODE_CLASS:
+ case VIDEO_ENHANCEMENT_CLASS:
+ return __oam_engine_group(engine);
+
+ default:
return PERF_GROUP_INVALID;
+ }
+}
+
+static struct i915_perf_regs __oam_regs(u32 base)
+{
+ return (struct i915_perf_regs) {
+ base,
+ GEN12_OAM_HEAD_POINTER(base),
+ GEN12_OAM_TAIL_POINTER(base),
+ GEN12_OAM_BUFFER(base),
+ GEN12_OAM_CONTEXT_CONTROL(base),
+ GEN12_OAM_CONTROL(base),
+ GEN12_OAM_DEBUG(base),
+ GEN12_OAM_STATUS(base),
+ GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static struct i915_perf_regs __oag_regs(void)
+{
+ return (struct i915_perf_regs) {
+ 0,
+ GEN12_OAG_OAHEADPTR,
+ GEN12_OAG_OATAILPTR,
+ GEN12_OAG_OABUFFER,
+ GEN12_OAG_OAGLBCTXCTRL,
+ GEN12_OAG_OACONTROL,
+ GEN12_OAG_OA_DEBUG,
+ GEN12_OAG_OASTATUS,
+ GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static void oa_init_groups(struct intel_gt *gt)
+{
+ int i, num_groups = gt->perf.num_perf_groups;
+
+ for (i = 0; i < num_groups; i++) {
+ struct i915_perf_group *g = &gt->perf.group[i];
+
+ /* Fused off engines can result in a group with num_engines == 0 */
+ if (g->num_engines == 0)
+ continue;
+
+ if (i == PERF_GROUP_OAG && gt->type != GT_MEDIA) {
+ g->regs = __oag_regs();
+ g->type = TYPE_OAG;
+ } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+ g->regs = __oam_regs(mtl_oa_base[i]);
+ g->type = TYPE_OAM;
+ }
+ }
}
static int oa_init_gt(struct intel_gt *gt)
@@ -4871,6 +5001,8 @@ static int oa_init_gt(struct intel_gt *gt)
gt->perf.num_perf_groups = num_groups;
gt->perf.group = g;
+ oa_init_groups(gt);
+
return 0;
}
@@ -4928,9 +5060,15 @@ static void oa_init_supported_formats(struct i915_perf *perf)
break;
case INTEL_DG2:
+ oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
+ oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ break;
+
case INTEL_METEORLAKE:
oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8);
break;
default:
@@ -5175,8 +5313,10 @@ int i915_perf_ioctl_version(void)
*
* 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and
* DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE
+ *
+ * 7: Add support for video decode and enhancement classes.
*/
- return 6;
+ return 7;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)