From 348fb0cb0a79bce03f402d689bbe0bf666577531 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 1 Dec 2020 13:17:57 +0000 Subject: drm/i915/pmu: Deprecate I915_PMU_LAST and optimize state tracking Adding any kinds of "last" abi markers is usually a mistake which I repeated when implementing the PMU because it felt convenient at the time. This patch marks I915_PMU_LAST as deprecated and stops the internal implementation using it for sizing the event status bitmask and array. New way of sizing the fields is a bit less elegant, but it omits reserving slots for tracking events we are not interested in, and as such saves some runtime space. Adding sampling events is likely to be a special event and the new plumbing needed will be easily detected in testing. Existing asserts against the bitfield and array sizes are keeping the code safe. First event which gets the new treatment in this new scheme are the interrupts - which neither needs any tracking in i915 pmu nor needs waking up the GPU to read it. v2: * Streamline helper names. (Chris) v3: * Comment which events need tracking. (Chris) Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201201131757.206367-1-tvrtko.ursulin@linux.intel.com --- include/uapi/drm/i915_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index fa1f3d62f9a6..6edcb2b6c708 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -178,7 +178,7 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */ -- cgit v1.2.3 From 8c3b1ba0e7ea9a80b0ee4b4445ea59c806787813 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Dec 2020 15:44:56 +0000 Subject: drm/i915/gt: Track the overall awake/busy time Since we wake the GT up before executing a request, and go to sleep as soon as it is retired, the GT wake time not only represents how long the device is powered up, but also provides a summary, albeit an overestimate, of the device runtime (i.e. the rc0 time to compare against rc6 time). v2: s/busy/awake/ v3: software-gt-awake-time and I915_PMU_SOFTWARE_GT_AWAKE_TIME Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Cc: Matthew Brost Reported-by: kernel test robot Link: https://patchwork.freedesktop.org/patch/msgid/20201215154456.13954-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/debugfs_gt_pm.c | 5 +++- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 49 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_pm.h | 2 ++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 24 ++++++++++++++++ drivers/gpu/drm/i915/i915_debugfs.c | 5 ++-- drivers/gpu/drm/i915/i915_pmu.c | 6 ++++ include/uapi/drm/i915_drm.h | 1 + 7 files changed, 89 insertions(+), 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index 174a24553322..8975717ace06 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -11,6 +11,7 @@ #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" +#include "intel_gt_pm.h" #include "intel_llc.h" #include "intel_rc6.h" #include "intel_rps.h" @@ -558,7 +559,9 @@ static int rps_boost_show(struct seq_file *m, void *data) seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps))); seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps))); - seq_printf(m, "GPU busy? %s\n", yesno(gt->awake)); + seq_printf(m, "GPU busy? %s, %llums\n", + yesno(gt->awake), + ktime_to_ms(intel_gt_get_awake_time(gt))); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 274aa0dd7050..c94e8ac884eb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -39,6 +39,28 @@ static void user_forcewake(struct intel_gt *gt, bool suspend) intel_gt_pm_put(gt); } +static void runtime_begin(struct intel_gt *gt) +{ + local_irq_disable(); + write_seqcount_begin(>->stats.lock); + gt->stats.start = ktime_get(); + gt->stats.active = true; + write_seqcount_end(>->stats.lock); + local_irq_enable(); +} + +static void runtime_end(struct intel_gt *gt) +{ + local_irq_disable(); + write_seqcount_begin(>->stats.lock); + gt->stats.active = false; + gt->stats.total = + ktime_add(gt->stats.total, + ktime_sub(ktime_get(), gt->stats.start)); + write_seqcount_end(>->stats.lock); + local_irq_enable(); +} + static int __gt_unpark(struct intel_wakeref *wf) { struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); @@ -67,6 +89,7 @@ static int __gt_unpark(struct intel_wakeref *wf) i915_pmu_gt_unparked(i915); intel_gt_unpark_requests(gt); + runtime_begin(gt); return 0; } @@ -79,6 +102,7 @@ static int __gt_park(struct intel_wakeref *wf) GT_TRACE(gt, "\n"); + runtime_end(gt); intel_gt_park_requests(gt); i915_vma_parked(gt); @@ -106,6 +130,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_gt_pm_init_early(struct intel_gt *gt) { intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); + seqcount_mutex_init(>->stats.lock, >->wakeref.mutex); } void intel_gt_pm_init(struct intel_gt *gt) @@ -339,6 +364,30 @@ int intel_gt_runtime_resume(struct intel_gt *gt) return intel_uc_runtime_resume(>->uc); } +static ktime_t __intel_gt_get_awake_time(const struct intel_gt *gt) +{ + ktime_t total = gt->stats.total; + + if (gt->stats.active) + total = ktime_add(total, + ktime_sub(ktime_get(), gt->stats.start)); + + return total; +} + +ktime_t intel_gt_get_awake_time(const struct intel_gt *gt) +{ + unsigned int seq; + ktime_t total; + + do { + seq = read_seqcount_begin(>->stats.lock); + total = __intel_gt_get_awake_time(gt); + } while (read_seqcount_retry(>->stats.lock, seq)); + + return total; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_gt_pm.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 60f0e2fbe55c..63846a856e7e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -58,6 +58,8 @@ int intel_gt_resume(struct intel_gt *gt); void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); +ktime_t intel_gt_get_awake_time(const struct intel_gt *gt); + static inline bool is_mock_gt(const struct intel_gt *gt) { return I915_SELFTEST_ONLY(gt->awake == -ENODEV); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 6d39a4a11bf3..c7bde529feab 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -87,6 +87,30 @@ struct intel_gt { u32 pm_guc_events; + struct { + bool active; + + /** + * @lock: Lock protecting the below fields. + */ + seqcount_mutex_t lock; + + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). + */ + ktime_t total; + + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; + } stats; + struct intel_engine_cs *engine[I915_NUM_ENGINES]; struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] [MAX_ENGINE_INSTANCE + 1]; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 263074c2c097..f29487ea4528 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1314,9 +1314,10 @@ static int i915_engine_info(struct seq_file *m, void *unused) wakeref = intel_runtime_pm_get(&i915->runtime_pm); - seq_printf(m, "GT awake? %s [%d]\n", + seq_printf(m, "GT awake? %s [%d], %llums\n", yesno(i915->gt.awake), - atomic_read(&i915->gt.wakeref.count)); + atomic_read(&i915->gt.wakeref.count), + ktime_to_ms(intel_gt_get_awake_time(&i915->gt))); seq_printf(m, "CS timestamp frequency: %u Hz\n", RUNTIME_INFO(i915)->cs_timestamp_frequency_hz); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 37716a89c682..7af4fabe5594 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -509,6 +509,8 @@ config_status(struct drm_i915_private *i915, u64 config) if (!HAS_RC6(i915)) return -ENODEV; break; + case I915_PMU_SOFTWARE_GT_AWAKE_TIME: + break; default: return -ENOENT; } @@ -616,6 +618,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event) case I915_PMU_RC6_RESIDENCY: val = get_rc6(&i915->gt); break; + case I915_PMU_SOFTWARE_GT_AWAKE_TIME: + val = ktime_to_ns(intel_gt_get_awake_time(&i915->gt)); + break; } } @@ -916,6 +921,7 @@ create_event_attributes(struct i915_pmu *pmu) __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), __event(I915_PMU_INTERRUPTS, "interrupts", NULL), __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), + __event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"), }; static const struct { enum drm_i915_pmu_engine_sample sample; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6edcb2b6c708..1987e2ea79a3 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -177,6 +177,7 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) #define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY -- cgit v1.2.3 From 87199e4c2490ab4ba0483a5ae5690c19b5c3d45b Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Thu, 14 Jan 2021 22:13:12 +0200 Subject: drm/framebuffer: Format modifier for Intel Gen 12 render compression with Clear Color Gen12 display can decompress surfaces compressed by render engine with Clear Color, add a new modifier as the driver needs to know the surface was compressed by render engine. V2: Description changes as suggested by Rafael. V3: Mention the Clear Color size of 64 bits in the comments(DK) v4: Fix trailing whitespaces v5: Explain Clear Color in the documentation. v6: Documentation Nitpicks(Nanley) Cc: Ville Syrjala Cc: Dhinakaran Pandiyan Cc: Kalyan Kondapally Cc: Rafael Antognolli Cc: Nanley Chery Signed-off-by: Radhakrishna Sripada Signed-off-by: Imre Deak Acked-by: Daniel Vetter Acked-by: Jani Nikula Acked-by: Nanley Chery Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20210114201314.783648-2-imre.deak@intel.com --- include/uapi/drm/drm_fourcc.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 723c8e23ca87..8782a7260c6f 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -527,6 +527,25 @@ extern "C" { */ #define I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7) +/* + * Intel Color Control Surface with Clear Color (CCS) for Gen-12 render + * compression. + * + * The main surface is Y-tiled and is at plane index 0 whereas CCS is linear + * and at index 1. The clear color is stored at index 2, and the pitch should + * be ignored. The clear color structure is 256 bits. The first 128 bits + * represents Raw Clear Color Red, Green, Blue and Alpha color each represented + * by 32 bits. The raw clear color is consumed by the 3d engine and generates + * the converted clear color of size 64 bits. The first 32 bits store the Lower + * Converted Clear Color value and the next 32 bits store the Higher Converted + * Clear Color value when applicable. The Converted Clear Color values are + * consumed by the DE. The last 64 bits are used to store Color Discard Enable + * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line + * corresponds to an area of 4x1 tiles in the main surface. The main surface + * pitch is required to be a multiple of 4 tile widths. + */ +#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8) + /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * -- cgit v1.2.3