From ebcaa1ff8b59097805d548fe7a676f194625c033 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 31 Oct 2017 10:23:25 +0000 Subject: drm/i915: Reject unknown syncobj flags We have to reject unknown flags for uAPI considerations, and also because the curent implementation limits their i915 storage space to two bits. v2: (Chris Wilson) * Fix fail in ABI check. * Added unknown flags and BUILD_BUG_ON. v3: * Use ARCH_KMALLOC_MINALIGN instead of alignof. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: cf6e7bac6357 ("drm/i915: Add support for drm syncobjs") Cc: Jason Ekstrand Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171031102326.9738-1-tvrtko.ursulin@linux.intel.com --- include/uapi/drm/i915_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 125bde7d9504..ac3c6503ca27 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -839,6 +839,7 @@ struct drm_i915_gem_exec_fence { #define I915_EXEC_FENCE_WAIT (1<<0) #define I915_EXEC_FENCE_SIGNAL (1<<1) +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) __u32 flags; }; -- cgit v1.2.3 From 1803fcbca2e444f7972430c4dc1c3e98c6ee1bc9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 10 Nov 2017 14:26:27 +0000 Subject: drm/i915: Define an engine class enum for the uABI We want to be able to report back to userspace details about an engine's class, and in return for userspace to be able to request actions regarding certain classes of engines. To isolate the uABI from any variations between hw generations, we define an abstract class for the engines and internally map onto the hw. v2: Remove MAX from the uABI; keep it internal if we need it, but don't let userspace make the mistake of using it themselves. v3: s/OTHER/INVALID/ The use of OTHER is ill-defined, so remove it from the uABI as any future new type of engine can define a class to suit it. But keep a reserved value for an invalid class, so that we can always unambiguously express when something doesn't belong to the classification. Signed-off-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Reviewed-by: Joonas Lahtinen #v2 Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 10 +++++++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++++- include/uapi/drm/i915_drm.h | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 87778f03393b..bded9c40dbd5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -50,6 +50,8 @@ struct engine_class_info { const char *name; int (*init_legacy)(struct intel_engine_cs *engine); int (*init_execlists)(struct intel_engine_cs *engine); + + u8 uabi_class; }; static const struct engine_class_info intel_engine_classes[] = { @@ -57,21 +59,25 @@ static const struct engine_class_info intel_engine_classes[] = { .name = "rcs", .init_execlists = logical_render_ring_init, .init_legacy = intel_init_render_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_RENDER, }, [COPY_ENGINE_CLASS] = { .name = "bcs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_blt_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_COPY, }, [VIDEO_DECODE_CLASS] = { .name = "vcs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_bsd_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO, }, [VIDEO_ENHANCEMENT_CLASS] = { .name = "vecs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_vebox_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, }, }; @@ -213,13 +219,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", class_info->name, info->instance) >= sizeof(engine->name)); - engine->uabi_id = info->uabi_id; engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; + engine->uabi_id = info->uabi_id; + engine->uabi_class = class_info->uabi_class; + engine->context_size = __intel_engine_context_size(dev_priv, engine->class); if (WARN_ON(engine->context_size > BIT(20))) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 69ad875fd011..f3dbfe7ae6e4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -289,11 +289,14 @@ struct intel_engine_execlists { struct intel_engine_cs { struct drm_i915_private *i915; char name[INTEL_ENGINE_CS_MAX_NAME]; + enum intel_engine_id id; - unsigned int uabi_id; unsigned int hw_id; unsigned int guc_id; + u8 uabi_id; + u8 uabi_class; + u8 class; u8 instance; u32 context_size; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ac3c6503ca27..1f7dfb22a7c2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -86,6 +86,22 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; +/* + * Different engines serve different roles, and there may be more than one + * engine serving each role. enum drm_i915_gem_engine_class provides a + * classification of the role of the engine, which may be used when requesting + * operations to be performed on a certain subset of engines, or for providing + * information about that group. + */ +enum drm_i915_gem_engine_class { + I915_ENGINE_CLASS_RENDER = 0, + I915_ENGINE_CLASS_COPY = 1, + I915_ENGINE_CLASS_VIDEO = 2, + I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + + I915_ENGINE_CLASS_INVALID = -1 +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use -- cgit v1.2.3 From d2b4b97933f5adacfba42dc3b9200d0e21fbe2c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:33 +0000 Subject: drm/i915: Record the default hw state after reset upon load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take a copy of the HW state after a reset upon module loading by executing a context switch from a blank context to the kernel context, thus saving the default hw state over the blank context image. We can then use the default hw state to initialise any future context, ensuring that each starts with the default view of hw state. v2: Unmap our default state from the GTT after stealing it from the context. This should stop us from accidentally overwriting it via the GTT (and frees up some precious GTT space). Testcase: igt/gem_ctx_isolation Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 - drivers/gpu/drm/i915/i915_debugfs.c | 1 - drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/i915_gem.c | 115 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 55 ++++----------- drivers/gpu/drm/i915/i915_gem_context.h | 4 +- drivers/gpu/drm/i915/intel_engine_cs.c | 17 +++++ drivers/gpu/drm/i915/intel_lrc.c | 38 +++++++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + include/uapi/drm/i915_drm.h | 15 +++++ 11 files changed, 224 insertions(+), 73 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f6ded475bb2c..42cc61230ca7 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -723,8 +723,6 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) if (IS_ERR(vgpu->shadow_ctx)) return PTR_ERR(vgpu->shadow_ctx); - vgpu->shadow_ctx->engine[RCS].initialised = true; - bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); return 0; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d89321f0468c..533ba096b9a6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1974,7 +1974,6 @@ static int i915_context_status(struct seq_file *m, void *unused) struct intel_context *ce = &ctx->engine[engine->id]; seq_printf(m, "%s: ", engine->name); - seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) describe_obj(m, ce->state->obj); if (ce->ring) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1b440f2b90a5..d97fe9c9439a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -406,6 +406,9 @@ static int i915_getparam(struct drm_device *dev, void *data, */ value = 1; break; + case I915_PARAM_HAS_CONTEXT_ISOLATION: + value = intel_engines_has_context_isolation(dev_priv); + break; case I915_PARAM_SLICE_MASK: value = INTEL_INFO(dev_priv)->sseu.slice_mask; if (!value) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed335612be25..4bf118304086 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4972,6 +4972,120 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) return true; } +static int __intel_engines_record_defaults(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + /* + * As we reset the gpu during very early sanitisation, the current + * register state on the GPU should reflect its defaults values. + * We load a context onto the hw (with restore-inhibit), then switch + * over to a second context to save that default register state. We + * can then prime every new context with that state so they all start + * from the same default HW values. + */ + + ctx = i915_gem_context_create_kernel(i915, 0); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_request *rq; + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ctx; + } + + err = i915_switch_context(rq); + if (engine->init_context) + err = engine->init_context(rq); + + __i915_add_request(rq, true); + if (err) + goto err_active; + } + + err = i915_gem_switch_to_kernel_context(i915); + if (err) + goto err_active; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) + goto err_active; + + assert_kernel_context_is_current(i915); + + for_each_engine(engine, i915, id) { + struct i915_vma *state; + + state = ctx->engine[id].state; + if (!state) + continue; + + /* + * As we will hold a reference to the logical state, it will + * not be torn down with the context, and importantly the + * object will hold onto its vma (making it possible for a + * stray GTT write to corrupt our defaults). Unmap the vma + * from the GTT to prevent such accidents and reclaim the + * space. + */ + err = i915_vma_unbind(state); + if (err) + goto err_active; + + err = i915_gem_object_set_to_cpu_domain(state->obj, false); + if (err) + goto err_active; + + engine->default_state = i915_gem_object_get(state->obj); + } + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + unsigned int found = intel_engines_has_context_isolation(i915); + + /* + * Make sure that classes with multiple engine instances all + * share the same basic configuration. + */ + for_each_engine(engine, i915, id) { + unsigned int bit = BIT(engine->uabi_class); + unsigned int expected = engine->default_state ? bit : 0; + + if ((found & bit) != expected) { + DRM_ERROR("mismatching default context state for class %d on engine %s\n", + engine->uabi_class, engine->name); + } + } + } + +out_ctx: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return err; + +err_active: + /* + * If we have to abandon now, we expect the engines to be idle + * and ready to be torn-down. First try to flush any remaining + * request, ensure we are pointing at the kernel context and + * then remove it. + */ + if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) + goto out_ctx; + + if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) + goto out_ctx; + + i915_gem_contexts_lost(i915); + goto out_ctx; +} + int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -5038,6 +5152,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); + ret = __intel_engines_record_defaults(dev_priv); out_unlock: if (ret == -EIO) { /* Allow engine initialisation to fail by marking the GPU as diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c05c3d7d21a5..2db040695035 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -418,8 +418,8 @@ out: return ctx; } -static struct i915_gem_context * -create_kernel_context(struct drm_i915_private *i915, int prio) +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; @@ -473,7 +473,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) ida_init(&dev_priv->contexts.hw_ida); /* lowest priority; idle task */ - ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN); + ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context\n"); err = PTR_ERR(ctx); @@ -487,7 +487,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) dev_priv->kernel_context = ctx; /* highest priority; preempting task */ - ctx = create_kernel_context(dev_priv, INT_MAX); + ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default preempt context\n"); err = PTR_ERR(ctx); @@ -522,28 +522,6 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) engine->context_unpin(engine, engine->last_retired_context); engine->last_retired_context = NULL; } - - /* Force the GPU state to be restored on enabling */ - if (!i915_modparams.enable_execlists) { - struct i915_gem_context *ctx; - - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - if (!i915_gem_context_is_default(ctx)) - continue; - - for_each_engine(engine, dev_priv, id) - ctx->engine[engine->id].initialised = false; - - ctx->remap_slice = ALL_L3_SLICES(dev_priv); - } - - for_each_engine(engine, dev_priv, id) { - struct intel_context *kce = - &dev_priv->kernel_context->engine[engine->id]; - - kce->initialised = true; - } - } } void i915_gem_contexts_fini(struct drm_i915_private *i915) @@ -718,9 +696,6 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, if (to->remap_slice) return false; - if (!to->engine[RCS].initialised) - return false; - if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; @@ -795,11 +770,14 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return ret; } - if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) - /* NB: If we inhibit the restore, the context is not allowed to - * die because future work may end up depending on valid address - * space. This means we must enforce that a page table load - * occur when this occurs. */ + if (i915_gem_context_is_kernel(to)) + /* + * The kernel context(s) is treated as pure scratch and is not + * expected to retain any state (as we sacrifice it during + * suspend and on resume it may be corrupted). This is ok, + * as nothing actually executes using the kernel context; it + * is purely used for flushing user contexts. + */ hw_flags = MI_RESTORE_INHIBIT; else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) hw_flags = MI_FORCE_RESTORE; @@ -843,15 +821,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) to->remap_slice &= ~(1<engine[RCS].initialised) { - if (engine->init_context) { - ret = engine->init_context(req); - if (ret) - return ret; - } - to->engine[RCS].initialised = true; - } - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 44688e22a5c2..4bfb72f8e1cb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -157,7 +157,6 @@ struct i915_gem_context { u32 *lrc_reg_state; u64 lrc_desc; int pin_count; - bool initialised; } engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ @@ -292,6 +291,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); + static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6d0b38189a7d..868c07a693b5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -687,6 +687,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + if (engine->default_state) + i915_gem_object_put(engine->default_state); + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) engine->context_unpin(engine, engine->i915->preempt_context); engine->context_unpin(engine, engine->i915->kernel_context); @@ -1705,6 +1708,20 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int which; + + which = 0; + for_each_engine(engine, i915, id) + if (engine->default_state) + which |= BIT(engine->uabi_class); + + return which; +} + static void print_request(struct drm_printer *m, struct drm_i915_gem_request *rq, const char *prefix) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1baaab35905c..0c93f27f36ee 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1180,7 +1180,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; u32 *cs; - int ret; GEM_BUG_ON(!ce->pin_count); @@ -1194,14 +1193,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) if (IS_ERR(cs)) return PTR_ERR(cs); - if (!ce->initialised) { - ret = engine->init_context(request); - if (ret) - return ret; - - ce->initialised = true; - } - /* Note that after this point, we have committed to using * this request as it is being used to both track the * state of engine initialisation and liveness of the @@ -2133,7 +2124,6 @@ static void execlists_init_reg_state(u32 *regs, CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | (HAS_RESOURCE_STREAMER(dev_priv) ? CTX_CTRL_RS_CTX_ENABLE : 0))); CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); @@ -2210,6 +2200,7 @@ populate_lr_context(struct i915_gem_context *ctx, struct intel_ring *ring) { void *vaddr; + u32 *regs; int ret; ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); @@ -2226,11 +2217,31 @@ populate_lr_context(struct i915_gem_context *ctx, } ctx_obj->mm.dirty = true; + if (engine->default_state) { + /* + * We only want to copy over the template context state; + * skipping over the headers reserved for GuC communication, + * leaving those as zero. + */ + const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) + return PTR_ERR(defaults); + + memcpy(vaddr + start, defaults + start, engine->context_size); + i915_gem_object_unpin_map(engine->default_state); + } + /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ - - execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, - ctx, engine, ring); + regs = vaddr + LRC_STATE_PN * PAGE_SIZE; + execlists_init_reg_state(regs, ctx, engine, ring); + if (!engine->default_state) + regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); i915_gem_object_unpin_map(ctx_obj); @@ -2283,7 +2294,6 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ce->ring = ring; ce->state = vma; - ce->initialised |= engine->init_context == NULL; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7e2a671882fb..464dc58af27b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1384,11 +1384,34 @@ alloc_context_vma(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct drm_i915_gem_object *obj; struct i915_vma *vma; + int err; obj = i915_gem_object_create(i915, engine->context_size); if (IS_ERR(obj)) return ERR_CAST(obj); + if (engine->default_state) { + void *defaults, *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) { + err = PTR_ERR(defaults); + goto err_map; + } + + memcpy(vaddr, defaults, engine->context_size); + + i915_gem_object_unpin_map(engine->default_state); + i915_gem_object_unpin_map(obj); + } + /* * Try to make the context utilize L3 as well as LLC. * @@ -1410,10 +1433,18 @@ alloc_context_vma(struct intel_engine_cs *engine) } vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); - if (IS_ERR(vma)) - i915_gem_object_put(obj); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } return vma; + +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); } static struct intel_ring * @@ -1449,16 +1480,6 @@ intel_ring_context_pin(struct intel_engine_cs *engine, ce->state->obj->pin_global++; } - /* The kernel context is only used as a placeholder for flushing the - * active context. It is never used for submitting user rendering and - * as such never requires the golden render context, and so we can skip - * emitting it when we switch to the kernel context. This is required - * as during eviction we cannot allocate and pin the renderstate in - * order to initialise the context. - */ - if (i915_gem_context_is_kernel(ctx)) - ce->initialised = true; - i915_gem_context_get(ctx); out: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f3dbfe7ae6e4..337222859166 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -306,6 +306,7 @@ struct intel_engine_cs { struct intel_ring *buffer; struct intel_timeline *timeline; + struct drm_i915_gem_object *default_state; struct intel_render_state *render_state; atomic_t irq_count; @@ -932,6 +933,7 @@ void intel_engines_park(struct drm_i915_private *i915); void intel_engines_unpark(struct drm_i915_private *i915); void intel_engines_reset_default_submission(struct drm_i915_private *i915); +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 1f7dfb22a7c2..6c02ced663f8 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -466,6 +466,21 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 +/* + * Query whether every context (both per-file default and user created) is + * isolated (insofar as HW supports). If this parameter is not true, then + * freshly created contexts may inherit values from an existing context, + * rather than default HW values. If true, it also ensures (insofar as HW + * supports) that all state set by this context will not leak to any other + * context. + * + * As not every engine across every gen support contexts, the returned + * value reports the support of context isolation for individual engines by + * returning a bitmask of each engine class set to true if that class supports + * isolation. + */ +#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 + typedef struct drm_i915_getparam { __s32 param; /* -- cgit v1.2.3 From dab91783338bd3dd42638f89b5f7e34c57773207 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 10 Nov 2017 19:08:44 +0000 Subject: drm/i915: expose command stream timestamp frequency to userspace We use to have this fixed per generation, but starting with CNL userspace cannot tell just off the PCI ID. Let's make this information available. This is particularly useful for performance monitoring where much of the normalization work is done using those timestamps (this include pipeline statistics in both GL & Vulkan as well as OA reports). v2: Use variables for 24MHz/19.2MHz values (Ewelina) Renamed function & coding style (Sagar) v3: Fix frequency read on Broadwell (Sagar) Fix missing divide by 4 on <= gen4 (Sagar) Signed-off-by: Lionel Landwerlin Tested-by: Rafael Antognolli Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20171110190845.32574-7-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 + drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_reg.h | 21 ++++++ drivers/gpu/drm/i915/intel_device_info.c | 107 +++++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 6 ++ 6 files changed, 141 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cb77aeab4ee2..82ff186108f1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3269,6 +3269,8 @@ static int i915_engine_info(struct seq_file *m, void *unused) yesno(dev_priv->gt.awake)); seq_printf(m, "Global active requests: %d\n", dev_priv->gt.active_requests); + seq_printf(m, "CS timestamp frequency: %llu Hz\n", + dev_priv->info.cs_timestamp_frequency); p = drm_seq_file_printer(m); for_each_engine(engine, dev_priv, id) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9df7b5d59a94..42813f4247e2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -419,6 +419,9 @@ static int i915_getparam(struct drm_device *dev, void *data, if (!value) return -ENODEV; break; + case I915_PARAM_CS_TIMESTAMP_FREQUENCY: + value = INTEL_INFO(dev_priv)->cs_timestamp_frequency; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d511bf948cd6..b538df740ac3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -885,6 +885,8 @@ struct intel_device_info { /* Slice/subslice/EU info */ struct sseu_dev_info sseu; + u64 cs_timestamp_frequency; + struct color_luts { u16 degamma_lut_size; u16 gamma_lut_size; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 31e4543ca942..05e33a41fcc7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1116,9 +1116,24 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0x0D00) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0 +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1 +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) + #define RPM_CONFIG1 _MMIO(0x0D04) #define GEN10_GT_NOA_ENABLE (1 << 9) +/* GPM unit config (Gen9+) */ +#define CTC_MODE _MMIO(0xA26C) +#define CTC_SOURCE_PARAMETER_MASK 1 +#define CTC_SOURCE_CRYSTAL_CLOCK 0 +#define CTC_SOURCE_DIVIDE_LOGIC 1 +#define CTC_SHIFT_PARAMETER_SHIFT 1 +#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) + /* RCP unit config (Gen8+) */ #define RCP_CONFIG _MMIO(0x0D08) @@ -8863,6 +8878,12 @@ enum skl_power_gate { #define ILK_TIMESTAMP_HI _MMIO(0x70070) #define IVB_TIMESTAMP_CTR _MMIO(0x44070) +#define GEN9_TIMESTAMP_OVERRIDE _MMIO(0x44074) +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT 0 +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK 0x3ff +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT 12 +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf << 12) + #define _PIPE_FRMTMSTMP_A 0x70048 #define PIPE_FRMTMSTMP(pipe) \ _MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index db03d179fc85..78bf7374fbdd 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -329,6 +329,108 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = 0; } +static u64 read_reference_ts_freq(struct drm_i915_private *dev_priv) +{ + u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE); + u64 base_freq, frac_freq; + + base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; + base_freq *= 1000000; + + frac_freq = ((ts_override & + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); + if (frac_freq != 0) + frac_freq = 1000000 / (frac_freq + 1); + + return base_freq + frac_freq; +} + +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv) +{ + u64 f12_5_mhz = 12500000; + u64 f19_2_mhz = 19200000; + u64 f24_mhz = 24000000; + + if (INTEL_GEN(dev_priv) <= 4) { + /* PRMs say: + * + * "The value in this register increments once every 16 + * hclks." (through the “Clocking Configuration” + * (“CLKCFG”) MCHBAR register) + */ + return (dev_priv->rawclk_freq * 1000) / 16; + } else if (INTEL_GEN(dev_priv) <= 8) { + /* PRMs say: + * + * "The PCU TSC counts 10ns increments; this timestamp + * reflects bits 38:3 of the TSC (i.e. 80ns granularity, + * rolling over every 1.5 hours). + */ + return f12_5_mhz; + } else if (INTEL_GEN(dev_priv) <= 9) { + u32 ctc_reg = I915_READ(CTC_MODE); + u64 freq = 0; + + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(dev_priv); + } else { + freq = IS_GEN9_LP(dev_priv) ? f19_2_mhz : f24_mhz; + + /* Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >> + CTC_SHIFT_PARAMETER_SHIFT); + } + + return freq; + } else if (INTEL_GEN(dev_priv) <= 10) { + u32 ctc_reg = I915_READ(CTC_MODE); + u64 freq = 0; + u32 rpm_config_reg = 0; + + /* First figure out the reference frequency. There are 2 ways + * we can compute the frequency, either through the + * TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE + * tells us which one we should use. + */ + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(dev_priv); + } else { + u32 crystal_clock; + + rpm_config_reg = I915_READ(RPM_CONFIG0); + crystal_clock = (rpm_config_reg & + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + switch (crystal_clock) { + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + freq = f19_2_mhz; + break; + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + freq = f24_mhz; + break; + } + } + + /* Now figure out how the command stream's timestamp register + * increments from this frequency (it might increment only + * every few clock cycle). + */ + freq >>= 3 - ((rpm_config_reg & + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + + return freq; + } + + DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n"); + return 0; +} + /* * Determine various intel_device_info fields at runtime. * @@ -450,6 +552,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) else if (INTEL_GEN(dev_priv) >= 10) gen10_sseu_info_init(dev_priv); + /* Initialize command stream timestamp frequency */ + info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv); + DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); DRM_DEBUG_DRIVER("subslice total: %u\n", @@ -465,4 +570,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->sseu.has_subslice_pg ? "y" : "n"); DRM_DEBUG_DRIVER("has EU power gating: %s\n", info->sseu.has_eu_pg ? "y" : "n"); + DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n", + info->cs_timestamp_frequency); } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6c02ced663f8..b57985929553 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -481,6 +481,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_CONTEXT_ISOLATION 50 +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP + * registers. This used to be fixed per platform but from CNL onwards, this + * might vary depending on the parts. + */ +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 + typedef struct drm_i915_getparam { __s32 param; /* -- cgit v1.2.3 From 373d7080896a3cb3b28ae3a2abdafb7bb87552b1 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 14 Nov 2017 16:41:19 -0500 Subject: drm/amdkfd: Add CWSR support This hardware feature allows the GPU to preempt shader execution in the middle of a compute wave, save the state and restore it later to resume execution. Memory for saving the state is allocated per queue in user mode and the address and size passed to the create_queue ioctl. The size depends on the number of waves that can be in flight simultaneously on a given ASIC. Signed-off-by: Shaoyun.liu Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 20 ++++- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++ drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 27 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 31 +++++++- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 87 +++++++++++++++++++++- include/uapi/linux/kfd_ioctl.h | 3 +- 8 files changed, 179 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 505d39156acd..2a4612d8437a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file *filep) return -EPERM; } - process = kfd_create_process(current); + process = kfd_create_process(filep); if (IS_ERR(process)) return PTR_ERR(process); @@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->ctx_save_restore_area_address = args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; + q_properties->ctl_stack_size = args->ctl_stack_size; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -1088,6 +1089,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) KFD_MMAP_EVENTS_MASK) { vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; return kfd_event_mmap(process, vma); + } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == + KFD_MMAP_RESERVED_MEM_MASK) { + vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; + return kfd_reserved_mem_mmap(process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 621a3b53a038..4f05eacca786 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -27,6 +27,7 @@ #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" +#include "cwsr_trap_handler_gfx8.asm" #define MQD_SIZE_ALIGNED 768 @@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = { .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = { .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, }; struct kfd_deviceid { @@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, return AMD_IOMMU_INV_PRI_RSP_INVALID; } +static void kfd_cwsr_init(struct kfd_dev *kfd) +{ + if (cwsr_enable && kfd->device_info->supports_cwsr) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + + kfd->cwsr_isa = cwsr_trap_gfx8_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + kfd->cwsr_enabled = true; + } +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { @@ -286,6 +300,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_iommu_pasid_error; } + kfd_cwsr_init(kfd); + if (kfd_resume(kfd)) goto kfd_resume_error; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e202921c150e..5c065024e285 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -173,6 +173,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) @@ -846,6 +849,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } dqm->asic_ops.init_sdma_vm(dqm, q, qpd); + + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index f744caeaee04..ee8adf654cd0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -50,6 +50,10 @@ module_param(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); +int cwsr_enable = 1; +module_param(cwsr_enable, int, 0444); +MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); + int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; module_param(max_num_of_queues_per_device, int, 0444); MODULE_PARM_DESC(max_num_of_queues_per_device, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 2ba7cea7b99b..00e1f1a9728b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -89,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_iq_rptr = 1; + if (q->tba_addr) { + m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8); + m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8); + m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8); + m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8); + m->compute_pgm_rsrc2 |= + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; + m->cp_hqd_cntl_stack_size = q->ctl_stack_size; + m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; + m->cp_hqd_wg_state_offset = q->ctl_stack_size; + } + *mqd = m; if (gart_addr) *gart_addr = addr; @@ -167,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) + m->cp_hqd_ctx_save_control = + atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | + mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 47504737ab4a..a66876467995 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -41,6 +41,7 @@ #define KFD_MMAP_DOORBELL_MASK 0x8000000000000 #define KFD_MMAP_EVENTS_MASK 0x4000000000000 +#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000 /* * When working with cp scheduler we should assign the HIQ manually or via @@ -62,6 +63,15 @@ #define KFD_MAX_NUM_OF_PROCESSES 512 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 +/* + * Size of the per-process TBA+TMA buffer: 2 pages + * + * The first page is the TBA used for the CWSR ISA code. The second + * page is used as TMA for daisy changing a user-mode trap handler. + */ +#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) +#define KFD_CWSR_TMA_OFFSET PAGE_SIZE + /* * Kernel module parameter to specify maximum number of supported queues per * device @@ -78,6 +88,8 @@ extern int max_num_of_queues_per_device; /* Kernel module parameter to specify the scheduling policy */ extern int sched_policy; +extern int cwsr_enable; + /* * Kernel module parameter to specify whether to send sigterm to HSA process on * unhandled exception @@ -131,6 +143,7 @@ struct kfd_device_info { size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; + bool supports_cwsr; }; struct kfd_mem_obj { @@ -200,6 +213,11 @@ struct kfd_dev { /* Debug manager */ struct kfd_dbgmgr *dbgmgr; + + /* CWSR */ + bool cwsr_enabled; + const void *cwsr_isa; + unsigned int cwsr_isa_size; }; /* KGD2KFD callbacks */ @@ -332,6 +350,9 @@ struct queue_properties { uint32_t eop_ring_buffer_size; uint64_t ctx_save_restore_area_address; uint32_t ctx_save_restore_area_size; + uint32_t ctl_stack_size; + uint64_t tba_addr; + uint64_t tma_addr; }; /** @@ -439,6 +460,11 @@ struct qcm_process_device { uint32_t num_gws; uint32_t num_oac; uint32_t sh_hidden_private_base; + + /* CWSR memory */ + void *cwsr_kaddr; + uint64_t tba_addr; + uint64_t tma_addr; }; @@ -563,7 +589,7 @@ struct amdkfd_ioctl_desc { void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); -struct kfd_process *kfd_create_process(const struct task_struct *); +struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); @@ -577,6 +603,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma); + /* Process device data iterator */ struct kfd_process_device *kfd_get_first_process_device_data( struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1bb9b2643d5a..39f4c19aaf61 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -28,6 +28,7 @@ #include #include #include +#include struct mm_struct; @@ -53,6 +54,8 @@ struct kfd_process_release_work { static struct kfd_process *find_process(const struct task_struct *thread); static struct kfd_process *create_process(const struct task_struct *thread); +static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep); + void kfd_process_create_wq(void) { @@ -68,9 +71,10 @@ void kfd_process_destroy_wq(void) } } -struct kfd_process *kfd_create_process(const struct task_struct *thread) +struct kfd_process *kfd_create_process(struct file *filep) { struct kfd_process *process; + struct task_struct *thread = current; if (!thread->mm) return ERR_PTR(-EINVAL); @@ -101,6 +105,8 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) up_write(&thread->mm->mmap_sem); + kfd_process_init_cwsr(process, filep); + return process; } @@ -168,6 +174,11 @@ static void kfd_process_wq_release(struct work_struct *work) amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); list_del(&pdd->per_device_list); + + if (pdd->qpd.cwsr_kaddr) + free_pages((unsigned long)pdd->qpd.cwsr_kaddr, + get_order(KFD_CWSR_TBA_TMA_SIZE)); + kfree(pdd); } @@ -260,6 +271,46 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, }; +static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) +{ + int err = 0; + unsigned long offset; + struct kfd_process_device *temp, *pdd = NULL; + struct kfd_dev *dev = NULL; + struct qcm_process_device *qpd = NULL; + + mutex_lock(&p->mutex); + list_for_each_entry_safe(pdd, temp, &p->per_device_data, + per_device_list) { + dev = pdd->dev; + qpd = &pdd->qpd; + if (!dev->cwsr_enabled || qpd->cwsr_kaddr) + continue; + offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; + qpd->tba_addr = (int64_t)vm_mmap(filep, 0, + KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, + MAP_SHARED, offset); + + if (IS_ERR_VALUE(qpd->tba_addr)) { + pr_err("Failure to set tba address. error -%d.\n", + (int)qpd->tba_addr); + err = qpd->tba_addr; + qpd->tba_addr = 0; + qpd->cwsr_kaddr = NULL; + goto out; + } + + memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); + + qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; + pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", + qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); + } +out: + mutex_unlock(&p->mutex); + return err; +} + static struct kfd_process *create_process(const struct task_struct *thread) { struct kfd_process *process; @@ -535,3 +586,37 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) return p; } + +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma) +{ + struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); + struct kfd_process_device *pdd; + struct qcm_process_device *qpd; + + if (!dev) + return -EINVAL; + if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { + pr_err("Incorrect CWSR mapping size.\n"); + return -EINVAL; + } + + pdd = kfd_get_process_device_data(dev, process); + if (!pdd) + return -EINVAL; + qpd = &pdd->qpd; + + qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(KFD_CWSR_TBA_TMA_SIZE)); + if (!qpd->cwsr_kaddr) { + pr_err("Error allocating per process CWSR buffer.\n"); + return -ENOMEM; + } + + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND + | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; + /* Mapping pages to user process */ + return remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(__pa(qpd->cwsr_kaddr)), + KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); +} diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e80501368ae..f7563ef2e883 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -58,7 +58,8 @@ struct kfd_ioctl_create_queue_args { __u64 eop_buffer_address; /* to KFD */ __u64 eop_buffer_size; /* to KFD */ __u64 ctx_save_restore_address; /* to KFD */ - __u64 ctx_save_restore_size; /* to KFD */ + __u32 ctx_save_restore_size; /* to KFD */ + __u32 ctl_stack_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { -- cgit v1.2.3 From d7b9bd2248d794275b53d34e665f7c5a08c4b396 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 14 Nov 2017 16:41:20 -0500 Subject: drm/amdkfd: Add support for user-mode trap handlers A second-level user mode trap handler can be installed. The CWSR trap handler jumps to the secondary trap handler conditionally for any conditions not handled by it. This can be used e.g. for debugging or catching math exceptions. When CWSR is disabled, the user mode trap handler is installed as first level trap handler. Signed-off-by: Shaoyun.liu Signed-off-by: Jay Cornwall Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 37 +++++++++++++++++++++- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 22 +++++++++++++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 5 +++ include/uapi/linux/kfd_ioctl.h | 12 ++++++- 4 files changed, 74 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 2a4612d8437a..cc61ec289880 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -432,6 +432,38 @@ out: return err; } +static int kfd_ioctl_set_trap_handler(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_trap_handler_args *args = data; + struct kfd_dev *dev; + int err = 0; + struct kfd_process_device *pdd; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = -ESRCH; + goto out; + } + + if (dev->dqm->ops.set_trap_handler(dev->dqm, + &pdd->qpd, + args->tba_addr, + args->tma_addr)) + err = -EINVAL; + +out: + mutex_unlock(&p->mutex); + + return err; +} + static int kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) { @@ -980,7 +1012,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { kfd_ioctl_set_scratch_backing_va, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, - kfd_ioctl_get_tile_config, 0) + kfd_ioctl_get_tile_config, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, + kfd_ioctl_set_trap_handler, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 5c065024e285..8447810c9a1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1116,6 +1116,26 @@ out: return retval; } +static int set_trap_handler(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + uint64_t tba_addr, + uint64_t tma_addr) +{ + uint64_t *tma; + + if (dqm->dev->cwsr_enabled) { + /* Jump from CWSR trap handler to user trap */ + tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); + tma[0] = tba_addr; + tma[1] = tma_addr; + } else { + qpd->tba_addr = tba_addr; + qpd->tma_addr = tma_addr; + } + + return 0; +} + static int process_termination_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -1247,6 +1267,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: @@ -1262,6 +1283,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.initialize = initialize_nocpsch; dqm->ops.uninitialize = uninitialize; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_nocpsch; break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 5b77cb69f732..8752edf9cd9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -123,6 +123,11 @@ struct device_queue_manager_ops { void __user *alternate_aperture_base, uint64_t alternate_aperture_size); + int (*set_trap_handler)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + uint64_t tba_addr, + uint64_t tma_addr); + int (*process_termination)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); }; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index f7563ef2e883..f4cab5b3ba9a 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -262,6 +262,13 @@ struct kfd_ioctl_get_tile_config_args { */ }; +struct kfd_ioctl_set_trap_handler_args { + uint64_t tba_addr; /* to KFD */ + uint64_t tma_addr; /* to KFD */ + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -322,7 +329,10 @@ struct kfd_ioctl_get_tile_config_args { #define AMDKFD_IOC_GET_TILE_CONFIG \ AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) +#define AMDKFD_IOC_SET_TRAP_HANDLER \ + AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x13 +#define AMDKFD_COMMAND_END 0x14 #endif -- cgit v1.2.3 From b46a33e271ed81bd765c632b972c49d5b44729c7 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Nov 2017 18:18:45 +0000 Subject: drm/i915/pmu: Expose a PMU interface for perf queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: Chris Wilson From: Tvrtko Ursulin From: Dmitry Rogozhkin The first goal is to be able to measure GPU (and invidual ring) busyness without having to poll registers from userspace. (Which not only incurs holding the forcewake lock indefinitely, perturbing the system, but also runs the risk of hanging the machine.) As an alternative we can use the perf event counter interface to sample the ring registers periodically and send those results to userspace. Functionality we are exporting to userspace is via the existing perf PMU API and can be exercised via the existing tools. For example: perf stat -a -e i915/rcs0-busy/ -I 1000 Will print the render engine busynnes once per second. All the performance counters can be enumerated (perf list) and have their unit of measure correctly reported in sysfs. v1-v2 (Chris Wilson): v2: Use a common timer for the ring sampling. v3: (Tvrtko Ursulin) * Decouple uAPI from i915 engine ids. * Complete uAPI defines. * Refactor some code to helpers for clarity. * Skip sampling disabled engines. * Expose counters in sysfs. * Pass in fake regs to avoid null ptr deref in perf core. * Convert to class/instance uAPI. * Use shared driver code for rc6 residency, power and frequency. v4: (Dmitry Rogozhkin) * Register PMU with .task_ctx_nr=perf_invalid_context * Expose cpumask for the PMU with the single CPU in the mask * Properly support pmu->stop(): it should call pmu->read() * Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE) * Introduce refcounting of event subscriptions. * Make pmu.busy_stats a refcounter to avoid busy stats going away with some deleted event. * Expose cpumask for i915 PMU to avoid multiple events creation of the same type followed by counter aggregation by perf-stat. * Track CPUs getting online/offline to migrate perf context. If (likely) cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be needed to see effect of CPU status tracking. * End result is that only global events are supported and perf stat works correctly. * Deny perf driver level sampling - it is prohibited for uncore PMU. v5: (Tvrtko Ursulin) * Don't hardcode number of engine samplers. * Rewrite event ref-counting for correctness and simplicity. * Store initial counter value when starting already enabled events to correctly report values to all listeners. * Fix RC6 residency readout. * Comments, GPL header. v6: * Add missing entry to v4 changelog. * Fix accounting in CPU hotplug case by copying the approach from arch/x86/events/intel/cstate.c. (Dmitry Rogozhkin) v7: * Log failure message only on failure. * Remove CPU hotplug notification state on unregister. v8: * Fix error unwind on failed registration. * Checkpatch cleanup. v9: * Drop the energy metric, it is available via intel_rapl_perf. (Ville Syrjälä) * Use HAS_RC6(p). (Chris Wilson) * Handle unsupported non-engine events. (Dmitry Rogozhkin) * Rebase for intel_rc6_residency_ns needing caller managed runtime pm. * Drop HAS_RC6 checks from the read callback since creating those events will be rejected at init time already. * Add counter units to sysfs so perf stat output is nicer. * Cleanup the attribute tables for brevity and readability. v10: * Fixed queued accounting. v11: * Move intel_engine_lookup_user to intel_engine_cs.c * Commit update. (Joonas Lahtinen) v12: * More accurate sampling. (Chris Wilson) * Store and report frequency in MHz for better usability from perf stat. * Removed metrics: queued, interrupts, rc6 counters. * Sample engine busyness based on seqno difference only for less MMIO (and forcewake) on all platforms. (Chris Wilson) v13: * Comment spelling, use mul_u32_u32 to work around potential GCC issue and somne code alignment changes. (Chris Wilson) v14: * Rebase. v15: * Rebase for RPS refactoring. v16: * Use the dynamic slot in the CPU hotplug state machine so that we are free to setup our state as multi-instance. Previously we were re-using the CPUHP_AP_PERF_X86_UNCORE_ONLINE slot which is neither used as multi-instance, nor owned by our driver to start with. * Register the CPU hotplug handlers after the PMU, otherwise the callback will get called before the PMU is initialized which can end up in perf_pmu_migrate_context with an un-initialized base. * Added workaround for a probable bug in cpuhp core. v17: * Remove workaround for the cpuhp bug. v18: * Rebase for drm_i915_gem_engine_class getting upstream before us. v19: * Rebase. (trivial) Signed-off-by: Chris Wilson Signed-off-by: Tvrtko Ursulin Signed-off-by: Dmitry Rogozhkin Cc: Tvrtko Ursulin Cc: Chris Wilson Cc: Dmitry Rogozhkin Cc: Peter Zijlstra Reviewed-by: Chris Wilson Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/i915_drv.h | 5 + drivers/gpu/drm/i915/i915_pmu.c | 688 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_pmu.h | 104 +++++ drivers/gpu/drm/i915/i915_reg.h | 3 + drivers/gpu/drm/i915/intel_engine_cs.c | 33 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 26 ++ include/uapi/drm/i915_drm.h | 39 ++ 9 files changed, 902 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_pmu.c create mode 100644 drivers/gpu/drm/i915/i915_pmu.h (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index c3649ec5b041..42bc8bd4ff06 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -45,6 +45,7 @@ i915-y := i915_drv.o \ i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # GEM code i915-y += i915_cmd_parser.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8dbcb03b5f54..0793a27e2b95 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -48,6 +48,7 @@ #include "i915_drv.h" #include "i915_trace.h" +#include "i915_pmu.h" #include "i915_vgpu.h" #include "intel_drv.h" #include "intel_uc.h" @@ -1215,6 +1216,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; i915_gem_shrinker_init(dev_priv); + i915_pmu_register(dev_priv); /* * Notify a valid surface after modesetting, @@ -1269,6 +1271,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_opregion_unregister(dev_priv); i915_perf_unregister(dev_priv); + i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); i915_guc_log_unregister(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 019117144b3b..5bd5ac4cd03e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -2290,6 +2291,8 @@ struct drm_i915_private { struct i915_gem_context *kernel_context; /* Context only to be used for injecting preemption commands */ struct i915_gem_context *preempt_context; + struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] + [MAX_ENGINE_INSTANCE + 1]; struct drm_dma_handle *status_page_dmah; struct resource mch_res; @@ -2761,6 +2764,8 @@ struct drm_i915_private { int irq; } lpe_audio; + struct i915_pmu pmu; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c new file mode 100644 index 000000000000..01b5ee67c1bf --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -0,0 +1,688 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include + +#include "i915_drv.h" +#include "i915_pmu.h" +#include "intel_ringbuffer.h" + +/* Frequency for the sampling timer for events which need it. */ +#define FREQUENCY 200 +#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) + +#define ENGINE_SAMPLE_MASK \ + (BIT(I915_SAMPLE_BUSY) | \ + BIT(I915_SAMPLE_WAIT) | \ + BIT(I915_SAMPLE_SEMA)) + +#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) + +static cpumask_t i915_pmu_cpumask = CPU_MASK_NONE; + +static u8 engine_config_sample(u64 config) +{ + return config & I915_PMU_SAMPLE_MASK; +} + +static u8 engine_event_sample(struct perf_event *event) +{ + return engine_config_sample(event->attr.config); +} + +static u8 engine_event_class(struct perf_event *event) +{ + return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; +} + +static u8 engine_event_instance(struct perf_event *event) +{ + return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; +} + +static bool is_engine_config(u64 config) +{ + return config < __I915_PMU_OTHER(0); +} + +static unsigned int config_enabled_bit(u64 config) +{ + if (is_engine_config(config)) + return engine_config_sample(config); + else + return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); +} + +static u64 config_enabled_mask(u64 config) +{ + return BIT_ULL(config_enabled_bit(config)); +} + +static bool is_engine_event(struct perf_event *event) +{ + return is_engine_config(event->attr.config); +} + +static unsigned int event_enabled_bit(struct perf_event *event) +{ + return config_enabled_bit(event->attr.config); +} + +static bool grab_forcewake(struct drm_i915_private *i915, bool fw) +{ + if (!fw) + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + + return true; +} + +static void +update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val) +{ + /* + * Since we are doing stochastic sampling for these counters, + * average the delta with the previous value for better accuracy. + */ + sample->cur += div_u64(mul_u32_u32(sample->prev + val, unit), 2); + sample->prev = val; +} + +static void engines_sample(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + bool fw = false; + + if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) + return; + + if (!dev_priv->gt.awake) + return; + + if (!intel_runtime_pm_get_if_in_use(dev_priv)) + return; + + for_each_engine(engine, dev_priv, id) { + u32 current_seqno = intel_engine_get_seqno(engine); + u32 last_seqno = intel_engine_last_submit(engine); + u32 val; + + val = !i915_seqno_passed(current_seqno, last_seqno); + + update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY], + PERIOD, val); + + if (val && (engine->pmu.enable & + (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) { + fw = grab_forcewake(dev_priv, fw); + + val = I915_READ_FW(RING_CTL(engine->mmio_base)); + } else { + val = 0; + } + + update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT], + PERIOD, !!(val & RING_WAIT)); + + update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], + PERIOD, !!(val & RING_WAIT_SEMAPHORE)); + } + + if (fw) + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + intel_runtime_pm_put(dev_priv); +} + +static void frequency_sample(struct drm_i915_private *dev_priv) +{ + if (dev_priv->pmu.enable & + config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { + u32 val; + + val = dev_priv->gt_pm.rps.cur_freq; + if (dev_priv->gt.awake && + intel_runtime_pm_get_if_in_use(dev_priv)) { + val = intel_get_cagf(dev_priv, + I915_READ_NOTRACE(GEN6_RPSTAT1)); + intel_runtime_pm_put(dev_priv); + } + + update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], + 1, intel_gpu_freq(dev_priv, val)); + } + + if (dev_priv->pmu.enable & + config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { + update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1, + intel_gpu_freq(dev_priv, + dev_priv->gt_pm.rps.cur_freq)); + } +} + +static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) +{ + struct drm_i915_private *i915 = + container_of(hrtimer, struct drm_i915_private, pmu.timer); + + if (i915->pmu.enable == 0) + return HRTIMER_NORESTART; + + engines_sample(i915); + frequency_sample(i915); + + hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD)); + return HRTIMER_RESTART; +} + +static void i915_pmu_event_destroy(struct perf_event *event) +{ + WARN_ON(event->parent); +} + +static int engine_event_init(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + + if (!intel_engine_lookup_user(i915, engine_event_class(event), + engine_event_instance(event))) + return -ENODEV; + + switch (engine_event_sample(event)) { + case I915_SAMPLE_BUSY: + case I915_SAMPLE_WAIT: + break; + case I915_SAMPLE_SEMA: + if (INTEL_GEN(i915) < 6) + return -ENODEV; + break; + default: + return -ENOENT; + } + + return 0; +} + +static int i915_pmu_event_init(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + int cpu, ret; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (event->cpu < 0) + return -EINVAL; + + cpu = cpumask_any_and(&i915_pmu_cpumask, + topology_sibling_cpumask(event->cpu)); + if (cpu >= nr_cpu_ids) + return -ENODEV; + + if (is_engine_event(event)) { + ret = engine_event_init(event); + } else { + ret = 0; + switch (event->attr.config) { + case I915_PMU_ACTUAL_FREQUENCY: + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + /* Requires a mutex for sampling! */ + ret = -ENODEV; + case I915_PMU_REQUESTED_FREQUENCY: + if (INTEL_GEN(i915) < 6) + ret = -ENODEV; + break; + default: + ret = -ENOENT; + break; + } + } + if (ret) + return ret; + + event->cpu = cpu; + if (!event->parent) + event->destroy = i915_pmu_event_destroy; + + return 0; +} + +static u64 __i915_pmu_event_read(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + u64 val = 0; + + if (is_engine_event(event)) { + u8 sample = engine_event_sample(event); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + + if (WARN_ON_ONCE(!engine)) { + /* Do nothing */ + } else { + val = engine->pmu.sample[sample].cur; + } + } else { + switch (event->attr.config) { + case I915_PMU_ACTUAL_FREQUENCY: + val = + div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, + FREQUENCY); + break; + case I915_PMU_REQUESTED_FREQUENCY: + val = + div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, + FREQUENCY); + break; + } + } + + return val; +} + +static void i915_pmu_event_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new; + +again: + prev = local64_read(&hwc->prev_count); + new = __i915_pmu_event_read(event); + + if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) + goto again; + + local64_add(new - prev, &event->count); +} + +static void i915_pmu_enable(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + unsigned int bit = event_enabled_bit(event); + unsigned long flags; + + spin_lock_irqsave(&i915->pmu.lock, flags); + + /* + * Start the sampling timer when enabling the first event. + */ + if (i915->pmu.enable == 0) + hrtimer_start_range_ns(&i915->pmu.timer, + ns_to_ktime(PERIOD), 0, + HRTIMER_MODE_REL_PINNED); + + /* + * Update the bitmask of enabled events and increment + * the event reference counter. + */ + GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); + i915->pmu.enable |= BIT_ULL(bit); + i915->pmu.enable_count[bit]++; + + /* + * For per-engine events the bitmask and reference counting + * is stored per engine. + */ + if (is_engine_event(event)) { + u8 sample = engine_event_sample(event); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + GEM_BUG_ON(!engine); + engine->pmu.enable |= BIT(sample); + + GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); + engine->pmu.enable_count[sample]++; + } + + /* + * Store the current counter value so we can report the correct delta + * for all listeners. Even when the event was already enabled and has + * an existing non-zero value. + */ + local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); + + spin_unlock_irqrestore(&i915->pmu.lock, flags); +} + +static void i915_pmu_disable(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + unsigned int bit = event_enabled_bit(event); + unsigned long flags; + + spin_lock_irqsave(&i915->pmu.lock, flags); + + if (is_engine_event(event)) { + u8 sample = engine_event_sample(event); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + GEM_BUG_ON(!engine); + GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); + /* + * Decrement the reference count and clear the enabled + * bitmask when the last listener on an event goes away. + */ + if (--engine->pmu.enable_count[sample] == 0) + engine->pmu.enable &= ~BIT(sample); + } + + GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); + /* + * Decrement the reference count and clear the enabled + * bitmask when the last listener on an event goes away. + */ + if (--i915->pmu.enable_count[bit] == 0) + i915->pmu.enable &= ~BIT_ULL(bit); + + spin_unlock_irqrestore(&i915->pmu.lock, flags); +} + +static void i915_pmu_event_start(struct perf_event *event, int flags) +{ + i915_pmu_enable(event); + event->hw.state = 0; +} + +static void i915_pmu_event_stop(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_UPDATE) + i915_pmu_event_read(event); + i915_pmu_disable(event); + event->hw.state = PERF_HES_STOPPED; +} + +static int i915_pmu_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + i915_pmu_event_start(event, flags); + + return 0; +} + +static void i915_pmu_event_del(struct perf_event *event, int flags) +{ + i915_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int i915_pmu_event_event_idx(struct perf_event *event) +{ + return 0; +} + +static ssize_t i915_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + return sprintf(buf, "%s\n", (char *)eattr->var); +} + +#define I915_PMU_FORMAT_ATTR(_name, _config) \ + (&((struct dev_ext_attribute[]) { \ + { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ + .var = (void *)_config, } \ + })[0].attr.attr) + +static struct attribute *i915_pmu_format_attrs[] = { + I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), + NULL, +}; + +static const struct attribute_group i915_pmu_format_attr_group = { + .name = "format", + .attrs = i915_pmu_format_attrs, +}; + +static ssize_t i915_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + return sprintf(buf, "config=0x%lx\n", (unsigned long)eattr->var); +} + +#define I915_EVENT_ATTR(_name, _config) \ + (&((struct dev_ext_attribute[]) { \ + { .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \ + .var = (void *)_config, } \ + })[0].attr.attr) + +#define I915_EVENT_STR(_name, _str) \ + (&((struct perf_pmu_events_attr[]) { \ + { .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .id = 0, \ + .event_str = _str, } \ + })[0].attr.attr) + +#define I915_EVENT(_name, _config, _unit) \ + I915_EVENT_ATTR(_name, _config), \ + I915_EVENT_STR(_name.unit, _unit) + +#define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \ + I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \ + I915_EVENT_STR(_name.unit, "ns") + +#define I915_ENGINE_EVENTS(_name, _class, _instance) \ + I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \ + I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \ + I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT) + +static struct attribute *i915_pmu_events_attrs[] = { + I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0), + I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0), + I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0), + I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1), + I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0), + + I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"), + I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"), + + NULL, +}; + +static const struct attribute_group i915_pmu_events_attr_group = { + .name = "events", + .attrs = i915_pmu_events_attrs, +}; + +static ssize_t +i915_pmu_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); +} + +static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); + +static struct attribute *i915_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group i915_pmu_cpumask_attr_group = { + .attrs = i915_cpumask_attrs, +}; + +static const struct attribute_group *i915_pmu_attr_groups[] = { + &i915_pmu_format_attr_group, + &i915_pmu_events_attr_group, + &i915_pmu_cpumask_attr_group, + NULL +}; + +#ifdef CONFIG_HOTPLUG_CPU +static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + unsigned int target; + + GEM_BUG_ON(!pmu->base.event_init); + + target = cpumask_any_and(&i915_pmu_cpumask, &i915_pmu_cpumask); + /* Select the first online CPU as a designated reader. */ + if (target >= nr_cpu_ids) + cpumask_set_cpu(cpu, &i915_pmu_cpumask); + + return 0; +} + +static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + unsigned int target; + + GEM_BUG_ON(!pmu->base.event_init); + + if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &i915_pmu_cpumask); + perf_pmu_migrate_context(&pmu->base, cpu, target); + } + } + + return 0; +} + +static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; +#endif + +static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) +{ +#ifdef CONFIG_HOTPLUG_CPU + enum cpuhp_state slot; + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/x86/intel/i915:online", + i915_pmu_cpu_online, + i915_pmu_cpu_offline); + if (ret < 0) + return ret; + + slot = ret; + ret = cpuhp_state_add_instance(slot, &i915->pmu.node); + if (ret) { + cpuhp_remove_multi_state(slot); + return ret; + } + + cpuhp_slot = slot; +#endif + return 0; +} + +static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) +{ +#ifdef CONFIG_HOTPLUG_CPU + WARN_ON(cpuhp_slot == CPUHP_INVALID); + WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); + cpuhp_remove_multi_state(cpuhp_slot); +#endif +} + +void i915_pmu_register(struct drm_i915_private *i915) +{ + int ret; + + if (INTEL_GEN(i915) <= 2) { + DRM_INFO("PMU not supported for this GPU."); + return; + } + + i915->pmu.base.attr_groups = i915_pmu_attr_groups; + i915->pmu.base.task_ctx_nr = perf_invalid_context; + i915->pmu.base.event_init = i915_pmu_event_init; + i915->pmu.base.add = i915_pmu_event_add; + i915->pmu.base.del = i915_pmu_event_del; + i915->pmu.base.start = i915_pmu_event_start; + i915->pmu.base.stop = i915_pmu_event_stop; + i915->pmu.base.read = i915_pmu_event_read; + i915->pmu.base.event_idx = i915_pmu_event_event_idx; + + spin_lock_init(&i915->pmu.lock); + hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + i915->pmu.timer.function = i915_sample; + + ret = perf_pmu_register(&i915->pmu.base, "i915", -1); + if (ret) + goto err; + + ret = i915_pmu_register_cpuhp_state(i915); + if (ret) + goto err_unreg; + + return; + +err_unreg: + perf_pmu_unregister(&i915->pmu.base); +err: + i915->pmu.base.event_init = NULL; + DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); +} + +void i915_pmu_unregister(struct drm_i915_private *i915) +{ + if (!i915->pmu.base.event_init) + return; + + WARN_ON(i915->pmu.enable); + + hrtimer_cancel(&i915->pmu.timer); + + i915_pmu_unregister_cpuhp_state(i915); + + perf_pmu_unregister(&i915->pmu.base); + i915->pmu.base.event_init = NULL; +} diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h new file mode 100644 index 000000000000..1ac8b2e34607 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -0,0 +1,104 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#ifndef __I915_PMU_H__ +#define __I915_PMU_H__ + +enum { + __I915_SAMPLE_FREQ_ACT = 0, + __I915_SAMPLE_FREQ_REQ, + __I915_NUM_PMU_SAMPLERS +}; + +/** + * How many different events we track in the global PMU mask. + * + * It is also used to know to needed number of event reference counters. + */ +#define I915_PMU_MASK_BITS \ + ((1 << I915_PMU_SAMPLE_BITS) + \ + (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0))) + +struct i915_pmu_sample { + u64 cur; + u32 prev; +}; + +struct i915_pmu { + /** + * @node: List node for CPU hotplug handling. + */ + struct hlist_node node; + /** + * @base: PMU base. + */ + struct pmu base; + /** + * @lock: Lock protecting enable mask and ref count handling. + */ + spinlock_t lock; + /** + * @timer: Timer for internal i915 PMU sampling. + */ + struct hrtimer timer; + /** + * @enable: Bitmask of all currently enabled events. + * + * Bits are derived from uAPI event numbers in a way that low 16 bits + * correspond to engine event _sample_ _type_ (I915_SAMPLE_QUEUED is + * bit 0), and higher bits correspond to other events (for instance + * I915_PMU_ACTUAL_FREQUENCY is bit 16 etc). + * + * In other words, low 16 bits are not per engine but per engine + * sampler type, while the upper bits are directly mapped to other + * event types. + */ + u64 enable; + /** + * @enable_count: Reference counts for the enabled events. + * + * Array indices are mapped in the same way as bits in the @enable field + * and they are used to control sampling on/off when multiple clients + * are using the PMU API. + */ + unsigned int enable_count[I915_PMU_MASK_BITS]; + /** + * @sample: Current and previous (raw) counters for sampling events. + * + * These counters are updated from the i915 PMU sampling timer. + * + * Only global counters are held here, while the per-engine ones are in + * struct intel_engine_cs. + */ + struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; +}; + +#ifdef CONFIG_PERF_EVENTS +void i915_pmu_register(struct drm_i915_private *i915); +void i915_pmu_unregister(struct drm_i915_private *i915); +#else +static inline void i915_pmu_register(struct drm_i915_private *i915) {} +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} +#endif + +#endif diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 96c80fa0fcac..09bf043c1c2e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,6 +186,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VIDEO_ENHANCEMENT_CLASS 2 #define COPY_ENGINE_CLASS 3 #define OTHER_CLASS 4 +#define MAX_ENGINE_CLASS 4 + +#define MAX_ENGINE_INSTANCE 1 /* PCI config space */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 22c095035539..a5a494210b9e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -205,6 +205,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); class_info = &intel_engine_classes[info->class]; + if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS)) + return -EINVAL; + + if (GEM_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) + return -EINVAL; + + if (GEM_WARN_ON(dev_priv->engine_class[info->class][info->instance])) + return -EINVAL; + GEM_BUG_ON(dev_priv->engine[id]); engine = kzalloc(sizeof(*engine), GFP_KERNEL); if (!engine) @@ -234,6 +243,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv, ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); + dev_priv->engine_class[info->class][info->instance] = engine; dev_priv->engine[id] = engine; return 0; } @@ -1816,6 +1826,29 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) drm_printf(m, "\n"); } +static u8 user_class_map[] = { + [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS, + [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS, + [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS, + [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS, +}; + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) +{ + if (class >= ARRAY_SIZE(user_class_map)) + return NULL; + + class = user_class_map[class]; + + GEM_BUG_ON(class > MAX_ENGINE_CLASS); + + if (instance > MAX_ENGINE_INSTANCE) + return NULL; + + return i915->engine_class[class][instance]; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 80cd7812ce02..7ee0f18d4179 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_pmu.h" #include "i915_selftest.h" struct drm_printer; @@ -338,6 +339,28 @@ struct intel_engine_cs { I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; + struct { + /** + * @enable: Bitmask of enable sample events on this engine. + * + * Bits correspond to sample event types, for instance + * I915_SAMPLE_QUEUED is bit 0 etc. + */ + u32 enable; + /** + * @enable_count: Reference count for the enabled samplers. + * + * Index number corresponds to the bit number from @enable. + */ + unsigned int enable_count[I915_PMU_SAMPLE_BITS]; + /** + * @sample: Counter values for sampling events. + * + * Our internal timer stores the current counters in this field. + */ + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; + } pmu; + /* * A pool of objects to use as shadow copies of client batch buffers * when the command parser is enabled. Prevents the client from @@ -926,4 +949,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine); void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p); +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b57985929553..40e7b438bdaa 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -102,6 +102,45 @@ enum drm_i915_gem_engine_class { I915_ENGINE_CLASS_INVALID = -1 }; +/** + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 + * + */ + +enum drm_i915_pmu_engine_sample { + I915_SAMPLE_BUSY = 0, + I915_SAMPLE_WAIT = 1, + I915_SAMPLE_SEMA = 2, + I915_ENGINE_SAMPLE_MAX /* non-ABI */ +}; + +#define I915_PMU_SAMPLE_BITS (4) +#define I915_PMU_SAMPLE_MASK (0xf) +#define I915_PMU_SAMPLE_INSTANCE_BITS (8) +#define I915_PMU_CLASS_SHIFT \ + (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS) + +#define __I915_PMU_ENGINE(class, instance, sample) \ + ((class) << I915_PMU_CLASS_SHIFT | \ + (instance) << I915_PMU_SAMPLE_BITS | \ + (sample)) + +#define I915_PMU_ENGINE_BUSY(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY) + +#define I915_PMU_ENGINE_WAIT(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT) + +#define I915_PMU_ENGINE_SEMA(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) + +#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) + +#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) +#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) + +#define I915_PMU_LAST I915_PMU_REQUESTED_FREQUENCY + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use -- cgit v1.2.3 From 0cd4684d6ea9a4ffec33fc19de4dd667bb90d0a5 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Nov 2017 18:18:50 +0000 Subject: drm/i915/pmu: Add interrupt count metric For clients like intel-gpu-overlay it is easier to read the count via the perf API than having to parse /proc. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-7-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 23 +++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 4 +++- 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 6a428e7218d2..fef389ebf92c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -277,6 +277,22 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) return HRTIMER_RESTART; } +static u64 count_interrupts(struct drm_i915_private *i915) +{ + /* open-coded kstat_irqs() */ + struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); + u64 sum = 0; + int cpu; + + if (!desc || !desc->kstat_irqs) + return 0; + + for_each_possible_cpu(cpu) + sum += *per_cpu_ptr(desc->kstat_irqs, cpu); + + return sum; +} + static void i915_pmu_event_destroy(struct perf_event *event) { WARN_ON(event->parent); @@ -343,6 +359,8 @@ static int i915_pmu_event_init(struct perf_event *event) if (INTEL_GEN(i915) < 6) ret = -ENODEV; break; + case I915_PMU_INTERRUPTS: + break; default: ret = -ENOENT; break; @@ -392,6 +410,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event) div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, FREQUENCY); break; + case I915_PMU_INTERRUPTS: + val = count_interrupts(i915); + break; } } @@ -654,6 +675,8 @@ static struct attribute *i915_pmu_events_attrs[] = { I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"), I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"), + I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS), + NULL, }; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 40e7b438bdaa..d840ff083520 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -139,7 +139,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) -#define I915_PMU_LAST I915_PMU_REQUESTED_FREQUENCY +#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) + +#define I915_PMU_LAST I915_PMU_INTERRUPTS /* Each region is a minimum of 16k, and there are at most 255 of them. */ -- cgit v1.2.3 From 6060b6aec03c76f9ce0977b70c27429d39d2956e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Nov 2017 18:18:52 +0000 Subject: drm/i915/pmu: Add RC6 residency metrics For clients like intel-gpu-overlay it is easier to read the counters via the perf API than having to parse sysfs. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-9-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 31 +++++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 6 +++++- 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index fef389ebf92c..1071935bfa67 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -361,6 +361,15 @@ static int i915_pmu_event_init(struct perf_event *event) break; case I915_PMU_INTERRUPTS: break; + case I915_PMU_RC6_RESIDENCY: + if (!HAS_RC6(i915)) + ret = -ENODEV; + break; + case I915_PMU_RC6p_RESIDENCY: + case I915_PMU_RC6pp_RESIDENCY: + if (!HAS_RC6p(i915)) + ret = -ENODEV; + break; default: ret = -ENOENT; break; @@ -413,6 +422,24 @@ static u64 __i915_pmu_event_read(struct perf_event *event) case I915_PMU_INTERRUPTS: val = count_interrupts(i915); break; + case I915_PMU_RC6_RESIDENCY: + intel_runtime_pm_get(i915); + val = intel_rc6_residency_ns(i915, + IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : + GEN6_GT_GFX_RC6); + intel_runtime_pm_put(i915); + break; + case I915_PMU_RC6p_RESIDENCY: + intel_runtime_pm_get(i915); + val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); + intel_runtime_pm_put(i915); + break; + case I915_PMU_RC6pp_RESIDENCY: + intel_runtime_pm_get(i915); + val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); + intel_runtime_pm_put(i915); + break; } } @@ -677,6 +704,10 @@ static struct attribute *i915_pmu_events_attrs[] = { I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS), + I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"), + I915_EVENT(rc6p-residency, I915_PMU_RC6p_RESIDENCY, "ns"), + I915_EVENT(rc6pp-residency, I915_PMU_RC6pp_RESIDENCY, "ns"), + NULL, }; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d840ff083520..915a6e85a855 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -141,7 +141,11 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) -#define I915_PMU_LAST I915_PMU_INTERRUPTS +#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_RC6p_RESIDENCY __I915_PMU_OTHER(4) +#define I915_PMU_RC6pp_RESIDENCY __I915_PMU_OTHER(5) + +#define I915_PMU_LAST I915_PMU_RC6pp_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */ -- cgit v1.2.3 From b552ae444e454eb3254c958e05b69820c0ef346d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 23 Nov 2017 10:07:01 +0000 Subject: drm/i915/pmu: Drop I915_ENGINE_SAMPLE_MAX from uapi headers We have agreed during the engine classes discussion that fields marked as non-ABI are better left out altogether from uapi headers. v2: Use a local define for maintanability. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171123100701.18430-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + include/uapi/drm/i915_drm.h | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 43473e6d1a4f..d38d059285dc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -358,6 +358,7 @@ struct intel_engine_cs { * * Our internal timer stores the current counters in this field. */ +#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; /** * @busy_stats: Has enablement of engine stats tracking been diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 915a6e85a855..239e8633edc9 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -110,8 +110,7 @@ enum drm_i915_gem_engine_class { enum drm_i915_pmu_engine_sample { I915_SAMPLE_BUSY = 0, I915_SAMPLE_WAIT = 1, - I915_SAMPLE_SEMA = 2, - I915_ENGINE_SAMPLE_MAX /* non-ABI */ + I915_SAMPLE_SEMA = 2 }; #define I915_PMU_SAMPLE_BITS (4) -- cgit v1.2.3 From 3452fa3095e91acbcb1f6290e0d70fa7d3695a3a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 24 Nov 2017 17:13:31 +0000 Subject: drm/i915/pmu: Aggregate all RC6 states into one counter Chris has discovered that RC6, RC6p and RC6pp counters are mutually exclusive, and even that on some SNB SKUs you get RC6p increasing, and on the others RC6. Furthermore RC6p and RC6pp were only present starting from GEN6 until, GEN7, not including Haswell. All this combined makes it questionable whether we need to reserve new ABI for these counters. One idea was to just combine them all under the RC6 counter to simplify things for userspace. So that is what this patch does. Signed-off-by: Tvrtko Ursulin Suggested-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171124171331.17981-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 23 ++++++----------------- include/uapi/drm/i915_drm.h | 6 +----- 2 files changed, 7 insertions(+), 22 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 39310cf13c3a..3357b690ce90 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -359,11 +359,6 @@ static int i915_pmu_event_init(struct perf_event *event) if (!HAS_RC6(i915)) ret = -ENODEV; break; - case I915_PMU_RC6p_RESIDENCY: - case I915_PMU_RC6pp_RESIDENCY: - if (!HAS_RC6p(i915)) - ret = -ENODEV; - break; default: ret = -ENOENT; break; @@ -421,16 +416,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event) IS_VALLEYVIEW(i915) ? VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6); - intel_runtime_pm_put(i915); - break; - case I915_PMU_RC6p_RESIDENCY: - intel_runtime_pm_get(i915); - val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); - intel_runtime_pm_put(i915); - break; - case I915_PMU_RC6pp_RESIDENCY: - intel_runtime_pm_get(i915); - val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); + if (HAS_RC6p(i915)) { + val += intel_rc6_residency_ns(i915, + GEN6_GT_GFX_RC6p); + val += intel_rc6_residency_ns(i915, + GEN6_GT_GFX_RC6pp); + } intel_runtime_pm_put(i915); break; } @@ -708,8 +699,6 @@ static struct attribute *i915_pmu_events_attrs[] = { I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS), I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"), - I915_EVENT(rc6p-residency, I915_PMU_RC6p_RESIDENCY, "ns"), - I915_EVENT(rc6pp-residency, I915_PMU_RC6pp_RESIDENCY, "ns"), NULL, }; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 239e8633edc9..536ee4febd74 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -137,14 +137,10 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) - #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) - #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) -#define I915_PMU_RC6p_RESIDENCY __I915_PMU_OTHER(4) -#define I915_PMU_RC6pp_RESIDENCY __I915_PMU_OTHER(5) -#define I915_PMU_LAST I915_PMU_RC6pp_RESIDENCY +#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */ -- cgit v1.2.3 From e20eaa2382e7888a4e06ccb015c476a6fb1fda0c Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 23 Nov 2017 16:26:35 +0800 Subject: vfio: ABI for mdev display dma-buf operation Add VFIO_DEVICE_QUERY_GFX_PLANE ioctl command to let user query and get a plane and its information. So far, two types of buffers are supported: buffers based on dma-buf and buffers based on region. This ioctl can be invoked with: 1) Either DMABUF or REGION flag. Vendor driver returns a plane_info successfully only when the specific kind of buffer is supported. 2) Flag PROBE. And at the same time either DMABUF or REGION must be set, so that vendor driver returns success only when the specific kind of buffer is supported. Add VFIO_DEVICE_GET_GFX_DMABUF ioctl command to let user get a specific dma-buf fd of an exposed MDEV buffer provided by dmabuf_id which was returned in VFIO_DEVICE_QUERY_GFX_PLANE ioctl command. The life cycle of an exposed MDEV buffer is handled by userspace and tracked by kernel space. The returned dmabuf_id in struct vfio_device_ query_gfx_plane can be a new id of a new exposed buffer or an old id of a re-exported buffer. Host user can check the value of dmabuf_id to see if it needs to create new resources according to the new exposed buffer or just re-use the existing resource related to the old buffer. v18: - update comments for VFIO_DEVICE_GET_GFX_DMABUF. (Alex) v17: - modify VFIO_DEVICE_GET_GFX_DMABUF interface. (Alex) v16: - add x_hot and y_hot fields. (Gerd) - add comments for VFIO_DEVICE_GET_GFX_DMABUF. (Alex) - rebase to 4.14.0-rc6. v15: - add a ioctl to get a dmabuf for a given dmabuf id. (Gerd) v14: - add PROBE, DMABUF and REGION flags. (Alex) v12: - add drm_format_mod back. (Gerd and Zhenyu) - add region_index. (Gerd) v11: - rename plane_type to drm_plane_type. (Gerd) - move fields of vfio_device_query_gfx_plane to vfio_device_gfx_plane_info. (Gerd) - remove drm_format_mod, start fields. (Daniel) - remove plane_id. v10: - refine the ABI API VFIO_DEVICE_QUERY_GFX_PLANE. (Alex) (Gerd) v3: - add a field gvt_plane_info in the drm_i915_gem_obj structure to save the decoded plane information to avoid look up while need the plane info. (Gerd) Signed-off-by: Tina Zhang Reviewed-by: Gerd Hoffmann Reviewed-by: Kirti Wankhede Acked-by: Alex Williamson Cc: Daniel Vetter Signed-off-by: Zhenyu Wang --- include/uapi/linux/vfio.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ae461050661a..5c1cca2ba04d 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -502,6 +502,68 @@ struct vfio_pci_hot_reset { #define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13) +/** + * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_device_query_gfx_plane) + * + * Set the drm_plane_type and flags, then retrieve the gfx plane info. + * + * flags supported: + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_DMABUF are set + * to ask if the mdev supports dma-buf. 0 on support, -EINVAL on no + * support for dma-buf. + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_REGION are set + * to ask if the mdev supports region. 0 on support, -EINVAL on no + * support for region. + * - VFIO_GFX_PLANE_TYPE_DMABUF or VFIO_GFX_PLANE_TYPE_REGION is set + * with each call to query the plane info. + * - Others are invalid and return -EINVAL. + * + * Note: + * 1. Plane could be disabled by guest. In that case, success will be + * returned with zero-initialized drm_format, size, width and height + * fields. + * 2. x_hot/y_hot is set to 0xFFFFFFFF if no hotspot information available + * + * Return: 0 on success, -errno on other failure. + */ +struct vfio_device_gfx_plane_info { + __u32 argsz; + __u32 flags; +#define VFIO_GFX_PLANE_TYPE_PROBE (1 << 0) +#define VFIO_GFX_PLANE_TYPE_DMABUF (1 << 1) +#define VFIO_GFX_PLANE_TYPE_REGION (1 << 2) + /* in */ + __u32 drm_plane_type; /* type of plane: DRM_PLANE_TYPE_* */ + /* out */ + __u32 drm_format; /* drm format of plane */ + __u64 drm_format_mod; /* tiled mode */ + __u32 width; /* width of plane */ + __u32 height; /* height of plane */ + __u32 stride; /* stride of plane */ + __u32 size; /* size of plane in bytes, align on page*/ + __u32 x_pos; /* horizontal position of cursor plane */ + __u32 y_pos; /* vertical position of cursor plane*/ + __u32 x_hot; /* horizontal position of cursor hotspot */ + __u32 y_hot; /* vertical position of cursor hotspot */ + union { + __u32 region_index; /* region index */ + __u32 dmabuf_id; /* dma-buf id */ + }; +}; + +#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14) + +/** + * VFIO_DEVICE_GET_GFX_DMABUF - _IOW(VFIO_TYPE, VFIO_BASE + 15, __u32) + * + * Return a new dma-buf file descriptor for an exposed guest framebuffer + * described by the provided dmabuf_id. The dmabuf_id is returned from VFIO_ + * DEVICE_QUERY_GFX_PLANE as a token of the exposed guest framebuffer. + */ + +#define VFIO_DEVICE_GET_GFX_DMABUF _IO(VFIO_TYPE, VFIO_BASE + 15) + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From bc1b1bf6e347af908c9a994803e18e2e22cf84b3 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 17 Oct 2017 14:58:01 +0800 Subject: drm/amdgpu:implement ctx query2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this query will give flag bits to indicate what happend on the given context Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 37 +++++++++++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 8 +++++++ 2 files changed, 45 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c539fb6a597e..d71dc164b469 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -227,6 +227,40 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } +static int amdgpu_ctx_query2(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv, uint32_t id, + union drm_amdgpu_ctx_out *out) +{ + struct amdgpu_ctx *ctx; + struct amdgpu_ctx_mgr *mgr; + + if (!fpriv) + return -EINVAL; + + mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + ctx = idr_find(&mgr->ctx_handles, id); + if (!ctx) { + mutex_unlock(&mgr->lock); + return -EINVAL; + } + + out->state.flags = 0x0; + out->state.hangs = 0x0; + + if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter)) + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET; + + if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; + + if (atomic_read(&ctx->guilty)) + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; + + mutex_unlock(&mgr->lock); + return 0; +} + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -258,6 +292,9 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, case AMDGPU_CTX_OP_QUERY_STATE: r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; + case AMDGPU_CTX_OP_QUERY_STATE2: + r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); + break; default: return -EINVAL; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 919248fb4028..0e23ce3f3113 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -160,6 +160,7 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_ALLOC_CTX 1 #define AMDGPU_CTX_OP_FREE_CTX 2 #define AMDGPU_CTX_OP_QUERY_STATE 3 +#define AMDGPU_CTX_OP_QUERY_STATE2 4 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -170,6 +171,13 @@ union drm_amdgpu_bo_list { /* unknown cause */ #define AMDGPU_CTX_UNKNOWN_RESET 3 +/* indicate gpu reset occured after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET (1<<0) +/* indicate vram lost occured after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) +/* indicate some job from this context once cause gpu hang */ +#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) + /* Context priority level */ #define AMDGPU_CTX_PRIORITY_UNSET -2048 #define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 -- cgit v1.2.3 From 5b565e0e5a9872f8c5a459ce53f8d6a4b19a1a66 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 7 Nov 2017 12:03:31 +0100 Subject: drm/amdgpu: expose the VA above the hole to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let userspace know how much area we have above the 48bit VA hole on Vega10. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 12 ++++++++++-- include/uapi/drm/amdgpu_drm.h | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 2614269c4d7f..3222e1d4636c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -550,6 +550,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file } case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device dev_info = {}; + uint64_t vm_size; dev_info.device_id = dev->pdev->device; dev_info.chip_rev = adev->rev_id; @@ -577,10 +578,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION; if (amdgpu_sriov_vf(adev)) dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; + + vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; dev_info.virtual_address_max = - min(adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE, - AMDGPU_VA_HOLE_START); + min(vm_size, AMDGPU_VA_HOLE_START); + + vm_size -= AMDGPU_VA_RESERVED_SIZE; + if (vm_size > AMDGPU_VA_HOLE_START) { + dev_info.high_va_offset = AMDGPU_VA_HOLE_END; + dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size; + } dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0e23ce3f3113..4d21191aaed0 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -877,6 +877,10 @@ struct drm_amdgpu_info_device { __u32 _pad1; /* always on cu bitmap */ __u32 cu_ao_bitmap[4][4]; + /** Starting high virtual address for UMDs. */ + __u64 high_va_offset; + /** The maximum high virtual address */ + __u64 high_va_max; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3 From 5843f4e02fbe86a59981e35adc6cabebee46fdc0 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 1 Nov 2017 15:20:04 +0100 Subject: drm/fourcc: Fix fourcc_mod_code() definition Avoid a compiler warnings when the val parameter is an expression. Reviewed-by: Daniel Vetter Signed-off-by: Thierry Reding --- include/uapi/drm/drm_fourcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 3ad838d3f93f..a76ed8f9e383 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -188,7 +188,7 @@ extern "C" { #define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) #define fourcc_mod_code(vendor, val) \ - ((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffULL)) + ((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 0x00ffffffffffffffULL)) /* * Format Modifier tokens: -- cgit v1.2.3 From 268892cb63a822315921a8dab48ac3e4abf7dd03 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 12 Oct 2017 16:39:20 +0200 Subject: drm/tegra: Sanitize format modifiers The existing format modifier definitions were merged prematurely, and recent work has unveiled that the definitions are suboptimal in several ways: - The format specifiers, except for one, are not Tegra specific, but the names don't reflect that. - The number space is split into two, reserving 32 bits for some "parameter" which most of the modifiers are not going to have. - Symbolic names for the modifiers are not using the standard DRM_FORMAT_MOD_* prefix, which makes them awkward to use. - The vendor prefix NV is somewhat ambiguous. Fortunately, nobody's started using these modifiers, so we can still fix the above issues. Do so by using the standard prefix. Also, remove TEGRA from the name of those modifiers that exist on NVIDIA GPUs as well. In case of the block linear modifiers, make the "parameter" smaller (4 bits, though only 6 values are valid) and don't let that leak into any of the other modifiers. Finally, also use the more canonical NVIDIA instead of the ambiguous NV prefix. Acked-by: Daniel Vetter Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/fb.c | 35 +++++++++++++++++++++++++++++------ include/uapi/drm/drm_fourcc.h | 36 +++++++++++++++++++----------------- 2 files changed, 48 insertions(+), 23 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 80540c1c66dc..406e895d82cc 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -54,17 +54,40 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, struct tegra_fb *fb = to_tegra_fb(framebuffer); uint64_t modifier = fb->base.modifier; - switch (fourcc_mod_tegra_mod(modifier)) { - case NV_FORMAT_MOD_TEGRA_TILED: + switch (modifier) { + case DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED: tiling->mode = TEGRA_BO_TILING_MODE_TILED; tiling->value = 0; break; - case NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(0): + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0): tiling->mode = TEGRA_BO_TILING_MODE_BLOCK; - tiling->value = fourcc_mod_tegra_param(modifier); - if (tiling->value > 5) - return -EINVAL; + tiling->value = 0; + break; + + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1): + tiling->mode = TEGRA_BO_TILING_MODE_BLOCK; + tiling->value = 1; + break; + + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2): + tiling->mode = TEGRA_BO_TILING_MODE_BLOCK; + tiling->value = 2; + break; + + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3): + tiling->mode = TEGRA_BO_TILING_MODE_BLOCK; + tiling->value = 3; + break; + + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4): + tiling->mode = TEGRA_BO_TILING_MODE_BLOCK; + tiling->value = 4; + break; + + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5): + tiling->mode = TEGRA_BO_TILING_MODE_BLOCK; + tiling->value = 5; break; default: diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index a76ed8f9e383..e04613d30a13 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -178,7 +178,7 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_NONE 0 #define DRM_FORMAT_MOD_VENDOR_INTEL 0x01 #define DRM_FORMAT_MOD_VENDOR_AMD 0x02 -#define DRM_FORMAT_MOD_VENDOR_NV 0x03 +#define DRM_FORMAT_MOD_VENDOR_NVIDIA 0x03 #define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04 #define DRM_FORMAT_MOD_VENDOR_QCOM 0x05 #define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06 @@ -338,29 +338,17 @@ extern "C" { */ #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4) -/* NVIDIA Tegra frame buffer modifiers */ - -/* - * Some modifiers take parameters, for example the number of vertical GOBs in - * a block. Reserve the lower 32 bits for parameters - */ -#define __fourcc_mod_tegra_mode_shift 32 -#define fourcc_mod_tegra_code(val, params) \ - fourcc_mod_code(NV, ((((__u64)val) << __fourcc_mod_tegra_mode_shift) | params)) -#define fourcc_mod_tegra_mod(m) \ - (m & ~((1ULL << __fourcc_mod_tegra_mode_shift) - 1)) -#define fourcc_mod_tegra_param(m) \ - (m & ((1ULL << __fourcc_mod_tegra_mode_shift) - 1)) +/* NVIDIA frame buffer modifiers */ /* * Tegra Tiled Layout, used by Tegra 2, 3 and 4. * * Pixels are arranged in simple tiles of 16 x 16 bytes. */ -#define NV_FORMAT_MOD_TEGRA_TILED fourcc_mod_tegra_code(1, 0) +#define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1) /* - * Tegra 16Bx2 Block Linear layout, used by TK1/TX1 + * 16Bx2 Block Linear layout, used by desktop GPUs, and Tegra K1 and later * * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked * vertically by a power of 2 (1 to 32 GOBs) to form a block. @@ -380,7 +368,21 @@ extern "C" { * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format * in full detail. */ -#define NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(v) fourcc_mod_tegra_code(2, v) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \ + fourcc_mod_code(NVIDIA, 0x10 | ((v) & 0xf)) + +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \ + fourcc_mod_code(NVIDIA, 0x10) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \ + fourcc_mod_code(NVIDIA, 0x11) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \ + fourcc_mod_code(NVIDIA, 0x12) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \ + fourcc_mod_code(NVIDIA, 0x13) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \ + fourcc_mod_code(NVIDIA, 0x14) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \ + fourcc_mod_code(NVIDIA, 0x15) /* * Broadcom VC4 "T" format -- cgit v1.2.3 From 8ded59413ccc58fe138ab4bf337d0d0b3131d46b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 14 Dec 2017 16:10:15 +0100 Subject: drm/exynos: ipp: Remove Exynos DRM IPP subsystem Exynos DRM IPP subsystem is in fact non-functional and frankly speaking dead-code. This patch clearly marks that Exynos DRM IPP subsystem is broken and never really functional. It will be replaced by a completely rewritten API. Exynos DRM IPP user-space API can be obsoleted for the following reasons: 1. Exynos DRM IPP user-space API can be optional in Exynos DRM, so userspace should not rely that it is always available and should have a software fallback in case it is not there. 2. The only mode which was initially semi-working was memory-to-memory image processing. The remaining modes (LCD-"writeback" and "output") were never operational due to missing code (both in mainline and even vendor kernels). 3. Exynos DRM IPP mainline user-space API compatibility for memory-to-memory got broken very early by commit 083500baefd5 ("drm: remove DRM_FORMAT_NV12MT", which removed the support for tiled formats, the main feature which made this API somehow useful on Exynos platforms (video codec that time produced only tiled frames, to implement xvideo or any other video overlay, one has to de-tile them for proper display). 4. Broken drivers. Especially once support for IOMMU has been added, it revealed that drivers don't configure DMA operations properly and in many cases operate outside the provided buffers trashing memory around. 5. Need for external patches. Although IPP user-space API has been used in some vendor kernels, but in such cases there were additional patches applied (like reverting mentioned 083500baefd5 patch) what means that those userspace apps which might use it, still won't work with the mainline kernel version. We don't have time machines, so we cannot change it, but Exynos DRM IPP extension should never have been merged to mainline in that form. Exynos IPP subsystem and user-space API will be rewritten, so remove current IPP core code and mark existing drivers as BROKEN. Signed-off-by: Marek Szyprowski Acked-by: Daniel Stone Acked-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/Kconfig | 11 +- drivers/gpu/drm/exynos/Makefile | 1 - drivers/gpu/drm/exynos/exynos_drm_drv.c | 12 - drivers/gpu/drm/exynos/exynos_drm_drv.h | 2 - drivers/gpu/drm/exynos/exynos_drm_ipp.c | 1806 ------------------------------- drivers/gpu/drm/exynos/exynos_drm_ipp.h | 252 ----- include/uapi/drm/exynos_drm.h | 192 +--- 7 files changed, 4 insertions(+), 2272 deletions(-) delete mode 100644 drivers/gpu/drm/exynos/exynos_drm_ipp.c delete mode 100644 drivers/gpu/drm/exynos/exynos_drm_ipp.h (limited to 'include/uapi') diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 5a7c9d8abd6b..735ce47688f9 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -95,26 +95,21 @@ config DRM_EXYNOS_G2D help Choose this option if you want to use Exynos G2D for DRM. -config DRM_EXYNOS_IPP - bool "Image Post Processor" - help - Choose this option if you want to use IPP feature for DRM. - config DRM_EXYNOS_FIMC bool "FIMC" - depends on DRM_EXYNOS_IPP && MFD_SYSCON + depends on BROKEN && MFD_SYSCON help Choose this option if you want to use Exynos FIMC for DRM. config DRM_EXYNOS_ROTATOR bool "Rotator" - depends on DRM_EXYNOS_IPP + depends on BROKEN help Choose this option if you want to use Exynos Rotator for DRM. config DRM_EXYNOS_GSC bool "GScaler" - depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n + depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n help Choose this option if you want to use Exynos GSC for DRM. diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile index bdf4212dde7b..a51c5459bb13 100644 --- a/drivers/gpu/drm/exynos/Makefile +++ b/drivers/gpu/drm/exynos/Makefile @@ -18,7 +18,6 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o -exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index b96bd5a781b2..a518e9c6d6cc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -29,7 +29,6 @@ #include "exynos_drm_plane.h" #include "exynos_drm_vidi.h" #include "exynos_drm_g2d.h" -#include "exynos_drm_ipp.h" #include "exynos_drm_iommu.h" #define DRIVER_NAME "exynos" @@ -109,14 +108,6 @@ static const struct drm_ioctl_desc exynos_ioctls[] = { DRM_AUTH | DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_PROPERTY, exynos_drm_ipp_get_property, - DRM_AUTH | DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(EXYNOS_IPP_SET_PROPERTY, exynos_drm_ipp_set_property, - DRM_AUTH | DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(EXYNOS_IPP_QUEUE_BUF, exynos_drm_ipp_queue_buf, - DRM_AUTH | DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(EXYNOS_IPP_CMD_CTRL, exynos_drm_ipp_cmd_ctrl, - DRM_AUTH | DRM_RENDER_ALLOW), }; static const struct file_operations exynos_drm_driver_fops = { @@ -256,9 +247,6 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = { DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR), }, { DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC), - }, { - DRV_PTR(ipp_driver, CONFIG_DRM_EXYNOS_IPP), - DRM_VIRTUAL_DEVICE }, { &exynos_drm_platform_driver, DRM_VIRTUAL_DEVICE diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index 589d465a7f88..df2262f70d91 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -188,7 +188,6 @@ struct exynos_drm_g2d_private { struct drm_exynos_file_private { struct exynos_drm_g2d_private *g2d_priv; - struct device *ipp_dev; }; /* @@ -291,6 +290,5 @@ extern struct platform_driver g2d_driver; extern struct platform_driver fimc_driver; extern struct platform_driver rotator_driver; extern struct platform_driver gsc_driver; -extern struct platform_driver ipp_driver; extern struct platform_driver mic_driver; #endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c deleted file mode 100644 index 3edda18cc2d2..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ /dev/null @@ -1,1806 +0,0 @@ -/* - * Copyright (C) 2012 Samsung Electronics Co.Ltd - * Authors: - * Eunchul Kim - * Jinyoung Jeon - * Sangmin Lee - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ -#include -#include -#include -#include -#include - -#include -#include -#include "exynos_drm_drv.h" -#include "exynos_drm_gem.h" -#include "exynos_drm_ipp.h" -#include "exynos_drm_iommu.h" - -/* - * IPP stands for Image Post Processing and - * supports image scaler/rotator and input/output DMA operations. - * using FIMC, GSC, Rotator, so on. - * IPP is integration device driver of same attribute h/w - */ - -/* - * TODO - * 1. expand command control id. - * 2. integrate property and config. - * 3. removed send_event id check routine. - * 4. compare send_event id if needed. - * 5. free subdrv_remove notifier callback list if needed. - * 6. need to check subdrv_open about multi-open. - * 7. need to power_on implement power and sysmmu ctrl. - */ - -#define get_ipp_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define ipp_is_m2m_cmd(c) (c == IPP_CMD_M2M) - -/* - * A structure of event. - * - * @base: base of event. - * @event: ipp event. - */ -struct drm_exynos_ipp_send_event { - struct drm_pending_event base; - struct drm_exynos_ipp_event event; -}; - -/* - * A structure of memory node. - * - * @list: list head to memory queue information. - * @ops_id: id of operations. - * @prop_id: id of property. - * @buf_id: id of buffer. - * @buf_info: gem objects and dma address, size. - * @filp: a pointer to drm_file. - */ -struct drm_exynos_ipp_mem_node { - struct list_head list; - enum drm_exynos_ops_id ops_id; - u32 prop_id; - u32 buf_id; - struct drm_exynos_ipp_buf_info buf_info; -}; - -/* - * A structure of ipp context. - * - * @subdrv: prepare initialization using subdrv. - * @ipp_lock: lock for synchronization of access to ipp_idr. - * @prop_lock: lock for synchronization of access to prop_idr. - * @ipp_idr: ipp driver idr. - * @prop_idr: property idr. - * @event_workq: event work queue. - * @cmd_workq: command work queue. - */ -struct ipp_context { - struct exynos_drm_subdrv subdrv; - struct mutex ipp_lock; - struct mutex prop_lock; - struct idr ipp_idr; - struct idr prop_idr; - struct workqueue_struct *event_workq; - struct workqueue_struct *cmd_workq; -}; - -static LIST_HEAD(exynos_drm_ippdrv_list); -static DEFINE_MUTEX(exynos_drm_ippdrv_lock); -static BLOCKING_NOTIFIER_HEAD(exynos_drm_ippnb_list); - -int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv) -{ - mutex_lock(&exynos_drm_ippdrv_lock); - list_add_tail(&ippdrv->drv_list, &exynos_drm_ippdrv_list); - mutex_unlock(&exynos_drm_ippdrv_lock); - - return 0; -} - -int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv) -{ - mutex_lock(&exynos_drm_ippdrv_lock); - list_del(&ippdrv->drv_list); - mutex_unlock(&exynos_drm_ippdrv_lock); - - return 0; -} - -static int ipp_create_id(struct idr *id_idr, struct mutex *lock, void *obj) -{ - int ret; - - mutex_lock(lock); - ret = idr_alloc(id_idr, obj, 1, 0, GFP_KERNEL); - mutex_unlock(lock); - - return ret; -} - -static void ipp_remove_id(struct idr *id_idr, struct mutex *lock, u32 id) -{ - mutex_lock(lock); - idr_remove(id_idr, id); - mutex_unlock(lock); -} - -static void *ipp_find_obj(struct idr *id_idr, struct mutex *lock, u32 id) -{ - void *obj; - - mutex_lock(lock); - obj = idr_find(id_idr, id); - mutex_unlock(lock); - - return obj; -} - -static int ipp_check_driver(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_property *property) -{ - if (ippdrv->dedicated || (!ipp_is_m2m_cmd(property->cmd) && - !pm_runtime_suspended(ippdrv->dev))) - return -EBUSY; - - if (ippdrv->check_property && - ippdrv->check_property(ippdrv->dev, property)) - return -EINVAL; - - return 0; -} - -static struct exynos_drm_ippdrv *ipp_find_driver(struct ipp_context *ctx, - struct drm_exynos_ipp_property *property) -{ - struct exynos_drm_ippdrv *ippdrv; - u32 ipp_id = property->ipp_id; - int ret; - - if (ipp_id) { - ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock, ipp_id); - if (!ippdrv) { - DRM_DEBUG("ipp%d driver not found\n", ipp_id); - return ERR_PTR(-ENODEV); - } - - ret = ipp_check_driver(ippdrv, property); - if (ret < 0) { - DRM_DEBUG("ipp%d driver check error %d\n", ipp_id, ret); - return ERR_PTR(ret); - } - - return ippdrv; - } else { - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) { - ret = ipp_check_driver(ippdrv, property); - if (ret == 0) - return ippdrv; - } - - DRM_DEBUG("cannot find driver suitable for given property.\n"); - } - - return ERR_PTR(-ENODEV); -} - -static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id) -{ - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - int count = 0; - - DRM_DEBUG_KMS("prop_id[%d]\n", prop_id); - - /* - * This case is search ipp driver by prop_id handle. - * sometimes, ipp subsystem find driver by prop_id. - * e.g PAUSE state, queue buf, command control. - */ - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) { - DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", count++, ippdrv); - - mutex_lock(&ippdrv->cmd_lock); - list_for_each_entry(c_node, &ippdrv->cmd_list, list) { - if (c_node->property.prop_id == prop_id) { - mutex_unlock(&ippdrv->cmd_lock); - return ippdrv; - } - } - mutex_unlock(&ippdrv->cmd_lock); - } - - return ERR_PTR(-ENODEV); -} - -int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - struct device *dev = file_priv->ipp_dev; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_prop_list *prop_list = data; - struct exynos_drm_ippdrv *ippdrv; - int count = 0; - - if (!ctx) { - DRM_ERROR("invalid context.\n"); - return -EINVAL; - } - - if (!prop_list) { - DRM_ERROR("invalid property parameter.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("ipp_id[%d]\n", prop_list->ipp_id); - - if (!prop_list->ipp_id) { - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) - count++; - - /* - * Supports ippdrv list count for user application. - * First step user application getting ippdrv count. - * and second step getting ippdrv capability using ipp_id. - */ - prop_list->count = count; - } else { - /* - * Getting ippdrv capability by ipp_id. - * some device not supported wb, output interface. - * so, user application detect correct ipp driver - * using this ioctl. - */ - ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock, - prop_list->ipp_id); - if (!ippdrv) { - DRM_ERROR("not found ipp%d driver.\n", - prop_list->ipp_id); - return -ENODEV; - } - - *prop_list = ippdrv->prop_list; - } - - return 0; -} - -static void ipp_print_property(struct drm_exynos_ipp_property *property, - int idx) -{ - struct drm_exynos_ipp_config *config = &property->config[idx]; - struct drm_exynos_pos *pos = &config->pos; - struct drm_exynos_sz *sz = &config->sz; - - DRM_DEBUG_KMS("prop_id[%d]ops[%s]fmt[0x%x]\n", - property->prop_id, idx ? "dst" : "src", config->fmt); - - DRM_DEBUG_KMS("pos[%d %d %d %d]sz[%d %d]f[%d]r[%d]\n", - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize, config->flip, config->degree); -} - -static struct drm_exynos_ipp_cmd_work *ipp_create_cmd_work(void) -{ - struct drm_exynos_ipp_cmd_work *cmd_work; - - cmd_work = kzalloc(sizeof(*cmd_work), GFP_KERNEL); - if (!cmd_work) - return ERR_PTR(-ENOMEM); - - INIT_WORK((struct work_struct *)cmd_work, ipp_sched_cmd); - - return cmd_work; -} - -static struct drm_exynos_ipp_event_work *ipp_create_event_work(void) -{ - struct drm_exynos_ipp_event_work *event_work; - - event_work = kzalloc(sizeof(*event_work), GFP_KERNEL); - if (!event_work) - return ERR_PTR(-ENOMEM); - - INIT_WORK(&event_work->work, ipp_sched_event); - - return event_work; -} - -int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - struct device *dev = file_priv->ipp_dev; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_property *property = data; - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - u32 prop_id; - int ret, i; - - if (!ctx) { - DRM_ERROR("invalid context.\n"); - return -EINVAL; - } - - if (!property) { - DRM_ERROR("invalid property parameter.\n"); - return -EINVAL; - } - - prop_id = property->prop_id; - - /* - * This is log print for user application property. - * user application set various property. - */ - for_each_ipp_ops(i) - ipp_print_property(property, i); - - /* - * In case prop_id is not zero try to set existing property. - */ - if (prop_id) { - c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock, prop_id); - - if (!c_node || c_node->filp != file) { - DRM_DEBUG_KMS("prop_id[%d] not found\n", prop_id); - return -EINVAL; - } - - if (c_node->state != IPP_STATE_STOP) { - DRM_DEBUG_KMS("prop_id[%d] not stopped\n", prop_id); - return -EINVAL; - } - - c_node->property = *property; - - return 0; - } - - /* find ipp driver using ipp id */ - ippdrv = ipp_find_driver(ctx, property); - if (IS_ERR(ippdrv)) { - DRM_ERROR("failed to get ipp driver.\n"); - return -EINVAL; - } - - /* allocate command node */ - c_node = kzalloc(sizeof(*c_node), GFP_KERNEL); - if (!c_node) - return -ENOMEM; - - ret = ipp_create_id(&ctx->prop_idr, &ctx->prop_lock, c_node); - if (ret < 0) { - DRM_ERROR("failed to create id.\n"); - goto err_clear; - } - property->prop_id = ret; - - DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%pK]\n", - property->prop_id, property->cmd, ippdrv); - - /* stored property information and ippdrv in private data */ - c_node->property = *property; - c_node->state = IPP_STATE_IDLE; - c_node->filp = file; - - c_node->start_work = ipp_create_cmd_work(); - if (IS_ERR(c_node->start_work)) { - DRM_ERROR("failed to create start work.\n"); - ret = PTR_ERR(c_node->start_work); - goto err_remove_id; - } - - c_node->stop_work = ipp_create_cmd_work(); - if (IS_ERR(c_node->stop_work)) { - DRM_ERROR("failed to create stop work.\n"); - ret = PTR_ERR(c_node->stop_work); - goto err_free_start; - } - - c_node->event_work = ipp_create_event_work(); - if (IS_ERR(c_node->event_work)) { - DRM_ERROR("failed to create event work.\n"); - ret = PTR_ERR(c_node->event_work); - goto err_free_stop; - } - - mutex_init(&c_node->lock); - mutex_init(&c_node->mem_lock); - mutex_init(&c_node->event_lock); - - init_completion(&c_node->start_complete); - init_completion(&c_node->stop_complete); - - for_each_ipp_ops(i) - INIT_LIST_HEAD(&c_node->mem_list[i]); - - INIT_LIST_HEAD(&c_node->event_list); - mutex_lock(&ippdrv->cmd_lock); - list_add_tail(&c_node->list, &ippdrv->cmd_list); - mutex_unlock(&ippdrv->cmd_lock); - - /* make dedicated state without m2m */ - if (!ipp_is_m2m_cmd(property->cmd)) - ippdrv->dedicated = true; - - return 0; - -err_free_stop: - kfree(c_node->stop_work); -err_free_start: - kfree(c_node->start_work); -err_remove_id: - ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock, property->prop_id); -err_clear: - kfree(c_node); - return ret; -} - -static int ipp_validate_mem_node(struct drm_device *drm_dev, - struct drm_exynos_ipp_mem_node *m_node, - struct drm_exynos_ipp_cmd_node *c_node) -{ - struct drm_exynos_ipp_config *ipp_cfg; - unsigned int num_plane; - unsigned long size, buf_size = 0, plane_size, img_size = 0; - unsigned int bpp, width, height; - int i; - - ipp_cfg = &c_node->property.config[m_node->ops_id]; - num_plane = drm_format_num_planes(ipp_cfg->fmt); - - /** - * This is a rather simplified validation of a memory node. - * It basically verifies provided gem object handles - * and the buffer sizes with respect to current configuration. - * This is not the best that can be done - * but it seems more than enough - */ - for (i = 0; i < num_plane; ++i) { - width = ipp_cfg->sz.hsize; - height = ipp_cfg->sz.vsize; - bpp = drm_format_plane_cpp(ipp_cfg->fmt, i); - - /* - * The result of drm_format_plane_cpp() for chroma planes must - * be used with drm_format_xxxx_chroma_subsampling() for - * correct result. - */ - if (i > 0) { - width /= drm_format_horz_chroma_subsampling( - ipp_cfg->fmt); - height /= drm_format_vert_chroma_subsampling( - ipp_cfg->fmt); - } - plane_size = width * height * bpp; - img_size += plane_size; - - if (m_node->buf_info.handles[i]) { - size = exynos_drm_gem_get_size(drm_dev, - m_node->buf_info.handles[i], - c_node->filp); - if (plane_size > size) { - DRM_ERROR( - "buffer %d is smaller than required\n", - i); - return -EINVAL; - } - - buf_size += size; - } - } - - if (buf_size < img_size) { - DRM_ERROR("size of buffers(%lu) is smaller than image(%lu)\n", - buf_size, img_size); - return -EINVAL; - } - - return 0; -} - -static int ipp_put_mem_node(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_mem_node *m_node) -{ - int i; - - DRM_DEBUG_KMS("node[%pK]\n", m_node); - - if (!m_node) { - DRM_ERROR("invalid dequeue node.\n"); - return -EFAULT; - } - - DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id); - - /* put gem buffer */ - for_each_ipp_planar(i) { - unsigned long handle = m_node->buf_info.handles[i]; - if (handle) - exynos_drm_gem_put_dma_addr(drm_dev, handle, - c_node->filp); - } - - list_del(&m_node->list); - kfree(m_node); - - return 0; -} - -static struct drm_exynos_ipp_mem_node - *ipp_get_mem_node(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_mem_node *m_node; - struct drm_exynos_ipp_buf_info *buf_info; - int i; - - m_node = kzalloc(sizeof(*m_node), GFP_KERNEL); - if (!m_node) - return ERR_PTR(-ENOMEM); - - buf_info = &m_node->buf_info; - - /* operations, buffer id */ - m_node->ops_id = qbuf->ops_id; - m_node->prop_id = qbuf->prop_id; - m_node->buf_id = qbuf->buf_id; - INIT_LIST_HEAD(&m_node->list); - - DRM_DEBUG_KMS("m_node[%pK]ops_id[%d]\n", m_node, qbuf->ops_id); - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id); - - for_each_ipp_planar(i) { - DRM_DEBUG_KMS("i[%d]handle[0x%x]\n", i, qbuf->handle[i]); - - /* get dma address by handle */ - if (qbuf->handle[i]) { - dma_addr_t *addr; - - addr = exynos_drm_gem_get_dma_addr(drm_dev, - qbuf->handle[i], c_node->filp); - if (IS_ERR(addr)) { - DRM_ERROR("failed to get addr.\n"); - ipp_put_mem_node(drm_dev, c_node, m_node); - return ERR_PTR(-EFAULT); - } - - buf_info->handles[i] = qbuf->handle[i]; - buf_info->base[i] = *addr; - DRM_DEBUG_KMS("i[%d]base[%pad]hd[0x%lx]\n", i, - &buf_info->base[i], buf_info->handles[i]); - } - } - - mutex_lock(&c_node->mem_lock); - if (ipp_validate_mem_node(drm_dev, m_node, c_node)) { - ipp_put_mem_node(drm_dev, c_node, m_node); - mutex_unlock(&c_node->mem_lock); - return ERR_PTR(-EFAULT); - } - list_add_tail(&m_node->list, &c_node->mem_list[qbuf->ops_id]); - mutex_unlock(&c_node->mem_lock); - - return m_node; -} - -static void ipp_clean_mem_nodes(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, int ops) -{ - struct drm_exynos_ipp_mem_node *m_node, *tm_node; - struct list_head *head = &c_node->mem_list[ops]; - - mutex_lock(&c_node->mem_lock); - - list_for_each_entry_safe(m_node, tm_node, head, list) { - int ret; - - ret = ipp_put_mem_node(drm_dev, c_node, m_node); - if (ret) - DRM_ERROR("failed to put m_node.\n"); - } - - mutex_unlock(&c_node->mem_lock); -} - -static int ipp_get_event(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_send_event *e; - int ret; - - DRM_DEBUG_KMS("ops_id[%d]buf_id[%d]\n", qbuf->ops_id, qbuf->buf_id); - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - return -ENOMEM; - - /* make event */ - e->event.base.type = DRM_EXYNOS_IPP_EVENT; - e->event.base.length = sizeof(e->event); - e->event.user_data = qbuf->user_data; - e->event.prop_id = qbuf->prop_id; - e->event.buf_id[EXYNOS_DRM_OPS_DST] = qbuf->buf_id; - - ret = drm_event_reserve_init(drm_dev, c_node->filp, &e->base, &e->event.base); - if (ret) { - kfree(e); - return ret; - } - - mutex_lock(&c_node->event_lock); - list_add_tail(&e->base.link, &c_node->event_list); - mutex_unlock(&c_node->event_lock); - - return 0; -} - -static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_send_event *e, *te; - int count = 0; - - mutex_lock(&c_node->event_lock); - list_for_each_entry_safe(e, te, &c_node->event_list, base.link) { - DRM_DEBUG_KMS("count[%d]e[%pK]\n", count++, e); - - /* - * qbuf == NULL condition means all event deletion. - * stop operations want to delete all event list. - * another case delete only same buf id. - */ - if (!qbuf) { - /* delete list */ - list_del(&e->base.link); - kfree(e); - } - - /* compare buffer id */ - if (qbuf && (qbuf->buf_id == - e->event.buf_id[EXYNOS_DRM_OPS_DST])) { - /* delete list */ - list_del(&e->base.link); - kfree(e); - goto out_unlock; - } - } - -out_unlock: - mutex_unlock(&c_node->event_lock); - return; -} - -static void ipp_clean_cmd_node(struct ipp_context *ctx, - struct drm_exynos_ipp_cmd_node *c_node) -{ - int i; - - /* cancel works */ - cancel_work_sync(&c_node->start_work->work); - cancel_work_sync(&c_node->stop_work->work); - cancel_work_sync(&c_node->event_work->work); - - /* put event */ - ipp_put_event(c_node, NULL); - - for_each_ipp_ops(i) - ipp_clean_mem_nodes(ctx->subdrv.drm_dev, c_node, i); - - /* delete list */ - list_del(&c_node->list); - - ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock, - c_node->property.prop_id); - - /* destroy mutex */ - mutex_destroy(&c_node->lock); - mutex_destroy(&c_node->mem_lock); - mutex_destroy(&c_node->event_lock); - - /* free command node */ - kfree(c_node->start_work); - kfree(c_node->stop_work); - kfree(c_node->event_work); - kfree(c_node); -} - -static bool ipp_check_mem_list(struct drm_exynos_ipp_cmd_node *c_node) -{ - switch (c_node->property.cmd) { - case IPP_CMD_WB: - return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_DST]); - case IPP_CMD_OUTPUT: - return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_SRC]); - case IPP_CMD_M2M: - default: - return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_SRC]) && - !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_DST]); - } -} - -static struct drm_exynos_ipp_mem_node - *ipp_find_mem_node(struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_mem_node *m_node; - struct list_head *head; - int count = 0; - - DRM_DEBUG_KMS("buf_id[%d]\n", qbuf->buf_id); - - /* source/destination memory list */ - head = &c_node->mem_list[qbuf->ops_id]; - - /* find memory node from memory list */ - list_for_each_entry(m_node, head, list) { - DRM_DEBUG_KMS("count[%d]m_node[%pK]\n", count++, m_node); - - /* compare buffer id */ - if (m_node->buf_id == qbuf->buf_id) - return m_node; - } - - return NULL; -} - -static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_mem_node *m_node) -{ - struct exynos_drm_ipp_ops *ops = NULL; - int ret = 0; - - DRM_DEBUG_KMS("node[%pK]\n", m_node); - - if (!m_node) { - DRM_ERROR("invalid queue node.\n"); - return -EFAULT; - } - - DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id); - - /* get operations callback */ - ops = ippdrv->ops[m_node->ops_id]; - if (!ops) { - DRM_ERROR("not support ops.\n"); - return -EFAULT; - } - - /* set address and enable irq */ - if (ops->set_addr) { - ret = ops->set_addr(ippdrv->dev, &m_node->buf_info, - m_node->buf_id, IPP_BUF_ENQUEUE); - if (ret) { - DRM_ERROR("failed to set addr.\n"); - return ret; - } - } - - return ret; -} - -static void ipp_handle_cmd_work(struct device *dev, - struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_work *cmd_work, - struct drm_exynos_ipp_cmd_node *c_node) -{ - struct ipp_context *ctx = get_ipp_context(dev); - - cmd_work->ippdrv = ippdrv; - cmd_work->c_node = c_node; - queue_work(ctx->cmd_workq, &cmd_work->work); -} - -static int ipp_queue_buf_with_run(struct device *dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_mem_node *m_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_property *property; - struct exynos_drm_ipp_ops *ops; - int ret; - - ippdrv = ipp_find_drv_by_handle(qbuf->prop_id); - if (IS_ERR(ippdrv)) { - DRM_ERROR("failed to get ipp driver.\n"); - return -EFAULT; - } - - ops = ippdrv->ops[qbuf->ops_id]; - if (!ops) { - DRM_ERROR("failed to get ops.\n"); - return -EFAULT; - } - - property = &c_node->property; - - if (c_node->state != IPP_STATE_START) { - DRM_DEBUG_KMS("bypass for invalid state.\n"); - return 0; - } - - mutex_lock(&c_node->mem_lock); - if (!ipp_check_mem_list(c_node)) { - mutex_unlock(&c_node->mem_lock); - DRM_DEBUG_KMS("empty memory.\n"); - return 0; - } - - /* - * If set destination buffer and enabled clock, - * then m2m operations need start operations at queue_buf - */ - if (ipp_is_m2m_cmd(property->cmd)) { - struct drm_exynos_ipp_cmd_work *cmd_work = c_node->start_work; - - cmd_work->ctrl = IPP_CTRL_PLAY; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - } else { - ret = ipp_set_mem_node(ippdrv, c_node, m_node); - if (ret) { - mutex_unlock(&c_node->mem_lock); - DRM_ERROR("failed to set m node.\n"); - return ret; - } - } - mutex_unlock(&c_node->mem_lock); - - return 0; -} - -static void ipp_clean_queue_buf(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_mem_node *m_node, *tm_node; - - /* delete list */ - mutex_lock(&c_node->mem_lock); - list_for_each_entry_safe(m_node, tm_node, - &c_node->mem_list[qbuf->ops_id], list) { - if (m_node->buf_id == qbuf->buf_id && - m_node->ops_id == qbuf->ops_id) - ipp_put_mem_node(drm_dev, c_node, m_node); - } - mutex_unlock(&c_node->mem_lock); -} - -int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - struct device *dev = file_priv->ipp_dev; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_queue_buf *qbuf = data; - struct drm_exynos_ipp_cmd_node *c_node; - struct drm_exynos_ipp_mem_node *m_node; - int ret; - - if (!qbuf) { - DRM_ERROR("invalid buf parameter.\n"); - return -EINVAL; - } - - if (qbuf->ops_id >= EXYNOS_DRM_OPS_MAX) { - DRM_ERROR("invalid ops parameter.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("prop_id[%d]ops_id[%s]buf_id[%d]buf_type[%d]\n", - qbuf->prop_id, qbuf->ops_id ? "dst" : "src", - qbuf->buf_id, qbuf->buf_type); - - /* find command node */ - c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock, - qbuf->prop_id); - if (!c_node || c_node->filp != file) { - DRM_ERROR("failed to get command node.\n"); - return -ENODEV; - } - - /* buffer control */ - switch (qbuf->buf_type) { - case IPP_BUF_ENQUEUE: - /* get memory node */ - m_node = ipp_get_mem_node(drm_dev, c_node, qbuf); - if (IS_ERR(m_node)) { - DRM_ERROR("failed to get m_node.\n"); - return PTR_ERR(m_node); - } - - /* - * first step get event for destination buffer. - * and second step when M2M case run with destination buffer - * if needed. - */ - if (qbuf->ops_id == EXYNOS_DRM_OPS_DST) { - /* get event for destination buffer */ - ret = ipp_get_event(drm_dev, c_node, qbuf); - if (ret) { - DRM_ERROR("failed to get event.\n"); - goto err_clean_node; - } - - /* - * M2M case run play control for streaming feature. - * other case set address and waiting. - */ - ret = ipp_queue_buf_with_run(dev, c_node, m_node, qbuf); - if (ret) { - DRM_ERROR("failed to run command.\n"); - goto err_clean_node; - } - } - break; - case IPP_BUF_DEQUEUE: - mutex_lock(&c_node->lock); - - /* put event for destination buffer */ - if (qbuf->ops_id == EXYNOS_DRM_OPS_DST) - ipp_put_event(c_node, qbuf); - - ipp_clean_queue_buf(drm_dev, c_node, qbuf); - - mutex_unlock(&c_node->lock); - break; - default: - DRM_ERROR("invalid buffer control.\n"); - return -EINVAL; - } - - return 0; - -err_clean_node: - DRM_ERROR("clean memory nodes.\n"); - - ipp_clean_queue_buf(drm_dev, c_node, qbuf); - return ret; -} - -static bool exynos_drm_ipp_check_valid(struct device *dev, - enum drm_exynos_ipp_ctrl ctrl, enum drm_exynos_ipp_state state) -{ - if (ctrl != IPP_CTRL_PLAY) { - if (pm_runtime_suspended(dev)) { - DRM_ERROR("pm:runtime_suspended.\n"); - goto err_status; - } - } - - switch (ctrl) { - case IPP_CTRL_PLAY: - if (state != IPP_STATE_IDLE) - goto err_status; - break; - case IPP_CTRL_STOP: - if (state == IPP_STATE_STOP) - goto err_status; - break; - case IPP_CTRL_PAUSE: - if (state != IPP_STATE_START) - goto err_status; - break; - case IPP_CTRL_RESUME: - if (state != IPP_STATE_STOP) - goto err_status; - break; - default: - DRM_ERROR("invalid state.\n"); - goto err_status; - } - - return true; - -err_status: - DRM_ERROR("invalid status:ctrl[%d]state[%d]\n", ctrl, state); - return false; -} - -int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - struct exynos_drm_ippdrv *ippdrv = NULL; - struct device *dev = file_priv->ipp_dev; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_cmd_ctrl *cmd_ctrl = data; - struct drm_exynos_ipp_cmd_work *cmd_work; - struct drm_exynos_ipp_cmd_node *c_node; - - if (!ctx) { - DRM_ERROR("invalid context.\n"); - return -EINVAL; - } - - if (!cmd_ctrl) { - DRM_ERROR("invalid control parameter.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("ctrl[%d]prop_id[%d]\n", - cmd_ctrl->ctrl, cmd_ctrl->prop_id); - - ippdrv = ipp_find_drv_by_handle(cmd_ctrl->prop_id); - if (IS_ERR(ippdrv)) { - DRM_ERROR("failed to get ipp driver.\n"); - return PTR_ERR(ippdrv); - } - - c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock, - cmd_ctrl->prop_id); - if (!c_node || c_node->filp != file) { - DRM_ERROR("invalid command node list.\n"); - return -ENODEV; - } - - if (!exynos_drm_ipp_check_valid(ippdrv->dev, cmd_ctrl->ctrl, - c_node->state)) { - DRM_ERROR("invalid state.\n"); - return -EINVAL; - } - - switch (cmd_ctrl->ctrl) { - case IPP_CTRL_PLAY: - if (pm_runtime_suspended(ippdrv->dev)) - pm_runtime_get_sync(ippdrv->dev); - - c_node->state = IPP_STATE_START; - - cmd_work = c_node->start_work; - cmd_work->ctrl = cmd_ctrl->ctrl; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - break; - case IPP_CTRL_STOP: - cmd_work = c_node->stop_work; - cmd_work->ctrl = cmd_ctrl->ctrl; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - - if (!wait_for_completion_timeout(&c_node->stop_complete, - msecs_to_jiffies(300))) { - DRM_ERROR("timeout stop:prop_id[%d]\n", - c_node->property.prop_id); - } - - c_node->state = IPP_STATE_STOP; - ippdrv->dedicated = false; - mutex_lock(&ippdrv->cmd_lock); - ipp_clean_cmd_node(ctx, c_node); - - if (list_empty(&ippdrv->cmd_list)) - pm_runtime_put_sync(ippdrv->dev); - mutex_unlock(&ippdrv->cmd_lock); - break; - case IPP_CTRL_PAUSE: - cmd_work = c_node->stop_work; - cmd_work->ctrl = cmd_ctrl->ctrl; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - - if (!wait_for_completion_timeout(&c_node->stop_complete, - msecs_to_jiffies(200))) { - DRM_ERROR("timeout stop:prop_id[%d]\n", - c_node->property.prop_id); - } - - c_node->state = IPP_STATE_STOP; - break; - case IPP_CTRL_RESUME: - c_node->state = IPP_STATE_START; - cmd_work = c_node->start_work; - cmd_work->ctrl = cmd_ctrl->ctrl; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - break; - default: - DRM_ERROR("could not support this state currently.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("done ctrl[%d]prop_id[%d]\n", - cmd_ctrl->ctrl, cmd_ctrl->prop_id); - - return 0; -} - -int exynos_drm_ippnb_register(struct notifier_block *nb) -{ - return blocking_notifier_chain_register( - &exynos_drm_ippnb_list, nb); -} - -int exynos_drm_ippnb_unregister(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister( - &exynos_drm_ippnb_list, nb); -} - -int exynos_drm_ippnb_send_event(unsigned long val, void *v) -{ - return blocking_notifier_call_chain( - &exynos_drm_ippnb_list, val, v); -} - -static int ipp_set_property(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_property *property) -{ - struct exynos_drm_ipp_ops *ops = NULL; - bool swap = false; - int ret, i; - - if (!property) { - DRM_ERROR("invalid property parameter.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id); - - /* reset h/w block */ - if (ippdrv->reset && - ippdrv->reset(ippdrv->dev)) { - return -EINVAL; - } - - /* set source,destination operations */ - for_each_ipp_ops(i) { - struct drm_exynos_ipp_config *config = - &property->config[i]; - - ops = ippdrv->ops[i]; - if (!ops || !config) { - DRM_ERROR("not support ops and config.\n"); - return -EINVAL; - } - - /* set format */ - if (ops->set_fmt) { - ret = ops->set_fmt(ippdrv->dev, config->fmt); - if (ret) - return ret; - } - - /* set transform for rotation, flip */ - if (ops->set_transf) { - ret = ops->set_transf(ippdrv->dev, config->degree, - config->flip, &swap); - if (ret) - return ret; - } - - /* set size */ - if (ops->set_size) { - ret = ops->set_size(ippdrv->dev, swap, &config->pos, - &config->sz); - if (ret) - return ret; - } - } - - return 0; -} - -static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_node *c_node) -{ - struct drm_exynos_ipp_mem_node *m_node; - struct drm_exynos_ipp_property *property = &c_node->property; - struct list_head *head; - int ret, i; - - DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id); - - /* store command info in ippdrv */ - ippdrv->c_node = c_node; - - mutex_lock(&c_node->mem_lock); - if (!ipp_check_mem_list(c_node)) { - DRM_DEBUG_KMS("empty memory.\n"); - ret = -ENOMEM; - goto err_unlock; - } - - /* set current property in ippdrv */ - ret = ipp_set_property(ippdrv, property); - if (ret) { - DRM_ERROR("failed to set property.\n"); - ippdrv->c_node = NULL; - goto err_unlock; - } - - /* check command */ - switch (property->cmd) { - case IPP_CMD_M2M: - for_each_ipp_ops(i) { - /* source/destination memory list */ - head = &c_node->mem_list[i]; - - m_node = list_first_entry(head, - struct drm_exynos_ipp_mem_node, list); - - DRM_DEBUG_KMS("m_node[%pK]\n", m_node); - - ret = ipp_set_mem_node(ippdrv, c_node, m_node); - if (ret) { - DRM_ERROR("failed to set m node.\n"); - goto err_unlock; - } - } - break; - case IPP_CMD_WB: - /* destination memory list */ - head = &c_node->mem_list[EXYNOS_DRM_OPS_DST]; - - list_for_each_entry(m_node, head, list) { - ret = ipp_set_mem_node(ippdrv, c_node, m_node); - if (ret) { - DRM_ERROR("failed to set m node.\n"); - goto err_unlock; - } - } - break; - case IPP_CMD_OUTPUT: - /* source memory list */ - head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC]; - - list_for_each_entry(m_node, head, list) { - ret = ipp_set_mem_node(ippdrv, c_node, m_node); - if (ret) { - DRM_ERROR("failed to set m node.\n"); - goto err_unlock; - } - } - break; - default: - DRM_ERROR("invalid operations.\n"); - ret = -EINVAL; - goto err_unlock; - } - mutex_unlock(&c_node->mem_lock); - - DRM_DEBUG_KMS("cmd[%d]\n", property->cmd); - - /* start operations */ - if (ippdrv->start) { - ret = ippdrv->start(ippdrv->dev, property->cmd); - if (ret) { - DRM_ERROR("failed to start ops.\n"); - ippdrv->c_node = NULL; - return ret; - } - } - - return 0; - -err_unlock: - mutex_unlock(&c_node->mem_lock); - ippdrv->c_node = NULL; - return ret; -} - -static int ipp_stop_property(struct drm_device *drm_dev, - struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_node *c_node) -{ - struct drm_exynos_ipp_property *property = &c_node->property; - int i; - - DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id); - - /* stop operations */ - if (ippdrv->stop) - ippdrv->stop(ippdrv->dev, property->cmd); - - /* check command */ - switch (property->cmd) { - case IPP_CMD_M2M: - for_each_ipp_ops(i) - ipp_clean_mem_nodes(drm_dev, c_node, i); - break; - case IPP_CMD_WB: - ipp_clean_mem_nodes(drm_dev, c_node, EXYNOS_DRM_OPS_DST); - break; - case IPP_CMD_OUTPUT: - ipp_clean_mem_nodes(drm_dev, c_node, EXYNOS_DRM_OPS_SRC); - break; - default: - DRM_ERROR("invalid operations.\n"); - return -EINVAL; - } - - return 0; -} - -void ipp_sched_cmd(struct work_struct *work) -{ - struct drm_exynos_ipp_cmd_work *cmd_work = - container_of(work, struct drm_exynos_ipp_cmd_work, work); - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - struct drm_exynos_ipp_property *property; - int ret; - - ippdrv = cmd_work->ippdrv; - if (!ippdrv) { - DRM_ERROR("invalid ippdrv list.\n"); - return; - } - - c_node = cmd_work->c_node; - if (!c_node) { - DRM_ERROR("invalid command node list.\n"); - return; - } - - mutex_lock(&c_node->lock); - - property = &c_node->property; - - switch (cmd_work->ctrl) { - case IPP_CTRL_PLAY: - case IPP_CTRL_RESUME: - ret = ipp_start_property(ippdrv, c_node); - if (ret) { - DRM_ERROR("failed to start property:prop_id[%d]\n", - c_node->property.prop_id); - goto err_unlock; - } - - /* - * M2M case supports wait_completion of transfer. - * because M2M case supports single unit operation - * with multiple queue. - * M2M need to wait completion of data transfer. - */ - if (ipp_is_m2m_cmd(property->cmd)) { - if (!wait_for_completion_timeout - (&c_node->start_complete, msecs_to_jiffies(200))) { - DRM_ERROR("timeout event:prop_id[%d]\n", - c_node->property.prop_id); - goto err_unlock; - } - } - break; - case IPP_CTRL_STOP: - case IPP_CTRL_PAUSE: - ret = ipp_stop_property(ippdrv->drm_dev, ippdrv, - c_node); - if (ret) { - DRM_ERROR("failed to stop property.\n"); - goto err_unlock; - } - - complete(&c_node->stop_complete); - break; - default: - DRM_ERROR("unknown control type\n"); - break; - } - - DRM_DEBUG_KMS("ctrl[%d] done.\n", cmd_work->ctrl); - -err_unlock: - mutex_unlock(&c_node->lock); -} - -static int ipp_send_event(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_node *c_node, int *buf_id) -{ - struct drm_device *drm_dev = ippdrv->drm_dev; - struct drm_exynos_ipp_property *property = &c_node->property; - struct drm_exynos_ipp_mem_node *m_node; - struct drm_exynos_ipp_queue_buf qbuf; - struct drm_exynos_ipp_send_event *e; - struct list_head *head; - struct timeval now; - u32 tbuf_id[EXYNOS_DRM_OPS_MAX] = {0, }; - int ret, i; - - for_each_ipp_ops(i) - DRM_DEBUG_KMS("%s buf_id[%d]\n", i ? "dst" : "src", buf_id[i]); - - if (!drm_dev) { - DRM_ERROR("failed to get drm_dev.\n"); - return -EINVAL; - } - - if (!property) { - DRM_ERROR("failed to get property.\n"); - return -EINVAL; - } - - mutex_lock(&c_node->event_lock); - if (list_empty(&c_node->event_list)) { - DRM_DEBUG_KMS("event list is empty.\n"); - ret = 0; - goto err_event_unlock; - } - - mutex_lock(&c_node->mem_lock); - if (!ipp_check_mem_list(c_node)) { - DRM_DEBUG_KMS("empty memory.\n"); - ret = 0; - goto err_mem_unlock; - } - - /* check command */ - switch (property->cmd) { - case IPP_CMD_M2M: - for_each_ipp_ops(i) { - /* source/destination memory list */ - head = &c_node->mem_list[i]; - - m_node = list_first_entry(head, - struct drm_exynos_ipp_mem_node, list); - - tbuf_id[i] = m_node->buf_id; - DRM_DEBUG_KMS("%s buf_id[%d]\n", - i ? "dst" : "src", tbuf_id[i]); - - ret = ipp_put_mem_node(drm_dev, c_node, m_node); - if (ret) - DRM_ERROR("failed to put m_node.\n"); - } - break; - case IPP_CMD_WB: - /* clear buf for finding */ - memset(&qbuf, 0x0, sizeof(qbuf)); - qbuf.ops_id = EXYNOS_DRM_OPS_DST; - qbuf.buf_id = buf_id[EXYNOS_DRM_OPS_DST]; - - /* get memory node entry */ - m_node = ipp_find_mem_node(c_node, &qbuf); - if (!m_node) { - DRM_ERROR("empty memory node.\n"); - ret = -ENOMEM; - goto err_mem_unlock; - } - - tbuf_id[EXYNOS_DRM_OPS_DST] = m_node->buf_id; - - ret = ipp_put_mem_node(drm_dev, c_node, m_node); - if (ret) - DRM_ERROR("failed to put m_node.\n"); - break; - case IPP_CMD_OUTPUT: - /* source memory list */ - head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC]; - - m_node = list_first_entry(head, - struct drm_exynos_ipp_mem_node, list); - - tbuf_id[EXYNOS_DRM_OPS_SRC] = m_node->buf_id; - - ret = ipp_put_mem_node(drm_dev, c_node, m_node); - if (ret) - DRM_ERROR("failed to put m_node.\n"); - break; - default: - DRM_ERROR("invalid operations.\n"); - ret = -EINVAL; - goto err_mem_unlock; - } - mutex_unlock(&c_node->mem_lock); - - if (tbuf_id[EXYNOS_DRM_OPS_DST] != buf_id[EXYNOS_DRM_OPS_DST]) - DRM_ERROR("failed to match buf_id[%d %d]prop_id[%d]\n", - tbuf_id[1], buf_id[1], property->prop_id); - - /* - * command node have event list of destination buffer - * If destination buffer enqueue to mem list, - * then we make event and link to event list tail. - * so, we get first event for first enqueued buffer. - */ - e = list_first_entry(&c_node->event_list, - struct drm_exynos_ipp_send_event, base.link); - - do_gettimeofday(&now); - DRM_DEBUG_KMS("tv_sec[%ld]tv_usec[%ld]\n", now.tv_sec, now.tv_usec); - e->event.tv_sec = now.tv_sec; - e->event.tv_usec = now.tv_usec; - e->event.prop_id = property->prop_id; - - /* set buffer id about source destination */ - for_each_ipp_ops(i) - e->event.buf_id[i] = tbuf_id[i]; - - drm_send_event(drm_dev, &e->base); - mutex_unlock(&c_node->event_lock); - - DRM_DEBUG_KMS("done cmd[%d]prop_id[%d]buf_id[%d]\n", - property->cmd, property->prop_id, tbuf_id[EXYNOS_DRM_OPS_DST]); - - return 0; - -err_mem_unlock: - mutex_unlock(&c_node->mem_lock); -err_event_unlock: - mutex_unlock(&c_node->event_lock); - return ret; -} - -void ipp_sched_event(struct work_struct *work) -{ - struct drm_exynos_ipp_event_work *event_work = - container_of(work, struct drm_exynos_ipp_event_work, work); - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - int ret; - - if (!event_work) { - DRM_ERROR("failed to get event_work.\n"); - return; - } - - DRM_DEBUG_KMS("buf_id[%d]\n", event_work->buf_id[EXYNOS_DRM_OPS_DST]); - - ippdrv = event_work->ippdrv; - if (!ippdrv) { - DRM_ERROR("failed to get ipp driver.\n"); - return; - } - - c_node = ippdrv->c_node; - if (!c_node) { - DRM_ERROR("failed to get command node.\n"); - return; - } - - /* - * IPP supports command thread, event thread synchronization. - * If IPP close immediately from user land, then IPP make - * synchronization with command thread, so make complete event. - * or going out operations. - */ - if (c_node->state != IPP_STATE_START) { - DRM_DEBUG_KMS("bypass state[%d]prop_id[%d]\n", - c_node->state, c_node->property.prop_id); - goto err_completion; - } - - ret = ipp_send_event(ippdrv, c_node, event_work->buf_id); - if (ret) { - DRM_ERROR("failed to send event.\n"); - goto err_completion; - } - -err_completion: - if (ipp_is_m2m_cmd(c_node->property.cmd)) - complete(&c_node->start_complete); -} - -static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev) -{ - struct ipp_context *ctx = get_ipp_context(dev); - struct exynos_drm_ippdrv *ippdrv; - int ret, count = 0; - - /* get ipp driver entry */ - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) { - ippdrv->drm_dev = drm_dev; - - ret = ipp_create_id(&ctx->ipp_idr, &ctx->ipp_lock, ippdrv); - if (ret < 0) { - DRM_ERROR("failed to create id.\n"); - goto err; - } - ippdrv->prop_list.ipp_id = ret; - - DRM_DEBUG_KMS("count[%d]ippdrv[%pK]ipp_id[%d]\n", - count++, ippdrv, ret); - - /* store parent device for node */ - ippdrv->parent_dev = dev; - - /* store event work queue and handler */ - ippdrv->event_workq = ctx->event_workq; - ippdrv->sched_event = ipp_sched_event; - INIT_LIST_HEAD(&ippdrv->cmd_list); - mutex_init(&ippdrv->cmd_lock); - - ret = drm_iommu_attach_device(drm_dev, ippdrv->dev); - if (ret) { - DRM_ERROR("failed to activate iommu\n"); - goto err; - } - } - - return 0; - -err: - /* get ipp driver entry */ - list_for_each_entry_continue_reverse(ippdrv, &exynos_drm_ippdrv_list, - drv_list) { - drm_iommu_detach_device(drm_dev, ippdrv->dev); - - ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock, - ippdrv->prop_list.ipp_id); - } - - return ret; -} - -static void ipp_subdrv_remove(struct drm_device *drm_dev, struct device *dev) -{ - struct exynos_drm_ippdrv *ippdrv, *t; - struct ipp_context *ctx = get_ipp_context(dev); - - /* get ipp driver entry */ - list_for_each_entry_safe(ippdrv, t, &exynos_drm_ippdrv_list, drv_list) { - drm_iommu_detach_device(drm_dev, ippdrv->dev); - - ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock, - ippdrv->prop_list.ipp_id); - - ippdrv->drm_dev = NULL; - exynos_drm_ippdrv_unregister(ippdrv); - } -} - -static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - - file_priv->ipp_dev = dev; - - DRM_DEBUG_KMS("done priv[%pK]\n", dev); - - return 0; -} - -static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev, - struct drm_file *file) -{ - struct exynos_drm_ippdrv *ippdrv = NULL; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_cmd_node *c_node, *tc_node; - int count = 0; - - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) { - mutex_lock(&ippdrv->cmd_lock); - list_for_each_entry_safe(c_node, tc_node, - &ippdrv->cmd_list, list) { - DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", - count++, ippdrv); - - if (c_node->filp == file) { - /* - * userland goto unnormal state. process killed. - * and close the file. - * so, IPP didn't called stop cmd ctrl. - * so, we are make stop operation in this state. - */ - if (c_node->state == IPP_STATE_START) { - ipp_stop_property(drm_dev, ippdrv, - c_node); - c_node->state = IPP_STATE_STOP; - } - - ippdrv->dedicated = false; - ipp_clean_cmd_node(ctx, c_node); - if (list_empty(&ippdrv->cmd_list)) - pm_runtime_put_sync(ippdrv->dev); - } - } - mutex_unlock(&ippdrv->cmd_lock); - } - - return; -} - -static int ipp_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct ipp_context *ctx; - struct exynos_drm_subdrv *subdrv; - int ret; - - ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - mutex_init(&ctx->ipp_lock); - mutex_init(&ctx->prop_lock); - - idr_init(&ctx->ipp_idr); - idr_init(&ctx->prop_idr); - - /* - * create single thread for ipp event - * IPP supports event thread for IPP drivers. - * IPP driver send event_work to this thread. - * and IPP event thread send event to user process. - */ - ctx->event_workq = create_singlethread_workqueue("ipp_event"); - if (!ctx->event_workq) { - dev_err(dev, "failed to create event workqueue\n"); - return -EINVAL; - } - - /* - * create single thread for ipp command - * IPP supports command thread for user process. - * user process make command node using set property ioctl. - * and make start_work and send this work to command thread. - * and then this command thread start property. - */ - ctx->cmd_workq = create_singlethread_workqueue("ipp_cmd"); - if (!ctx->cmd_workq) { - dev_err(dev, "failed to create cmd workqueue\n"); - ret = -EINVAL; - goto err_event_workq; - } - - /* set sub driver informations */ - subdrv = &ctx->subdrv; - subdrv->dev = dev; - subdrv->probe = ipp_subdrv_probe; - subdrv->remove = ipp_subdrv_remove; - subdrv->open = ipp_subdrv_open; - subdrv->close = ipp_subdrv_close; - - platform_set_drvdata(pdev, ctx); - - ret = exynos_drm_subdrv_register(subdrv); - if (ret < 0) { - DRM_ERROR("failed to register drm ipp device.\n"); - goto err_cmd_workq; - } - - dev_info(dev, "drm ipp registered successfully.\n"); - - return 0; - -err_cmd_workq: - destroy_workqueue(ctx->cmd_workq); -err_event_workq: - destroy_workqueue(ctx->event_workq); - return ret; -} - -static int ipp_remove(struct platform_device *pdev) -{ - struct ipp_context *ctx = platform_get_drvdata(pdev); - - /* unregister sub driver */ - exynos_drm_subdrv_unregister(&ctx->subdrv); - - /* remove,destroy ipp idr */ - idr_destroy(&ctx->ipp_idr); - idr_destroy(&ctx->prop_idr); - - mutex_destroy(&ctx->ipp_lock); - mutex_destroy(&ctx->prop_lock); - - /* destroy command, event work queue */ - destroy_workqueue(ctx->cmd_workq); - destroy_workqueue(ctx->event_workq); - - return 0; -} - -struct platform_driver ipp_driver = { - .probe = ipp_probe, - .remove = ipp_remove, - .driver = { - .name = "exynos-drm-ipp", - .owner = THIS_MODULE, - }, -}; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h deleted file mode 100644 index 2a61547a39d0..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.h +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim - * Jinyoung Jeon - * Sangmin Lee - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_IPP_H_ -#define _EXYNOS_DRM_IPP_H_ - -#define for_each_ipp_ops(pos) \ - for (pos = 0; pos < EXYNOS_DRM_OPS_MAX; pos++) -#define for_each_ipp_planar(pos) \ - for (pos = 0; pos < EXYNOS_DRM_PLANAR_MAX; pos++) - -#define IPP_GET_LCD_WIDTH _IOR('F', 302, int) -#define IPP_GET_LCD_HEIGHT _IOR('F', 303, int) -#define IPP_SET_WRITEBACK _IOW('F', 304, u32) - -/* definition of state */ -enum drm_exynos_ipp_state { - IPP_STATE_IDLE, - IPP_STATE_START, - IPP_STATE_STOP, -}; - -/* - * A structure of command work information. - * @work: work structure. - * @ippdrv: current work ippdrv. - * @c_node: command node information. - * @ctrl: command control. - */ -struct drm_exynos_ipp_cmd_work { - struct work_struct work; - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - enum drm_exynos_ipp_ctrl ctrl; -}; - -/* - * A structure of command node. - * - * @list: list head to command queue information. - * @event_list: list head of event. - * @mem_list: list head to source,destination memory queue information. - * @lock: lock for synchronization of access to ioctl. - * @mem_lock: lock for synchronization of access to memory nodes. - * @event_lock: lock for synchronization of access to scheduled event. - * @start_complete: completion of start of command. - * @stop_complete: completion of stop of command. - * @property: property information. - * @start_work: start command work structure. - * @stop_work: stop command work structure. - * @event_work: event work structure. - * @state: state of command node. - * @filp: associated file pointer. - */ -struct drm_exynos_ipp_cmd_node { - struct list_head list; - struct list_head event_list; - struct list_head mem_list[EXYNOS_DRM_OPS_MAX]; - struct mutex lock; - struct mutex mem_lock; - struct mutex event_lock; - struct completion start_complete; - struct completion stop_complete; - struct drm_exynos_ipp_property property; - struct drm_exynos_ipp_cmd_work *start_work; - struct drm_exynos_ipp_cmd_work *stop_work; - struct drm_exynos_ipp_event_work *event_work; - enum drm_exynos_ipp_state state; - struct drm_file *filp; -}; - -/* - * A structure of buffer information. - * - * @handles: Y, Cb, Cr each gem object handle. - * @base: Y, Cb, Cr each planar address. - */ -struct drm_exynos_ipp_buf_info { - unsigned long handles[EXYNOS_DRM_PLANAR_MAX]; - dma_addr_t base[EXYNOS_DRM_PLANAR_MAX]; -}; - -/* - * A structure of wb setting information. - * - * @enable: enable flag for wb. - * @refresh: HZ of the refresh rate. - */ -struct drm_exynos_ipp_set_wb { - __u32 enable; - __u32 refresh; -}; - -/* - * A structure of event work information. - * - * @work: work structure. - * @ippdrv: current work ippdrv. - * @buf_id: id of src, dst buffer. - */ -struct drm_exynos_ipp_event_work { - struct work_struct work; - struct exynos_drm_ippdrv *ippdrv; - u32 buf_id[EXYNOS_DRM_OPS_MAX]; -}; - -/* - * A structure of source,destination operations. - * - * @set_fmt: set format of image. - * @set_transf: set transform(rotations, flip). - * @set_size: set size of region. - * @set_addr: set address for dma. - */ -struct exynos_drm_ipp_ops { - int (*set_fmt)(struct device *dev, u32 fmt); - int (*set_transf)(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap); - int (*set_size)(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz); - int (*set_addr)(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type); -}; - -/* - * A structure of ipp driver. - * - * @drv_list: list head for registed sub driver information. - * @parent_dev: parent device information. - * @dev: platform device. - * @drm_dev: drm device. - * @dedicated: dedicated ipp device. - * @ops: source, destination operations. - * @event_workq: event work queue. - * @c_node: current command information. - * @cmd_list: list head for command information. - * @cmd_lock: lock for synchronization of access to cmd_list. - * @prop_list: property informations of current ipp driver. - * @check_property: check property about format, size, buffer. - * @reset: reset ipp block. - * @start: ipp each device start. - * @stop: ipp each device stop. - * @sched_event: work schedule handler. - */ -struct exynos_drm_ippdrv { - struct list_head drv_list; - struct device *parent_dev; - struct device *dev; - struct drm_device *drm_dev; - bool dedicated; - struct exynos_drm_ipp_ops *ops[EXYNOS_DRM_OPS_MAX]; - struct workqueue_struct *event_workq; - struct drm_exynos_ipp_cmd_node *c_node; - struct list_head cmd_list; - struct mutex cmd_lock; - struct drm_exynos_ipp_prop_list prop_list; - - int (*check_property)(struct device *dev, - struct drm_exynos_ipp_property *property); - int (*reset)(struct device *dev); - int (*start)(struct device *dev, enum drm_exynos_ipp_cmd cmd); - void (*stop)(struct device *dev, enum drm_exynos_ipp_cmd cmd); - void (*sched_event)(struct work_struct *work); -}; - -#ifdef CONFIG_DRM_EXYNOS_IPP -extern int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv); -extern int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv); -extern int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data, - struct drm_file *file); -extern int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data, - struct drm_file *file); -extern int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data, - struct drm_file *file); -extern int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data, - struct drm_file *file); -extern int exynos_drm_ippnb_register(struct notifier_block *nb); -extern int exynos_drm_ippnb_unregister(struct notifier_block *nb); -extern int exynos_drm_ippnb_send_event(unsigned long val, void *v); -extern void ipp_sched_cmd(struct work_struct *work); -extern void ipp_sched_event(struct work_struct *work); - -#else -static inline int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv) -{ - return -ENODEV; -} - -static inline int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv) -{ - return -ENODEV; -} - -static inline int exynos_drm_ipp_get_property(struct drm_device *drm_dev, - void *data, - struct drm_file *file_priv) -{ - return -ENOTTY; -} - -static inline int exynos_drm_ipp_set_property(struct drm_device *drm_dev, - void *data, - struct drm_file *file_priv) -{ - return -ENOTTY; -} - -static inline int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, - void *data, - struct drm_file *file) -{ - return -ENOTTY; -} - -static inline int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, - void *data, - struct drm_file *file) -{ - return -ENOTTY; -} - -static inline int exynos_drm_ippnb_register(struct notifier_block *nb) -{ - return -ENODEV; -} - -static inline int exynos_drm_ippnb_unregister(struct notifier_block *nb) -{ - return -ENODEV; -} - -static inline int exynos_drm_ippnb_send_event(unsigned long val, void *v) -{ - return -ENOTTY; -} -#endif - -#endif /* _EXYNOS_DRM_IPP_H_ */ - diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h index d01087b2a651..4a54305120e0 100644 --- a/include/uapi/drm/exynos_drm.h +++ b/include/uapi/drm/exynos_drm.h @@ -135,172 +135,6 @@ struct drm_exynos_g2d_exec { __u64 async; }; -enum drm_exynos_ops_id { - EXYNOS_DRM_OPS_SRC, - EXYNOS_DRM_OPS_DST, - EXYNOS_DRM_OPS_MAX, -}; - -struct drm_exynos_sz { - __u32 hsize; - __u32 vsize; -}; - -struct drm_exynos_pos { - __u32 x; - __u32 y; - __u32 w; - __u32 h; -}; - -enum drm_exynos_flip { - EXYNOS_DRM_FLIP_NONE = (0 << 0), - EXYNOS_DRM_FLIP_VERTICAL = (1 << 0), - EXYNOS_DRM_FLIP_HORIZONTAL = (1 << 1), - EXYNOS_DRM_FLIP_BOTH = EXYNOS_DRM_FLIP_VERTICAL | - EXYNOS_DRM_FLIP_HORIZONTAL, -}; - -enum drm_exynos_degree { - EXYNOS_DRM_DEGREE_0, - EXYNOS_DRM_DEGREE_90, - EXYNOS_DRM_DEGREE_180, - EXYNOS_DRM_DEGREE_270, -}; - -enum drm_exynos_planer { - EXYNOS_DRM_PLANAR_Y, - EXYNOS_DRM_PLANAR_CB, - EXYNOS_DRM_PLANAR_CR, - EXYNOS_DRM_PLANAR_MAX, -}; - -/** - * A structure for ipp supported property list. - * - * @version: version of this structure. - * @ipp_id: id of ipp driver. - * @count: count of ipp driver. - * @writeback: flag of writeback supporting. - * @flip: flag of flip supporting. - * @degree: flag of degree information. - * @csc: flag of csc supporting. - * @crop: flag of crop supporting. - * @scale: flag of scale supporting. - * @refresh_min: min hz of refresh. - * @refresh_max: max hz of refresh. - * @crop_min: crop min resolution. - * @crop_max: crop max resolution. - * @scale_min: scale min resolution. - * @scale_max: scale max resolution. - */ -struct drm_exynos_ipp_prop_list { - __u32 version; - __u32 ipp_id; - __u32 count; - __u32 writeback; - __u32 flip; - __u32 degree; - __u32 csc; - __u32 crop; - __u32 scale; - __u32 refresh_min; - __u32 refresh_max; - __u32 reserved; - struct drm_exynos_sz crop_min; - struct drm_exynos_sz crop_max; - struct drm_exynos_sz scale_min; - struct drm_exynos_sz scale_max; -}; - -/** - * A structure for ipp config. - * - * @ops_id: property of operation directions. - * @flip: property of mirror, flip. - * @degree: property of rotation degree. - * @fmt: property of image format. - * @sz: property of image size. - * @pos: property of image position(src-cropped,dst-scaler). - */ -struct drm_exynos_ipp_config { - __u32 ops_id; - __u32 flip; - __u32 degree; - __u32 fmt; - struct drm_exynos_sz sz; - struct drm_exynos_pos pos; -}; - -enum drm_exynos_ipp_cmd { - IPP_CMD_NONE, - IPP_CMD_M2M, - IPP_CMD_WB, - IPP_CMD_OUTPUT, - IPP_CMD_MAX, -}; - -/** - * A structure for ipp property. - * - * @config: source, destination config. - * @cmd: definition of command. - * @ipp_id: id of ipp driver. - * @prop_id: id of property. - * @refresh_rate: refresh rate. - */ -struct drm_exynos_ipp_property { - struct drm_exynos_ipp_config config[EXYNOS_DRM_OPS_MAX]; - __u32 cmd; - __u32 ipp_id; - __u32 prop_id; - __u32 refresh_rate; -}; - -enum drm_exynos_ipp_buf_type { - IPP_BUF_ENQUEUE, - IPP_BUF_DEQUEUE, -}; - -/** - * A structure for ipp buffer operations. - * - * @ops_id: operation directions. - * @buf_type: definition of buffer. - * @prop_id: id of property. - * @buf_id: id of buffer. - * @handle: Y, Cb, Cr each planar handle. - * @user_data: user data. - */ -struct drm_exynos_ipp_queue_buf { - __u32 ops_id; - __u32 buf_type; - __u32 prop_id; - __u32 buf_id; - __u32 handle[EXYNOS_DRM_PLANAR_MAX]; - __u32 reserved; - __u64 user_data; -}; - -enum drm_exynos_ipp_ctrl { - IPP_CTRL_PLAY, - IPP_CTRL_STOP, - IPP_CTRL_PAUSE, - IPP_CTRL_RESUME, - IPP_CTRL_MAX, -}; - -/** - * A structure for ipp start/stop operations. - * - * @prop_id: id of property. - * @ctrl: definition of control. - */ -struct drm_exynos_ipp_cmd_ctrl { - __u32 prop_id; - __u32 ctrl; -}; - #define DRM_EXYNOS_GEM_CREATE 0x00 #define DRM_EXYNOS_GEM_MAP 0x01 /* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */ @@ -312,11 +146,7 @@ struct drm_exynos_ipp_cmd_ctrl { #define DRM_EXYNOS_G2D_SET_CMDLIST 0x21 #define DRM_EXYNOS_G2D_EXEC 0x22 -/* IPP - Image Post Processing */ -#define DRM_EXYNOS_IPP_GET_PROPERTY 0x30 -#define DRM_EXYNOS_IPP_SET_PROPERTY 0x31 -#define DRM_EXYNOS_IPP_QUEUE_BUF 0x32 -#define DRM_EXYNOS_IPP_CMD_CTRL 0x33 +/* Reserved 0x30 ~ 0x33 for obsolete Exynos IPP ioctls */ #define DRM_IOCTL_EXYNOS_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + \ DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create) @@ -335,18 +165,8 @@ struct drm_exynos_ipp_cmd_ctrl { #define DRM_IOCTL_EXYNOS_G2D_EXEC DRM_IOWR(DRM_COMMAND_BASE + \ DRM_EXYNOS_G2D_EXEC, struct drm_exynos_g2d_exec) -#define DRM_IOCTL_EXYNOS_IPP_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_EXYNOS_IPP_GET_PROPERTY, struct drm_exynos_ipp_prop_list) -#define DRM_IOCTL_EXYNOS_IPP_SET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_EXYNOS_IPP_SET_PROPERTY, struct drm_exynos_ipp_property) -#define DRM_IOCTL_EXYNOS_IPP_QUEUE_BUF DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_EXYNOS_IPP_QUEUE_BUF, struct drm_exynos_ipp_queue_buf) -#define DRM_IOCTL_EXYNOS_IPP_CMD_CTRL DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_EXYNOS_IPP_CMD_CTRL, struct drm_exynos_ipp_cmd_ctrl) - /* EXYNOS specific events */ #define DRM_EXYNOS_G2D_EVENT 0x80000000 -#define DRM_EXYNOS_IPP_EVENT 0x80000001 struct drm_exynos_g2d_event { struct drm_event base; @@ -357,16 +177,6 @@ struct drm_exynos_g2d_event { __u32 reserved; }; -struct drm_exynos_ipp_event { - struct drm_event base; - __u64 user_data; - __u32 tv_sec; - __u32 tv_usec; - __u32 prop_id; - __u32 reserved; - __u32 buf_id[EXYNOS_DRM_OPS_MAX]; -}; - #if defined(__cplusplus) } #endif -- cgit v1.2.3