From ac65bdfef14a902b40ff69a35f5c604dba096547 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 19 Jun 2019 18:01:35 +0100 Subject: drm/i915: Keep rings pinned while the context is active Remember to keep the rings pinned as well as the context image until the GPU is no longer active. v2: Introduce a ring->pin_count primarily to hide the mock_ring that doesn't fit into the normal GGTT vma picture. v3: Order is important in teardown, ringbuffer submission needs to drop the pin count on the engine->kernel_context before it can gleefully free its ring. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110946 Fixes: ce476c80b8bf ("drm/i915: Keep contexts pinned until after the next kernel context switch") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190619170135.15281-1-chris@chris-wilson.co.uk (cherry picked from commit 09c5ab384f6fb30f834a5777888b4486dd7f015d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_context.c | 27 ++++++++++++++++-------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 12 +++++++++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 10 ++------- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 31 ++++++++++++++++++---------- drivers/gpu/drm/i915/gt/mock_engine.c | 1 + 5 files changed, 53 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 2c454f227c2e..23120901c55f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -126,6 +126,7 @@ static void intel_context_retire(struct i915_active *active) if (ce->state) __context_unpin_state(ce->state); + intel_ring_unpin(ce->ring); intel_context_put(ce); } @@ -160,27 +161,35 @@ int intel_context_active_acquire(struct intel_context *ce, unsigned long flags) intel_context_get(ce); + err = intel_ring_pin(ce->ring); + if (err) + goto err_put; + if (!ce->state) return 0; err = __context_pin_state(ce->state, flags); - if (err) { - i915_active_cancel(&ce->active); - intel_context_put(ce); - return err; - } + if (err) + goto err_ring; /* Preallocate tracking nodes */ if (!i915_gem_context_is_kernel(ce->gem_context)) { err = i915_active_acquire_preallocate_barrier(&ce->active, ce->engine); - if (err) { - i915_active_release(&ce->active); - return err; - } + if (err) + goto err_state; } return 0; + +err_state: + __context_unpin_state(ce->state); +err_ring: + intel_ring_unpin(ce->ring); +err_put: + intel_context_put(ce); + i915_active_cancel(&ce->active); + return err; } void intel_context_active_release(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 868b220214f8..43e975a26016 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -70,6 +70,18 @@ struct intel_ring { struct list_head request_list; struct list_head active_link; + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + u32 head; u32 tail; u32 emit; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b42b5f158295..82b7ace62d97 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1414,6 +1414,7 @@ static void execlists_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); + GEM_BUG_ON(!i915_active_is_idle(&ce->active)); GEM_BUG_ON(intel_context_is_pinned(ce)); if (ce->state) @@ -1426,7 +1427,6 @@ static void execlists_context_unpin(struct intel_context *ce) { i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); - intel_ring_unpin(ce->ring); } static void @@ -1478,13 +1478,9 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = intel_ring_pin(ce->ring); - if (ret) - goto unpin_map; - ret = i915_gem_context_pin_hw_id(ce->gem_context); if (ret) - goto unpin_ring; + goto unpin_map; ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; @@ -1492,8 +1488,6 @@ __execlists_context_pin(struct intel_context *ce, return 0; -unpin_ring: - intel_ring_unpin(ce->ring); unpin_map: i915_gem_object_unpin_map(ce->state->obj); unpin_active: diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index c6023bc9452d..12010e798868 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1149,16 +1149,16 @@ i915_emit_bb_start(struct i915_request *rq, int intel_ring_pin(struct intel_ring *ring) { struct i915_vma *vma = ring->vma; - enum i915_map_type map = i915_coherent_map_type(vma->vm->i915); unsigned int flags; void *addr; int ret; - GEM_BUG_ON(ring->vaddr); + if (atomic_fetch_inc(&ring->pin_count)) + return 0; ret = i915_timeline_pin(ring->timeline); if (ret) - return ret; + goto err_unpin; flags = PIN_GLOBAL; @@ -1172,26 +1172,31 @@ int intel_ring_pin(struct intel_ring *ring) ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - goto unpin_timeline; + goto err_timeline; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else - addr = i915_gem_object_pin_map(vma->obj, map); + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); if (IS_ERR(addr)) { ret = PTR_ERR(addr); - goto unpin_ring; + goto err_ring; } vma->obj->pin_global++; + GEM_BUG_ON(ring->vaddr); ring->vaddr = addr; + return 0; -unpin_ring: +err_ring: i915_vma_unpin(vma); -unpin_timeline: +err_timeline: i915_timeline_unpin(ring->timeline); +err_unpin: + atomic_dec(&ring->pin_count); return ret; } @@ -1207,16 +1212,19 @@ void intel_ring_reset(struct intel_ring *ring, u32 tail) void intel_ring_unpin(struct intel_ring *ring) { - GEM_BUG_ON(!ring->vma); - GEM_BUG_ON(!ring->vaddr); + if (!atomic_dec_and_test(&ring->pin_count)) + return; /* Discard any unused bytes beyond that submitted to hw. */ intel_ring_reset(ring, ring->tail); + GEM_BUG_ON(!ring->vma); if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else i915_gem_object_unpin_map(ring->vma->obj); + + GEM_BUG_ON(!ring->vaddr); ring->vaddr = NULL; ring->vma->obj->pin_global--; @@ -2081,10 +2089,11 @@ static void ring_destroy(struct intel_engine_cs *engine) WARN_ON(INTEL_GEN(dev_priv) > 2 && (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + intel_engine_cleanup_common(engine); + intel_ring_unpin(engine->buffer); intel_ring_put(engine->buffer); - intel_engine_cleanup_common(engine); kfree(engine); } diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 086801b51441..486c6953dcb1 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -66,6 +66,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->base.effective_size = sz; ring->base.vaddr = (void *)(ring + 1); ring->base.timeline = &ring->timeline; + atomic_set(&ring->base.pin_count, 1); INIT_LIST_HEAD(&ring->base.request_list); intel_ring_update_space(&ring->base); -- cgit v1.2.3 From 248f883db61283b4f5a1c92a5e27277377b09f16 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 25 Jun 2019 10:06:55 +0100 Subject: drm/i915: Disable SAMPLER_STATE prefetching on all Gen11 steppings. The Demand Prefetch workaround (binding table prefetching) only applies to Icelake A0/B0. But the Sampler Prefetch workaround needs to be applied to all Gen11 steppings, according to a programming note in the SARCHKMD documentation. Using the Intel Gallium driver, I have seen intermittent failures in the dEQP-GLES31.functional.copy_image.non_compressed.* tests. After applying this workaround, the tests reliably pass. v2: Remove the overlap with a pre-production w/a BSpec: 9663 Signed-off-by: Kenneth Graunke Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190625090655.19220-1-chris@chris-wilson.co.uk (cherry picked from commit f9a393875d3af13cc3267477746608dadb7f17c1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 15e90fd2cfdc..50c0060509a6 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1258,8 +1258,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) wa_write_or(wal, GEN7_SARCHKMD, - GEN7_DISABLE_DEMAND_PREFETCH | - GEN7_DISABLE_SAMPLER_PREFETCH); + GEN7_DISABLE_DEMAND_PREFETCH); + + /* Wa_1606682166:icl */ + wa_write_or(wal, + GEN7_SARCHKMD, + GEN7_DISABLE_SAMPLER_PREFETCH); } if (IS_GEN_RANGE(i915, 9, 11)) { -- cgit v1.2.3 From c270cac40828eca4fb8d7c27cab1d0ac7765ff3d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sat, 29 Jun 2019 14:13:50 +0100 Subject: drm/i915: fix whitelist selftests with readonly registers When a register is readonly there is not much we can tell about its value (apart from its default value?). This can be covered by tests exercising the value of the register from userspace. For PS_INVOCATION_COUNT we've got the following piglit tests : KHR-GL45.pipeline_statistics_query_tests_ARB.functional_fragment_shader_invocations Vulkan CTS tests : dEQP-VK.query_pool.statistics_query.fragment_shader_invocations.* v2: Use a local to shrink under 80cols. Signed-off-by: Lionel Landwerlin Fixes: 86554f48e511 ("drm/i915/selftests: Verify whitelist of context registers") Tested-by: Anuj Phogat Signed-off-by: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190629131350.31185-1-chris@chris-wilson.co.uk (cherry picked from commit 361b69051326ed0e07553315227678d00d651a9e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 9eaf030affd0..44becd9538be 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -925,7 +925,12 @@ check_whitelisted_registers(struct intel_engine_cs *engine, err = 0; for (i = 0; i < engine->whitelist.count; i++) { - if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) + const struct i915_wa *wa = &engine->whitelist.list[i]; + + if (i915_mmio_reg_offset(wa->reg) & RING_FORCE_TO_NONPRIV_RD) + continue; + + if (!fn(engine, a[i], b[i], wa->reg)) err = -EINVAL; } -- cgit v1.2.3 From 6ce5bfe936ac31d5c52c4b1328d0bfda5f97e7ca Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 28 Jun 2019 15:07:19 +0300 Subject: drm/i915: whitelist PS_(DEPTH|INVOCATION)_COUNT CFL:C0+ changed the status of those registers which are now blacklisted by default. This is breaking a number of CTS tests on GL & Vulkan : KHR-GL45.pipeline_statistics_query_tests_ARB.functional_fragment_shader_invocations (GL) dEQP-VK.query_pool.statistics_query.fragment_shader_invocations.* (Vulkan) v2: Only use one whitelist entry (Lionel) Bspec: 14091 Signed-off-by: Lionel Landwerlin Cc: stable@vger.kernel.org # 6883eab27481: drm/i915: Support flags in whitlist WAs Cc: stable@vger.kernel.org Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190628120720.21682-3-lionel.g.landwerlin@intel.com (cherry picked from commit 2c903da50f5a9522b134e488bd0f92646c46f3c0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 50c0060509a6..b26c3549429e 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1098,10 +1098,25 @@ static void glk_whitelist_build(struct intel_engine_cs *engine) static void cfl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + if (engine->class != RENDER_CLASS) return; - gen9_whitelist_build(&engine->whitelist); + gen9_whitelist_build(w); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); } static void cnl_whitelist_build(struct intel_engine_cs *engine) -- cgit v1.2.3 From cf8f9aa1eda7d916bd23f6b8c226404deb11690c Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 28 Jun 2019 15:07:20 +0300 Subject: drm/i915/icl: whitelist PS_(DEPTH|INVOCATION)_COUNT The same tests failing on CFL+ platforms are also failing on ICL. Documentation doesn't list the WaAllowPMDepthAndInvocationCountAccessFromUMD workaround for ICL but applying it fixes the same tests as CFL. v2: Use only one whitelist entry (Lionel) Signed-off-by: Lionel Landwerlin Tested-by: Anuj Phogat Cc: stable@vger.kernel.org # 6883eab27481: drm/i915: Support flags in whitlist WAs Cc: stable@vger.kernel.org Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190628120720.21682-4-lionel.g.landwerlin@intel.com (cherry picked from commit 3fe0107e45ab396342497e06b8924cdd485cde3b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b26c3549429e..98dfb086320f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1144,6 +1144,19 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* WaEnableStateCacheRedirectToCS:icl */ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); break; case VIDEO_DECODE_CLASS: -- cgit v1.2.3 From 982b1d002f16c2695871e005c4132060c836db56 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jul 2019 09:09:28 +0100 Subject: drm/i915: Lock the engine while dumping the active request We cannot let the request be retired and freed while we are trying to dump it during error capture. It is not sufficient just to grab a reference to the request, as during retirement we may free the ring which we are also dumping. So take the engine lock to prevent retiring and freeing of the request. Reported-by: Alex Shumsky Fixes: 83c317832eb1 ("drm/i915: Dump the ringbuffer of the active request for debugging") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Alex Shumsky Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190715080946.15593-6-chris@chris-wilson.co.uk (cherry picked from commit cfe7288c276e359eebf057699fe86c2f8af14224) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 ++++------- drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++++-- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7fd33e81c2d9..aa5a1f11a91b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1471,6 +1471,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq; intel_wakeref_t wakeref; + unsigned long flags; if (header) { va_list ap; @@ -1490,10 +1491,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, i915_reset_engine_count(error, engine), i915_reset_count(error)); - rcu_read_lock(); - drm_printf(m, "\tRequests:\n"); + spin_lock_irqsave(&engine->active.lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1513,8 +1513,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request_ring(m, rq); } - - rcu_read_unlock(); + spin_unlock_irqrestore(&engine->active.lock, flags); wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); if (wakeref) { @@ -1672,7 +1671,6 @@ struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine) { struct i915_request *request, *active = NULL; - unsigned long flags; /* * We are called by the error capture, reset and to dump engine @@ -1685,7 +1683,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->active.lock, flags); + lockdep_assert_held(&engine->active.lock); list_for_each_entry(request, &engine->active.requests, sched.link) { if (i915_request_completed(request)) continue; @@ -1700,7 +1698,6 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) active = request; break; } - spin_unlock_irqrestore(&engine->active.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 41a511d5267f..8bc76fcff70d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1418,6 +1418,7 @@ static void gem_record_rings(struct i915_gpu_state *error) struct intel_engine_cs *engine = i915->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; struct i915_request *request; + unsigned long flags; ee->engine_id = -1; @@ -1429,10 +1430,11 @@ static void gem_record_rings(struct i915_gpu_state *error) error_record_engine_registers(error, engine, ee); error_record_engine_execlists(engine, ee); + spin_lock_irqsave(&engine->active.lock, flags); request = intel_engine_find_active_request(engine); if (request) { struct i915_gem_context *ctx = request->gem_context; - struct intel_ring *ring; + struct intel_ring *ring = request->ring; ee->vm = ctx->vm ?: &ggtt->vm; @@ -1462,7 +1464,6 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->rq_post = request->postfix; ee->rq_tail = request->tail; - ring = request->ring; ee->cpu_ring_head = ring->head; ee->cpu_ring_tail = ring->tail; ee->ringbuffer = @@ -1470,6 +1471,7 @@ static void gem_record_rings(struct i915_gpu_state *error) engine_record_requests(engine, request, ee); } + spin_unlock_irqrestore(&engine->active.lock, flags); ee->hws_page = i915_error_object_create(i915, -- cgit v1.2.3 From 0bbfdce345c8cf01a3a985fa99fefd2146dcc748 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 17 Jul 2019 19:06:19 +0100 Subject: drm/i915: Fix GEN8_MCR_SELECTOR programming fls returns bit positions starting from one for the lsb and the MCR register expects zero based (sub)slice addressing. Incorrent MCR programming can have the effect of directing MMIO reads of registers in the 0xb100-0xb3ff range to invalid subslice returning zeroes instead of actual content. Signed-off-by: Tvrtko Ursulin Fixes: 1e40d4aea57b ("drm/i915/cnl: Implement WaProgramMgsrForCorrectSliceSpecificMmioReads") Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190717180624.20354-2-tvrtko.ursulin@linux.intel.com (cherry picked from commit 15160879d47213c32f357bc67b6014d9aaf14ed7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index aa5a1f11a91b..f25632c9b292 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -969,9 +969,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) { const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + unsigned int slice = fls(sseu->slice_mask) - 1; + unsigned int subslice; u32 mcr_s_ss_select; - u32 slice = fls(sseu->slice_mask); - u32 subslice = fls(sseu->subslice_mask[slice]); + + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); + subslice = fls(sseu->subslice_mask[slice]); + GEM_BUG_ON(!subslice); + subslice--; if (IS_GEN(dev_priv, 10)) mcr_s_ss_select = GEN8_MCR_SLICE(slice) | -- cgit v1.2.3 From 0de50e40fc685fed4d6896a379b123f859ffb17b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jun 2019 16:45:49 +0100 Subject: drm/i915: Lift intel_engines_resume() to callers Since the reset path wants to recover the engines itself, it only wants to reinitialise the hardware using i915_gem_init_hw(). Pull the call to intel_engines_resume() to the module init/resume path so we can avoid it during reset. Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-3-chris@chris-wilson.co.uk (cherry picked from commit 092be382a2602067766f190a113514d469162456) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 7 ++++--- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 24 ------------------------ drivers/gpu/drm/i915/gt/intel_engine_pm.h | 2 -- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 21 ++++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 21 ++++++++++++++++++++- drivers/gpu/drm/i915/i915_gem.c | 25 +++++++++++-------------- 7 files changed, 56 insertions(+), 46 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 05011d4a3b88..914b5d4112bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -253,14 +253,15 @@ void i915_gem_resume(struct drm_i915_private *i915) i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(i915); + if (i915_gem_init_hw(i915)) + goto err_wedged; + /* * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(i915); - - if (i915_gem_init_hw(i915)) + if (intel_gt_resume(i915)) goto err_wedged; intel_uc_resume(i915); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 2ce00d3dc42a..ae5b6baf6dff 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -142,27 +142,3 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) { intel_wakeref_init(&engine->wakeref); } - -int intel_engines_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) { - intel_engine_pm_get(engine); - engine->serial++; /* kernel context lost */ - err = engine->resume(engine); - intel_engine_pm_put(engine); - if (err) { - dev_err(i915->drm.dev, - "Failed to restart %s (%d)\n", - engine->name, err); - break; - } - } - intel_gt_pm_put(i915); - - return err; -} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index b326cd993d60..f6f213fbc98c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -17,6 +17,4 @@ void intel_engine_park(struct intel_engine_cs *engine); void intel_engine_init__pm(struct intel_engine_cs *engine); -int intel_engines_resume(struct drm_i915_private *i915); - #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 7b5967751762..9f8f7f54191f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_engine_pm.h" #include "intel_gt_pm.h" #include "intel_pm.h" #include "intel_wakeref.h" @@ -118,10 +119,11 @@ void intel_gt_sanitize(struct drm_i915_private *i915, bool force) intel_engine_reset(engine, false); } -void intel_gt_resume(struct drm_i915_private *i915) +int intel_gt_resume(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; + int err = 0; /* * After resume, we may need to poke into the pinned kernel @@ -129,9 +131,12 @@ void intel_gt_resume(struct drm_i915_private *i915) * Only the kernel contexts should remain pinned over suspend, * allowing us to fixup the user contexts on their first pin. */ + intel_gt_pm_get(i915); for_each_engine(engine, i915, id) { struct intel_context *ce; + intel_engine_pm_get(engine); + ce = engine->kernel_context; if (ce) ce->ops->reset(ce); @@ -139,5 +144,19 @@ void intel_gt_resume(struct drm_i915_private *i915) ce = engine->preempt_context; if (ce) ce->ops->reset(ce); + + engine->serial++; /* kernel context lost */ + err = engine->resume(engine); + + intel_engine_pm_put(engine); + if (err) { + dev_err(i915->drm.dev, + "Failed to restart %s (%d)\n", + engine->name, err); + break; + } } + intel_gt_pm_put(i915); + + return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 7dd1130a19a4..53f342b20181 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -22,6 +22,6 @@ void intel_gt_pm_put(struct drm_i915_private *i915); void intel_gt_pm_init(struct drm_i915_private *i915); void intel_gt_sanitize(struct drm_i915_private *i915, bool force); -void intel_gt_resume(struct drm_i915_private *i915); +int intel_gt_resume(struct drm_i915_private *i915); #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 4c478b38e420..0439ed66e969 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -951,6 +951,21 @@ static int do_reset(struct drm_i915_private *i915, return gt_reset(i915, stalled_mask); } +static int resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + for_each_engine(engine, i915, id) { + ret = engine->resume(engine); + if (ret) + return ret; + } + + return 0; +} + /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset @@ -1024,9 +1039,13 @@ void i915_reset(struct drm_i915_private *i915, if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); - goto error; + goto taint; } + ret = resume(i915); + if (ret) + goto taint; + i915_queue_hangcheck(i915); finish: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 190ad54fb072..8a659d3d7435 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,7 +46,6 @@ #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_pm.h" #include "gem/i915_gemfs.h" -#include "gt/intel_engine_pm.h" #include "gt/intel_gt_pm.h" #include "gt/intel_mocs.h" #include "gt/intel_reset.h" @@ -1307,21 +1306,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); - /* Only when the HW is re-initialised, can we replay the requests */ - ret = intel_engines_resume(dev_priv); - if (ret) - goto cleanup_uc; - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); intel_engines_set_scheduler_caps(dev_priv); return 0; -cleanup_uc: - intel_uc_fini_hw(dev_priv); out: intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - return ret; } @@ -1580,6 +1571,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) goto err_uc_init; + /* Only when the HW is re-initialised, can we replay the requests */ + ret = intel_gt_resume(dev_priv); + if (ret) + goto err_init_hw; + /* * Despite its name intel_init_clock_gating applies both display * clock gating workarounds; GT mmio workarounds and the occasional @@ -1593,20 +1589,20 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ret = intel_engines_verify_workarounds(dev_priv); if (ret) - goto err_init_hw; + goto err_gt; ret = __intel_engines_record_defaults(dev_priv); if (ret) - goto err_init_hw; + goto err_gt; if (i915_inject_load_failure()) { ret = -ENODEV; - goto err_init_hw; + goto err_gt; } if (i915_inject_load_failure()) { ret = -EIO; - goto err_init_hw; + goto err_gt; } intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); @@ -1620,7 +1616,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * HW as irrevisibly wedged, but keep enough state around that the * driver doesn't explode during runtime. */ -err_init_hw: +err_gt: mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_set_wedged(dev_priv); @@ -1630,6 +1626,7 @@ err_init_hw: i915_gem_drain_workqueue(dev_priv); mutex_lock(&dev_priv->drm.struct_mutex); +err_init_hw: intel_uc_fini_hw(dev_priv); err_uc_init: intel_uc_fini(dev_priv); -- cgit v1.2.3 From b1fa6fd94fc6a5d6be85359743b5f3626f3f881c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jun 2019 16:45:47 +0100 Subject: drm/i915: Add a wakeref getter for iff the wakeref is already active For use in the next patch, we want to acquire a wakeref without having to wake the device up -- i.e. only acquire the engine wakeref if the engine is already active. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-1-chris@chris-wilson.co.uk (cherry picked from commit de5147b8ce6d51f634661d7c531385371485cec6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_pm.h | 10 +++++++++- drivers/gpu/drm/i915/intel_wakeref.h | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index f6f213fbc98c..a11c893f64c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -7,12 +7,20 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H +#include "intel_engine_types.h" +#include "intel_wakeref.h" + struct drm_i915_private; -struct intel_engine_cs; void intel_engine_pm_get(struct intel_engine_cs *engine); void intel_engine_pm_put(struct intel_engine_cs *engine); +static inline bool +intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) +{ + return intel_wakeref_get_if_active(&engine->wakeref); +} + void intel_engine_park(struct intel_engine_cs *engine); void intel_engine_init__pm(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 9cbb2ebf575b..38275310b196 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -65,6 +65,21 @@ intel_wakeref_get(struct intel_runtime_pm *rpm, return 0; } +/** + * intel_wakeref_get_if_in_use: Acquire the wakeref + * @wf: the wakeref + * + * Acquire a hold on the wakeref, but only if the wakeref is already + * active. + * + * Returns: true if the wakeref was acquired, false otherwise. + */ +static inline bool +intel_wakeref_get_if_active(struct intel_wakeref *wf) +{ + return atomic_inc_not_zero(&wf->count); +} + /** * intel_wakeref_put: Release the wakeref * @i915: the drm_i915_private device -- cgit v1.2.3 From 4b9bb9728c915c6079619e71e3340fe4840d9d40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jun 2019 16:45:48 +0100 Subject: drm/i915: Only recover active engines If we issue a reset to a currently idle engine, leave it idle afterwards. This is useful to excise a linkage between reset and the shrinker. When waking the engine, we need to pin the default context image which we use for overwriting a guilty context -- if the engine is idle we do not need this pinned image! However, this pinning means that waking the engine acquires the FS_RECLAIM, and so may trigger the shrinker. The shrinker itself may need to wait upon the GPU to unbind and object and so may require services of reset; ergo we should avoid the engine wake up path. The danger in skipping the recovery for idle engines is that we leave the engine with no context defined, which may interfere with the operation of the power context on some older platforms. In practice, we should only be resetting an active GPU but it something to look out for on Ironlake (if memory serves). Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-2-chris@chris-wilson.co.uk (cherry picked from commit 18398904ca9e3ddd180e2ecd45886e146b1d9d5b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_reset.c | 37 +++++++++++++++++++------------- drivers/gpu/drm/i915/gt/selftest_reset.c | 5 ++++- 2 files changed, 26 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 0439ed66e969..3f907701ef4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -687,7 +687,6 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * written to the powercontext is undefined and so we may lose * GPU state upon resume, i.e. fail to restart after a reset. */ - intel_engine_pm_get(engine); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); engine->reset.prepare(engine); } @@ -718,16 +717,21 @@ static void revoke_mmaps(struct drm_i915_private *i915) } } -static void reset_prepare(struct drm_i915_private *i915) +static intel_engine_mask_t reset_prepare(struct drm_i915_private *i915) { struct intel_engine_cs *engine; + intel_engine_mask_t awake = 0; enum intel_engine_id id; - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) + for_each_engine(engine, i915, id) { + if (intel_engine_pm_get_if_awake(engine)) + awake |= engine->mask; reset_prepare_engine(engine); + } intel_uc_reset_prepare(i915); + + return awake; } static void gt_revoke(struct drm_i915_private *i915) @@ -761,20 +765,22 @@ static int gt_reset(struct drm_i915_private *i915, static void reset_finish_engine(struct intel_engine_cs *engine) { engine->reset.finish(engine); - intel_engine_pm_put(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); + + intel_engine_signal_breadcrumbs(engine); } -static void reset_finish(struct drm_i915_private *i915) +static void reset_finish(struct drm_i915_private *i915, + intel_engine_mask_t awake) { struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { reset_finish_engine(engine); - intel_engine_signal_breadcrumbs(engine); + if (awake & engine->mask) + intel_engine_pm_put(engine); } - intel_gt_pm_put(i915); } static void nop_submit_request(struct i915_request *request) @@ -798,6 +804,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) { struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; + intel_engine_mask_t awake; enum intel_engine_id id; if (test_bit(I915_WEDGED, &error->flags)) @@ -817,7 +824,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * rolling the global seqno forward (since this would complete requests * for which we haven't set the fence error to EIO yet). */ - reset_prepare(i915); + awake = reset_prepare(i915); /* Even if the GPU reset fails, it should still stop the engines */ if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) @@ -841,7 +848,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) for_each_engine(engine, i915, id) engine->cancel_requests(engine); - reset_finish(i915); + reset_finish(i915, awake); GEM_TRACE("end\n"); } @@ -988,6 +995,7 @@ void i915_reset(struct drm_i915_private *i915, const char *reason) { struct i915_gpu_error *error = &i915->gpu_error; + intel_engine_mask_t awake; int ret; GEM_TRACE("flags=%lx\n", error->flags); @@ -1004,7 +1012,7 @@ void i915_reset(struct drm_i915_private *i915, dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); error->reset_count++; - reset_prepare(i915); + awake = reset_prepare(i915); if (!intel_has_gpu_reset(i915)) { if (i915_modparams.reset) @@ -1049,7 +1057,7 @@ void i915_reset(struct drm_i915_private *i915, i915_queue_hangcheck(i915); finish: - reset_finish(i915); + reset_finish(i915, awake); unlock: mutex_unlock(&error->wedge_mutex); return; @@ -1100,7 +1108,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - if (!intel_wakeref_active(&engine->wakeref)) + if (!intel_engine_pm_get_if_awake(engine)) return 0; reset_prepare_engine(engine); @@ -1135,12 +1143,11 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * process to program RING_MODE, HWSP and re-enable submission. */ ret = engine->resume(engine); - if (ret) - goto out; out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); + intel_engine_pm_put(engine); return ret; } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 89da9e7cc1ba..b5c590c9ccba 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -71,13 +71,16 @@ static int igt_atomic_reset(void *arg) goto unlock; for (p = igt_atomic_phases; p->name; p++) { + intel_engine_mask_t awake; + GEM_TRACE("intel_gpu_reset under %s\n", p->name); + awake = reset_prepare(i915); p->critical_section_begin(); reset_prepare(i915); err = intel_gpu_reset(i915, ALL_ENGINES); - reset_finish(i915); p->critical_section_end(); + reset_finish(i915, awake); if (err) { pr_err("intel_gpu_reset failed under %s\n", p->name); -- cgit v1.2.3 From f2cb60e9a3881e679465f84140754bc9d29956ea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 17 Aug 2019 16:30:22 +0100 Subject: dma-fence: Store the timestamp in the same union as the cb_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The timestamp and the cb_list are mutually exclusive, the cb_list can only be added to prior to being signaled (and once signaled we drain), while the timestamp is only valid upon being signaled. Both the timestamp and the cb_list are only valid while the fence is alive, and as soon as no references are held can be replaced by the rcu_head. By reusing the union for the timestamp, we squeeze the base dma_fence struct to 64 bytes on x86-64. v2: Sort the union chronologically Suggested-by: Christian König Signed-off-by: Chris Wilson Cc: Christian König Acked-by: Christian König . Link: https://patchwork.freedesktop.org/patch/msgid/20190817153022.5749-1-chris@chris-wilson.co.uk --- drivers/dma-buf/dma-fence.c | 16 +++++++++------- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 +++++++------ drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 3 +++ include/linux/dma-fence.h | 24 +++++++++++++++++++----- 4 files changed, 38 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 8a6d0250285d..2c136aee3e79 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -129,6 +129,7 @@ EXPORT_SYMBOL(dma_fence_context_alloc); int dma_fence_signal_locked(struct dma_fence *fence) { struct dma_fence_cb *cur, *tmp; + struct list_head cb_list; lockdep_assert_held(fence->lock); @@ -136,16 +137,16 @@ int dma_fence_signal_locked(struct dma_fence *fence) &fence->flags))) return -EINVAL; + /* Stash the cb_list before replacing it with the timestamp */ + list_replace(&fence->cb_list, &cb_list); + fence->timestamp = ktime_get(); set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); trace_dma_fence_signaled(fence); - if (!list_empty(&fence->cb_list)) { - list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { - INIT_LIST_HEAD(&cur->node); - cur->func(fence, cur); - } - INIT_LIST_HEAD(&fence->cb_list); + list_for_each_entry_safe(cur, tmp, &cb_list, node) { + INIT_LIST_HEAD(&cur->node); + cur->func(fence, cur); } return 0; @@ -231,7 +232,8 @@ void dma_fence_release(struct kref *kref) trace_dma_fence_destroy(fence); - if (WARN(!list_empty(&fence->cb_list), + if (WARN(!list_empty(&fence->cb_list) && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags), "Fence %s:%s:%llx:%llx released with pending signals!\n", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index c092bdf5f0bf..ea56b2cc6095 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -112,18 +112,18 @@ __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) } static void -__dma_fence_signal__notify(struct dma_fence *fence) +__dma_fence_signal__notify(struct dma_fence *fence, + const struct list_head *list) { struct dma_fence_cb *cur, *tmp; lockdep_assert_held(fence->lock); lockdep_assert_irqs_disabled(); - list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { + list_for_each_entry_safe(cur, tmp, list, node) { INIT_LIST_HEAD(&cur->node); cur->func(fence, cur); } - INIT_LIST_HEAD(&fence->cb_list); } void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) @@ -185,11 +185,12 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) list_for_each_safe(pos, next, &signal) { struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); - - __dma_fence_signal__timestamp(&rq->fence, timestamp); + struct list_head cb_list; spin_lock(&rq->lock); - __dma_fence_signal__notify(&rq->fence); + list_replace(&rq->fence.cb_list, &cb_list); + __dma_fence_signal__timestamp(&rq->fence, timestamp); + __dma_fence_signal__notify(&rq->fence, &cb_list); spin_unlock(&rq->lock); i915_request_put(rq); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 301260e23e52..c446eb34d6c6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -184,6 +184,9 @@ static long vmw_fence_wait(struct dma_fence *f, bool intr, signed long timeout) spin_lock(f->lock); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->flags)) + goto out; + if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out; diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 2ce4d877d33e..3347c54f3a87 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -65,17 +65,31 @@ struct dma_fence_cb; struct dma_fence { spinlock_t *lock; const struct dma_fence_ops *ops; - /* We clear the callback list on kref_put so that by the time we - * release the fence it is unused. No one should be adding to the cb_list - * that they don't themselves hold a reference for. + /* + * We clear the callback list on kref_put so that by the time we + * release the fence it is unused. No one should be adding to the + * cb_list that they don't themselves hold a reference for. + * + * The lifetime of the timestamp is similarly tied to both the + * rcu freelist and the cb_list. The timestamp is only set upon + * signaling while simultaneously notifying the cb_list. Ergo, we + * only use either the cb_list of timestamp. Upon destruction, + * neither are accessible, and so we can use the rcu. This means + * that the cb_list is *only* valid until the signal bit is set, + * and to read either you *must* hold a reference to the fence, + * and not just the rcu_read_lock. + * + * Listed in chronological order. */ union { - struct rcu_head rcu; struct list_head cb_list; + /* @cb_list replaced by @timestamp on dma_fence_signal() */ + ktime_t timestamp; + /* @timestamp replaced by @rcu on dma_fence_release() */ + struct rcu_head rcu; }; u64 context; u64 seqno; - ktime_t timestamp; unsigned long flags; struct kref refcount; int error; -- cgit v1.2.3