summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c133
1 files changed, 76 insertions, 57 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6a8f27d0a742..d0ef50bf930a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -344,11 +344,14 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
+ struct page *page = virt_to_page(engine->status_page.page_addr);
+ phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
u32 addr;
- addr = dev_priv->status_page_dmah->busaddr;
+ addr = lower_32_bits(phys);
if (INTEL_GEN(dev_priv) >= 4)
- addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
+ addr |= (phys >> 28) & 0xf0;
+
I915_WRITE(HWS_PGA, addr);
}
@@ -537,6 +540,8 @@ static int init_ring_common(struct intel_engine_cs *engine)
if (INTEL_GEN(dev_priv) > 2)
I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
+ /* Papering over lost _interrupts_ immediately following the restart */
+ intel_engine_wakeup(engine);
out:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@@ -1013,24 +1018,22 @@ i915_emit_bb_start(struct i915_request *rq,
return 0;
}
-
-
-int intel_ring_pin(struct intel_ring *ring,
- struct drm_i915_private *i915,
- unsigned int offset_bias)
+int intel_ring_pin(struct intel_ring *ring)
{
- enum i915_map_type map = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
struct i915_vma *vma = ring->vma;
+ enum i915_map_type map =
+ HAS_LLC(vma->vm->i915) ? I915_MAP_WB : I915_MAP_WC;
unsigned int flags;
void *addr;
int ret;
GEM_BUG_ON(ring->vaddr);
-
flags = PIN_GLOBAL;
- if (offset_bias)
- flags |= PIN_OFFSET_BIAS | offset_bias;
+
+ /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+ flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
if (vma->obj->stolen)
flags |= PIN_MAPPABLE;
else
@@ -1045,7 +1048,7 @@ int intel_ring_pin(struct intel_ring *ring,
return ret;
}
- ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags);
+ ret = i915_vma_pin(vma, 0, 0, flags);
if (unlikely(ret))
return ret;
@@ -1230,8 +1233,7 @@ static int __context_pin(struct intel_context *ce)
return err;
}
- err = i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT,
- PIN_GLOBAL | PIN_HIGH);
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
return err;
@@ -1419,8 +1421,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
goto err;
}
- /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- err = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE);
+ err = intel_ring_pin(ring);
if (err)
goto err_ring;
@@ -1676,9 +1677,26 @@ static int switch_context(struct i915_request *rq)
GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
if (ppgtt) {
- ret = load_pd_dir(rq, ppgtt);
- if (ret)
- goto err;
+ int loops;
+
+ /*
+ * Baytail takes a little more convincing that it really needs
+ * to reload the PD between contexts. It is not just a little
+ * longer, as adding more stalls after the load_pd_dir (i.e.
+ * adding a long loop around flush_pd_dir) is not as effective
+ * as reloading the PD umpteen times. 32 is derived from
+ * experimentation (gem_exec_parallel/fds) and has no good
+ * explanation.
+ */
+ loops = 1;
+ if (engine->id == BCS && IS_VALLEYVIEW(engine->i915))
+ loops = 32;
+
+ do {
+ ret = load_pd_dir(rq, ppgtt);
+ if (ret)
+ goto err;
+ } while (--loops);
if (intel_engine_flag(engine) & ppgtt->pd_dirty_rings) {
unwind_mm = intel_engine_flag(engine);
@@ -1706,9 +1724,29 @@ static int switch_context(struct i915_request *rq)
}
if (ppgtt) {
+ ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (ret)
+ goto err_mm;
+
ret = flush_pd_dir(rq);
if (ret)
goto err_mm;
+
+ /*
+ * Not only do we need a full barrier (post-sync write) after
+ * invalidating the TLBs, but we need to wait a little bit
+ * longer. Whether this is merely delaying us, or the
+ * subsequent flush is a key part of serialising with the
+ * post-sync op, this extra pass appears vital before a
+ * mm switch!
+ */
+ ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (ret)
+ goto err_mm;
+
+ ret = engine->emit_flush(rq, EMIT_FLUSH);
+ if (ret)
+ goto err_mm;
}
if (ctx->remap_slice) {
@@ -1946,7 +1984,7 @@ static void gen6_bsd_submit_request(struct i915_request *request)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
-static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
+static int mi_flush_dw(struct i915_request *rq, u32 flags)
{
u32 cmd, *cs;
@@ -1956,7 +1994,8 @@ static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
cmd = MI_FLUSH_DW;
- /* We always require a command barrier so that subsequent
+ /*
+ * We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
@@ -1964,22 +2003,33 @@ static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
/*
- * Bspec vol 1c.5 - video engine command streamer:
+ * Bspec vol 1c.3 - blitter engine command streamer:
* "If ENABLED, all TLBs will be invalidated once the flush
* operation is complete. This bit is only valid when the
* Post-Sync Operation field is a value of 1h or 3h."
*/
- if (mode & EMIT_INVALIDATE)
- cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+ cmd |= flags;
*cs++ = cmd;
*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = 0;
*cs++ = MI_NOOP;
+
intel_ring_advance(rq, cs);
+
return 0;
}
+static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
+{
+ return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
+}
+
+static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
+{
+ return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
+}
+
static int
hsw_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
@@ -1992,9 +2042,7 @@ hsw_emit_bb_start(struct i915_request *rq,
return PTR_ERR(cs);
*cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
- 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
- (dispatch_flags & I915_DISPATCH_RS ?
- MI_BATCH_RESOURCE_STREAMER : 0);
+ 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
/* bit0-7 is the length on GEN6+ */
*cs++ = offset;
intel_ring_advance(rq, cs);
@@ -2026,36 +2074,7 @@ gen6_emit_bb_start(struct i915_request *rq,
static int gen6_ring_flush(struct i915_request *rq, u32 mode)
{
- u32 cmd, *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- cmd = MI_FLUSH_DW;
-
- /* We always require a command barrier so that subsequent
- * commands, such as breadcrumb interrupts, are strictly ordered
- * wrt the contents of the write cache being flushed to memory
- * (and thus being coherent from the CPU).
- */
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
- /*
- * Bspec vol 1c.3 - blitter engine command streamer:
- * "If ENABLED, all TLBs will be invalidated once the flush
- * operation is complete. This bit is only valid when the
- * Post-Sync Operation field is a value of 1h or 3h."
- */
- if (mode & EMIT_INVALIDATE)
- cmd |= MI_INVALIDATE_TLB;
- *cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = 0;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
+ return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
}
static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,