summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c1377
1 files changed, 881 insertions, 496 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 82b7ace62d97..d42584439f51 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -136,9 +136,12 @@
#include "gem/i915_gem_context.h"
#include "i915_drv.h"
-#include "i915_gem_render_state.h"
+#include "i915_perf.h"
+#include "i915_trace.h"
#include "i915_vgpu.h"
#include "intel_engine_pm.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
@@ -161,6 +164,15 @@
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
+#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */
+#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
+#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15)
+#define GEN12_IDLE_CTX_ID 0x7FF
+#define GEN12_CSB_CTX_VALID(csb_dw) \
+ (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
+
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
#define WA_TAIL_DWORDS 2
@@ -214,13 +226,34 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
return container_of(engine, struct virtual_engine, base);
}
-static int execlists_context_deferred_alloc(struct intel_context *ce,
- struct intel_engine_cs *engine);
+static int __execlists_context_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine);
+
static void execlists_init_reg_state(u32 *reg_state,
struct intel_context *ce,
struct intel_engine_cs *engine,
struct intel_ring *ring);
+static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
+{
+ return (i915_ggtt_offset(engine->status_page.vma) +
+ I915_GEM_HWS_PREEMPT_ADDR);
+}
+
+static inline void
+ring_set_paused(const struct intel_engine_cs *engine, int state)
+{
+ /*
+ * We inspect HWS_PREEMPT with a semaphore inside
+ * engine->emit_fini_breadcrumb. If the dword is true,
+ * the ring is paused as the semaphore will busywait
+ * until the dword is false.
+ */
+ engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
+ if (state)
+ wmb();
+}
+
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
@@ -236,6 +269,17 @@ static int effective_prio(const struct i915_request *rq)
int prio = rq_prio(rq);
/*
+ * If this request is special and must not be interrupted at any
+ * cost, so be it. Note we are only checking the most recent request
+ * in the context and so may be masking an earlier vip request. It
+ * is hoped that under the conditions where nopreempt is used, this
+ * will not matter (i.e. all requests to that context will be
+ * nopreempt for as long as desired).
+ */
+ if (i915_request_has_nopreempt(rq))
+ prio = I915_PRIORITY_UNPREEMPTABLE;
+
+ /*
* On unwinding the active request, we give it a priority bump
* if it has completed waiting on any semaphore. If we know that
* the request has already started, we can prevent an unwanted
@@ -245,6 +289,7 @@ static int effective_prio(const struct i915_request *rq)
prio |= I915_PRIORITY_NOSEMAPHORE;
/* Restrict mere WAIT boosts from triggering preemption */
+ BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
return prio | __NO_PREEMPTION;
}
@@ -271,10 +316,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
{
int last_prio;
- if (!engine->preempt_context)
- return false;
-
- if (i915_request_completed(rq))
+ if (!intel_engine_has_semaphores(engine))
return false;
/*
@@ -338,9 +380,6 @@ __maybe_unused static inline bool
assert_priority_queue(const struct i915_request *prev,
const struct i915_request *next)
{
- const struct intel_engine_execlists *execlists =
- &prev->engine->execlists;
-
/*
* Without preemption, the prev may refer to the still active element
* which we refuse to let go.
@@ -348,7 +387,7 @@ assert_priority_queue(const struct i915_request *prev,
* Even with preemption, there are times when we think it is better not
* to preempt and leave an ostensibly lower priority request in flight.
*/
- if (port_request(execlists->port) == prev)
+ if (i915_request_is_active(prev))
return true;
return rq_prio(prev) >= rq_prio(next);
@@ -389,13 +428,17 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
- desc = ctx->desc_template; /* bits 0-11 */
- GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
+ desc = INTEL_LEGACY_32B_CONTEXT;
+ if (i915_vm_is_4lvl(ce->vm))
+ desc = INTEL_LEGACY_64B_CONTEXT;
+ desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
+ desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
+ if (IS_GEN(engine->i915, 8))
+ desc |= GEN8_CTX_L3LLC_COHERENT;
desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
/* bits 12-31 */
- GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
-
/*
* The following 32bits are copied into the OA reports (dword 2).
* Consider updating oa_get_render_ctx_id in i915_perf.c when changing
@@ -442,13 +485,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
struct intel_engine_cs *owner;
if (i915_request_completed(rq))
- break;
+ continue; /* XXX */
__i915_request_unsubmit(rq);
unwind_wa_tail(rq);
- GEM_BUG_ON(rq->hw_context->inflight);
-
/*
* Push the request back into the queue for later resubmission.
* If this request is not native to this physical engine (i.e.
@@ -468,6 +509,19 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_move(&rq->sched.link, pl);
active = rq;
} else {
+ /*
+ * Decouple the virtual breadcrumb before moving it
+ * back to the virtual engine -- we don't want the
+ * request to complete in the background and try
+ * and cancel the breadcrumb on the virtual engine
+ * (instead of the old engine where it is linked)!
+ */
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &rq->fence.flags)) {
+ spin_lock(&rq->lock);
+ i915_request_cancel_breadcrumb(rq);
+ spin_unlock(&rq->lock);
+ }
rq->engine = owner;
owner->submit_request(rq);
active = NULL;
@@ -500,32 +554,45 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
status, rq);
}
-inline void
-execlists_user_begin(struct intel_engine_execlists *execlists,
- const struct execlist_port *port)
+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
{
- execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
-}
+ struct intel_engine_cs * const engine = rq->engine;
+ struct intel_context * const ce = rq->hw_context;
-inline void
-execlists_user_end(struct intel_engine_execlists *execlists)
-{
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+ intel_context_get(ce);
+
+ intel_gt_pm_get(engine->gt);
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+ intel_engine_context_in(engine);
+
+ return engine;
}
-static inline void
-execlists_context_schedule_in(struct i915_request *rq)
+static inline struct i915_request *
+execlists_schedule_in(struct i915_request *rq, int idx)
{
- GEM_BUG_ON(rq->hw_context->inflight);
+ struct intel_context * const ce = rq->hw_context;
+ struct intel_engine_cs *old;
- execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
- intel_engine_context_in(rq->engine);
- rq->hw_context->inflight = rq->engine;
+ GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
+ trace_i915_request_in(rq, idx);
+
+ old = READ_ONCE(ce->inflight);
+ do {
+ if (!old) {
+ WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+ break;
+ }
+ } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
+
+ GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+ return i915_request_get(rq);
}
-static void kick_siblings(struct i915_request *rq)
+static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
{
- struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
struct i915_request *next = READ_ONCE(ve->request);
if (next && next->execution_mask & ~rq->execution_mask)
@@ -533,29 +600,53 @@ static void kick_siblings(struct i915_request *rq)
}
static inline void
-execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
+__execlists_schedule_out(struct i915_request *rq,
+ struct intel_engine_cs * const engine)
{
- rq->hw_context->inflight = NULL;
- intel_engine_context_out(rq->engine);
- execlists_context_status_change(rq, status);
- trace_i915_request_out(rq);
+ struct intel_context * const ce = rq->hw_context;
+
+ intel_engine_context_out(engine);
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+ intel_gt_pm_put(engine->gt);
/*
- * If this is part of a virtual engine, its next request may have
- * been blocked waiting for access to the active context. We have
- * to kick all the siblings again in case we need to switch (e.g.
- * the next request is not runnable on this engine). Hopefully,
- * we will already have submitted the next request before the
- * tasklet runs and do not need to rebuild each virtual tree
- * and kick everyone again.
+ * If this is part of a virtual engine, its next request may
+ * have been blocked waiting for access to the active context.
+ * We have to kick all the siblings again in case we need to
+ * switch (e.g. the next request is not runnable on this
+ * engine). Hopefully, we will already have submitted the next
+ * request before the tasklet runs and do not need to rebuild
+ * each virtual tree and kick everyone again.
*/
- if (rq->engine != rq->hw_context->engine)
- kick_siblings(rq);
+ if (ce->engine != engine)
+ kick_siblings(rq, ce);
+
+ intel_context_put(ce);
+}
+
+static inline void
+execlists_schedule_out(struct i915_request *rq)
+{
+ struct intel_context * const ce = rq->hw_context;
+ struct intel_engine_cs *cur, *old;
+
+ trace_i915_request_out(rq);
+ GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+
+ old = READ_ONCE(ce->inflight);
+ do
+ cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
+ while (!try_cmpxchg(&ce->inflight, &old, cur));
+ if (!cur)
+ __execlists_schedule_out(rq, old);
+
+ i915_request_put(rq);
}
-static u64 execlists_update_context(struct i915_request *rq)
+static u64 execlists_update_context(const struct i915_request *rq)
{
struct intel_context *ce = rq->hw_context;
+ u64 desc;
ce->lrc_reg_state[CTX_RING_TAIL + 1] =
intel_ring_set_tail(rq->ring, rq->tail);
@@ -576,7 +667,11 @@ static u64 execlists_update_context(struct i915_request *rq)
* wmb).
*/
mb();
- return ce->lrc_desc;
+
+ desc = ce->lrc_desc;
+ ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+
+ return desc;
}
static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
@@ -590,12 +685,65 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
}
}
+static __maybe_unused void
+trace_ports(const struct intel_engine_execlists *execlists,
+ const char *msg,
+ struct i915_request * const *ports)
+{
+ const struct intel_engine_cs *engine =
+ container_of(execlists, typeof(*engine), execlists);
+
+ GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
+ engine->name, msg,
+ ports[0]->fence.context,
+ ports[0]->fence.seqno,
+ i915_request_completed(ports[0]) ? "!" :
+ i915_request_started(ports[0]) ? "*" :
+ "",
+ ports[1] ? ports[1]->fence.context : 0,
+ ports[1] ? ports[1]->fence.seqno : 0);
+}
+
+static __maybe_unused bool
+assert_pending_valid(const struct intel_engine_execlists *execlists,
+ const char *msg)
+{
+ struct i915_request * const *port, *rq;
+ struct intel_context *ce = NULL;
+
+ trace_ports(execlists, msg, execlists->pending);
+
+ if (!execlists->pending[0])
+ return false;
+
+ if (execlists->pending[execlists_num_ports(execlists)])
+ return false;
+
+ for (port = execlists->pending; (rq = *port); port++) {
+ if (ce == rq->hw_context)
+ return false;
+
+ ce = rq->hw_context;
+ if (i915_request_completed(rq))
+ continue;
+
+ if (i915_active_is_idle(&ce->active))
+ return false;
+
+ if (!i915_vma_is_pinned(ce->state))
+ return false;
+ }
+
+ return ce;
+}
+
static void execlists_submit_ports(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
unsigned int n;
+ GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
+
/*
* We can skip acquiring intel_runtime_pm_get() here as it was taken
* on our behalf by the request (see i915_gem_mark_busy()) and it will
@@ -604,7 +752,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
* that all ELSP are drained i.e. we have processed the CSB,
* before allowing ourselves to idle and calling intel_runtime_pm_put().
*/
- GEM_BUG_ON(!intel_wakeref_active(&engine->wakeref));
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
/*
* ELSQ note: the submit queue is not cleared after being submitted
@@ -613,38 +761,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
* of elsq entries, keep this in mind before changing the loop below.
*/
for (n = execlists_num_ports(execlists); n--; ) {
- struct i915_request *rq;
- unsigned int count;
- u64 desc;
-
- rq = port_unpack(&port[n], &count);
- if (rq) {
- GEM_BUG_ON(count > !n);
- if (!count++)
- execlists_context_schedule_in(rq);
- port_set(&port[n], port_pack(rq, count));
- desc = execlists_update_context(rq);
- GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
-
- GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
- engine->name, n,
- port[n].context_id, count,
- rq->fence.context, rq->fence.seqno,
- hwsp_seqno(rq),
- rq_prio(rq));
- } else {
- GEM_BUG_ON(!n);
- desc = 0;
- }
+ struct i915_request *rq = execlists->pending[n];
- write_desc(execlists, desc, n);
+ write_desc(execlists,
+ rq ? execlists_update_context(rq) : 0,
+ n);
}
/* we need to manually load the submit queue */
if (execlists->ctrl_reg)
writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
}
static bool ctx_single_port_submission(const struct intel_context *ce)
@@ -668,6 +794,7 @@ static bool can_merge_ctx(const struct intel_context *prev,
static bool can_merge_rq(const struct i915_request *prev,
const struct i915_request *next)
{
+ GEM_BUG_ON(prev == next);
GEM_BUG_ON(!assert_priority_queue(prev, next));
if (!can_merge_ctx(prev->hw_context, next->hw_context))
@@ -676,58 +803,6 @@ static bool can_merge_rq(const struct i915_request *prev,
return true;
}
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
- GEM_BUG_ON(rq == port_request(port));
-
- if (port_isset(port))
- i915_request_put(port_request(port));
-
- port_set(port, port_pack(i915_request_get(rq), port_count(port)));
-}
-
-static void inject_preempt_context(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce = engine->preempt_context;
- unsigned int n;
-
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
-
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
-
- write_desc(execlists, ce->lrc_desc, n);
-
- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
- execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
-
- (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
-}
-
-static void complete_preempt_context(struct intel_engine_execlists *execlists)
-{
- GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
- if (inject_preempt_hang(execlists))
- return;
-
- execlists_cancel_port_requests(execlists);
- __unwind_incomplete_requests(container_of(execlists,
- struct intel_engine_cs,
- execlists));
-}
-
static void virtual_update_register_offsets(u32 *regs,
struct intel_engine_cs *engine)
{
@@ -792,7 +867,7 @@ static bool virtual_matches(const struct virtual_engine *ve,
* we reuse the register offsets). This is a very small
* hystersis on the greedy seelction algorithm.
*/
- inflight = READ_ONCE(ve->context.inflight);
+ inflight = intel_context_inflight(&ve->context);
if (inflight && inflight != engine)
return false;
@@ -815,13 +890,120 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
spin_unlock(&old->breadcrumbs.irq_lock);
}
+static struct i915_request *
+last_active(const struct intel_engine_execlists *execlists)
+{
+ struct i915_request * const *last = execlists->active;
+
+ while (*last && i915_request_completed(*last))
+ last++;
+
+ return *last;
+}
+
+static void defer_request(struct i915_request *rq, struct list_head * const pl)
+{
+ LIST_HEAD(list);
+
+ /*
+ * We want to move the interrupted request to the back of
+ * the round-robin list (i.e. its priority level), but
+ * in doing so, we must then move all requests that were in
+ * flight and were waiting for the interrupted request to
+ * be run after it again.
+ */
+ do {
+ struct i915_dependency *p;
+
+ GEM_BUG_ON(i915_request_is_active(rq));
+ list_move_tail(&rq->sched.link, pl);
+
+ list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ struct i915_request *w =
+ container_of(p->waiter, typeof(*w), sched);
+
+ /* Leave semaphores spinning on the other engines */
+ if (w->engine != rq->engine)
+ continue;
+
+ /* No waiter should start before its signaler */
+ GEM_BUG_ON(i915_request_started(w) &&
+ !i915_request_completed(rq));
+
+ GEM_BUG_ON(i915_request_is_active(w));
+ if (list_empty(&w->sched.link))
+ continue; /* Not yet submitted; unready */
+
+ if (rq_prio(w) < rq_prio(rq))
+ continue;
+
+ GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
+ list_move_tail(&w->sched.link, &list);
+ }
+
+ rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+ } while (rq);
+}
+
+static void defer_active(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = __unwind_incomplete_requests(engine);
+ if (!rq)
+ return;
+
+ defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
+}
+
+static bool
+need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+ int hint;
+
+ if (!intel_engine_has_semaphores(engine))
+ return false;
+
+ if (list_is_last(&rq->sched.link, &engine->active.requests))
+ return false;
+
+ hint = max(rq_prio(list_next_entry(rq, sched.link)),
+ engine->execlists.queue_priority_hint);
+
+ return hint >= effective_prio(rq);
+}
+
+static int
+switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+ if (list_is_last(&rq->sched.link, &engine->active.requests))
+ return INT_MIN;
+
+ return rq_prio(list_next_entry(rq, sched.link));
+}
+
+static bool
+enable_timeslice(const struct intel_engine_execlists *execlists)
+{
+ const struct i915_request *rq = *execlists->active;
+
+ if (i915_request_completed(rq))
+ return false;
+
+ return execlists->switch_priority_hint >= effective_prio(rq);
+}
+
+static void record_preemption(struct intel_engine_execlists *execlists)
+{
+ (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
- const struct execlist_port * const last_port =
- &execlists->port[execlists->port_mask];
- struct i915_request *last = port_request(port);
+ struct i915_request **port = execlists->pending;
+ struct i915_request ** const last_port = port + execlists->port_mask;
+ struct i915_request *last;
struct rb_node *rb;
bool submit = false;
@@ -867,65 +1049,100 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
break;
}
+ /*
+ * If the queue is higher priority than the last
+ * request in the currently active context, submit afresh.
+ * We will resubmit again afterwards in case we need to split
+ * the active context to interject the preemption request,
+ * i.e. we will retrigger preemption following the ack in case
+ * of trouble.
+ */
+ last = last_active(execlists);
if (last) {
- /*
- * Don't resubmit or switch until all outstanding
- * preemptions (lite-restore) are seen. Then we
- * know the next preemption status we see corresponds
- * to this ELSP update.
- */
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
- GEM_BUG_ON(!port_count(&port[0]));
+ if (need_preempt(engine, last, rb)) {
+ GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
+ engine->name,
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
+ record_preemption(execlists);
- /*
- * If we write to ELSP a second time before the HW has had
- * a chance to respond to the previous write, we can confuse
- * the HW and hit "undefined behaviour". After writing to ELSP,
- * we must then wait until we see a context-switch event from
- * the HW to indicate that it has had a chance to respond.
- */
- if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
- return;
+ /*
+ * Don't let the RING_HEAD advance past the breadcrumb
+ * as we unwind (and until we resubmit) so that we do
+ * not accidentally tell it to go backwards.
+ */
+ ring_set_paused(engine, 1);
- if (need_preempt(engine, last, rb)) {
- inject_preempt_context(engine);
- return;
- }
+ /*
+ * Note that we have not stopped the GPU at this point,
+ * so we are unwinding the incomplete requests as they
+ * remain inflight and so by the time we do complete
+ * the preemption, some of the unwound requests may
+ * complete!
+ */
+ __unwind_incomplete_requests(engine);
- /*
- * In theory, we could coalesce more requests onto
- * the second port (the first port is active, with
- * no preemptions pending). However, that means we
- * then have to deal with the possible lite-restore
- * of the second port (as we submit the ELSP, there
- * may be a context-switch) but also we may complete
- * the resubmission before the context-switch. Ergo,
- * coalescing onto the second port will cause a
- * preemption event, but we cannot predict whether
- * that will affect port[0] or port[1].
- *
- * If the second port is already active, we can wait
- * until the next context-switch before contemplating
- * new requests. The GPU will be busy and we should be
- * able to resubmit the new ELSP before it idles,
- * avoiding pipeline bubbles (momentary pauses where
- * the driver is unable to keep up the supply of new
- * work). However, we have to double check that the
- * priorities of the ports haven't been switch.
- */
- if (port_count(&port[1]))
- return;
+ /*
+ * If we need to return to the preempted context, we
+ * need to skip the lite-restore and force it to
+ * reload the RING_TAIL. Otherwise, the HW has a
+ * tendency to ignore us rewinding the TAIL to the
+ * end of an earlier request.
+ */
+ last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ last = NULL;
+ } else if (need_timeslice(engine, last) &&
+ !timer_pending(&engine->execlists.timer)) {
+ GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
+ engine->name,
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
- /*
- * WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_fini_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
+ ring_set_paused(engine, 1);
+ defer_active(engine);
+
+ /*
+ * Unlike for preemption, if we rewind and continue
+ * executing the same context as previously active,
+ * the order of execution will remain the same and
+ * the tail will only advance. We do not need to
+ * force a full context restore, as a lite-restore
+ * is sufficient to resample the monotonic TAIL.
+ *
+ * If we switch to any other context, similarly we
+ * will not rewind TAIL of current context, and
+ * normal save/restore will preserve state and allow
+ * us to later continue executing the same request.
+ */
+ last = NULL;
+ } else {
+ /*
+ * Otherwise if we already have a request pending
+ * for execution after the current one, we can
+ * just wait until the next CS event before
+ * queuing more. In either case we will force a
+ * lite-restore preemption event, but if we wait
+ * we hopefully coalesce several updates into a single
+ * submission.
+ */
+ if (!list_is_last(&last->sched.link,
+ &engine->active.requests))
+ return;
+
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == rq:TAIL as we resubmit the
+ * request. See gen8_emit_fini_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
+ }
}
while (rb) { /* XXX virtual is always taking precedence */
@@ -955,9 +1172,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
continue;
}
+ if (i915_request_completed(rq)) {
+ ve->request = NULL;
+ ve->base.execlists.queue_priority_hint = INT_MIN;
+ rb_erase_cached(rb, &execlists->virtual);
+ RB_CLEAR_NODE(rb);
+
+ rq->engine = engine;
+ __i915_request_submit(rq);
+
+ spin_unlock(&ve->base.active.lock);
+
+ rb = rb_first_cached(&execlists->virtual);
+ continue;
+ }
+
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
- return; /* leave this rq for another engine */
+ return; /* leave this for another */
}
GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
@@ -1006,9 +1238,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
__i915_request_submit(rq);
- trace_i915_request_in(rq, port_index(port, execlists));
- submit = true;
- last = rq;
+ if (!i915_request_completed(rq)) {
+ submit = true;
+ last = rq;
+ }
}
spin_unlock(&ve->base.active.lock);
@@ -1021,6 +1254,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
+ if (i915_request_completed(rq))
+ goto skip;
+
/*
* Can we combine this request with the current port?
* It has to be the same context/ringbuffer and not
@@ -1060,19 +1296,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
ctx_single_port_submission(rq->hw_context))
goto done;
-
- if (submit)
- port_assign(port, last);
+ *port = execlists_schedule_in(last, port - execlists->pending);
port++;
-
- GEM_BUG_ON(port_isset(port));
}
- __i915_request_submit(rq);
- trace_i915_request_in(rq, port_index(port, execlists));
-
last = rq;
submit = true;
+skip:
+ __i915_request_submit(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -1097,54 +1328,34 @@ done:
* interrupt for secondary ports).
*/
execlists->queue_priority_hint = queue_prio(execlists);
+ GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
+ engine->name, execlists->queue_priority_hint,
+ yesno(submit));
if (submit) {
- port_assign(port, last);
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ memset(port + 1, 0, (last_port - port) * sizeof(*port));
+ execlists->switch_priority_hint =
+ switch_prio(engine, *execlists->pending);
execlists_submit_ports(engine);
+ } else {
+ ring_set_paused(engine, 0);
}
-
- /* We must always keep the beast fed if we have work piled up */
- GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
- !port_isset(execlists->port));
-
- /* Re-evaluate the executing context setup after each preemptive kick */
- if (last)
- execlists_user_begin(execlists, execlists->port);
-
- /* If the engine is now idle, so should be the flag; and vice versa. */
- GEM_BUG_ON(execlists_is_active(&engine->execlists,
- EXECLISTS_ACTIVE_USER) ==
- !port_isset(engine->execlists.port));
}
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
{
- struct execlist_port *port = execlists->port;
- unsigned int num_ports = execlists_num_ports(execlists);
-
- while (num_ports-- && port_isset(port)) {
- struct i915_request *rq = port_request(port);
-
- GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
- rq->engine->name,
- (unsigned int)(port - execlists->port),
- rq->fence.context, rq->fence.seqno,
- hwsp_seqno(rq));
-
- GEM_BUG_ON(!execlists->active);
- execlists_context_schedule_out(rq,
- i915_request_completed(rq) ?
- INTEL_CONTEXT_SCHEDULE_OUT :
- INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+ struct i915_request * const *port, *rq;
- i915_request_put(rq);
+ for (port = execlists->pending; (rq = *port); port++)
+ execlists_schedule_out(rq);
+ memset(execlists->pending, 0, sizeof(execlists->pending));
- memset(port, 0, sizeof(*port));
- port++;
- }
-
- execlists_clear_all_active(execlists);
+ for (port = execlists->active; (rq = *port); port++)
+ execlists_schedule_out(rq);
+ execlists->active =
+ memset(execlists->inflight, 0, sizeof(execlists->inflight));
}
static inline void
@@ -1160,15 +1371,100 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
}
+enum csb_step {
+ CSB_NOP,
+ CSB_PROMOTE,
+ CSB_PREEMPT,
+ CSB_COMPLETE,
+};
+
+/*
+ * Starting with Gen12, the status has a new format:
+ *
+ * bit 0: switched to new queue
+ * bit 1: reserved
+ * bit 2: semaphore wait mode (poll or signal), only valid when
+ * switch detail is set to "wait on semaphore"
+ * bits 3-5: engine class
+ * bits 6-11: engine instance
+ * bits 12-14: reserved
+ * bits 15-25: sw context id of the lrc the GT switched to
+ * bits 26-31: sw counter of the lrc the GT switched to
+ * bits 32-35: context switch detail
+ * - 0: ctx complete
+ * - 1: wait on sync flip
+ * - 2: wait on vblank
+ * - 3: wait on scanline
+ * - 4: wait on semaphore
+ * - 5: context preempted (not on SEMAPHORE_WAIT or
+ * WAIT_FOR_EVENT)
+ * bit 36: reserved
+ * bits 37-43: wait detail (for switch detail 1 to 4)
+ * bits 44-46: reserved
+ * bits 47-57: sw context id of the lrc the GT switched away from
+ * bits 58-63: sw counter of the lrc the GT switched away from
+ */
+static inline enum csb_step
+gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+ u32 lower_dw = csb[0];
+ u32 upper_dw = csb[1];
+ bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
+ bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
+ bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+
+ if (!ctx_away_valid && ctx_to_valid)
+ return CSB_PROMOTE;
+
+ /*
+ * The context switch detail is not guaranteed to be 5 when a preemption
+ * occurs, so we can't just check for that. The check below works for
+ * all the cases we care about, including preemptions of WAIT
+ * instructions and lite-restore. Preempt-to-idle via the CTRL register
+ * would require some extra handling, but we don't support that.
+ */
+ if (new_queue && ctx_away_valid)
+ return CSB_PREEMPT;
+
+ /*
+ * switch detail = 5 is covered by the case above and we do not expect a
+ * context switch on an unsuccessful wait instruction since we always
+ * use polling mode.
+ */
+ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+
+ if (*execlists->active) {
+ GEM_BUG_ON(!ctx_away_valid);
+ return CSB_COMPLETE;
+ }
+
+ return CSB_NOP;
+}
+
+static inline enum csb_step
+gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+ unsigned int status = *csb;
+
+ if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
+ return CSB_PROMOTE;
+
+ if (status & GEN8_CTX_STATUS_PREEMPTED)
+ return CSB_PREEMPT;
+
+ if (*execlists->active)
+ return CSB_COMPLETE;
+
+ return CSB_NOP;
+}
+
static void process_csb(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
const u32 * const buf = execlists->csb_status;
const u8 num_entries = execlists->csb_size;
u8 head, tail;
- lockdep_assert_held(&engine->active.lock);
GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
/*
@@ -1198,9 +1494,7 @@ static void process_csb(struct intel_engine_cs *engine)
rmb();
do {
- struct i915_request *rq;
- unsigned int status;
- unsigned int count;
+ enum csb_step csb_step;
if (++head == num_entries)
head = 0;
@@ -1223,68 +1517,43 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+ GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
engine->name, head,
- buf[2 * head + 0], buf[2 * head + 1],
- execlists->active);
-
- status = buf[2 * head];
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ buf[2 * head + 0], buf[2 * head + 1]);
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ if (INTEL_GEN(engine->i915) >= 12)
+ csb_step = gen12_csb_parse(execlists, buf + 2 * head);
+ else
+ csb_step = gen8_csb_parse(execlists, buf + 2 * head);
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
- GEM_TRACE("%s preempt-idle\n", engine->name);
- complete_preempt_context(execlists);
- continue;
- }
+ switch (csb_step) {
+ case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
+ trace_ports(execlists, "preempted", execlists->active);
- if (status & GEN8_CTX_STATUS_PREEMPTED &&
- execlists_is_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT))
- continue;
+ while (*execlists->active)
+ execlists_schedule_out(*execlists->active++);
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
+ /* fallthrough */
+ case CSB_PROMOTE: /* switch pending to inflight */
+ GEM_BUG_ON(*execlists->active);
+ GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+ execlists->active =
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending));
- rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
- engine->name,
- port->context_id, count,
- rq ? rq->fence.context : 0,
- rq ? rq->fence.seqno : 0,
- rq ? hwsp_seqno(rq) : 0,
- rq ? rq_prio(rq) : 0);
+ if (enable_timeslice(execlists))
+ mod_timer(&execlists->timer, jiffies + 1);
- /* Check the context/desc id for this event matches */
- GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+ if (!inject_preempt_hang(execlists))
+ ring_set_paused(engine, 0);
- GEM_BUG_ON(count == 0);
- if (--count == 0) {
- /*
- * On the final event corresponding to the
- * submission of this context, we expect either
- * an element-switch event or a completion
- * event (and on completion, the active-idle
- * marker). No more preemptions, lite-restore
- * or otherwise.
- */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
- GEM_BUG_ON(port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
- GEM_BUG_ON(!port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+ WRITE_ONCE(execlists->pending[0], NULL);
+ break;
+
+ case CSB_COMPLETE: /* port0 completed, advanced to port1 */
+ trace_ports(execlists, "completed", execlists->active);
/*
* We rely on the hardware being strongly
@@ -1292,22 +1561,16 @@ static void process_csb(struct intel_engine_cs *engine)
* coherent (visible from the CPU) before the
* user interrupt and CSB is processed.
*/
- GEM_BUG_ON(!i915_request_completed(rq));
-
- execlists_context_schedule_out(rq,
- INTEL_CONTEXT_SCHEDULE_OUT);
- i915_request_put(rq);
+ GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
+ !reset_in_progress(execlists));
+ execlists_schedule_out(*execlists->active++);
- GEM_TRACE("%s completed ctx=%d\n",
- engine->name, port->context_id);
+ GEM_BUG_ON(execlists->active - execlists->inflight >
+ execlists_num_ports(execlists));
+ break;
- port = execlists_port_complete(execlists, port);
- if (port_isset(port))
- execlists_user_begin(execlists, port);
- else
- execlists_user_end(execlists);
- } else {
- port_set(port, port_pack(rq, count));
+ case CSB_NOP:
+ break;
}
} while (head != tail);
@@ -1330,9 +1593,7 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
-
- process_csb(engine);
- if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+ if (!engine->execlists.pending[0])
execlists_dequeue(engine);
}
@@ -1345,14 +1606,21 @@ static void execlists_submission_tasklet(unsigned long data)
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
unsigned long flags;
- GEM_TRACE("%s awake?=%d, active=%x\n",
- engine->name,
- !!intel_wakeref_active(&engine->wakeref),
- engine->execlists.active);
+ process_csb(engine);
+ if (!READ_ONCE(engine->execlists.pending[0])) {
+ spin_lock_irqsave(&engine->active.lock, flags);
+ __execlists_submission_tasklet(engine);
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+ }
+}
- spin_lock_irqsave(&engine->active.lock, flags);
- __execlists_submission_tasklet(engine);
- spin_unlock_irqrestore(&engine->active.lock, flags);
+static void execlists_submission_timer(struct timer_list *timer)
+{
+ struct intel_engine_cs *engine =
+ from_timer(engine, timer, execlists.timer);
+
+ /* Kick the tasklet for some interrupt coalescing and reset handling */
+ tasklet_hi_schedule(&engine->execlists.tasklet);
}
static void queue_request(struct intel_engine_cs *engine,
@@ -1376,12 +1644,16 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
tasklet_hi_schedule(&execlists->tasklet);
}
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static void submit_queue(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
- if (prio > engine->execlists.queue_priority_hint) {
- engine->execlists.queue_priority_hint = prio;
- __submit_queue_imm(engine);
- }
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ if (rq_prio(rq) <= execlists->queue_priority_hint)
+ return;
+
+ execlists->queue_priority_hint = rq_prio(rq);
+ __submit_queue_imm(engine);
}
static void execlists_submit_request(struct i915_request *request)
@@ -1397,7 +1669,7 @@ static void execlists_submit_request(struct i915_request *request)
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
GEM_BUG_ON(list_empty(&request->sched.link));
- submit_queue(engine, rq_prio(request));
+ submit_queue(engine, request);
spin_unlock_irqrestore(&engine->active.lock, flags);
}
@@ -1405,9 +1677,7 @@ static void execlists_submit_request(struct i915_request *request)
static void __execlists_context_fini(struct intel_context *ce)
{
intel_ring_put(ce->ring);
-
- GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
- i915_gem_object_put(ce->state->obj);
+ i915_vma_put(ce->state);
}
static void execlists_context_destroy(struct kref *kref)
@@ -1420,13 +1690,45 @@ static void execlists_context_destroy(struct kref *kref)
if (ce->state)
__execlists_context_fini(ce);
+ intel_context_fini(ce);
intel_context_free(ce);
}
+static void
+set_redzone(void *vaddr, const struct intel_engine_cs *engine)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
+ vaddr += engine->context_size;
+
+ memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
+}
+
+static void
+check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
+ vaddr += engine->context_size;
+
+ if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
+ dev_err_once(engine->i915->drm.dev,
+ "%s context redzone overwritten!\n",
+ engine->name);
+}
+
static void execlists_context_unpin(struct intel_context *ce)
{
+ check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
+ ce->engine);
+
i915_gem_context_unpin_hw_id(ce->gem_context);
i915_gem_object_unpin_map(ce->state->obj);
+ intel_ring_reset(ce->ring, ce->ring->tail);
}
static void
@@ -1444,9 +1746,12 @@ __execlists_update_reg_state(struct intel_context *ce,
regs[CTX_RING_TAIL + 1] = ring->tail;
/* RPCS */
- if (engine->class == RENDER_CLASS)
+ if (engine->class == RENDER_CLASS) {
regs[CTX_R_PWR_CLK_STATE + 1] =
intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+
+ i915_oa_init_reg_state(engine, ce, regs);
+ }
}
static int
@@ -1456,19 +1761,12 @@ __execlists_context_pin(struct intel_context *ce,
void *vaddr;
int ret;
- GEM_BUG_ON(!ce->gem_context->vm);
-
- ret = execlists_context_deferred_alloc(ce, engine);
- if (ret)
- goto err;
GEM_BUG_ON(!ce->state);
- ret = intel_context_active_acquire(ce,
- engine->i915->ggtt.pin_bias |
- PIN_OFFSET_BIAS |
- PIN_HIGH);
+ ret = intel_context_active_acquire(ce);
if (ret)
goto err;
+ GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
vaddr = i915_gem_object_pin_map(ce->state->obj,
i915_coherent_map_type(engine->i915) |
@@ -1501,6 +1799,11 @@ static int execlists_context_pin(struct intel_context *ce)
return __execlists_context_pin(ce, ce->engine);
}
+static int execlists_context_alloc(struct intel_context *ce)
+{
+ return __execlists_context_alloc(ce, ce->engine);
+}
+
static void execlists_context_reset(struct intel_context *ce)
{
/*
@@ -1524,6 +1827,8 @@ static void execlists_context_reset(struct intel_context *ce)
}
static const struct intel_context_ops execlists_context_ops = {
+ .alloc = execlists_context_alloc,
+
.pin = execlists_context_pin,
.unpin = execlists_context_unpin,
@@ -1569,8 +1874,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
static int emit_pdps(struct i915_request *rq)
{
const struct intel_engine_cs * const engine = rq->engine;
- struct i915_ppgtt * const ppgtt =
- i915_vm_to_ppgtt(rq->gem_context->vm);
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm);
int err, i;
u32 *cs;
@@ -1643,7 +1947,7 @@ static int execlists_request_alloc(struct i915_request *request)
*/
/* Unconditionally invalidate GPU caches and TLBs. */
- if (i915_vm_is_4lvl(request->gem_context->vm))
+ if (i915_vm_is_4lvl(request->hw_context->vm))
ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
else
ret = emit_pdps(request);
@@ -1676,7 +1980,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
/* NB no one else is allowed to scribble over scratch + 256! */
*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = i915_scratch_offset(engine->i915) + 256;
+ *batch++ = intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
*batch++ = 0;
*batch++ = MI_LOAD_REGISTER_IMM(1);
@@ -1690,12 +1995,19 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = i915_scratch_offset(engine->i915) + 256;
+ *batch++ = intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
*batch++ = 0;
return batch;
}
+static u32 slm_offset(struct intel_engine_cs *engine)
+{
+ return intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA);
+}
+
/*
* Typically we only have one indirect_ctx and per_ctx batch buffer which are
* initialized at the beginning and shared across all contexts but this field
@@ -1727,8 +2039,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_QW_WRITE,
- i915_scratch_offset(engine->i915) +
- 2 * CACHELINE_BYTES);
+ slm_offset(engine));
*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -1874,7 +2185,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
if (IS_ERR(obj))
return PTR_ERR(obj);
- vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err;
@@ -1914,6 +2225,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return 0;
switch (INTEL_GEN(engine->i915)) {
+ case 12:
case 11:
return 0;
case 10:
@@ -1970,22 +2282,23 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
static void enable_execlists(struct intel_engine_cs *engine)
{
+ u32 mode;
+
+ assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
+
intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
if (INTEL_GEN(engine->i915) >= 11)
- ENGINE_WRITE(engine,
- RING_MODE_GEN7,
- _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+ mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
else
- ENGINE_WRITE(engine,
- RING_MODE_GEN7,
- _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+ mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
+ ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
- ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+ ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
- ENGINE_WRITE(engine,
- RING_HWS_PGA,
- i915_ggtt_offset(engine->status_page.vma));
+ ENGINE_WRITE_FW(engine,
+ RING_HWS_PGA,
+ i915_ggtt_offset(engine->status_page.vma));
ENGINE_POSTING_READ(engine, RING_HWS_PGA);
}
@@ -1993,7 +2306,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
{
bool unexpected = false;
- if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) {
+ if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
unexpected = true;
}
@@ -2041,34 +2354,32 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
__tasklet_disable_sync_once(&execlists->tasklet);
GEM_BUG_ON(!reset_in_progress(execlists));
- intel_engine_stop_cs(engine);
-
/* And flush any current direct submission. */
spin_lock_irqsave(&engine->active.lock, flags);
spin_unlock_irqrestore(&engine->active.lock, flags);
-}
-
-static bool lrc_regs_ok(const struct i915_request *rq)
-{
- const struct intel_ring *ring = rq->ring;
- const u32 *regs = rq->hw_context->lrc_reg_state;
-
- /* Quick spot check for the common signs of context corruption */
-
- if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
- (RING_CTL_SIZE(ring->size) | RING_VALID))
- return false;
- if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
- return false;
-
- return true;
+ /*
+ * We stop engines, otherwise we might get failed reset and a
+ * dead gpu (on elk). Also as modern gpu as kbl can suffer
+ * from system hang if batchbuffer is progressing when
+ * the reset is issued, regardless of READY_TO_RESET ack.
+ * Thus assume it is best to stop engines on all gens
+ * where we have a gpu reset.
+ *
+ * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+ *
+ * FIXME: Wa for more modern gens needs to be validated
+ */
+ intel_engine_stop_cs(engine);
}
-static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+static void reset_csb_pointers(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
const unsigned int reset_value = execlists->csb_size - 1;
+ ring_set_paused(engine, 0);
+
/*
* After a reset, the HW starts writing into CSB entry [0]. We
* therefore have to set our HEAD pointer back one entry so that
@@ -2088,15 +2399,15 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
static struct i915_request *active_request(struct i915_request *rq)
{
- const struct list_head * const list = &rq->engine->active.requests;
- const struct intel_context * const context = rq->hw_context;
+ const struct list_head * const list = &rq->timeline->requests;
+ const struct intel_context * const ce = rq->hw_context;
struct i915_request *active = NULL;
- list_for_each_entry_from_reverse(rq, list, sched.link) {
+ list_for_each_entry_from_reverse(rq, list, link) {
if (i915_request_completed(rq))
break;
- if (rq->hw_context != context)
+ if (rq->hw_context != ce)
break;
active = rq;
@@ -2115,33 +2426,27 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
process_csb(engine); /* drain preemption events */
/* Following the reset, we need to reload the CSB read/write pointers */
- reset_csb_pointers(&engine->execlists);
+ reset_csb_pointers(engine);
/*
* Save the currently executing context, even if we completed
* its request, it was still running at the time of the
* reset and will have been clobbered.
*/
- if (!port_isset(execlists->port))
- goto out_clear;
+ rq = execlists_active(execlists);
+ if (!rq)
+ goto unwind;
- rq = port_request(execlists->port);
ce = rq->hw_context;
-
- /*
- * Catch up with any missed context-switch interrupts.
- *
- * Ideally we would just read the remaining CSB entries now that we
- * know the gpu is idle. However, the CSB registers are sometimes^W
- * often trashed across a GPU reset! Instead we have to rely on
- * guessing the missed context-switch events by looking at what
- * requests were completed.
- */
- execlists_cancel_port_requests(execlists);
-
+ GEM_BUG_ON(i915_active_is_idle(&ce->active));
+ GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
rq = active_request(rq);
- if (!rq)
+ if (!rq) {
+ ce->ring->head = ce->ring->tail;
goto out_replay;
+ }
+
+ ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
/*
* If this request hasn't started yet, e.g. it is waiting on a
@@ -2155,7 +2460,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* Otherwise, if we have not started yet, the request should replay
* perfectly and we do not need to flag the result as being erroneous.
*/
- if (!i915_request_started(rq) && lrc_regs_ok(rq))
+ if (!i915_request_started(rq))
goto out_replay;
/*
@@ -2169,8 +2474,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* and have to at least restore the RING register in the context
* image back to the expected values to skip over the guilty request.
*/
- i915_reset_request(rq, stalled);
- if (!stalled && lrc_regs_ok(rq))
+ __i915_request_reset(rq, stalled);
+ if (!stalled)
goto out_replay;
/*
@@ -2190,17 +2495,15 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
execlists_init_reg_state(regs, ce, engine, ce->ring);
out_replay:
- /* Rerun the request; its payload has been neutered (if guilty). */
- ce->ring->head =
- rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
+ GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
+ engine->name, ce->ring->head, ce->ring->tail);
intel_ring_update_space(ce->ring);
__execlists_update_reg_state(ce, engine);
+unwind:
/* Push back any incomplete requests for replay after the reset. */
+ cancel_port_requests(execlists);
__unwind_incomplete_requests(engine);
-
-out_clear:
- execlists_clear_all_active(execlists);
}
static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
@@ -2296,7 +2599,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
execlists->queue_priority_hint = INT_MIN;
execlists->queue = RB_ROOT_CACHED;
- GEM_BUG_ON(port_isset(execlists->port));
GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
execlists->tasklet.func = nop_submission_tasklet;
@@ -2434,7 +2736,8 @@ static int gen8_emit_flush_render(struct i915_request *request,
{
struct intel_engine_cs *engine = request->engine;
u32 scratch_addr =
- i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
+ intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
int len;
@@ -2499,6 +2802,63 @@ static int gen8_emit_flush_render(struct i915_request *request,
return 0;
}
+static int gen11_emit_flush_render(struct i915_request *request,
+ u32 mode)
+{
+ struct intel_engine_cs *engine = request->engine;
+ const u32 scratch_addr =
+ intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+
+ if (mode & EMIT_FLUSH) {
+ u32 *cs;
+ u32 flags = 0;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ intel_ring_advance(request, cs);
+ }
+
+ if (mode & EMIT_INVALIDATE) {
+ u32 *cs;
+ u32 flags = 0;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ intel_ring_advance(request, cs);
+ }
+
+ return 0;
+}
+
/*
* Reserve space for 2 NOOPs at the end of each request to be
* used as a workaround for not being allowed to do lite
@@ -2514,15 +2874,28 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
return cs;
}
-static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- request->timeline->hwsp_offset,
- 0);
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = intel_hws_preempt_address(request->engine);
+ *cs++ = 0;
+
+ return cs;
+}
+static __always_inline u32*
+gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
+ u32 *cs)
+{
*cs++ = MI_USER_INTERRUPT;
+
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ if (intel_engine_has_semaphores(request->engine))
+ cs = emit_preempt_busywait(request, cs);
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
@@ -2530,51 +2903,53 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ 0);
+
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
- /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
request->timeline->hwsp_offset,
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE);
+
+ /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL,
0);
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
- request->tail = intel_ring_offset(request, cs);
- assert_ring_tail_valid(request->ring, request->tail);
-
- return gen8_emit_wa_tail(request, cs);
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
}
-static int gen8_init_rcs_context(struct i915_request *rq)
+static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
+ u32 *cs)
{
- int ret;
-
- ret = intel_engine_emit_ctx_wa(rq);
- if (ret)
- return ret;
-
- ret = intel_rcs_context_init_mocs(rq);
- /*
- * Failing to program the MOCS is non-fatal.The system will not
- * run at peak performance. So generate an error and carry on.
- */
- if (ret)
- DRM_ERROR("MOCS failed to program: expect performance issues.\n");
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
- return i915_gem_render_state_emit(rq);
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
}
static void execlists_park(struct intel_engine_cs *engine)
{
- intel_engine_park(engine);
+ del_timer(&engine->execlists.timer);
}
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
@@ -2592,11 +2967,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (!intel_vgpu_active(engine->i915))
+ if (!intel_vgpu_active(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
- if (engine->preempt_context &&
- HAS_LOGICAL_RING_PREEMPTION(engine->i915))
- engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+ if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+ engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+ }
}
static void execlists_destroy(struct intel_engine_cs *engine)
@@ -2665,22 +3040,32 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
}
-int intel_execlists_submission_setup(struct intel_engine_cs *engine)
+static void rcs_submission_override(struct intel_engine_cs *engine)
{
- /* Intentionally left blank. */
- engine->buffer = NULL;
+ switch (INTEL_GEN(engine->i915)) {
+ case 12:
+ case 11:
+ engine->emit_flush = gen11_emit_flush_render;
+ engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
+ break;
+ default:
+ engine->emit_flush = gen8_emit_flush_render;
+ engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+ break;
+ }
+}
+int intel_execlists_submission_setup(struct intel_engine_cs *engine)
+{
tasklet_init(&engine->execlists.tasklet,
execlists_submission_tasklet, (unsigned long)engine);
+ timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
- if (engine->class == RENDER_CLASS) {
- engine->init_context = gen8_init_rcs_context;
- engine->emit_flush = gen8_emit_flush_render;
- engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
- }
+ if (engine->class == RENDER_CLASS)
+ rcs_submission_override(engine);
return 0;
}
@@ -2697,9 +3082,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
if (ret)
return ret;
- intel_engine_init_workarounds(engine);
- intel_engine_init_whitelist(engine);
-
if (intel_init_workaround_bb(engine))
/*
* We continue even if we fail to initialize WA batch
@@ -2718,11 +3100,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
i915_mmio_reg_offset(RING_ELSP(base));
}
- execlists->preempt_complete_status = ~0u;
- if (engine->preempt_context)
- execlists->preempt_complete_status =
- upper_32_bits(engine->preempt_context->lrc_desc);
-
execlists->csb_status =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
@@ -2734,7 +3111,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
else
execlists->csb_size = GEN11_CSB_ENTRIES;
- reset_csb_pointers(execlists);
+ reset_csb_pointers(engine);
return 0;
}
@@ -2747,6 +3124,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
default:
MISSING_CASE(INTEL_GEN(engine->i915));
/* fall through */
+ case 12:
+ indirect_ctx_offset =
+ GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ break;
case 11:
indirect_ctx_offset =
GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
@@ -2773,7 +3154,7 @@ static void execlists_init_reg_state(u32 *regs,
struct intel_engine_cs *engine,
struct intel_ring *ring)
{
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm);
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
bool rcs = engine->class == RENDER_CLASS;
u32 base = engine->mmio_base;
@@ -2864,8 +3245,6 @@ static void execlists_init_reg_state(u32 *regs,
if (rcs) {
regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
-
- i915_oa_init_reg_state(engine, ce, regs);
}
regs[CTX_END] = MI_BATCH_BUFFER_END;
@@ -2890,6 +3269,8 @@ populate_lr_context(struct intel_context *ce,
return ret;
}
+ set_redzone(vaddr, engine);
+
if (engine->default_state) {
/*
* We only want to copy over the template context state;
@@ -2917,11 +3298,6 @@ populate_lr_context(struct intel_context *ce,
if (!engine->default_state)
regs[CTX_CONTEXT_CONTROL + 1] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
- if (ce->gem_context == engine->i915->preempt_context &&
- INTEL_GEN(engine->i915) < 11)
- regs[CTX_CONTEXT_CONTROL + 1] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
ret = 0;
err_unpin_ctx:
@@ -2932,27 +3308,16 @@ err_unpin_ctx:
return ret;
}
-static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
-{
- if (ctx->timeline)
- return i915_timeline_get(ctx->timeline);
- else
- return i915_timeline_create(ctx->i915, NULL);
-}
-
-static int execlists_context_deferred_alloc(struct intel_context *ce,
- struct intel_engine_cs *engine)
+static int __execlists_context_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *ctx_obj;
+ struct intel_ring *ring;
struct i915_vma *vma;
u32 context_size;
- struct intel_ring *ring;
- struct i915_timeline *timeline;
int ret;
- if (ce->state)
- return 0;
-
+ GEM_BUG_ON(ce->state);
context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
/*
@@ -2960,27 +3325,32 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
* for our own use and for sharing with the GuC.
*/
context_size += LRC_HEADER_PAGES * PAGE_SIZE;
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ context_size += I915_GTT_PAGE_SIZE; /* for redzone */
ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
if (IS_ERR(ctx_obj))
return PTR_ERR(ctx_obj);
- vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto error_deref_obj;
}
- timeline = get_timeline(ce->gem_context);
- if (IS_ERR(timeline)) {
- ret = PTR_ERR(timeline);
- goto error_deref_obj;
+ if (!ce->timeline) {
+ struct intel_timeline *tl;
+
+ tl = intel_timeline_create(engine->gt, NULL);
+ if (IS_ERR(tl)) {
+ ret = PTR_ERR(tl);
+ goto error_deref_obj;
+ }
+
+ ce->timeline = tl;
}
- ring = intel_engine_create_ring(engine,
- timeline,
- ce->gem_context->ring_size);
- i915_timeline_put(timeline);
+ ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
goto error_deref_obj;
@@ -3038,6 +3408,7 @@ static void virtual_context_destroy(struct kref *kref)
if (ve->context.state)
__execlists_context_fini(&ve->context);
+ intel_context_fini(&ve->context);
kfree(ve->bonds);
kfree(ve);
@@ -3090,6 +3461,8 @@ static void virtual_context_enter(struct intel_context *ce)
for (n = 0; n < ve->num_siblings; n++)
intel_engine_pm_get(ve->siblings[n]);
+
+ intel_timeline_enter(ce->timeline);
}
static void virtual_context_exit(struct intel_context *ce)
@@ -3097,6 +3470,8 @@ static void virtual_context_exit(struct intel_context *ce)
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
unsigned int n;
+ intel_timeline_exit(ce->timeline);
+
for (n = 0; n < ve->num_siblings; n++)
intel_engine_pm_put(ve->siblings[n]);
}
@@ -3290,11 +3665,11 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
return ERR_PTR(-ENOMEM);
ve->base.i915 = ctx->i915;
+ ve->base.gt = siblings[0]->gt;
ve->base.id = -1;
ve->base.class = OTHER_CLASS;
ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
- ve->base.flags = I915_ENGINE_IS_VIRTUAL;
/*
* The decision on whether to submit a request using semaphores
@@ -3391,8 +3766,18 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
ve->base.emit_fini_breadcrumb_dw =
sibling->emit_fini_breadcrumb_dw;
+
+ ve->base.flags = sibling->flags;
}
+ ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
+
+ err = __execlists_context_alloc(&ve->context, siblings[0]);
+ if (err)
+ goto err_put;
+
+ __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
+
return &ve->context;
err_put: