summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c539
1 files changed, 358 insertions, 181 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e3a5f673ff67..15434cad5430 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -139,6 +139,7 @@
#include "i915_gem_render_state.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
+#include "intel_workarounds.h"
#define RING_EXECLIST_QFULL (1 << 0x2)
#define RING_EXECLIST1_VALID (1 << 0x3)
@@ -176,14 +177,16 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
static inline int rq_prio(const struct i915_request *rq)
{
- return rq->priotree.priority;
+ return rq->sched.attr.priority;
}
static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *last,
int prio)
{
- return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+ return (intel_engine_has_preemption(engine) &&
+ __execlists_need_preempt(prio, rq_prio(last)) &&
+ !i915_request_completed(last));
}
/**
@@ -221,7 +224,7 @@ static void
intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
- struct intel_context *ce = &ctx->engine[engine->id];
+ struct intel_context *ce = to_intel_context(ctx, engine);
u64 desc;
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -255,9 +258,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
}
static struct i915_priolist *
-lookup_priolist(struct intel_engine_cs *engine,
- struct i915_priotree *pt,
- int prio)
+lookup_priolist(struct intel_engine_cs *engine, int prio)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_priolist *p;
@@ -328,10 +329,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
struct i915_priolist *uninitialized_var(p);
int last_prio = I915_PRIORITY_INVALID;
- lockdep_assert_held(&engine->timeline->lock);
+ lockdep_assert_held(&engine->timeline.lock);
list_for_each_entry_safe_reverse(rq, rn,
- &engine->timeline->requests,
+ &engine->timeline.requests,
link) {
if (i915_request_completed(rq))
return;
@@ -342,10 +343,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != last_prio) {
last_prio = rq_prio(rq);
- p = lookup_priolist(engine, &rq->priotree, last_prio);
+ p = lookup_priolist(engine, last_prio);
}
- list_add(&rq->priotree.link, &p->requests);
+ GEM_BUG_ON(p->priority != rq_prio(rq));
+ list_add(&rq->sched.link, &p->requests);
}
}
@@ -354,10 +356,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
{
struct intel_engine_cs *engine =
container_of(execlists, typeof(*engine), execlists);
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
- spin_lock_irq(&engine->timeline->lock);
__unwind_incomplete_requests(engine);
- spin_unlock_irq(&engine->timeline->lock);
+
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
static inline void
@@ -374,6 +379,19 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
status, rq);
}
+inline void
+execlists_user_begin(struct intel_engine_execlists *execlists,
+ const struct execlist_port *port)
+{
+ execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
+}
+
+inline void
+execlists_user_end(struct intel_engine_execlists *execlists)
+{
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+}
+
static inline void
execlists_context_schedule_in(struct i915_request *rq)
{
@@ -382,10 +400,11 @@ execlists_context_schedule_in(struct i915_request *rq)
}
static inline void
-execlists_context_schedule_out(struct i915_request *rq)
+execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
{
intel_engine_context_out(rq->engine);
- execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+ execlists_context_status_change(rq, status);
+ trace_i915_request_out(rq);
}
static void
@@ -399,7 +418,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
static u64 execlists_update_context(struct i915_request *rq)
{
- struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
+ struct intel_context *ce = to_intel_context(rq->ctx, rq->engine);
struct i915_hw_ppgtt *ppgtt =
rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
u32 *reg_state = ce->lrc_reg_state;
@@ -454,10 +473,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
- GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
+ GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
engine->name, n,
port[n].context_id, count,
rq->global_seqno,
+ rq->fence.context, rq->fence.seqno,
+ intel_engine_get_seqno(engine),
rq_prio(rq));
} else {
GEM_BUG_ON(!n);
@@ -506,7 +527,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
struct intel_context *ce =
- &engine->i915->preempt_context->engine[engine->id];
+ to_intel_context(engine->i915->preempt_context, engine);
unsigned int n;
GEM_BUG_ON(execlists->preempt_complete_status !=
@@ -535,7 +556,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
-static void execlists_dequeue(struct intel_engine_cs *engine)
+static bool __execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port;
@@ -545,6 +566,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct rb_node *rb;
bool submit = false;
+ lockdep_assert_held(&engine->timeline.lock);
+
/* Hardware submission is through 2 ports. Conceptually each port
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
* static for a context, and unique to each, so we only execute
@@ -566,7 +589,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* and context switches) submission.
*/
- spin_lock_irq(&engine->timeline->lock);
rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
@@ -581,7 +603,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
EXECLISTS_ACTIVE_USER));
GEM_BUG_ON(!port_count(&port[0]));
if (port_count(&port[0]) > 1)
- goto unlock;
+ return false;
/*
* If we write to ELSP a second time before the HW has had
@@ -591,11 +613,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* the HW to indicate that it has had a chance to respond.
*/
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
- goto unlock;
+ return false;
if (need_preempt(engine, last, execlists->queue_priority)) {
inject_preempt_context(engine);
- goto unlock;
+ return false;
}
/*
@@ -620,7 +642,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* priorities of the ports haven't been switch.
*/
if (port_count(&port[1]))
- goto unlock;
+ return false;
/*
* WaIdleLiteRestore:bdw,skl
@@ -637,7 +659,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
- list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+ list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
/*
* Can we combine this request with the current port?
* It has to be the same context/ringbuffer and not
@@ -657,7 +679,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
if (port == last_port) {
__list_del_many(&p->requests,
- &rq->priotree.link);
+ &rq->sched.link);
goto done;
}
@@ -671,7 +693,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (ctx_single_port_submission(last->ctx) ||
ctx_single_port_submission(rq->ctx)) {
__list_del_many(&p->requests,
- &rq->priotree.link);
+ &rq->sched.link);
goto done;
}
@@ -684,7 +706,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(port_isset(port));
}
- INIT_LIST_HEAD(&rq->priotree.link);
+ INIT_LIST_HEAD(&rq->sched.link);
__i915_request_submit(rq);
trace_i915_request_in(rq, port_index(port, execlists));
last = rq;
@@ -697,8 +719,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
}
+
done:
- execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
+ /*
+ * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
+ *
+ * We choose queue_priority such that if we add a request of greater
+ * priority than this, we kick the submission tasklet to decide on
+ * the right order of submitting the requests to hardware. We must
+ * also be prepared to reorder requests as they are in-flight on the
+ * HW. We derive the queue_priority then as the first "hole" in
+ * the HW submission ports and if there are no available slots,
+ * the priority of the lowest executing request, i.e. last.
+ *
+ * When we do receive a higher priority request ready to run from the
+ * user, see queue_request(), the queue_priority is bumped to that
+ * request triggering preemption on the next dequeue (or subsequent
+ * interrupt for secondary ports).
+ */
+ execlists->queue_priority =
+ port != execlists->port ? rq_prio(last) : INT_MIN;
+
execlists->first = rb;
if (submit)
port_assign(port, last);
@@ -706,13 +747,25 @@ done:
/* We must always keep the beast fed if we have work piled up */
GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
-unlock:
- spin_unlock_irq(&engine->timeline->lock);
+ /* Re-evaluate the executing context setup after each preemptive kick */
+ if (last)
+ execlists_user_begin(execlists, execlists->port);
- if (submit) {
- execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
+ return submit;
+}
+
+static void execlists_dequeue(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ unsigned long flags;
+ bool submit;
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ submit = __execlists_dequeue(engine);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+ if (submit)
execlists_submit_ports(engine);
- }
GEM_BUG_ON(port_isset(execlists->port) &&
!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
@@ -727,13 +780,18 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
while (num_ports-- && port_isset(port)) {
struct i915_request *rq = port_request(port);
- GEM_BUG_ON(!execlists->active);
- intel_engine_context_out(rq->engine);
+ GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n",
+ rq->engine->name,
+ (unsigned int)(port - execlists->port),
+ rq->global_seqno,
+ rq->fence.context, rq->fence.seqno,
+ intel_engine_get_seqno(rq->engine));
- execlists_context_status_change(rq,
- i915_request_completed(rq) ?
- INTEL_CONTEXT_SCHEDULE_OUT :
- INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+ GEM_BUG_ON(!execlists->active);
+ execlists_context_schedule_out(rq,
+ i915_request_completed(rq) ?
+ INTEL_CONTEXT_SCHEDULE_OUT :
+ INTEL_CONTEXT_SCHEDULE_PREEMPTED);
i915_request_put(rq);
@@ -741,7 +799,82 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
port++;
}
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+ execlists_user_end(execlists);
+}
+
+static void clear_gtiir(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ int i;
+
+ /*
+ * Clear any pending interrupt state.
+ *
+ * We do it twice out of paranoia that some of the IIR are
+ * double buffered, and so if we only reset it once there may
+ * still be an interrupt pending.
+ */
+ if (INTEL_GEN(dev_priv) >= 11) {
+ static const struct {
+ u8 bank;
+ u8 bit;
+ } gen11_gtiir[] = {
+ [RCS] = {0, GEN11_RCS0},
+ [BCS] = {0, GEN11_BCS},
+ [_VCS(0)] = {1, GEN11_VCS(0)},
+ [_VCS(1)] = {1, GEN11_VCS(1)},
+ [_VCS(2)] = {1, GEN11_VCS(2)},
+ [_VCS(3)] = {1, GEN11_VCS(3)},
+ [_VECS(0)] = {1, GEN11_VECS(0)},
+ [_VECS(1)] = {1, GEN11_VECS(1)},
+ };
+ unsigned long irqflags;
+
+ GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir));
+
+ spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+ for (i = 0; i < 2; i++) {
+ gen11_reset_one_iir(dev_priv,
+ gen11_gtiir[engine->id].bank,
+ gen11_gtiir[engine->id].bit);
+ }
+ spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+ } else {
+ static const u8 gtiir[] = {
+ [RCS] = 0,
+ [BCS] = 0,
+ [VCS] = 1,
+ [VCS2] = 1,
+ [VECS] = 3,
+ };
+
+ GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+ for (i = 0; i < 2; i++) {
+ I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+ engine->irq_keep_mask);
+ POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
+ }
+ GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
+ engine->irq_keep_mask);
+ }
+}
+
+static void reset_irq(struct intel_engine_cs *engine)
+{
+ /* Mark all CS interrupts as complete */
+ smp_store_mb(engine->execlists.active, 0);
+ synchronize_hardirq(engine->i915->drm.irq);
+
+ clear_gtiir(engine);
+
+ /*
+ * The port is checked prior to scheduling a tasklet, but
+ * just in case we have suspended the tasklet to do the
+ * wedging make sure that when it wakes, it decides there
+ * is no work to do by clearing the irq_posted bit.
+ */
+ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
}
static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -751,7 +884,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
struct rb_node *rb;
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ GEM_TRACE("%s current %d\n",
+ engine->name, intel_engine_get_seqno(engine));
/*
* Before we call engine->cancel_requests(), we should have exclusive
@@ -771,11 +905,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Cancel the requests on the HW and clear the ELSP tracker. */
execlists_cancel_port_requests(execlists);
+ reset_irq(engine);
- spin_lock(&engine->timeline->lock);
+ spin_lock(&engine->timeline.lock);
/* Mark all executing requests as skipped. */
- list_for_each_entry(rq, &engine->timeline->requests, link) {
+ list_for_each_entry(rq, &engine->timeline.requests, link) {
GEM_BUG_ON(!rq->global_seqno);
if (!i915_request_completed(rq))
dma_fence_set_error(&rq->fence, -EIO);
@@ -786,8 +921,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
while (rb) {
struct i915_priolist *p = to_priolist(rb);
- list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
- INIT_LIST_HEAD(&rq->priotree.link);
+ list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+ INIT_LIST_HEAD(&rq->sched.link);
dma_fence_set_error(&rq->fence, -EIO);
__i915_request_submit(rq);
@@ -807,18 +942,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
execlists->first = NULL;
GEM_BUG_ON(port_isset(execlists->port));
- spin_unlock(&engine->timeline->lock);
-
- /*
- * The port is checked prior to scheduling a tasklet, but
- * just in case we have suspended the tasklet to do the
- * wedging make sure that when it wakes, it decides there
- * is no work to do by clearing the irq_posted bit.
- */
- clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
- /* Mark all CS interrupts as complete */
- execlists->active = 0;
+ spin_unlock(&engine->timeline.lock);
local_irq_restore(flags);
}
@@ -831,7 +955,7 @@ static void execlists_submission_tasklet(unsigned long data)
{
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port * const port = execlists->port;
+ struct execlist_port *port = execlists->port;
struct drm_i915_private *dev_priv = engine->i915;
bool fw = false;
@@ -884,6 +1008,7 @@ static void execlists_submission_tasklet(unsigned long data)
head = execlists->csb_head;
tail = READ_ONCE(buf[write_idx]);
+ rmb(); /* Hopefully paired with a wmb() in HW */
}
GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
engine->name,
@@ -958,10 +1083,13 @@ static void execlists_submission_tasklet(unsigned long data)
EXECLISTS_ACTIVE_USER));
rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
+ GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
engine->name,
port->context_id, count,
rq ? rq->global_seqno : 0,
+ rq ? rq->fence.context : 0,
+ rq ? rq->fence.seqno : 0,
+ intel_engine_get_seqno(engine),
rq ? rq_prio(rq) : 0);
/* Check the context/desc id for this event matches */
@@ -969,28 +1097,43 @@ static void execlists_submission_tasklet(unsigned long data)
GEM_BUG_ON(count == 0);
if (--count == 0) {
+ /*
+ * On the final event corresponding to the
+ * submission of this context, we expect either
+ * an element-switch event or a completion
+ * event (and on completion, the active-idle
+ * marker). No more preemptions, lite-restore
+ * or otherwise.
+ */
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
GEM_BUG_ON(port_isset(&port[1]) &&
!(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+ GEM_BUG_ON(!port_isset(&port[1]) &&
+ !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+
+ /*
+ * We rely on the hardware being strongly
+ * ordered, that the breadcrumb write is
+ * coherent (visible from the CPU) before the
+ * user interrupt and CSB is processed.
+ */
GEM_BUG_ON(!i915_request_completed(rq));
- execlists_context_schedule_out(rq);
- trace_i915_request_out(rq);
+
+ execlists_context_schedule_out(rq,
+ INTEL_CONTEXT_SCHEDULE_OUT);
i915_request_put(rq);
GEM_TRACE("%s completed ctx=%d\n",
engine->name, port->context_id);
- execlists_port_complete(execlists, port);
+ port = execlists_port_complete(execlists, port);
+ if (port_isset(port))
+ execlists_user_begin(execlists, port);
+ else
+ execlists_user_end(execlists);
} else {
port_set(port, port_pack(rq, count));
}
-
- /* After the final element, the hw should be idle */
- GEM_BUG_ON(port_count(port) == 0 &&
- !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
- if (port_count(port) == 0)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_USER);
}
if (head != execlists->csb_head) {
@@ -1013,18 +1156,23 @@ static void execlists_submission_tasklet(unsigned long data)
}
static void queue_request(struct intel_engine_cs *engine,
- struct i915_priotree *pt,
+ struct i915_sched_node *node,
int prio)
{
- list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
+ list_add_tail(&node->link,
+ &lookup_priolist(engine, prio)->requests);
+}
+
+static void __submit_queue(struct intel_engine_cs *engine, int prio)
+{
+ engine->execlists.queue_priority = prio;
+ tasklet_hi_schedule(&engine->execlists.tasklet);
}
static void submit_queue(struct intel_engine_cs *engine, int prio)
{
- if (prio > engine->execlists.queue_priority) {
- engine->execlists.queue_priority = prio;
- tasklet_hi_schedule(&engine->execlists.tasklet);
- }
+ if (prio > engine->execlists.queue_priority)
+ __submit_queue(engine, prio);
}
static void execlists_submit_request(struct i915_request *request)
@@ -1033,42 +1181,45 @@ static void execlists_submit_request(struct i915_request *request)
unsigned long flags;
/* Will be called from irq-context when using foreign fences. */
- spin_lock_irqsave(&engine->timeline->lock, flags);
+ spin_lock_irqsave(&engine->timeline.lock, flags);
- queue_request(engine, &request->priotree, rq_prio(request));
+ queue_request(engine, &request->sched, rq_prio(request));
submit_queue(engine, rq_prio(request));
GEM_BUG_ON(!engine->execlists.first);
- GEM_BUG_ON(list_empty(&request->priotree.link));
+ GEM_BUG_ON(list_empty(&request->sched.link));
- spin_unlock_irqrestore(&engine->timeline->lock, flags);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
-static struct i915_request *pt_to_request(struct i915_priotree *pt)
+static struct i915_request *sched_to_request(struct i915_sched_node *node)
{
- return container_of(pt, struct i915_request, priotree);
+ return container_of(node, struct i915_request, sched);
}
static struct intel_engine_cs *
-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
+sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
{
- struct intel_engine_cs *engine = pt_to_request(pt)->engine;
+ struct intel_engine_cs *engine = sched_to_request(node)->engine;
GEM_BUG_ON(!locked);
if (engine != locked) {
- spin_unlock(&locked->timeline->lock);
- spin_lock(&engine->timeline->lock);
+ spin_unlock(&locked->timeline.lock);
+ spin_lock(&engine->timeline.lock);
}
return engine;
}
-static void execlists_schedule(struct i915_request *request, int prio)
+static void execlists_schedule(struct i915_request *request,
+ const struct i915_sched_attr *attr)
{
- struct intel_engine_cs *engine;
+ struct i915_priolist *uninitialized_var(pl);
+ struct intel_engine_cs *engine, *last;
struct i915_dependency *dep, *p;
struct i915_dependency stack;
+ const int prio = attr->priority;
LIST_HEAD(dfs);
GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
@@ -1076,23 +1227,23 @@ static void execlists_schedule(struct i915_request *request, int prio)
if (i915_request_completed(request))
return;
- if (prio <= READ_ONCE(request->priotree.priority))
+ if (prio <= READ_ONCE(request->sched.attr.priority))
return;
/* Need BKL in order to use the temporary link inside i915_dependency */
lockdep_assert_held(&request->i915->drm.struct_mutex);
- stack.signaler = &request->priotree;
+ stack.signaler = &request->sched;
list_add(&stack.dfs_link, &dfs);
/*
* Recursively bump all dependent priorities to match the new request.
*
* A naive approach would be to use recursion:
- * static void update_priorities(struct i915_priotree *pt, prio) {
- * list_for_each_entry(dep, &pt->signalers_list, signal_link)
+ * static void update_priorities(struct i915_sched_node *node, prio) {
+ * list_for_each_entry(dep, &node->signalers_list, signal_link)
* update_priorities(dep->signal, prio)
- * queue_request(pt);
+ * queue_request(node);
* }
* but that may have unlimited recursion depth and so runs a very
* real risk of overunning the kernel stack. Instead, we build
@@ -1104,7 +1255,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
* last element in the list is the request we must execute first.
*/
list_for_each_entry(dep, &dfs, dfs_link) {
- struct i915_priotree *pt = dep->signaler;
+ struct i915_sched_node *node = dep->signaler;
/*
* Within an engine, there can be no cycle, but we may
@@ -1112,14 +1263,14 @@ static void execlists_schedule(struct i915_request *request, int prio)
* (redundant dependencies are not eliminated) and across
* engines.
*/
- list_for_each_entry(p, &pt->signalers_list, signal_link) {
+ list_for_each_entry(p, &node->signalers_list, signal_link) {
GEM_BUG_ON(p == dep); /* no cycles! */
- if (i915_priotree_signaled(p->signaler))
+ if (i915_sched_node_signaled(p->signaler))
continue;
- GEM_BUG_ON(p->signaler->priority < pt->priority);
- if (prio > READ_ONCE(p->signaler->priority))
+ GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
+ if (prio > READ_ONCE(p->signaler->attr.priority))
list_move_tail(&p->dfs_link, &dfs);
}
}
@@ -1130,37 +1281,45 @@ static void execlists_schedule(struct i915_request *request, int prio)
* execlists_submit_request()), we can set our own priority and skip
* acquiring the engine locks.
*/
- if (request->priotree.priority == I915_PRIORITY_INVALID) {
- GEM_BUG_ON(!list_empty(&request->priotree.link));
- request->priotree.priority = prio;
+ if (request->sched.attr.priority == I915_PRIORITY_INVALID) {
+ GEM_BUG_ON(!list_empty(&request->sched.link));
+ request->sched.attr = *attr;
if (stack.dfs_link.next == stack.dfs_link.prev)
return;
__list_del_entry(&stack.dfs_link);
}
+ last = NULL;
engine = request->engine;
- spin_lock_irq(&engine->timeline->lock);
+ spin_lock_irq(&engine->timeline.lock);
/* Fifo and depth-first replacement ensure our deps execute before us */
list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
- struct i915_priotree *pt = dep->signaler;
+ struct i915_sched_node *node = dep->signaler;
INIT_LIST_HEAD(&dep->dfs_link);
- engine = pt_lock_engine(pt, engine);
+ engine = sched_lock_engine(node, engine);
- if (prio <= pt->priority)
+ if (prio <= node->attr.priority)
continue;
- pt->priority = prio;
- if (!list_empty(&pt->link)) {
- __list_del_entry(&pt->link);
- queue_request(engine, pt, prio);
+ node->attr.priority = prio;
+ if (!list_empty(&node->link)) {
+ if (last != engine) {
+ pl = lookup_priolist(engine, prio);
+ last = engine;
+ }
+ GEM_BUG_ON(pl->priority != prio);
+ list_move_tail(&node->link, &pl->requests);
}
- submit_queue(engine, prio);
+
+ if (prio > engine->execlists.queue_priority &&
+ i915_sw_fence_done(&sched_to_request(node)->submit))
+ __submit_queue(engine, prio);
}
- spin_unlock_irq(&engine->timeline->lock);
+ spin_unlock_irq(&engine->timeline.lock);
}
static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
@@ -1190,7 +1349,7 @@ static struct intel_ring *
execlists_context_pin(struct intel_engine_cs *engine,
struct i915_gem_context *ctx)
{
- struct intel_context *ce = &ctx->engine[engine->id];
+ struct intel_context *ce = to_intel_context(ctx, engine);
void *vaddr;
int ret;
@@ -1224,6 +1383,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
i915_ggtt_offset(ce->ring->vma);
+ ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head;
ce->state->obj->pin_global++;
i915_gem_context_get(ctx);
@@ -1242,7 +1402,7 @@ err:
static void execlists_context_unpin(struct intel_engine_cs *engine,
struct i915_gem_context *ctx)
{
- struct intel_context *ce = &ctx->engine[engine->id];
+ struct intel_context *ce = to_intel_context(ctx, engine);
lockdep_assert_held(&ctx->i915->drm.struct_mutex);
GEM_BUG_ON(ce->pin_count == 0);
@@ -1261,8 +1421,8 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
static int execlists_request_alloc(struct i915_request *request)
{
- struct intel_engine_cs *engine = request->engine;
- struct intel_context *ce = &request->ctx->engine[engine->id];
+ struct intel_context *ce =
+ to_intel_context(request->ctx, request->engine);
int ret;
GEM_BUG_ON(!ce->pin_count);
@@ -1522,6 +1682,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return -EINVAL;
switch (INTEL_GEN(engine->i915)) {
+ case 11:
+ return 0;
case 10:
wa_bb_fn[0] = gen10_init_indirectctx_bb;
wa_bb_fn[1] = NULL;
@@ -1574,14 +1736,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return ret;
}
-static u8 gtiir[] = {
- [RCS] = 0,
- [BCS] = 0,
- [VCS] = 1,
- [VCS2] = 1,
- [VECS] = 3,
-};
-
static void enable_execlists(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -1641,6 +1795,8 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine)
if (ret)
return ret;
+ intel_whitelist_workarounds_apply(engine);
+
/* We need to disable the AsyncFlip performance optimisations in order
* to use MI_WAIT_FOR_EVENT within the CS. It should already be
* programmed to '1' on all products.
@@ -1651,7 +1807,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine)
I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
- return init_workarounds_ring(engine);
+ return 0;
}
static int gen9_init_render_ring(struct intel_engine_cs *engine)
@@ -1662,49 +1818,25 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
if (ret)
return ret;
- return init_workarounds_ring(engine);
-}
-
-static void reset_irq(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int i;
-
- GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
-
- /*
- * Clear any pending interrupt state.
- *
- * We do it twice out of paranoia that some of the IIR are double
- * buffered, and if we only reset it once there may still be
- * an interrupt pending.
- */
- for (i = 0; i < 2; i++) {
- I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
- GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
- POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
- }
- GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
- (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift));
+ intel_whitelist_workarounds_apply(engine);
- clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ return 0;
}
static void reset_common_ring(struct intel_engine_cs *engine,
struct i915_request *request)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct intel_context *ce;
unsigned long flags;
+ u32 *regs;
- GEM_TRACE("%s seqno=%x\n",
- engine->name, request ? request->global_seqno : 0);
+ GEM_TRACE("%s request global=%x, current=%d\n",
+ engine->name, request ? request->global_seqno : 0,
+ intel_engine_get_seqno(engine));
/* See execlists_cancel_requests() for the irq/spinlock split. */
local_irq_save(flags);
- reset_irq(engine);
-
/*
* Catch up with any missed context-switch interrupts.
*
@@ -1715,14 +1847,12 @@ static void reset_common_ring(struct intel_engine_cs *engine,
* requests were completed.
*/
execlists_cancel_port_requests(execlists);
+ reset_irq(engine);
/* Push back any incomplete requests for replay after the reset. */
- spin_lock(&engine->timeline->lock);
+ spin_lock(&engine->timeline.lock);
__unwind_incomplete_requests(engine);
- spin_unlock(&engine->timeline->lock);
-
- /* Mark all CS interrupts as complete */
- execlists->active = 0;
+ spin_unlock(&engine->timeline.lock);
local_irq_restore(flags);
@@ -1748,14 +1878,24 @@ static void reset_common_ring(struct intel_engine_cs *engine,
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- ce = &request->ctx->engine[engine->id];
- execlists_init_reg_state(ce->lrc_reg_state,
- request->ctx, engine, ce->ring);
+ regs = to_intel_context(request->ctx, engine)->lrc_reg_state;
+ if (engine->default_state) {
+ void *defaults;
+
+ defaults = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (!IS_ERR(defaults)) {
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ defaults + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
+ i915_gem_object_unpin_map(engine->default_state);
+ }
+ }
+ execlists_init_reg_state(regs, request->ctx, engine, request->ring);
/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
- ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
- i915_ggtt_offset(ce->ring->vma);
- ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+ regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
+ regs[CTX_RING_HEAD + 1] = request->postfix;
request->ring->head = request->postfix;
intel_ring_update_space(request->ring);
@@ -1816,7 +1956,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
}
- cs = intel_ring_begin(rq, 4);
+ cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -1845,6 +1985,9 @@ static int gen8_emit_bb_start(struct i915_request *rq,
(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
return 0;
@@ -1987,7 +2130,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
cs = gen8_emit_ggtt_write(cs, request->global_seqno,
intel_hws_seqno_address(request->engine));
*cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
@@ -2003,7 +2146,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
intel_hws_seqno_address(request->engine));
*cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
@@ -2015,7 +2158,7 @@ static int gen8_init_rcs_context(struct i915_request *rq)
{
int ret;
- ret = intel_ring_workarounds_emit(rq);
+ ret = intel_ctx_workarounds_emit(rq);
if (ret)
return ret;
@@ -2075,11 +2218,13 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+ if (engine->i915->preempt_context)
+ engine->flags |= I915_ENGINE_HAS_PREEMPTION;
engine->i915->caps.scheduler =
I915_SCHEDULER_CAP_ENABLED |
I915_SCHEDULER_CAP_PRIORITY;
- if (engine->i915->preempt_context)
+ if (intel_engine_has_preemption(engine))
engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
}
@@ -2118,7 +2263,20 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
static inline void
logical_ring_default_irqs(struct intel_engine_cs *engine)
{
- unsigned shift = engine->irq_shift;
+ unsigned int shift = 0;
+
+ if (INTEL_GEN(engine->i915) < 11) {
+ const u8 irq_shifts[] = {
+ [RCS] = GEN8_RCS_IRQ_SHIFT,
+ [BCS] = GEN8_BCS_IRQ_SHIFT,
+ [VCS] = GEN8_VCS1_IRQ_SHIFT,
+ [VCS2] = GEN8_VCS2_IRQ_SHIFT,
+ [VECS] = GEN8_VECS_IRQ_SHIFT,
+ };
+
+ shift = irq_shifts[engine->id];
+ }
+
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
}
@@ -2174,9 +2332,13 @@ static int logical_ring_init(struct intel_engine_cs *engine)
}
engine->execlists.preempt_complete_status = ~0u;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context) {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+
engine->execlists.preempt_complete_status =
- upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc);
+ upper_32_bits(ce->lrc_desc);
+ }
return 0;
@@ -2430,8 +2592,10 @@ populate_lr_context(struct i915_gem_context *ctx,
defaults = i915_gem_object_pin_map(engine->default_state,
I915_MAP_WB);
- if (IS_ERR(defaults))
- return PTR_ERR(defaults);
+ if (IS_ERR(defaults)) {
+ ret = PTR_ERR(defaults);
+ goto err_unpin_ctx;
+ }
memcpy(vaddr + start, defaults + start, engine->context_size);
i915_gem_object_unpin_map(engine->default_state);
@@ -2449,19 +2613,20 @@ populate_lr_context(struct i915_gem_context *ctx,
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
+err_unpin_ctx:
i915_gem_object_unpin_map(ctx_obj);
-
- return 0;
+ return ret;
}
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *ctx_obj;
- struct intel_context *ce = &ctx->engine[engine->id];
+ struct intel_context *ce = to_intel_context(ctx, engine);
struct i915_vma *vma;
uint32_t context_size;
struct intel_ring *ring;
+ struct i915_timeline *timeline;
int ret;
if (ce->state)
@@ -2477,8 +2642,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
ctx_obj = i915_gem_object_create(ctx->i915, context_size);
if (IS_ERR(ctx_obj)) {
- DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
- return PTR_ERR(ctx_obj);
+ ret = PTR_ERR(ctx_obj);
+ goto error_deref_obj;
}
vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
@@ -2487,7 +2652,14 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
goto error_deref_obj;
}
- ring = intel_engine_create_ring(engine, ctx->ring_size);
+ timeline = i915_timeline_create(ctx->i915, ctx->name);
+ if (IS_ERR(timeline)) {
+ ret = PTR_ERR(timeline);
+ goto error_deref_obj;
+ }
+
+ ring = intel_engine_create_ring(engine, timeline, ctx->ring_size);
+ i915_timeline_put(timeline);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
goto error_deref_obj;
@@ -2529,7 +2701,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
*/
list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
for_each_engine(engine, dev_priv, id) {
- struct intel_context *ce = &ctx->engine[engine->id];
+ struct intel_context *ce =
+ to_intel_context(ctx, engine);
u32 *reg;
if (!ce->state)
@@ -2551,3 +2724,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
}
}
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_lrc.c"
+#endif