diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_guc_submission.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_guc_submission.c | 290 |
1 files changed, 236 insertions, 54 deletions
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 8ced9e26f075..832ac9e45801 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -25,6 +25,8 @@ #include "i915_drv.h" #include "intel_uc.h" +#include <trace/events/dma_fence.h> + /** * DOC: GuC-based command submission * @@ -348,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) u32 freespace; int ret; - spin_lock(&client->wq_lock); + spin_lock_irq(&client->wq_lock); freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); freespace -= client->wq_rsvd; if (likely(freespace >= wqi_size)) { @@ -358,21 +360,27 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) client->no_wq_space++; ret = -EAGAIN; } - spin_unlock(&client->wq_lock); + spin_unlock_irq(&client->wq_lock); return ret; } +static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size) +{ + unsigned long flags; + + spin_lock_irqsave(&client->wq_lock, flags); + client->wq_rsvd += size; + spin_unlock_irqrestore(&client->wq_lock, flags); +} + void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) { - const size_t wqi_size = sizeof(struct guc_wq_item); + const int wqi_size = sizeof(struct guc_wq_item); struct i915_guc_client *client = request->i915->guc.execbuf_client; GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); - - spin_lock(&client->wq_lock); - client->wq_rsvd -= wqi_size; - spin_unlock(&client->wq_lock); + guc_client_update_wq_rsvd(client, -wqi_size); } /* Construct a Work Item and append it to the GuC's Work Queue */ @@ -509,15 +517,16 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) unsigned int engine_id = engine->id; struct intel_guc *guc = &rq->i915->guc; struct i915_guc_client *client = guc->execbuf_client; + unsigned long flags; int b_ret; - spin_lock(&client->wq_lock); - guc_wq_item_append(client, rq); - /* WA to flush out the pending GMADR writes to ring buffer. */ if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); + spin_lock_irqsave(&client->wq_lock, flags); + + guc_wq_item_append(client, rq); b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; @@ -527,15 +536,117 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->global_seqno; - spin_unlock(&client->wq_lock); + + spin_unlock_irqrestore(&client->wq_lock, flags); } static void i915_guc_submit(struct drm_i915_gem_request *rq) { - i915_gem_request_submit(rq); + __i915_gem_request_submit(rq); __i915_guc_submit(rq); } +static void nested_enable_signaling(struct drm_i915_gem_request *rq) +{ + /* If we use dma_fence_enable_sw_signaling() directly, lockdep + * detects an ordering issue between the fence lockclass and the + * global_timeline. This circular dependency can only occur via 2 + * different fences (but same fence lockclass), so we use the nesting + * annotation here to prevent the warn, equivalent to the nesting + * inside i915_gem_request_submit() for when we also enable the + * signaler. + */ + + if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags)) + return; + + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); + trace_dma_fence_enable_signal(&rq->fence); + + spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); + intel_engine_enable_signaling(rq); + spin_unlock(&rq->lock); +} + +static bool i915_guc_dequeue(struct intel_engine_cs *engine) +{ + struct execlist_port *port = engine->execlist_port; + struct drm_i915_gem_request *last = port[0].request; + unsigned long flags; + struct rb_node *rb; + bool submit = false; + + /* After execlist_first is updated, the tasklet will be rescheduled. + * + * If we are currently running (inside the tasklet) and a third + * party queues a request and so updates engine->execlist_first under + * the spinlock (which we have elided), it will atomically set the + * TASKLET_SCHED flag causing the us to be re-executed and pick up + * the change in state (the update to TASKLET_SCHED incurs a memory + * barrier making this cross-cpu checking safe). + */ + if (!READ_ONCE(engine->execlist_first)) + return false; + + spin_lock_irqsave(&engine->timeline->lock, flags); + rb = engine->execlist_first; + while (rb) { + struct drm_i915_gem_request *rq = + rb_entry(rb, typeof(*rq), priotree.node); + + if (last && rq->ctx != last->ctx) { + if (port != engine->execlist_port) + break; + + i915_gem_request_assign(&port->request, last); + nested_enable_signaling(last); + port++; + } + + rb = rb_next(rb); + rb_erase(&rq->priotree.node, &engine->execlist_queue); + RB_CLEAR_NODE(&rq->priotree.node); + rq->priotree.priority = INT_MAX; + + trace_i915_gem_request_in(rq, port - engine->execlist_port); + i915_guc_submit(rq); + last = rq; + submit = true; + } + if (submit) { + i915_gem_request_assign(&port->request, last); + nested_enable_signaling(last); + engine->execlist_first = rb; + } + spin_unlock_irqrestore(&engine->timeline->lock, flags); + + return submit; +} + +static void i915_guc_irq_handler(unsigned long data) +{ + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct execlist_port *port = engine->execlist_port; + struct drm_i915_gem_request *rq; + bool submit; + + do { + rq = port[0].request; + while (rq && i915_gem_request_completed(rq)) { + trace_i915_gem_request_out(rq); + i915_gem_request_put(rq); + port[0].request = port[1].request; + port[1].request = NULL; + rq = port[0].request; + } + + submit = false; + if (!port[1].request) + submit = i915_guc_dequeue(engine); + } while (submit); +} + /* * Everything below here is concerned with setup & teardown, and is * therefore not part of the somewhat time-critical batch-submission @@ -800,22 +911,21 @@ static void guc_addon_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct i915_vma *vma; - struct guc_ads *ads; - struct guc_policies *policies; - struct guc_mmio_reg_state *reg_state; - struct intel_engine_cs *engine; - enum intel_engine_id id; struct page *page; - u32 size; - /* The ads obj includes the struct itself and buffers passed to GuC */ - size = sizeof(struct guc_ads) + sizeof(struct guc_policies) + - sizeof(struct guc_mmio_reg_state) + - GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE; + struct { + struct guc_ads ads; + struct guc_policies policies; + struct guc_mmio_reg_state reg_state; + u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; + } __packed *blob; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 base; vma = guc->ads_vma; if (!vma) { - vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(size)); + vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); if (IS_ERR(vma)) return; @@ -823,44 +933,38 @@ static void guc_addon_create(struct intel_guc *guc) } page = i915_vma_first_page(vma); - ads = kmap(page); - - /* - * The GuC requires a "Golden Context" when it reinitialises - * engines after a reset. Here we use the Render ring default - * context, which must already exist and be pinned in the GGTT, - * so its address won't change after we've told the GuC where - * to find it. - */ - engine = dev_priv->engine[RCS]; - ads->golden_context_lrca = engine->status_page.ggtt_offset; - - for_each_engine(engine, dev_priv, id) - ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine); + blob = kmap(page); /* GuC scheduling policies */ - policies = (void *)ads + sizeof(struct guc_ads); - guc_policies_init(policies); - - ads->scheduler_policies = - guc_ggtt_offset(vma) + sizeof(struct guc_ads); + guc_policies_init(&blob->policies); /* MMIO reg state */ - reg_state = (void *)policies + sizeof(struct guc_policies); - for_each_engine(engine, dev_priv, id) { - reg_state->mmio_white_list[engine->guc_id].mmio_start = + blob->reg_state.mmio_white_list[engine->guc_id].mmio_start = engine->mmio_base + GUC_MMIO_WHITE_LIST_START; /* Nothing to be saved or restored for now. */ - reg_state->mmio_white_list[engine->guc_id].count = 0; + blob->reg_state.mmio_white_list[engine->guc_id].count = 0; } - ads->reg_state_addr = ads->scheduler_policies + - sizeof(struct guc_policies); + /* + * The GuC requires a "Golden Context" when it reinitialises + * engines after a reset. Here we use the Render ring default + * context, which must already exist and be pinned in the GGTT, + * so its address won't change after we've told the GuC where + * to find it. + */ + blob->ads.golden_context_lrca = + dev_priv->engine[RCS]->status_page.ggtt_offset; + + for_each_engine(engine, dev_priv, id) + blob->ads.eng_state_size[engine->guc_id] = + intel_lr_context_size(engine); - ads->reg_state_buffer = ads->reg_state_addr + - sizeof(struct guc_mmio_reg_state); + base = guc_ggtt_offset(vma); + blob->ads.scheduler_policies = base + ptr_offset(blob, policies); + blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); + blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); kunmap(page); } @@ -926,6 +1030,48 @@ static void guc_reset_wq(struct i915_guc_client *client) client->wq_tail = 0; } +static void guc_interrupts_capture(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int irqs; + + /* tell all command streamers to forward interrupts (but not vblank) to GuC */ + irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); + for_each_engine(engine, dev_priv, id) + I915_WRITE(RING_MODE_GEN7(engine), irqs); + + /* route USER_INTERRUPT to Host, all others are sent to GuC. */ + irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + /* These three registers have the same bit definitions */ + I915_WRITE(GUC_BCS_RCS_IER, ~irqs); + I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs); + I915_WRITE(GUC_WD_VECS_IER, ~irqs); + + /* + * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all + * (unmasked) PM interrupts to the GuC. All other bits of this + * register *disable* generation of a specific interrupt. + * + * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when + * writing to the PM interrupt mask register, i.e. interrupts + * that must not be disabled. + * + * If the GuC is handling these interrupts, then we must not let + * the PM code disable ANY interrupt that the GuC is expecting. + * So for each ENABLED (0) bit in this register, we must SET the + * bit in pm_intrmsk_mbz so that it's left enabled for the GuC. + * GuC needs ARAT expired interrupt unmasked hence it is set in + * pm_intrmsk_mbz. + * + * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will + * result in the register bit being left SET! + */ + dev_priv->rps.pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} + int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -942,31 +1088,67 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) guc_init_doorbell_hw(guc); /* Take over from manual control of ELSP (execlists) */ + guc_interrupts_capture(dev_priv); + for_each_engine(engine, dev_priv, id) { + const int wqi_size = sizeof(struct guc_wq_item); struct drm_i915_gem_request *rq; - engine->submit_request = i915_guc_submit; - engine->schedule = NULL; + /* The tasklet was initialised by execlists, and may be in + * a state of flux (across a reset) and so we just want to + * take over the callback without changing any other state + * in the tasklet. + */ + engine->irq_tasklet.func = i915_guc_irq_handler; + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); /* Replay the current set of previously submitted requests */ + spin_lock_irq(&engine->timeline->lock); list_for_each_entry(rq, &engine->timeline->requests, link) { - client->wq_rsvd += sizeof(struct guc_wq_item); + guc_client_update_wq_rsvd(client, wqi_size); __i915_guc_submit(rq); } + spin_unlock_irq(&engine->timeline->lock); } return 0; } +static void guc_interrupts_release(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int irqs; + + /* + * tell all command streamers NOT to forward interrupts or vblank + * to GuC. + */ + irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); + irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); + for_each_engine(engine, dev_priv, id) + I915_WRITE(RING_MODE_GEN7(engine), irqs); + + /* route all GT interrupts to the host */ + I915_WRITE(GUC_BCS_RCS_IER, 0); + I915_WRITE(GUC_VCS2_VCS1_IER, 0); + I915_WRITE(GUC_WD_VECS_IER, 0); + + dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; +} + void i915_guc_submission_disable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; + guc_interrupts_release(dev_priv); + if (!guc->execbuf_client) return; /* Revert back to manual ELSP submission */ - intel_execlists_enable_submission(dev_priv); + intel_engines_reset_default_submission(dev_priv); } void i915_guc_submission_fini(struct drm_i915_private *dev_priv) |