summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.c2
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.c329
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.h38
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_engine_cs.c455
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_engine_cs.h39
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c22
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_sseu.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c160
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c41
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c95
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c132
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset_types.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c870
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c591
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c306
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu_debugfs.h17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c112
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c53
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h14
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c101
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c132
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c85
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c917
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c11
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c68
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c38
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c17
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c29
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c37
56 files changed, 3434 insertions, 1587 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c
index 1de5fbaa1cf9..3a21cf63b3f0 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c
@@ -9,6 +9,7 @@
#include "debugfs_engines.h"
#include "debugfs_gt.h"
#include "debugfs_gt_pm.h"
+#include "intel_sseu_debugfs.h"
#include "uc/intel_uc_debugfs.h"
#include "i915_drv.h"
@@ -25,6 +26,7 @@ void debugfs_gt_register(struct intel_gt *gt)
debugfs_engines_register(gt, root);
debugfs_gt_pm_register(gt, root);
+ intel_sseu_debugfs_register(gt, root);
intel_uc_debugfs_register(&gt->uc, root);
}
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
new file mode 100644
index 000000000000..b491a64919c8
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "gen2_engine_cs.h"
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_gpu_commands.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_ring.h"
+
+int gen2_emit_flush(struct i915_request *rq, u32 mode)
+{
+ unsigned int num_store_dw = 12;
+ u32 cmd, *cs;
+
+ cmd = MI_FLUSH;
+ if (mode & EMIT_INVALIDATE)
+ cmd |= MI_READ_FLUSH;
+
+ cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = cmd;
+ while (num_store_dw--) {
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
+ *cs++ = 0;
+ *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
+ }
+ *cs++ = cmd;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
+{
+ u32 cmd, *cs;
+ int i;
+
+ /*
+ * read/write caches:
+ *
+ * I915_GEM_DOMAIN_RENDER is always invalidated, but is
+ * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
+ * also flushed at 2d versus 3d pipeline switches.
+ *
+ * read-only caches:
+ *
+ * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
+ * MI_READ_FLUSH is set, and is always flushed on 965.
+ *
+ * I915_GEM_DOMAIN_COMMAND may not exist?
+ *
+ * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
+ * invalidated when MI_EXE_FLUSH is set.
+ *
+ * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
+ * invalidated with every MI_FLUSH.
+ *
+ * TLBs:
+ *
+ * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
+ * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
+ * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
+ * are flushed at any MI_FLUSH.
+ */
+
+ cmd = MI_FLUSH;
+ if (mode & EMIT_INVALIDATE) {
+ cmd |= MI_EXE_FLUSH;
+ if (IS_G4X(rq->engine->i915) || IS_GEN(rq->engine->i915, 5))
+ cmd |= MI_INVALIDATE_ISP;
+ }
+
+ i = 2;
+ if (mode & EMIT_INVALIDATE)
+ i += 20;
+
+ cs = intel_ring_begin(rq, i);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = cmd;
+
+ /*
+ * A random delay to let the CS invalidate take effect? Without this
+ * delay, the GPU relocation path fails as the CS does not see
+ * the updated contents. Just as important, if we apply the flushes
+ * to the EMIT_FLUSH branch (i.e. immediately after the relocation
+ * write and before the invalidate on the next batch), the relocations
+ * still fail. This implies that is a delay following invalidation
+ * that is required to reset the caches as opposed to a delay to
+ * ensure the memory is written.
+ */
+ if (mode & EMIT_INVALIDATE) {
+ *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+ *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT) |
+ PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ for (i = 0; i < 12; i++)
+ *cs++ = MI_FLUSH;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+ *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT) |
+ PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+ *cs++ = 0;
+ }
+
+ *cs++ = cmd;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_FLUSH;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
+ int flush, int post)
+{
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH;
+
+ while (flush--) {
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
+ *cs++ = rq->fence.seqno;
+ }
+
+ while (post--) {
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+ }
+
+ *cs++ = MI_USER_INTERRUPT;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ return __gen2_emit_breadcrumb(rq, cs, 16, 8);
+}
+
+u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ return __gen2_emit_breadcrumb(rq, cs, 8, 8);
+}
+
+/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
+#define I830_BATCH_LIMIT SZ_256K
+#define I830_TLB_ENTRIES (2)
+#define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
+int i830_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags)
+{
+ u32 *cs, cs_offset =
+ intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT);
+
+ GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Evict the invalid PTE TLBs */
+ *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
+ *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
+ *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
+ *cs++ = cs_offset;
+ *cs++ = 0xdeadbeef;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
+ if (len > I830_BATCH_LIMIT)
+ return -ENOSPC;
+
+ cs = intel_ring_begin(rq, 6 + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /*
+ * Blit the batch (which has now all relocs applied) to the
+ * stable batch scratch bo area (so that the CS never
+ * stumbles over its tlb invalidation bug) ...
+ */
+ *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
+ *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
+ *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
+ *cs++ = cs_offset;
+ *cs++ = 4096;
+ *cs++ = offset;
+
+ *cs++ = MI_FLUSH;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ /* ... and execute it. */
+ offset = cs_offset;
+ }
+
+ if (!(dispatch_flags & I915_DISPATCH_SECURE))
+ offset |= MI_BATCH_NON_SECURE;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+ *cs++ = offset;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int gen3_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags)
+{
+ u32 *cs;
+
+ if (!(dispatch_flags & I915_DISPATCH_SECURE))
+ offset |= MI_BATCH_NON_SECURE;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+ *cs++ = offset;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int gen4_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 length,
+ unsigned int dispatch_flags)
+{
+ u32 security;
+ u32 *cs;
+
+ security = MI_BATCH_NON_SECURE_I965;
+ if (dispatch_flags & I915_DISPATCH_SECURE)
+ security = 0;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
+ *cs++ = offset;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+void gen2_irq_enable(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+
+ i915->irq_mask &= ~engine->irq_enable_mask;
+ intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
+ ENGINE_POSTING_READ16(engine, RING_IMR);
+}
+
+void gen2_irq_disable(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+
+ i915->irq_mask |= engine->irq_enable_mask;
+ intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
+}
+
+void gen3_irq_enable(struct intel_engine_cs *engine)
+{
+ engine->i915->irq_mask &= ~engine->irq_enable_mask;
+ intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
+ intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
+}
+
+void gen3_irq_disable(struct intel_engine_cs *engine)
+{
+ engine->i915->irq_mask |= engine->irq_enable_mask;
+ intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
+}
+
+void gen5_irq_enable(struct intel_engine_cs *engine)
+{
+ gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
+}
+
+void gen5_irq_disable(struct intel_engine_cs *engine)
+{
+ gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
new file mode 100644
index 000000000000..a5cd64a65c9e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN2_ENGINE_CS_H__
+#define __GEN2_ENGINE_CS_H__
+
+#include <linux/types.h>
+
+struct i915_request;
+struct intel_engine_cs;
+
+int gen2_emit_flush(struct i915_request *rq, u32 mode);
+int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode);
+int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode);
+
+u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs);
+u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs);
+
+int i830_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags);
+int gen3_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags);
+int gen4_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 length,
+ unsigned int dispatch_flags);
+
+void gen2_irq_enable(struct intel_engine_cs *engine);
+void gen2_irq_disable(struct intel_engine_cs *engine);
+void gen3_irq_enable(struct intel_engine_cs *engine);
+void gen3_irq_disable(struct intel_engine_cs *engine);
+void gen5_irq_enable(struct intel_engine_cs *engine);
+void gen5_irq_disable(struct intel_engine_cs *engine);
+
+#endif /* __GEN2_ENGINE_CS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c
new file mode 100644
index 000000000000..ce38d1bcaba3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c
@@ -0,0 +1,455 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "gen6_engine_cs.h"
+#include "intel_engine.h"
+#include "intel_gpu_commands.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_gt_pm_irq.h"
+#include "intel_ring.h"
+
+#define HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
+
+/*
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6. From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ * "1 of the following must also be set:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ * - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it. Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either. Notify enable is IRQs, which aren't
+ * really our business. That leaves only stall at scoreboard.
+ */
+static int
+gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
+{
+ u32 scratch_addr =
+ intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(5);
+ *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0; /* low dword */
+ *cs++ = 0; /* high dword */
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(5);
+ *cs++ = PIPE_CONTROL_QW_WRITE;
+ *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode)
+{
+ u32 scratch_addr =
+ intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+ u32 *cs, flags = 0;
+ int ret;
+
+ /* Force SNB workarounds for PIPE_CONTROL flushes */
+ ret = gen6_emit_post_sync_nonzero_flush(rq);
+ if (ret)
+ return ret;
+
+ /*
+ * Just flush everything. Experiments have shown that reducing the
+ * number of bits based on the write domains has little performance
+ * impact. And when rearranging requests, the order of flushes is
+ * unknown.
+ */
+ if (mode & EMIT_FLUSH) {
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ /*
+ * Ensure that any following seqno writes only happen
+ * when the render cache is indeed flushed.
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+ }
+ if (mode & EMIT_INVALIDATE) {
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ /*
+ * TLB invalidate requires a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
+ }
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = flags;
+ *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
+{
+ /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_QW_WRITE;
+ *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT) |
+ PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+
+ /* Finally we can flush and with it emit the breadcrumb */
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_CS_STALL);
+ *cs++ = i915_request_active_timeline(rq)->hwsp_offset |
+ PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+static int mi_flush_dw(struct i915_request *rq, u32 flags)
+{
+ u32 cmd, *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cmd = MI_FLUSH_DW;
+
+ /*
+ * We always require a command barrier so that subsequent
+ * commands, such as breadcrumb interrupts, are strictly ordered
+ * wrt the contents of the write cache being flushed to memory
+ * (and thus being coherent from the CPU).
+ */
+ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+ /*
+ * Bspec vol 1c.3 - blitter engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
+ cmd |= flags;
+
+ *cs++ = cmd;
+ *cs++ = HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
+{
+ return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
+}
+
+int gen6_emit_flush_xcs(struct i915_request *rq, u32 mode)
+{
+ return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
+}
+
+int gen6_emit_flush_vcs(struct i915_request *rq, u32 mode)
+{
+ return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
+}
+
+int gen6_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags)
+{
+ u32 security;
+ u32 *cs;
+
+ security = MI_BATCH_NON_SECURE_I965;
+ if (dispatch_flags & I915_DISPATCH_SECURE)
+ security = 0;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = __gen6_emit_bb_start(cs, offset, security);
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int
+hsw_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags)
+{
+ u32 security;
+ u32 *cs;
+
+ security = MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW;
+ if (dispatch_flags & I915_DISPATCH_SECURE)
+ security = 0;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = __gen6_emit_bb_start(cs, offset, security);
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int gen7_stall_cs(struct i915_request *rq)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ *cs++ = 0;
+ *cs++ = 0;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode)
+{
+ u32 scratch_addr =
+ intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+ u32 *cs, flags = 0;
+
+ /*
+ * Ensure that any following seqno writes only happen when the render
+ * cache is indeed flushed.
+ *
+ * Workaround: 4th PIPE_CONTROL command (except the ones with only
+ * read-cache invalidate bits set) must have the CS_STALL bit set. We
+ * don't try to be clever and just set it unconditionally.
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ /*
+ * CS_STALL suggests at least a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+ /*
+ * Just flush everything. Experiments have shown that reducing the
+ * number of bits based on the write domains has little performance
+ * impact.
+ */
+ if (mode & EMIT_FLUSH) {
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ }
+ if (mode & EMIT_INVALIDATE) {
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
+
+ /*
+ * Workaround: we must issue a pipe_control with CS-stall bit
+ * set before a pipe_control command that has the state cache
+ * invalidate bit set.
+ */
+ gen7_stall_cs(rq);
+ }
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = flags;
+ *cs++ = scratch_addr;
+ *cs++ = 0;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
+{
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL);
+ *cs++ = i915_request_active_timeline(rq)->hwsp_offset;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
+{
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_USER_INTERRUPT;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+#define GEN7_XCS_WA 32
+u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
+{
+ int i;
+
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
+ MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->fence.seqno;
+
+ for (i = 0; i < GEN7_XCS_WA; i++) {
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+ }
+
+ *cs++ = MI_FLUSH_DW;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+#undef GEN7_XCS_WA
+
+void gen6_irq_enable(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR,
+ ~(engine->irq_enable_mask | engine->irq_keep_mask));
+
+ /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
+ ENGINE_POSTING_READ(engine, RING_IMR);
+
+ gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
+}
+
+void gen6_irq_disable(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
+ gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
+}
+
+void hsw_irq_enable_vecs(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
+
+ /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
+ ENGINE_POSTING_READ(engine, RING_IMR);
+
+ gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask);
+}
+
+void hsw_irq_disable_vecs(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR, ~0);
+ gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.h b/drivers/gpu/drm/i915/gt/gen6_engine_cs.h
new file mode 100644
index 000000000000..76c6bc9f3bde
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN6_ENGINE_CS_H__
+#define __GEN6_ENGINE_CS_H__
+
+#include <linux/types.h>
+
+#include "intel_gpu_commands.h"
+
+struct i915_request;
+struct intel_engine_cs;
+
+int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode);
+int gen6_emit_flush_vcs(struct i915_request *rq, u32 mode);
+int gen6_emit_flush_xcs(struct i915_request *rq, u32 mode);
+u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
+u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
+
+int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode);
+u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
+u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
+
+int gen6_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags);
+int hsw_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags);
+
+void gen6_irq_enable(struct intel_engine_cs *engine);
+void gen6_irq_disable(struct intel_engine_cs *engine);
+
+void hsw_irq_enable_vecs(struct intel_engine_cs *engine);
+void hsw_irq_disable_vecs(struct intel_engine_cs *engine);
+
+#endif /* __GEN6_ENGINE_CS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index f4fec7eb4064..cdc0b9c54305 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -183,13 +183,11 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
struct i915_page_directory * const pd = ppgtt->base.pd;
struct i915_page_table *pt, *alloc = NULL;
- intel_wakeref_t wakeref;
+ bool flush = false;
u64 from = start;
unsigned int pde;
int ret = 0;
- wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
-
spin_lock(&pd->lock);
gen6_for_each_pde(pt, pd, start, length, pde) {
const unsigned int count = gen6_pte_count(start, length);
@@ -214,14 +212,20 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
alloc = pt;
pt = pd->entry[pde];
}
+
+ flush = true;
}
atomic_add(count, &pt->used);
}
spin_unlock(&pd->lock);
- if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND))
- gen6_flush_pd(ppgtt, from, start);
+ if (flush && i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
+ gen6_flush_pd(ppgtt, from, start);
+ }
goto out;
@@ -230,7 +234,6 @@ unwind_out:
out:
if (alloc)
free_px(vm, alloc);
- intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
return ret;
}
@@ -299,11 +302,12 @@ static void pd_vma_clear_pages(struct i915_vma *vma)
vma->pages = NULL;
}
-static int pd_vma_bind(struct i915_vma *vma,
+static int pd_vma_bind(struct i915_address_space *vm,
+ struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 unused)
{
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
struct gen6_ppgtt *ppgtt = vma->private;
u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
@@ -314,7 +318,7 @@ static int pd_vma_bind(struct i915_vma *vma,
return 0;
}
-static void pd_vma_unbind(struct i915_vma *vma)
+static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
{
struct gen6_ppgtt *ppgtt = vma->private;
struct i915_page_directory * const pd = ppgtt->base.pd;
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
index de595b66a746..d93d85cd3027 100644
--- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -396,7 +396,7 @@ int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
emit_batch(vma, memset(batch, 0, bv.max_size), &bv);
i915_gem_object_flush_map(vma->obj);
- i915_gem_object_unpin_map(vma->obj);
+ __i915_gem_object_release_map(vma->obj);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index d907d538176e..91786310c114 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -314,13 +314,18 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
lockdep_assert_held(&rq->lock);
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+ return true;
+
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
struct intel_context *ce = rq->context;
struct list_head *pos;
spin_lock(&b->irq_lock);
- GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
+ if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+ goto unlock;
if (!__intel_breadcrumbs_arm_irq(b))
goto unlock;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
index 487299cb91f2..b9c8163978a3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
@@ -30,7 +30,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
- *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu);
+ *cs++ = intel_sseu_make_rpcs(rq->engine->gt, &sseu);
intel_ring_advance(rq, cs);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 9bf6d4989968..a9249a23903a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -187,7 +187,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_GEM_HWS_SEQNO 0x40
#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
#define I915_GEM_HWS_SCRATCH 0x80
-#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
#define I915_HWS_CSB_BUF0_INDEX 0x10
#define I915_HWS_CSB_WRITE_INDEX 0x1f
@@ -335,7 +334,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...);
-ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
+ ktime_t *now);
struct i915_request *
intel_engine_find_active_request(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 8691eb61e185..dd1a42c4d344 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -370,7 +370,7 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine)
* instances.
*/
if ((INTEL_GEN(i915) >= 11 &&
- RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) ||
+ engine->gt->info.vdbox_sfc_access & engine->mask) ||
(INTEL_GEN(i915) >= 9 && engine->instance == 0))
engine->uabi_capabilities |=
I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
@@ -414,12 +414,12 @@ void intel_engines_release(struct intel_gt *gt)
/* Decouple the backend; but keep the layout for late GPU resets */
for_each_engine(engine, gt, id) {
- intel_wakeref_wait_for_idle(&engine->wakeref);
- GEM_BUG_ON(intel_engine_pm_is_awake(engine));
-
if (!engine->release)
continue;
+ intel_wakeref_wait_for_idle(&engine->wakeref);
+ GEM_BUG_ON(intel_engine_pm_is_awake(engine));
+
engine->release(engine);
engine->release = NULL;
@@ -450,6 +450,80 @@ void intel_engines_free(struct intel_gt *gt)
}
}
+/*
+ * Determine which engines are fused off in our particular hardware.
+ * Note that we have a catch-22 situation where we need to be able to access
+ * the blitter forcewake domain to read the engine fuses, but at the same time
+ * we need to know which engines are available on the system to know which
+ * forcewake domains are present. We solve this by intializing the forcewake
+ * domains based on the full engine mask in the platform capabilities before
+ * calling this function and pruning the domains for fused-off engines
+ * afterwards.
+ */
+static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_gt_info *info = &gt->info;
+ struct intel_uncore *uncore = gt->uncore;
+ unsigned int logical_vdbox = 0;
+ unsigned int i;
+ u32 media_fuse;
+ u16 vdbox_mask;
+ u16 vebox_mask;
+
+ info->engine_mask = INTEL_INFO(i915)->platform_engine_mask;
+
+ if (INTEL_GEN(i915) < 11)
+ return info->engine_mask;
+
+ media_fuse = ~intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
+
+ vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
+ vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
+ GEN11_GT_VEBOX_DISABLE_SHIFT;
+
+ for (i = 0; i < I915_MAX_VCS; i++) {
+ if (!HAS_ENGINE(gt, _VCS(i))) {
+ vdbox_mask &= ~BIT(i);
+ continue;
+ }
+
+ if (!(BIT(i) & vdbox_mask)) {
+ info->engine_mask &= ~BIT(_VCS(i));
+ drm_dbg(&i915->drm, "vcs%u fused off\n", i);
+ continue;
+ }
+
+ /*
+ * In Gen11, only even numbered logical VDBOXes are
+ * hooked up to an SFC (Scaler & Format Converter) unit.
+ * In TGL each VDBOX has access to an SFC.
+ */
+ if (INTEL_GEN(i915) >= 12 || logical_vdbox++ % 2 == 0)
+ gt->info.vdbox_sfc_access |= BIT(i);
+ }
+ drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n",
+ vdbox_mask, VDBOX_MASK(gt));
+ GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
+
+ for (i = 0; i < I915_MAX_VECS; i++) {
+ if (!HAS_ENGINE(gt, _VECS(i))) {
+ vebox_mask &= ~BIT(i);
+ continue;
+ }
+
+ if (!(BIT(i) & vebox_mask)) {
+ info->engine_mask &= ~BIT(_VECS(i));
+ drm_dbg(&i915->drm, "vecs%u fused off\n", i);
+ }
+ }
+ drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n",
+ vebox_mask, VEBOX_MASK(gt));
+ GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
+
+ return info->engine_mask;
+}
+
/**
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
* @gt: pointer to struct intel_gt
@@ -459,8 +533,7 @@ void intel_engines_free(struct intel_gt *gt)
int intel_engines_init_mmio(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
- struct intel_device_info *device_info = mkwrite_device_info(i915);
- const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
+ const unsigned int engine_mask = init_engine_mask(gt);
unsigned int mask = 0;
unsigned int i;
int err;
@@ -473,7 +546,7 @@ int intel_engines_init_mmio(struct intel_gt *gt)
return -ENODEV;
for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
- if (!HAS_ENGINE(i915, i))
+ if (!HAS_ENGINE(gt, i))
continue;
err = intel_engine_setup(gt, i);
@@ -489,14 +562,16 @@ int intel_engines_init_mmio(struct intel_gt *gt)
* engines.
*/
if (drm_WARN_ON(&i915->drm, mask != engine_mask))
- device_info->engine_mask = mask;
+ gt->info.engine_mask = mask;
- RUNTIME_INFO(i915)->num_engines = hweight32(mask);
+ gt->info.num_engines = hweight32(mask);
intel_gt_check_and_clear_faults(gt);
intel_setup_engine_capabilities(gt);
+ intel_uncore_prune_engine_fw_domains(gt->uncore, gt);
+
return 0;
cleanup:
@@ -634,7 +709,7 @@ static int engine_setup_common(struct intel_engine_cs *engine)
/* Use the whole device by default */
engine->sseu =
- intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
+ intel_sseu_from_device_info(&engine->gt->info.sseu);
intel_engine_init_workarounds(engine);
intel_engine_init_whitelist(engine);
@@ -661,7 +736,6 @@ static int measure_breadcrumb_dw(struct intel_context *ce)
if (!frame)
return -ENOMEM;
- frame->rq.i915 = engine->i915;
frame->rq.engine = engine;
frame->rq.context = ce;
rcu_assign_pointer(frame->rq.timeline, ce->timeline);
@@ -1001,7 +1075,7 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
struct intel_instdone *instdone)
{
struct drm_i915_private *i915 = engine->i915;
- const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+ const struct sseu_dev_info *sseu = &engine->gt->info.sseu;
struct intel_uncore *uncore = engine->uncore;
u32 mmio_base = engine->mmio_base;
int slice;
@@ -1095,19 +1169,21 @@ void intel_engine_flush_submission(struct intel_engine_cs *engine)
{
struct tasklet_struct *t = &engine->execlists.tasklet;
- if (__tasklet_is_scheduled(t)) {
- local_bh_disable();
- if (tasklet_trylock(t)) {
- /* Must wait for any GPU reset in progress. */
- if (__tasklet_is_enabled(t))
- t->func(t->data);
- tasklet_unlock(t);
- }
- local_bh_enable();
- }
+ if (!t->func)
+ return;
- /* Otherwise flush the tasklet if it was running on another cpu */
- tasklet_unlock_wait(t);
+ /* Synchronise and wait for the tasklet on another CPU */
+ tasklet_kill(t);
+
+ /* Having cancelled the tasklet, ensure that is run */
+ local_bh_disable();
+ if (tasklet_trylock(t)) {
+ /* Must wait for any GPU reset in progress. */
+ if (__tasklet_is_enabled(t))
+ t->func(t->data);
+ tasklet_unlock(t);
+ }
+ local_bh_enable();
}
/**
@@ -1194,8 +1270,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
}
}
-static int print_sched_attr(struct drm_i915_private *i915,
- const struct i915_sched_attr *attr,
+static int print_sched_attr(const struct i915_sched_attr *attr,
char *buf, int x, int len)
{
if (attr->priority == I915_PRIORITY_INVALID)
@@ -1215,7 +1290,7 @@ static void print_request(struct drm_printer *m,
char buf[80] = "";
int x = 0;
- x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
+ x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
prefix,
@@ -1423,9 +1498,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
int len;
len = scnprintf(hdr, sizeof(hdr),
- "\t\tActive[%d]: ccid:%08x, ",
+ "\t\tActive[%d]: ccid:%08x%s%s, ",
(int)(port - execlists->active),
- rq->context->lrc.ccid);
+ rq->context->lrc.ccid,
+ intel_context_is_closed(rq->context) ? "!" : "",
+ intel_context_is_banned(rq->context) ? "*" : "");
len += print_ring(hdr + len, sizeof(hdr) - len, rq);
scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr);
@@ -1435,9 +1512,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
int len;
len = scnprintf(hdr, sizeof(hdr),
- "\t\tPending[%d]: ccid:%08x, ",
+ "\t\tPending[%d]: ccid:%08x%s%s, ",
(int)(port - execlists->pending),
- rq->context->lrc.ccid);
+ rq->context->lrc.ccid,
+ intel_context_is_closed(rq->context) ? "!" : "",
+ intel_context_is_banned(rq->context) ? "*" : "");
len += print_ring(hdr + len, sizeof(hdr) - len, rq);
scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr);
@@ -1506,6 +1585,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct i915_request *rq;
intel_wakeref_t wakeref;
unsigned long flags;
+ ktime_t dummy;
if (header) {
va_list ap;
@@ -1523,6 +1603,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,
yesno(!llist_empty(&engine->barrier_tasks)));
drm_printf(m, "\tLatency: %luus\n",
ewma__engine_latency_read(&engine->latency));
+ if (intel_engine_supports_stats(engine))
+ drm_printf(m, "\tRuntime: %llums\n",
+ ktime_to_ms(intel_engine_get_busy_time(engine,
+ &dummy)));
+ drm_printf(m, "\tForcewake: %x domains, %d active\n",
+ engine->fw_domain, atomic_read(&engine->fw_active));
rcu_read_lock();
rq = READ_ONCE(engine->heartbeat.systole);
@@ -1589,7 +1675,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
intel_engine_print_breadcrumbs(engine, m);
}
-static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
+static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
+ ktime_t *now)
{
ktime_t total = engine->stats.total;
@@ -1597,9 +1684,9 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
* If the engine is executing something at the moment
* add it to the total.
*/
+ *now = ktime_get();
if (atomic_read(&engine->stats.active))
- total = ktime_add(total,
- ktime_sub(ktime_get(), engine->stats.start));
+ total = ktime_add(total, ktime_sub(*now, engine->stats.start));
return total;
}
@@ -1607,17 +1694,18 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
/**
* intel_engine_get_busy_time() - Return current accumulated engine busyness
* @engine: engine to report on
+ * @now: monotonic timestamp of sampling
*
* Returns accumulated time @engine was busy since engine stats were enabled.
*/
-ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
{
unsigned int seq;
ktime_t total;
do {
seq = read_seqbegin(&engine->stats.lock);
- total = __intel_engine_get_busy_time(engine);
+ total = __intel_engine_get_busy_time(engine, now);
} while (read_seqretry(&engine->stats.lock, seq));
return total;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5136c8bf112d..8ffdf676c0a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -4,6 +4,7 @@
* Copyright © 2019 Intel Corporation
*/
+#include "i915_drv.h"
#include "i915_request.h"
#include "intel_context.h"
@@ -31,7 +32,7 @@ static bool next_heartbeat(struct intel_engine_cs *engine)
delay = msecs_to_jiffies_timeout(delay);
if (delay >= HZ)
delay = round_jiffies_up_relative(delay);
- mod_delayed_work(system_wq, &engine->heartbeat.work, delay);
+ mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay);
return true;
}
@@ -48,8 +49,10 @@ static void show_heartbeat(const struct i915_request *rq,
struct drm_printer p = drm_debug_printer("heartbeat");
intel_engine_dump(engine, &p,
- "%s heartbeat {prio:%d} not ticking\n",
+ "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n",
engine->name,
+ rq->fence.context,
+ rq->fence.seqno,
rq->sched.attr.priority);
}
@@ -62,6 +65,10 @@ static void heartbeat(struct work_struct *wrk)
container_of(wrk, typeof(*engine), heartbeat.work.work);
struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
+ unsigned long serial;
+
+ /* Just in case everything has gone horribly wrong, give it a kick */
+ intel_engine_flush_submission(engine);
rq = engine->heartbeat.systole;
if (rq && i915_request_completed(rq)) {
@@ -76,8 +83,19 @@ static void heartbeat(struct work_struct *wrk)
goto out;
if (engine->heartbeat.systole) {
- if (engine->schedule &&
- rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
+ if (!i915_sw_fence_signaled(&rq->submit)) {
+ /*
+ * Not yet submitted, system is stalled.
+ *
+ * This more often happens for ring submission,
+ * where all contexts are funnelled into a common
+ * ringbuffer. If one context is blocked on an
+ * external fence, not only is it not submitted,
+ * but all other contexts, including the kernel
+ * context are stuck waiting for the signal.
+ */
+ } else if (engine->schedule &&
+ rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
/*
* Gradually raise the priority of the heartbeat to
* give high priority work [which presumably desires
@@ -105,10 +123,19 @@ static void heartbeat(struct work_struct *wrk)
goto out;
}
- if (engine->wakeref_serial == engine->serial)
+ serial = READ_ONCE(engine->serial);
+ if (engine->wakeref_serial == serial)
goto out;
- mutex_lock(&ce->timeline->mutex);
+ if (!mutex_trylock(&ce->timeline->mutex)) {
+ /* Unable to lock the kernel timeline, is the engine stuck? */
+ if (xchg(&engine->heartbeat.blocked, serial) == serial)
+ intel_gt_handle_error(engine->gt, engine->mask,
+ I915_ERROR_CAPTURE,
+ "no heartbeat on %s",
+ engine->name);
+ goto out;
+ }
intel_context_enter(ce);
rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
@@ -117,7 +144,7 @@ static void heartbeat(struct work_struct *wrk)
goto unlock;
idle_pulse(engine, rq);
- if (i915_modparams.enable_hangcheck)
+ if (engine->i915->params.enable_hangcheck)
engine->heartbeat.systole = i915_request_get(rq);
__i915_request_commit(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index d0a1078ef632..8ec3eecf3e39 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -142,6 +142,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
return true;
GEM_BUG_ON(!intel_context_is_barrier(ce));
+ GEM_BUG_ON(ce->timeline->hwsp_ggtt != engine->status_page.vma);
/* Already inside the kernel context, safe to power down. */
if (engine->wakeref_serial == engine->serial)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 2b6cdf47d428..8de92fd7d392 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -24,6 +24,7 @@
#include "i915_selftest.h"
#include "intel_sseu.h"
#include "intel_timeline_types.h"
+#include "intel_uncore.h"
#include "intel_wakeref.h"
#include "intel_workarounds_types.h"
@@ -176,8 +177,12 @@ struct intel_engine_execlists {
* the first error interrupt, record the EIR and schedule the tasklet.
* In the tasklet, we process the pending CS events to ensure we have
* the guilty request, and then reset the engine.
+ *
+ * Low 16b are used by HW, with the upper 16b used as the enabling mask.
+ * Reserve the upper 16b for tracking internal errors.
*/
u32 error_interrupt;
+#define ERROR_CSB BIT(31)
/**
* @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset
@@ -313,6 +318,16 @@ struct intel_engine_cs {
u32 context_size;
u32 mmio_base;
+ /*
+ * Some w/a require forcewake to be held (which prevents RC6) while
+ * a particular engine is active. If so, we set fw_domain to which
+ * domains need to be held for the duration of request activity,
+ * and 0 if none. We try to limit the duration of the hold as much
+ * as possible.
+ */
+ enum forcewake_domains fw_domain;
+ atomic_t fw_active;
+
unsigned long context_tag;
struct rb_node uabi_node;
@@ -337,6 +352,7 @@ struct intel_engine_cs {
struct {
struct delayed_work work;
struct i915_request *systole;
+ unsigned long blocked;
} heartbeat;
unsigned long serial;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 848decee9066..34e6096f196e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -201,7 +201,7 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
uabi_node);
char old[sizeof(engine->name)];
- if (intel_gt_has_init_error(engine->gt))
+ if (intel_gt_has_unrecoverable_error(engine->gt))
continue; /* ignore incomplete engines */
GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 66165b10256e..62979ea591f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -108,13 +108,32 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
void i915_ggtt_suspend(struct i915_ggtt *ggtt)
{
- struct i915_vma *vma;
+ struct i915_vma *vma, *vn;
+ int open;
+
+ mutex_lock(&ggtt->vm.mutex);
- list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
+ /* Skip rewriting PTE on VMA unbind. */
+ open = atomic_xchg(&ggtt->vm.open, 0);
+
+ list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
+ GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
i915_vma_wait_for_bind(vma);
+ if (i915_vma_is_pinned(vma))
+ continue;
+
+ if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
+ __i915_vma_evict(vma);
+ drm_mm_remove_node(&vma->node);
+ }
+ }
+
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
ggtt->invalidate(ggtt);
+ atomic_set(&ggtt->vm.open, open);
+
+ mutex_unlock(&ggtt->vm.mutex);
intel_gt_check_and_clear_faults(ggtt->vm.gt);
}
@@ -417,35 +436,31 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
}
-static int ggtt_bind_vma(struct i915_vma *vma,
+static int ggtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
struct drm_i915_gem_object *obj = vma->obj;
u32 pte_flags;
+ if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
+ return 0;
+
/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
pte_flags = 0;
if (i915_gem_object_is_readonly(obj))
pte_flags |= PTE_READ_ONLY;
- vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
-
+ vm->insert_entries(vm, vma, cache_level, pte_flags);
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
- /*
- * Without aliasing PPGTT there's no difference between
- * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
- * upgrade to both bound if we bind either to avoid double-binding.
- */
- atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
-
return 0;
}
-static void ggtt_unbind_vma(struct i915_vma *vma)
+static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
{
- vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+ vm->clear_range(vm, vma->node.start, vma->size);
}
static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
@@ -553,7 +568,8 @@ err:
return ret;
}
-static int aliasing_gtt_bind_vma(struct i915_vma *vma,
+static int aliasing_gtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
@@ -566,44 +582,27 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
pte_flags |= PTE_READ_ONLY;
if (flags & I915_VMA_LOCAL_BIND) {
- struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
+ struct i915_ppgtt *alias = i915_vm_to_ggtt(vm)->alias;
- if (flags & I915_VMA_ALLOC) {
- ret = alias->vm.allocate_va_range(&alias->vm,
- vma->node.start,
- vma->size);
- if (ret)
- return ret;
-
- set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
- }
-
- GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
- __i915_vma_flags(vma)));
- alias->vm.insert_entries(&alias->vm, vma,
- cache_level, pte_flags);
+ ret = ppgtt_bind_vma(&alias->vm, vma, cache_level, flags);
+ if (ret)
+ return ret;
}
if (flags & I915_VMA_GLOBAL_BIND)
- vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+ vm->insert_entries(vm, vma, cache_level, pte_flags);
return 0;
}
-static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
+static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
+ struct i915_vma *vma)
{
- if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
- struct i915_address_space *vm = vma->vm;
-
+ if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
vm->clear_range(vm, vma->node.start, vma->size);
- }
-
- if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
- struct i915_address_space *vm =
- &i915_vm_to_ggtt(vma->vm)->alias->vm;
- vm->clear_range(vm, vma->node.start, vma->size);
- }
+ if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND))
+ ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma);
}
static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
@@ -1166,6 +1165,11 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
ggtt->invalidate(ggtt);
}
+static unsigned int clear_bind(struct i915_vma *vma)
+{
+ return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags);
+}
+
void i915_ggtt_resume(struct i915_ggtt *ggtt)
{
struct i915_vma *vma;
@@ -1183,14 +1187,11 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
/* clflush objects bound into the GGTT and rebind them. */
list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
+ unsigned int was_bound = clear_bind(vma);
- if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
- continue;
-
- clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
WARN_ON(i915_vma_bind(vma,
obj ? obj->cache_level : 0,
- PIN_GLOBAL, NULL));
+ was_bound, NULL));
if (obj) { /* only used during resume => exclusive access */
flush |= fetch_and_zero(&obj->write_domain);
obj->read_domains |= I915_GEM_DOMAIN_GTT;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index f069551e412f..e0755f1a904b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -44,6 +44,14 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
gt->ggtt = ggtt;
}
+int intel_gt_init_mmio(struct intel_gt *gt)
+{
+ intel_uc_init_mmio(&gt->uc);
+ intel_sseu_info_init(gt);
+
+ return intel_engines_init_mmio(gt);
+}
+
static void init_unused_ring(struct intel_gt *gt, u32 base)
{
struct intel_uncore *uncore = gt->uncore;
@@ -510,7 +518,7 @@ static int __engines_verify_workarounds(struct intel_gt *gt)
static void __intel_gt_disable(struct intel_gt *gt)
{
- intel_gt_set_wedged_on_init(gt);
+ intel_gt_set_wedged_on_fini(gt);
intel_gt_suspend_prepare(gt);
intel_gt_suspend_late(gt);
@@ -616,6 +624,11 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
void intel_gt_driver_release(struct intel_gt *gt)
{
struct i915_address_space *vm;
+ intel_wakeref_t wakeref;
+
+ /* Scrub all HW state upon release */
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ __intel_gt_reset(gt, ALL_ENGINES);
vm = fetch_and_zero(&gt->vm);
if (vm) /* FIXME being called twice on error paths :( */
@@ -637,3 +650,11 @@ void intel_gt_driver_late_release(struct intel_gt *gt)
intel_gt_fini_timelines(gt);
intel_engines_free(gt);
}
+
+void intel_gt_info_print(const struct intel_gt_info *info,
+ struct drm_printer *p)
+{
+ drm_printf(p, "available engines: %x\n", info->engine_mask);
+
+ intel_sseu_dump(&info->sseu, p);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 4fac043750aa..9157c7411f60 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -11,6 +11,7 @@
#include "intel_reset.h"
struct drm_i915_private;
+struct drm_printer;
#define GT_TRACE(gt, fmt, ...) do { \
const struct intel_gt *gt__ __maybe_unused = (gt); \
@@ -35,6 +36,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
+int intel_gt_init_mmio(struct intel_gt *gt);
int __must_check intel_gt_init_hw(struct intel_gt *gt);
int intel_gt_init(struct intel_gt *gt);
void intel_gt_driver_register(struct intel_gt *gt);
@@ -58,14 +60,21 @@ static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
return i915_ggtt_offset(gt->scratch) + field;
}
-static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
+static inline bool intel_gt_has_unrecoverable_error(const struct intel_gt *gt)
{
- return __intel_reset_failed(&gt->reset);
+ return test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags) ||
+ test_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
}
-static inline bool intel_gt_has_init_error(const struct intel_gt *gt)
+static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
{
- return test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+ GEM_BUG_ON(intel_gt_has_unrecoverable_error(gt) &&
+ !test_bit(I915_WEDGED, &gt->reset.flags));
+
+ return unlikely(test_bit(I915_WEDGED, &gt->reset.flags));
}
+void intel_gt_info_print(const struct intel_gt_info *info,
+ struct drm_printer *p);
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 1495054a4305..418ae184cecf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -212,8 +212,9 @@ void intel_gt_flush_buffer_pool(struct intel_gt *gt)
{
struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
- if (cancel_delayed_work_sync(&pool->work))
+ do {
pool_free_imm(pool);
+ } while (cancel_delayed_work_sync(&pool->work));
}
void intel_gt_fini_buffer_pool(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 0cc7dd54f4f9..b05da68e52f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -27,7 +27,8 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
u32 eir;
- eir = ENGINE_READ(engine, RING_EIR);
+ /* Upper 16b are the enabling mask, rsvd for internal errors */
+ eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
ENGINE_TRACE(engine, "CS error: %x\n", eir);
/* Disable the error interrupt until after the reset */
@@ -457,7 +458,7 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt)
* RPS interrupts will get enabled/disabled on demand when RPS
* itself is enabled/disabled.
*/
- if (HAS_ENGINE(gt->i915, VECS0)) {
+ if (HAS_ENGINE(gt, VECS0)) {
pm_irqs |= PM_VEBOX_USER_INTERRUPT;
gt->pm_ier |= PM_VEBOX_USER_INTERRUPT;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 6bdb434a442d..274aa0dd7050 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -188,7 +188,7 @@ int intel_gt_resume(struct intel_gt *gt)
enum intel_engine_id id;
int err;
- err = intel_gt_has_init_error(gt);
+ err = intel_gt_has_unrecoverable_error(gt);
if (err)
return err;
@@ -214,8 +214,8 @@ int intel_gt_resume(struct intel_gt *gt)
/* Only when the HW is re-initialised, can we replay the requests */
err = intel_gt_init_hw(gt);
if (err) {
- drm_err(&gt->i915->drm,
- "Failed to initialize GPU, declaring it wedged!\n");
+ i915_probe_error(gt->i915,
+ "Failed to initialize GPU, declaring it wedged!\n");
goto err_wedged;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 16ff47c83bd5..66fcbf9d0fdd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -31,12 +31,15 @@ static bool engine_active(const struct intel_engine_cs *engine)
return !list_empty(&engine->kernel_context->timeline->requests);
}
-static bool flush_submission(struct intel_gt *gt)
+static bool flush_submission(struct intel_gt *gt, long timeout)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
bool active = false;
+ if (!timeout)
+ return false;
+
if (!intel_gt_pm_is_awake(gt))
return false;
@@ -139,7 +142,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
if (unlikely(timeout < 0))
timeout = -timeout, interruptible = false;
- flush_submission(gt); /* kick the ksoftirqd tasklets */
+ flush_submission(gt, timeout); /* kick the ksoftirqd tasklets */
spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
if (!mutex_trylock(&tl->mutex)) {
@@ -194,7 +197,7 @@ out_active: spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &free, link)
__intel_timeline_free(&tl->kref);
- if (flush_submission(gt)) /* Wait, there's more! */
+ if (flush_submission(gt, timeout)) /* Wait, there's more! */
active_count++;
return active_count ? timeout : 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 0cc1d6b185dc..6d39a4a11bf3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -109,6 +109,17 @@ struct intel_gt {
struct intel_gt_buffer_pool buffer_pool;
struct i915_vma *scratch;
+
+ struct intel_gt_info {
+ intel_engine_mask_t engine_mask;
+ u8 num_engines;
+
+ /* Media engine access to SFC per instance */
+ u8 vdbox_sfc_access;
+
+ /* Slice/subslice/EU info */
+ struct sseu_dev_info sseu;
+ } info;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index d93ebdf3fa0e..f2b75078e05f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -198,14 +198,16 @@ struct intel_gt;
struct i915_vma_ops {
/* Map an object into an address space with the given cache flags. */
- int (*bind_vma)(struct i915_vma *vma,
+ int (*bind_vma)(struct i915_address_space *vm,
+ struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags);
/*
* Unmap an object from an address space. This usually consists of
* setting the valid PTE entries to a reserved scratch page.
*/
- void (*unbind_vma)(struct i915_vma *vma);
+ void (*unbind_vma)(struct i915_address_space *vm,
+ struct i915_vma *vma);
int (*set_pages)(struct i915_vma *vma);
void (*clear_pages)(struct i915_vma *vma);
@@ -566,6 +568,13 @@ int ggtt_set_pages(struct i915_vma *vma);
int ppgtt_set_pages(struct i915_vma *vma);
void clear_pages(struct i915_vma *vma);
+int ppgtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
+void ppgtt_unbind_vma(struct i915_address_space *vm,
+ struct i915_vma *vma);
+
void gtt_write_workarounds(struct intel_gt *gt);
void setup_private_pat(struct intel_uncore *uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index cb07e1d2a353..24322ef08aa4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -446,6 +446,9 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
* we have to flip the index value to become priority.
*/
p = to_priolist(rb);
+ if (!I915_USER_PRIORITY_SHIFT)
+ return p->priority;
+
return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
}
@@ -1103,7 +1106,7 @@ static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs *engine)
{
struct i915_request *rq, *rn, *active = NULL;
- struct list_head *uninitialized_var(pl);
+ struct list_head *pl;
int prio = I915_PRIORITY_INVALID;
lockdep_assert_held(&engine->active.lock);
@@ -1377,6 +1380,8 @@ __execlists_schedule_in(struct i915_request *rq)
ce->lrc.ccid |= engine->execlists.ccid;
__intel_gt_pm_get(engine->gt);
+ if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active))
+ intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -1409,8 +1414,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
struct i915_request *next = READ_ONCE(ve->request);
- if (next && next->execution_mask & ~rq->execution_mask)
- tasklet_schedule(&ve->base.execlists.tasklet);
+ if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
+ tasklet_hi_schedule(&ve->base.execlists.tasklet);
}
static inline void
@@ -1445,6 +1450,8 @@ __execlists_schedule_out(struct i915_request *rq,
intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+ if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
+ intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
intel_gt_pm_put_async(engine->gt);
/*
@@ -1636,9 +1643,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
ccid = ce->lrc.ccid;
/*
- * Sentinels are supposed to be lonely so they flush the
- * current exection off the HW. Check that they are the
- * only request in the pending submission.
+ * Sentinels are supposed to be the last request so they flush
+ * the current execution off the HW. Check that they are the only
+ * request in the pending submission.
*/
if (sentinel) {
GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
@@ -1647,15 +1654,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
port - execlists->pending);
return false;
}
-
sentinel = i915_request_has_sentinel(rq);
- if (sentinel && port != execlists->pending) {
- GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n",
- engine->name,
- ce->timeline->fence_context,
- port - execlists->pending);
- return false;
- }
/* Hold tightly onto the lock to prevent concurrent retires! */
if (!spin_trylock_irqsave(&rq->lock, flags))
@@ -1967,7 +1966,7 @@ static int
switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
{
if (list_is_last(&rq->sched.link, &engine->active.requests))
- return INT_MIN;
+ return engine->execlists.queue_priority_hint;
return rq_prio(list_next_entry(rq, sched.link));
}
@@ -2445,6 +2444,7 @@ done:
set_preempt_timeout(engine, *active);
execlists_submit_ports(engine);
} else {
+ start_timeslice(engine, execlists->queue_priority_hint);
skip_submit:
ring_set_paused(engine, 0);
}
@@ -2569,6 +2569,25 @@ static void process_csb(struct intel_engine_cs *engine)
return;
/*
+ * We will consume all events from HW, or at least pretend to.
+ *
+ * The sequence of events from the HW is deterministic, and derived
+ * from our writes to the ELSP, with a smidgen of variability for
+ * the arrival of the asynchronous requests wrt to the inflight
+ * execution. If the HW sends an event that does not correspond with
+ * the one we are expecting, we have to abandon all hope as we lose
+ * all tracking of what the engine is actually executing. We will
+ * only detect we are out of sequence with the HW when we get an
+ * 'impossible' event because we have already drained our own
+ * preemption/promotion queue. If this occurs, we know that we likely
+ * lost track of execution earlier and must unwind and restart, the
+ * simplest way is by stop processing the event queue and force the
+ * engine to reset.
+ */
+ execlists->csb_head = tail;
+ ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
+
+ /*
* Hopefully paired with a wmb() in HW!
*
* We must complete the read of the write pointer before any reads
@@ -2577,8 +2596,6 @@ static void process_csb(struct intel_engine_cs *engine)
* we perform the READ_ONCE(*csb_write).
*/
rmb();
-
- ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
do {
bool promote;
@@ -2613,6 +2630,11 @@ static void process_csb(struct intel_engine_cs *engine)
if (promote) {
struct i915_request * const *old = execlists->active;
+ if (GEM_WARN_ON(!*execlists->pending)) {
+ execlists->error_interrupt |= ERROR_CSB;
+ break;
+ }
+
ring_set_paused(engine, 0);
/* Point active to the new ELSP; prevent overwriting */
@@ -2635,7 +2657,10 @@ static void process_csb(struct intel_engine_cs *engine)
WRITE_ONCE(execlists->pending[0], NULL);
} else {
- GEM_BUG_ON(!*execlists->active);
+ if (GEM_WARN_ON(!*execlists->active)) {
+ execlists->error_interrupt |= ERROR_CSB;
+ break;
+ }
/* port0 completed, advanced to port1 */
trace_ports(execlists, "completed", execlists->active);
@@ -2686,7 +2711,6 @@ static void process_csb(struct intel_engine_cs *engine)
}
} while (head != tail);
- execlists->csb_head = head;
set_timeslice(engine);
/*
@@ -3005,12 +3029,12 @@ static u32 active_ccid(struct intel_engine_cs *engine)
return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
}
-static bool execlists_capture(struct intel_engine_cs *engine)
+static void execlists_capture(struct intel_engine_cs *engine)
{
struct execlists_capture *cap;
if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
- return true;
+ return;
/*
* We need to _quickly_ capture the engine state before we reset.
@@ -3019,7 +3043,7 @@ static bool execlists_capture(struct intel_engine_cs *engine)
*/
cap = capture_regs(engine);
if (!cap)
- return true;
+ return;
spin_lock_irq(&engine->active.lock);
cap->rq = active_context(engine, active_ccid(engine));
@@ -3056,14 +3080,13 @@ static bool execlists_capture(struct intel_engine_cs *engine)
INIT_WORK(&cap->work, execlists_capture_work);
schedule_work(&cap->work);
- return true;
+ return;
err_rq:
i915_request_put(cap->rq);
err_free:
i915_gpu_coredump_put(cap->error);
kfree(cap);
- return false;
}
static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
@@ -3083,10 +3106,8 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
tasklet_disable_nosync(&engine->execlists.tasklet);
ring_set_paused(engine, 1); /* Freeze the current request in place */
- if (execlists_capture(engine))
- intel_engine_reset(engine, msg);
- else
- ring_set_paused(engine, 0);
+ execlists_capture(engine);
+ intel_engine_reset(engine, msg);
tasklet_enable(&engine->execlists.tasklet);
clear_and_wake_up_bit(bit, lock);
@@ -3117,9 +3138,18 @@ static void execlists_submission_tasklet(unsigned long data)
process_csb(engine);
if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
+ const char *msg;
+
+ /* Generate the error message in priority wrt to the user! */
+ if (engine->execlists.error_interrupt & GENMASK(15, 0))
+ msg = "CS error"; /* thrown by a user payload */
+ else if (engine->execlists.error_interrupt & ERROR_CSB)
+ msg = "invalid CSB event";
+ else
+ msg = "internal error";
+
engine->execlists.error_interrupt = 0;
- if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */
- execlists_reset(engine, "CS error");
+ execlists_reset(engine, msg);
}
if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
@@ -3170,13 +3200,6 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
if (reset_in_progress(execlists))
return; /* defer until we restart the engine following reset */
- /* Hopefully we clear execlists->pending[] to let us through */
- if (READ_ONCE(execlists->pending[0]) &&
- tasklet_trylock(&execlists->tasklet)) {
- process_csb(engine);
- tasklet_unlock(&execlists->tasklet);
- }
-
__execlists_submission_tasklet(engine);
}
@@ -3199,11 +3222,25 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine,
return !list_empty(&engine->active.hold) && hold_request(rq);
}
+static void flush_csb(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *el = &engine->execlists;
+
+ if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
+ if (!reset_in_progress(el))
+ process_csb(engine);
+ tasklet_unlock(&el->tasklet);
+ }
+}
+
static void execlists_submit_request(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
unsigned long flags;
+ /* Hopefully we clear execlists->pending[] to let us through */
+ flush_csb(engine);
+
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->active.lock, flags);
@@ -3415,7 +3452,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
/* RPCS */
if (engine->class == RENDER_CLASS) {
regs[CTX_R_PWR_CLK_STATE] =
- intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+ intel_sseu_make_rpcs(engine->gt, &ce->sseu);
i915_oa_init_reg_state(ce, engine);
}
@@ -3536,7 +3573,7 @@ static int emit_pdps(struct i915_request *rq)
int err, i;
u32 *cs;
- GEM_BUG_ON(intel_vgpu_active(rq->i915));
+ GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
/*
* Beware ye of the dragons, this sequence is magic!
@@ -3873,7 +3910,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
&wa_ctx->per_ctx };
wa_bb_func_t wa_bb_fn[2];
- struct page *page;
void *batch, *batch_ptr;
unsigned int i;
int ret;
@@ -3909,14 +3945,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return ret;
}
- page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
- batch = batch_ptr = kmap_atomic(page);
+ batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
/*
* Emit the two workaround batch buffers, recording the offset from the
* start of the workaround batch buffer object for each and their
* respective sizes.
*/
+ batch_ptr = batch;
for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
wa_bb[i]->offset = batch_ptr - batch;
if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
@@ -3928,10 +3964,10 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
batch_ptr = wa_bb_fn[i](engine, batch_ptr);
wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
}
+ GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
- BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
-
- kunmap_atomic(batch);
+ __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
+ __i915_gem_object_release_map(wa_ctx->vma->obj);
if (ret)
lrc_destroy_wa_ctx(engine);
@@ -4515,11 +4551,11 @@ static int gen8_emit_flush_render(struct i915_request *request,
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
* pipe control.
*/
- if (IS_GEN(request->i915, 9))
+ if (IS_GEN(request->engine->i915, 9))
vf_flush_wa = true;
/* WaForGAMHang:kbl */
- if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
+ if (IS_KBL_REVID(request->engine->i915, 0, KBL_REVID_B0))
dc_flush_wa = true;
}
@@ -5587,7 +5623,7 @@ static void virtual_submit_request(struct i915_request *rq)
GEM_BUG_ON(!list_empty(virtual_queue(ve)));
list_move_tail(&rq->sched.link, virtual_queue(ve));
- tasklet_schedule(&ve->base.execlists.tasklet);
+ tasklet_hi_schedule(&ve->base.execlists.tasklet);
}
spin_unlock_irqrestore(&ve->base.active.lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index f86f7e68ce5e..f0862e924d11 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -155,16 +155,16 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
return ppgtt;
}
-static int ppgtt_bind_vma(struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+int ppgtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
u32 pte_flags;
int err;
- if (flags & I915_VMA_ALLOC) {
- err = vma->vm->allocate_va_range(vma->vm,
- vma->node.start, vma->size);
+ if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
+ err = vm->allocate_va_range(vm, vma->node.start, vma->size);
if (err)
return err;
@@ -176,17 +176,16 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
if (i915_gem_object_is_readonly(vma->obj))
pte_flags |= PTE_READ_ONLY;
- GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
- vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+ vm->insert_entries(vm, vma, cache_level, pte_flags);
wmb();
return 0;
}
-static void ppgtt_unbind_vma(struct i915_vma *vma)
+void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
{
if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
- vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+ vm->clear_range(vm, vma->node.start, vma->size);
}
int ppgtt_set_pages(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index f59e7875cc5e..1bfad589c63b 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -61,7 +61,7 @@ render_state_get_rodata(const struct intel_engine_cs *engine)
#define OUT_BATCH(batch, i, val) \
do { \
if ((i) >= PAGE_SIZE / sizeof(u32)) \
- goto err; \
+ goto out; \
(batch)[(i)++] = (val); \
} while(0)
@@ -70,15 +70,12 @@ static int render_state_setup(struct intel_renderstate *so,
{
const struct intel_renderstate_rodata *rodata = so->rodata;
unsigned int i = 0, reloc_index = 0;
- unsigned int needs_clflush;
+ int ret = -EINVAL;
u32 *d;
- int ret;
- ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush);
- if (ret)
- return ret;
-
- d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0));
+ d = i915_gem_object_pin_map(so->vma->obj, I915_MAP_WB);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
while (i < rodata->batch_items) {
u32 s = rodata->batch[i];
@@ -89,7 +86,7 @@ static int render_state_setup(struct intel_renderstate *so,
if (HAS_64BIT_RELOC(i915)) {
if (i + 1 >= rodata->batch_items ||
rodata->batch[i + 1] != 0)
- goto err;
+ goto out;
d[i++] = s;
s = upper_32_bits(r);
@@ -103,7 +100,7 @@ static int render_state_setup(struct intel_renderstate *so,
if (rodata->reloc[reloc_index] != -1) {
drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index);
- goto err;
+ goto out;
}
so->batch_offset = i915_ggtt_offset(so->vma);
@@ -150,19 +147,11 @@ static int render_state_setup(struct intel_renderstate *so,
*/
so->aux_size = ALIGN(so->aux_size, 8);
- if (needs_clflush)
- drm_clflush_virt_range(d, i * sizeof(u32));
- kunmap_atomic(d);
-
ret = 0;
out:
- i915_gem_object_finish_access(so->vma->obj);
+ __i915_gem_object_flush_map(so->vma->obj, 0, i * sizeof(u32));
+ __i915_gem_object_release_map(so->vma->obj);
return ret;
-
-err:
- kunmap_atomic(d);
- ret = -EINVAL;
- goto out;
}
#undef OUT_BATCH
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 39070b514e65..46a5ceffc22f 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -342,7 +342,7 @@ static int gen6_reset_engines(struct intel_gt *gt,
static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
{
struct intel_uncore *uncore = engine->uncore;
- u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
+ u8 vdbox_sfc_access = engine->gt->info.vdbox_sfc_access;
i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
i915_reg_t sfc_usage;
@@ -417,7 +417,7 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
static void gen11_unlock_sfc(struct intel_engine_cs *engine)
{
struct intel_uncore *uncore = engine->uncore;
- u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
+ u8 vdbox_sfc_access = engine->gt->info.vdbox_sfc_access;
i915_reg_t sfc_forced_lock;
u32 sfc_forced_lock_bit;
@@ -638,7 +638,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
bool intel_has_gpu_reset(const struct intel_gt *gt)
{
- if (!i915_modparams.reset)
+ if (!gt->i915->params.reset)
return NULL;
return intel_get_gpu_reset(gt);
@@ -646,7 +646,7 @@ bool intel_has_gpu_reset(const struct intel_gt *gt)
bool intel_has_reset_engine(const struct intel_gt *gt)
{
- if (i915_modparams.reset < 2)
+ if (gt->i915->params.reset < 2)
return false;
return INTEL_INFO(gt->i915)->has_reset_engine;
@@ -880,7 +880,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
return true;
/* Never fully initialised, recovery impossible */
- if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
+ if (intel_gt_has_unrecoverable_error(gt))
return false;
GT_TRACE(gt, "start\n");
@@ -930,7 +930,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
* Warn CI about the unrecoverable wedged condition.
* Time for a reboot.
*/
- add_taint_for_CI(TAINT_WARN);
+ add_taint_for_CI(gt->i915, TAINT_WARN);
return false;
}
@@ -1038,7 +1038,7 @@ void intel_gt_reset(struct intel_gt *gt,
awake = reset_prepare(gt);
if (!intel_has_gpu_reset(gt)) {
- if (i915_modparams.reset)
+ if (gt->i915->params.reset)
drm_err(&gt->i915->drm, "GPU reset not supported\n");
else
drm_dbg(&gt->i915->drm, "GPU reset disabled\n");
@@ -1097,7 +1097,7 @@ taint:
* rather than continue on into oblivion. For everyone else,
* the system should still plod along, but they have been warned!
*/
- add_taint_for_CI(TAINT_WARN);
+ add_taint_for_CI(gt->i915, TAINT_WARN);
error:
__intel_gt_set_wedged(gt);
goto finish;
@@ -1246,7 +1246,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
*/
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
+ engine_mask &= gt->info.engine_mask;
if (flags & I915_ERROR_CAPTURE) {
i915_capture_error_state(gt->i915);
@@ -1342,7 +1342,7 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
if (!intel_gt_is_wedged(gt))
return 0;
- if (intel_gt_has_init_error(gt))
+ if (intel_gt_has_unrecoverable_error(gt))
return -EIO;
/* Reset still in progress? Maybe we will recover? */
@@ -1360,6 +1360,15 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
I915_WEDGED_ON_INIT);
intel_gt_set_wedged(gt);
set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+
+ /* Wedged on init is non-recoverable */
+ add_taint_for_CI(gt->i915, TAINT_WARN);
+}
+
+void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
+{
+ intel_gt_set_wedged(gt);
+ set_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
}
void intel_gt_init_reset(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 8e8d5f761166..a0eec7c11c0c 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -47,8 +47,10 @@ int intel_gt_terminally_wedged(struct intel_gt *gt);
/*
* There's no unset_wedged_on_init paired with this one.
* Once we're wedged on init, there's no going back.
+ * Same thing for unset_wedged_on_fini.
*/
void intel_gt_set_wedged_on_init(struct intel_gt *gt);
+void intel_gt_set_wedged_on_fini(struct intel_gt *gt);
int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
@@ -71,14 +73,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
(W)->gt; \
__intel_fini_wedge((W)))
-static inline bool __intel_reset_failed(const struct intel_reset *reset)
-{
- GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ?
- !test_bit(I915_WEDGED, &reset->flags) : false);
-
- return unlikely(test_bit(I915_WEDGED, &reset->flags));
-}
-
bool intel_has_gpu_reset(const struct intel_gt *gt);
bool intel_has_reset_engine(const struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h
index f43bc3a0fe4f..add6b86d9d03 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h
@@ -34,12 +34,17 @@ struct intel_reset {
* longer use the GPU - similar to #I915_WEDGED bit. The difference in
* in the way we're handling "forced" unwedged (e.g. through debugfs),
* which is not allowed in case we failed to initialize.
+ *
+ * #I915_WEDGED_ON_FINI - Similar to #I915_WEDGED_ON_INIT, except we
+ * use it to mark that the GPU is no longer available (and prevent
+ * users from using it).
*/
unsigned long flags;
#define I915_RESET_BACKOFF 0
#define I915_RESET_MODESET 1
#define I915_RESET_ENGINE 2
-#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 2)
+#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 3)
+#define I915_WEDGED_ON_FINI (BITS_PER_LONG - 2)
#define I915_WEDGED (BITS_PER_LONG - 1)
struct mutex mutex; /* serialises wedging/unwedging */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index ca7286e58409..94915f668715 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -27,21 +27,15 @@
*
*/
-#include <linux/log2.h>
-
-#include "gem/i915_gem_context.h"
-
+#include "gen2_engine_cs.h"
+#include "gen6_engine_cs.h"
#include "gen6_ppgtt.h"
#include "gen7_renderclear.h"
#include "i915_drv.h"
-#include "i915_trace.h"
#include "intel_context.h"
#include "intel_gt.h"
-#include "intel_gt_irq.h"
-#include "intel_gt_pm_irq.h"
#include "intel_reset.h"
#include "intel_ring.h"
-#include "intel_workarounds.h"
#include "shmem_utils.h"
/* Rough estimate of the typical request size, performing a flush,
@@ -49,436 +43,6 @@
*/
#define LEGACY_REQUEST_SIZE 200
-static int
-gen2_render_ring_flush(struct i915_request *rq, u32 mode)
-{
- unsigned int num_store_dw;
- u32 cmd, *cs;
-
- cmd = MI_FLUSH;
- num_store_dw = 0;
- if (mode & EMIT_INVALIDATE)
- cmd |= MI_READ_FLUSH;
- if (mode & EMIT_FLUSH)
- num_store_dw = 4;
-
- cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = cmd;
- while (num_store_dw--) {
- *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *cs++ = intel_gt_scratch_offset(rq->engine->gt,
- INTEL_GT_SCRATCH_FIELD_DEFAULT);
- *cs++ = 0;
- }
- *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-gen4_render_ring_flush(struct i915_request *rq, u32 mode)
-{
- u32 cmd, *cs;
- int i;
-
- /*
- * read/write caches:
- *
- * I915_GEM_DOMAIN_RENDER is always invalidated, but is
- * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
- * also flushed at 2d versus 3d pipeline switches.
- *
- * read-only caches:
- *
- * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
- * MI_READ_FLUSH is set, and is always flushed on 965.
- *
- * I915_GEM_DOMAIN_COMMAND may not exist?
- *
- * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
- * invalidated when MI_EXE_FLUSH is set.
- *
- * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
- * invalidated with every MI_FLUSH.
- *
- * TLBs:
- *
- * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
- * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
- * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
- * are flushed at any MI_FLUSH.
- */
-
- cmd = MI_FLUSH;
- if (mode & EMIT_INVALIDATE) {
- cmd |= MI_EXE_FLUSH;
- if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5))
- cmd |= MI_INVALIDATE_ISP;
- }
-
- i = 2;
- if (mode & EMIT_INVALIDATE)
- i += 20;
-
- cs = intel_ring_begin(rq, i);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = cmd;
-
- /*
- * A random delay to let the CS invalidate take effect? Without this
- * delay, the GPU relocation path fails as the CS does not see
- * the updated contents. Just as important, if we apply the flushes
- * to the EMIT_FLUSH branch (i.e. immediately after the relocation
- * write and before the invalidate on the next batch), the relocations
- * still fail. This implies that is a delay following invalidation
- * that is required to reset the caches as opposed to a delay to
- * ensure the memory is written.
- */
- if (mode & EMIT_INVALIDATE) {
- *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
- *cs++ = intel_gt_scratch_offset(rq->engine->gt,
- INTEL_GT_SCRATCH_FIELD_DEFAULT) |
- PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
- *cs++ = 0;
-
- for (i = 0; i < 12; i++)
- *cs++ = MI_FLUSH;
-
- *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
- *cs++ = intel_gt_scratch_offset(rq->engine->gt,
- INTEL_GT_SCRATCH_FIELD_DEFAULT) |
- PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
- *cs++ = 0;
- }
-
- *cs++ = cmd;
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-/*
- * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
- * implementing two workarounds on gen6. From section 1.4.7.1
- * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
- *
- * [DevSNB-C+{W/A}] Before any depth stall flush (including those
- * produced by non-pipelined state commands), software needs to first
- * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
- * 0.
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
- * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
- *
- * And the workaround for these two requires this workaround first:
- *
- * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
- * BEFORE the pipe-control with a post-sync op and no write-cache
- * flushes.
- *
- * And this last workaround is tricky because of the requirements on
- * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
- * volume 2 part 1:
- *
- * "1 of the following must also be set:
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)
- * - Stall at Pixel Scoreboard ([1] of DW1)
- * - Depth Stall ([13] of DW1)
- * - Post-Sync Operation ([13] of DW1)
- * - Notify Enable ([8] of DW1)"
- *
- * The cache flushes require the workaround flush that triggered this
- * one, so we can't use it. Depth stall would trigger the same.
- * Post-sync nonzero is what triggered this second workaround, so we
- * can't use that one either. Notify enable is IRQs, which aren't
- * really our business. That leaves only stall at scoreboard.
- */
-static int
-gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
-{
- u32 scratch_addr =
- intel_gt_scratch_offset(rq->engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
- u32 *cs;
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(5);
- *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
- *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0; /* low dword */
- *cs++ = 0; /* high dword */
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(5);
- *cs++ = PIPE_CONTROL_QW_WRITE;
- *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
- *cs++ = 0;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-gen6_render_ring_flush(struct i915_request *rq, u32 mode)
-{
- u32 scratch_addr =
- intel_gt_scratch_offset(rq->engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
- u32 *cs, flags = 0;
- int ret;
-
- /* Force SNB workarounds for PIPE_CONTROL flushes */
- ret = gen6_emit_post_sync_nonzero_flush(rq);
- if (ret)
- return ret;
-
- /* Just flush everything. Experiments have shown that reducing the
- * number of bits based on the write domains has little performance
- * impact.
- */
- if (mode & EMIT_FLUSH) {
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
- /*
- * Ensure that any following seqno writes only happen
- * when the render cache is indeed flushed.
- */
- flags |= PIPE_CONTROL_CS_STALL;
- }
- if (mode & EMIT_INVALIDATE) {
- flags |= PIPE_CONTROL_TLB_INVALIDATE;
- flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- /*
- * TLB invalidate requires a post-sync write.
- */
- flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
- }
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = flags;
- *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
- *cs++ = 0;
- *cs++ = 0;
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = PIPE_CONTROL_QW_WRITE;
- *cs++ = intel_gt_scratch_offset(rq->engine->gt,
- INTEL_GT_SCRATCH_FIELD_DEFAULT) |
- PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
-
- /* Finally we can flush and with it emit the breadcrumb */
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_CS_STALL);
- *cs++ = i915_request_active_timeline(rq)->hwsp_offset |
- PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = rq->fence.seqno;
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-
-static int
-gen7_render_ring_cs_stall_wa(struct i915_request *rq)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
- *cs++ = 0;
- *cs++ = 0;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-gen7_render_ring_flush(struct i915_request *rq, u32 mode)
-{
- u32 scratch_addr =
- intel_gt_scratch_offset(rq->engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
- u32 *cs, flags = 0;
-
- /*
- * Ensure that any following seqno writes only happen when the render
- * cache is indeed flushed.
- *
- * Workaround: 4th PIPE_CONTROL command (except the ones with only
- * read-cache invalidate bits set) must have the CS_STALL bit set. We
- * don't try to be clever and just set it unconditionally.
- */
- flags |= PIPE_CONTROL_CS_STALL;
-
- /*
- * CS_STALL suggests at least a post-sync write.
- */
- flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
- /* Just flush everything. Experiments have shown that reducing the
- * number of bits based on the write domains has little performance
- * impact.
- */
- if (mode & EMIT_FLUSH) {
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
- }
- if (mode & EMIT_INVALIDATE) {
- flags |= PIPE_CONTROL_TLB_INVALIDATE;
- flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
-
- /* Workaround: we must issue a pipe_control with CS-stall bit
- * set before a pipe_control command that has the state cache
- * invalidate bit set. */
- gen7_render_ring_cs_stall_wa(rq);
- }
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = flags;
- *cs++ = scratch_addr;
- *cs++ = 0;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_GLOBAL_GTT_IVB |
- PIPE_CONTROL_CS_STALL);
- *cs++ = i915_request_active_timeline(rq)->hwsp_offset;
- *cs++ = rq->fence.seqno;
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-
-static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = rq->fence.seqno;
-
- *cs++ = MI_USER_INTERRUPT;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-
-#define GEN7_XCS_WA 32
-static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- int i;
-
- GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
- *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
- MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = rq->fence.seqno;
-
- for (i = 0; i < GEN7_XCS_WA; i++) {
- *cs++ = MI_STORE_DWORD_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR;
- *cs++ = rq->fence.seqno;
- }
-
- *cs++ = MI_FLUSH_DW;
- *cs++ = 0;
- *cs++ = 0;
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-#undef GEN7_XCS_WA
-
static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
{
/*
@@ -865,32 +429,6 @@ static void reset_finish(struct intel_engine_cs *engine)
{
}
-static int rcs_resume(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct intel_uncore *uncore = engine->uncore;
-
- /*
- * Disable CONSTANT_BUFFER before it is loaded from the context
- * image. For as it is loaded, it is executed and the stored
- * address may no longer be valid, leading to a GPU hang.
- *
- * This imposes the requirement that userspace reload their
- * CONSTANT_BUFFER on every batch, fortunately a requirement
- * they are already accustomed to from before contexts were
- * enabled.
- */
- if (IS_GEN(i915, 4))
- intel_uncore_write(uncore, ECOSKPD,
- _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
-
- if (IS_GEN_RANGE(i915, 6, 7))
- intel_uncore_write(uncore, INSTPM,
- _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
-
- return xcs_resume(engine);
-}
-
static void reset_cancel(struct intel_engine_cs *engine)
{
struct i915_request *request;
@@ -918,255 +456,6 @@ static void i9xx_submit_request(struct i915_request *request)
intel_ring_set_tail(request->ring, request->tail));
}
-static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
- *cs++ = MI_FLUSH;
-
- *cs++ = MI_STORE_DWORD_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR;
- *cs++ = rq->fence.seqno;
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-
-#define GEN5_WA_STORES 8 /* must be at least 1! */
-static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- int i;
-
- GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
- *cs++ = MI_FLUSH;
-
- BUILD_BUG_ON(GEN5_WA_STORES < 1);
- for (i = 0; i < GEN5_WA_STORES; i++) {
- *cs++ = MI_STORE_DWORD_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR;
- *cs++ = rq->fence.seqno;
- }
-
- *cs++ = MI_USER_INTERRUPT;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-#undef GEN5_WA_STORES
-
-static void
-gen5_irq_enable(struct intel_engine_cs *engine)
-{
- gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-gen5_irq_disable(struct intel_engine_cs *engine)
-{
- gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-i9xx_irq_enable(struct intel_engine_cs *engine)
-{
- engine->i915->irq_mask &= ~engine->irq_enable_mask;
- intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
- intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
-}
-
-static void
-i9xx_irq_disable(struct intel_engine_cs *engine)
-{
- engine->i915->irq_mask |= engine->irq_enable_mask;
- intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
-}
-
-static void
-i8xx_irq_enable(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
-
- i915->irq_mask &= ~engine->irq_enable_mask;
- intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
- ENGINE_POSTING_READ16(engine, RING_IMR);
-}
-
-static void
-i8xx_irq_disable(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
-
- i915->irq_mask |= engine->irq_enable_mask;
- intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
-}
-
-static int
-bsd_ring_flush(struct i915_request *rq, u32 mode)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_FLUSH;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
- return 0;
-}
-
-static void
-gen6_irq_enable(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR,
- ~(engine->irq_enable_mask | engine->irq_keep_mask));
-
- /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
- ENGINE_POSTING_READ(engine, RING_IMR);
-
- gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-gen6_irq_disable(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
- gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-hsw_vebox_irq_enable(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
-
- /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
- ENGINE_POSTING_READ(engine, RING_IMR);
-
- gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static void
-hsw_vebox_irq_disable(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR, ~0);
- gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask);
-}
-
-static int
-i965_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 length,
- unsigned int dispatch_flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
- I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
- *cs++ = offset;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
-#define I830_BATCH_LIMIT SZ_256K
-#define I830_TLB_ENTRIES (2)
-#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
-static int
-i830_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- unsigned int dispatch_flags)
-{
- u32 *cs, cs_offset =
- intel_gt_scratch_offset(rq->engine->gt,
- INTEL_GT_SCRATCH_FIELD_DEFAULT);
-
- GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Evict the invalid PTE TLBs */
- *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
- *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
- *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
- *cs++ = cs_offset;
- *cs++ = 0xdeadbeef;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
- if (len > I830_BATCH_LIMIT)
- return -ENOSPC;
-
- cs = intel_ring_begin(rq, 6 + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Blit the batch (which has now all relocs applied) to the
- * stable batch scratch bo area (so that the CS never
- * stumbles over its tlb invalidation bug) ...
- */
- *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
- *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
- *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
- *cs++ = cs_offset;
- *cs++ = 4096;
- *cs++ = offset;
-
- *cs++ = MI_FLUSH;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- /* ... and execute it. */
- offset = cs_offset;
- }
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
- *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
- MI_BATCH_NON_SECURE);
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-i915_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- unsigned int dispatch_flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
- *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
- MI_BATCH_NON_SECURE);
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
static void __ring_context_fini(struct intel_context *ce)
{
i915_vma_put(ce->state);
@@ -1254,7 +543,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
vaddr, engine->context_size);
i915_gem_object_flush_map(obj);
- i915_gem_object_unpin_map(obj);
+ __i915_gem_object_release_map(obj);
}
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
@@ -1356,11 +645,11 @@ static inline int mi_set_context(struct i915_request *rq,
struct intel_context *ce,
u32 flags)
{
- struct drm_i915_private *i915 = rq->i915;
struct intel_engine_cs *engine = rq->engine;
+ struct drm_i915_private *i915 = engine->i915;
enum intel_engine_id id;
const int num_engines =
- IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+ IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0;
bool force_restore = false;
int len;
u32 *cs;
@@ -1471,7 +760,7 @@ static inline int mi_set_context(struct i915_request *rq,
static int remap_l3_slice(struct i915_request *rq, int slice)
{
- u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
+ u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice];
int i;
if (!remap_info)
@@ -1582,7 +871,7 @@ static int switch_context(struct i915_request *rq)
void **residuals = NULL;
int ret;
- GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+ GEM_BUG_ON(HAS_EXECLISTS(engine->i915));
if (engine->wa_ctx.vma && ce != engine->kernel_context) {
if (engine->wa_ctx.vma->private != ce) {
@@ -1704,99 +993,6 @@ static void gen6_bsd_submit_request(struct i915_request *request)
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
}
-static int mi_flush_dw(struct i915_request *rq, u32 flags)
-{
- u32 cmd, *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- cmd = MI_FLUSH_DW;
-
- /*
- * We always require a command barrier so that subsequent
- * commands, such as breadcrumb interrupts, are strictly ordered
- * wrt the contents of the write cache being flushed to memory
- * (and thus being coherent from the CPU).
- */
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
- /*
- * Bspec vol 1c.3 - blitter engine command streamer:
- * "If ENABLED, all TLBs will be invalidated once the flush
- * operation is complete. This bit is only valid when the
- * Post-Sync Operation field is a value of 1h or 3h."
- */
- cmd |= flags;
-
- *cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = 0;
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
-{
- return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
-}
-
-static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
-{
- return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
-}
-
-static int
-hsw_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- unsigned int dispatch_flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
- 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
- /* bit0-7 is the length on GEN6+ */
- *cs++ = offset;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-gen6_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- unsigned int dispatch_flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
- 0 : MI_BATCH_NON_SECURE_I965);
- /* bit0-7 is the length on GEN6+ */
- *cs++ = offset;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-/* Blitter support (SandyBridge+) */
-
-static int gen6_ring_flush(struct i915_request *rq, u32 mode)
-{
- return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
-}
-
static void i9xx_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = i9xx_submit_request;
@@ -1843,11 +1039,11 @@ static void setup_irq(struct intel_engine_cs *engine)
engine->irq_enable = gen5_irq_enable;
engine->irq_disable = gen5_irq_disable;
} else if (INTEL_GEN(i915) >= 3) {
- engine->irq_enable = i9xx_irq_enable;
- engine->irq_disable = i9xx_irq_disable;
+ engine->irq_enable = gen3_irq_enable;
+ engine->irq_disable = gen3_irq_disable;
} else {
- engine->irq_enable = i8xx_irq_enable;
- engine->irq_disable = i8xx_irq_disable;
+ engine->irq_enable = gen2_irq_enable;
+ engine->irq_disable = gen2_irq_disable;
}
}
@@ -1874,7 +1070,7 @@ static void setup_common(struct intel_engine_cs *engine)
* equivalent to our next initial bread so we can elide
* engine->emit_init_breadcrumb().
*/
- engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
+ engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
if (IS_GEN(i915, 5))
engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
@@ -1883,11 +1079,11 @@ static void setup_common(struct intel_engine_cs *engine)
if (INTEL_GEN(i915) >= 6)
engine->emit_bb_start = gen6_emit_bb_start;
else if (INTEL_GEN(i915) >= 4)
- engine->emit_bb_start = i965_emit_bb_start;
+ engine->emit_bb_start = gen4_emit_bb_start;
else if (IS_I830(i915) || IS_I845G(i915))
engine->emit_bb_start = i830_emit_bb_start;
else
- engine->emit_bb_start = i915_emit_bb_start;
+ engine->emit_bb_start = gen3_emit_bb_start;
}
static void setup_rcs(struct intel_engine_cs *engine)
@@ -1900,25 +1096,23 @@ static void setup_rcs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
if (INTEL_GEN(i915) >= 7) {
- engine->emit_flush = gen7_render_ring_flush;
- engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
+ engine->emit_flush = gen7_emit_flush_rcs;
+ engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs;
} else if (IS_GEN(i915, 6)) {
- engine->emit_flush = gen6_render_ring_flush;
- engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
+ engine->emit_flush = gen6_emit_flush_rcs;
+ engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs;
} else if (IS_GEN(i915, 5)) {
- engine->emit_flush = gen4_render_ring_flush;
+ engine->emit_flush = gen4_emit_flush_rcs;
} else {
if (INTEL_GEN(i915) < 4)
- engine->emit_flush = gen2_render_ring_flush;
+ engine->emit_flush = gen2_emit_flush;
else
- engine->emit_flush = gen4_render_ring_flush;
+ engine->emit_flush = gen4_emit_flush_rcs;
engine->irq_enable_mask = I915_USER_INTERRUPT;
}
if (IS_HASWELL(i915))
engine->emit_bb_start = hsw_emit_bb_start;
-
- engine->resume = rcs_resume;
}
static void setup_vcs(struct intel_engine_cs *engine)
@@ -1929,15 +1123,15 @@ static void setup_vcs(struct intel_engine_cs *engine)
/* gen6 bsd needs a special wa for tail updates */
if (IS_GEN(i915, 6))
engine->set_default_submission = gen6_bsd_set_default_submission;
- engine->emit_flush = gen6_bsd_ring_flush;
+ engine->emit_flush = gen6_emit_flush_vcs;
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
if (IS_GEN(i915, 6))
- engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
else
- engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
} else {
- engine->emit_flush = bsd_ring_flush;
+ engine->emit_flush = gen4_emit_flush_vcs;
if (IS_GEN(i915, 5))
engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
else
@@ -1949,13 +1143,13 @@ static void setup_bcs(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
- engine->emit_flush = gen6_ring_flush;
+ engine->emit_flush = gen6_emit_flush_xcs;
engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
if (IS_GEN(i915, 6))
- engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
else
- engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
}
static void setup_vecs(struct intel_engine_cs *engine)
@@ -1964,12 +1158,12 @@ static void setup_vecs(struct intel_engine_cs *engine)
GEM_BUG_ON(INTEL_GEN(i915) < 7);
- engine->emit_flush = gen6_ring_flush;
+ engine->emit_flush = gen6_emit_flush_xcs;
engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
- engine->irq_enable = hsw_vebox_irq_enable;
- engine->irq_disable = hsw_vebox_irq_disable;
+ engine->irq_enable = hsw_irq_enable_vecs;
+ engine->irq_disable = hsw_irq_disable_vecs;
- engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
}
static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 2f59fc6df3c2..97ba14ad52e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -51,15 +51,16 @@ static void rps_timer(struct timer_list *t)
{
struct intel_rps *rps = from_timer(rps, t, timer);
struct intel_engine_cs *engine;
+ ktime_t dt, last, timestamp;
enum intel_engine_id id;
s64 max_busy[3] = {};
- ktime_t dt, last;
+ timestamp = 0;
for_each_engine(engine, rps_to_gt(rps), id) {
s64 busy;
int i;
- dt = intel_engine_get_busy_time(engine);
+ dt = intel_engine_get_busy_time(engine, &timestamp);
last = engine->stats.rps;
engine->stats.rps = dt;
@@ -69,16 +70,14 @@ static void rps_timer(struct timer_list *t)
swap(busy, max_busy[i]);
}
}
-
- dt = ktime_get();
last = rps->pm_timestamp;
- rps->pm_timestamp = dt;
+ rps->pm_timestamp = timestamp;
if (intel_rps_is_active(rps)) {
s64 busy;
int i;
- dt = ktime_sub(dt, last);
+ dt = ktime_sub(timestamp, last);
/*
* Our goal is to evaluate each engine independently, so we run
@@ -1063,11 +1062,12 @@ static bool gen6_rps_enable(struct intel_rps *rps)
static int chv_rps_max_freq(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_gt *gt = rps_to_gt(rps);
u32 val;
val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
- switch (RUNTIME_INFO(i915)->sseu.eu_total) {
+ switch (gt->info.sseu.eu_total) {
case 8:
/* (2 * 4) config */
val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index d173271c7397..f1c039e1b5ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -60,10 +60,552 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
return hweight32(intel_sseu_get_subslices(sseu, slice));
}
-u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
+static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
+ int subslice)
+{
+ int slice_stride = sseu->max_subslices * sseu->eu_stride;
+
+ return slice * slice_stride + subslice * sseu->eu_stride;
+}
+
+static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
+ int subslice)
+{
+ int i, offset = sseu_eu_idx(sseu, slice, subslice);
+ u16 eu_mask = 0;
+
+ for (i = 0; i < sseu->eu_stride; i++)
+ eu_mask |=
+ ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
+
+ return eu_mask;
+}
+
+static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
+ u16 eu_mask)
+{
+ int i, offset = sseu_eu_idx(sseu, slice, subslice);
+
+ for (i = 0; i < sseu->eu_stride; i++)
+ sseu->eu_mask[offset + i] =
+ (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+}
+
+static u16 compute_eu_total(const struct sseu_dev_info *sseu)
+{
+ u16 i, total = 0;
+
+ for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
+ total += hweight8(sseu->eu_mask[i]);
+
+ return total;
+}
+
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
+ u8 s_en, u32 ss_en, u16 eu_en)
+{
+ int s, ss;
+
+ /* ss_en represents entire subslice mask across all slices */
+ GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
+ sizeof(ss_en) * BITS_PER_BYTE);
+
+ for (s = 0; s < sseu->max_slices; s++) {
+ if ((s_en & BIT(s)) == 0)
+ continue;
+
+ sseu->slice_mask |= BIT(s);
+
+ intel_sseu_set_subslices(sseu, s, ss_en);
+
+ for (ss = 0; ss < sseu->max_subslices; ss++)
+ if (intel_sseu_has_subslice(sseu, s, ss))
+ sseu_set_eus(sseu, s, ss, eu_en);
+ }
+ sseu->eu_per_subslice = hweight16(eu_en);
+ sseu->eu_total = compute_eu_total(sseu);
+}
+
+static void gen12_sseu_info_init(struct intel_gt *gt)
+{
+ struct sseu_dev_info *sseu = &gt->info.sseu;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 dss_en;
+ u16 eu_en = 0;
+ u8 eu_en_fuse;
+ u8 s_en;
+ int eu;
+
+ /*
+ * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
+ * Instead of splitting these, provide userspace with an array
+ * of DSS to more closely represent the hardware resource.
+ */
+ intel_sseu_set_info(sseu, 1, 6, 16);
+
+ s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
+ GEN11_GT_S_ENA_MASK;
+
+ dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE);
+
+ /* one bit per pair of EUs */
+ eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
+ GEN11_EU_DIS_MASK);
+ for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+ if (eu_en_fuse & BIT(eu))
+ eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+
+ gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
+
+ /* TGL only supports slice-level power gating */
+ sseu->has_slice_pg = 1;
+}
+
+static void gen11_sseu_info_init(struct intel_gt *gt)
+{
+ struct sseu_dev_info *sseu = &gt->info.sseu;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 ss_en;
+ u8 eu_en;
+ u8 s_en;
+
+ if (IS_ELKHARTLAKE(gt->i915))
+ intel_sseu_set_info(sseu, 1, 4, 8);
+ else
+ intel_sseu_set_info(sseu, 1, 8, 8);
+
+ s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
+ GEN11_GT_S_ENA_MASK;
+ ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
+
+ eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
+ GEN11_EU_DIS_MASK);
+
+ gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
+
+ /* ICL has no power gating restrictions. */
+ sseu->has_slice_pg = 1;
+ sseu->has_subslice_pg = 1;
+ sseu->has_eu_pg = 1;
+}
+
+static void gen10_sseu_info_init(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ struct sseu_dev_info *sseu = &gt->info.sseu;
+ const u32 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
+ const int eu_mask = 0xff;
+ u32 subslice_mask, eu_en;
+ int s, ss;
+
+ intel_sseu_set_info(sseu, 6, 4, 8);
+
+ sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
+ GEN10_F2_S_ENA_SHIFT;
+
+ /* Slice0 */
+ eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE0);
+ for (ss = 0; ss < sseu->max_subslices; ss++)
+ sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask);
+ /* Slice1 */
+ sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
+ eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE1);
+ sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
+ /* Slice2 */
+ sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
+ sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
+ /* Slice3 */
+ sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
+ eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE2);
+ sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
+ /* Slice4 */
+ sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
+ sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
+ /* Slice5 */
+ sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
+ eu_en = ~intel_uncore_read(uncore, GEN10_EU_DISABLE3);
+ sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
+
+ subslice_mask = (1 << 4) - 1;
+ subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
+ GEN10_F2_SS_DIS_SHIFT);
+
+ for (s = 0; s < sseu->max_slices; s++) {
+ u32 subslice_mask_with_eus = subslice_mask;
+
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
+ if (sseu_get_eus(sseu, s, ss) == 0)
+ subslice_mask_with_eus &= ~BIT(ss);
+ }
+
+ /*
+ * Slice0 can have up to 3 subslices, but there are only 2 in
+ * slice1/2.
+ */
+ intel_sseu_set_subslices(sseu, s, s == 0 ?
+ subslice_mask_with_eus :
+ subslice_mask_with_eus & 0x3);
+ }
+
+ sseu->eu_total = compute_eu_total(sseu);
+
+ /*
+ * CNL is expected to always have a uniform distribution
+ * of EU across subslices with the exception that any one
+ * EU in any one subslice may be fused off for die
+ * recovery.
+ */
+ sseu->eu_per_subslice =
+ intel_sseu_subslice_total(sseu) ?
+ DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
+ 0;
+
+ /* No restrictions on Power Gating */
+ sseu->has_slice_pg = 1;
+ sseu->has_subslice_pg = 1;
+ sseu->has_eu_pg = 1;
+}
+
+static void cherryview_sseu_info_init(struct intel_gt *gt)
+{
+ struct sseu_dev_info *sseu = &gt->info.sseu;
+ u32 fuse;
+ u8 subslice_mask = 0;
+
+ fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
+
+ sseu->slice_mask = BIT(0);
+ intel_sseu_set_info(sseu, 1, 2, 8);
+
+ if (!(fuse & CHV_FGT_DISABLE_SS0)) {
+ u8 disabled_mask =
+ ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
+ CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
+ (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
+ CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
+
+ subslice_mask |= BIT(0);
+ sseu_set_eus(sseu, 0, 0, ~disabled_mask);
+ }
+
+ if (!(fuse & CHV_FGT_DISABLE_SS1)) {
+ u8 disabled_mask =
+ ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
+ CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
+ (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
+ CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
+
+ subslice_mask |= BIT(1);
+ sseu_set_eus(sseu, 0, 1, ~disabled_mask);
+ }
+
+ intel_sseu_set_subslices(sseu, 0, subslice_mask);
+
+ sseu->eu_total = compute_eu_total(sseu);
+
+ /*
+ * CHV expected to always have a uniform distribution of EU
+ * across subslices.
+ */
+ sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
+ sseu->eu_total /
+ intel_sseu_subslice_total(sseu) :
+ 0;
+ /*
+ * CHV supports subslice power gating on devices with more than
+ * one subslice, and supports EU power gating on devices with
+ * more than one EU pair per subslice.
+ */
+ sseu->has_slice_pg = 0;
+ sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
+ sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
+}
+
+static void gen9_sseu_info_init(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_device_info *info = mkwrite_device_info(i915);
+ struct sseu_dev_info *sseu = &gt->info.sseu;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 fuse2, eu_disable, subslice_mask;
+ const u8 eu_mask = 0xff;
+ int s, ss;
+
+ fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
+ sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+
+ /* BXT has a single slice and at most 3 subslices. */
+ intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
+ IS_GEN9_LP(i915) ? 3 : 4, 8);
+
+ /*
+ * The subslice disable field is global, i.e. it applies
+ * to each of the enabled slices.
+ */
+ subslice_mask = (1 << sseu->max_subslices) - 1;
+ subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
+ GEN9_F2_SS_DIS_SHIFT);
+
+ /*
+ * Iterate through enabled slices and subslices to
+ * count the total enabled EU.
+ */
+ for (s = 0; s < sseu->max_slices; s++) {
+ if (!(sseu->slice_mask & BIT(s)))
+ /* skip disabled slice */
+ continue;
+
+ intel_sseu_set_subslices(sseu, s, subslice_mask);
+
+ eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
+ int eu_per_ss;
+ u8 eu_disabled_mask;
+
+ if (!intel_sseu_has_subslice(sseu, s, ss))
+ /* skip disabled subslice */
+ continue;
+
+ eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
+
+ sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+
+ eu_per_ss = sseu->max_eus_per_subslice -
+ hweight8(eu_disabled_mask);
+
+ /*
+ * Record which subslice(s) has(have) 7 EUs. we
+ * can tune the hash used to spread work among
+ * subslices if they are unbalanced.
+ */
+ if (eu_per_ss == 7)
+ sseu->subslice_7eu[s] |= BIT(ss);
+ }
+ }
+
+ sseu->eu_total = compute_eu_total(sseu);
+
+ /*
+ * SKL is expected to always have a uniform distribution
+ * of EU across subslices with the exception that any one
+ * EU in any one subslice may be fused off for die
+ * recovery. BXT is expected to be perfectly uniform in EU
+ * distribution.
+ */
+ sseu->eu_per_subslice =
+ intel_sseu_subslice_total(sseu) ?
+ DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
+ 0;
+
+ /*
+ * SKL+ supports slice power gating on devices with more than
+ * one slice, and supports EU power gating on devices with
+ * more than one EU pair per subslice. BXT+ supports subslice
+ * power gating on devices with more than one subslice, and
+ * supports EU power gating on devices with more than one EU
+ * pair per subslice.
+ */
+ sseu->has_slice_pg =
+ !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
+ sseu->has_subslice_pg =
+ IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
+ sseu->has_eu_pg = sseu->eu_per_subslice > 2;
+
+ if (IS_GEN9_LP(i915)) {
+#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss)))
+ info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
+
+ sseu->min_eu_in_pool = 0;
+ if (info->has_pooled_eu) {
+ if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0))
+ sseu->min_eu_in_pool = 3;
+ else if (IS_SS_DISABLED(1))
+ sseu->min_eu_in_pool = 6;
+ else
+ sseu->min_eu_in_pool = 9;
+ }
+#undef IS_SS_DISABLED
+ }
+}
+
+static void bdw_sseu_info_init(struct intel_gt *gt)
+{
+ struct sseu_dev_info *sseu = &gt->info.sseu;
+ struct intel_uncore *uncore = gt->uncore;
+ int s, ss;
+ u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
+ u32 eu_disable0, eu_disable1, eu_disable2;
+
+ fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
+ sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+ intel_sseu_set_info(sseu, 3, 3, 8);
+
+ /*
+ * The subslice disable field is global, i.e. it applies
+ * to each of the enabled slices.
+ */
+ subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
+ subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
+ GEN8_F2_SS_DIS_SHIFT);
+ eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
+ eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
+ eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
+ eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK;
+ eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) |
+ ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) <<
+ (32 - GEN8_EU_DIS0_S1_SHIFT));
+ eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) |
+ ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) <<
+ (32 - GEN8_EU_DIS1_S2_SHIFT));
+
+ /*
+ * Iterate through enabled slices and subslices to
+ * count the total enabled EU.
+ */
+ for (s = 0; s < sseu->max_slices; s++) {
+ if (!(sseu->slice_mask & BIT(s)))
+ /* skip disabled slice */
+ continue;
+
+ intel_sseu_set_subslices(sseu, s, subslice_mask);
+
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
+ u8 eu_disabled_mask;
+ u32 n_disabled;
+
+ if (!intel_sseu_has_subslice(sseu, s, ss))
+ /* skip disabled subslice */
+ continue;
+
+ eu_disabled_mask =
+ eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
+
+ sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+
+ n_disabled = hweight8(eu_disabled_mask);
+
+ /*
+ * Record which subslices have 7 EUs.
+ */
+ if (sseu->max_eus_per_subslice - n_disabled == 7)
+ sseu->subslice_7eu[s] |= 1 << ss;
+ }
+ }
+
+ sseu->eu_total = compute_eu_total(sseu);
+
+ /*
+ * BDW is expected to always have a uniform distribution of EU across
+ * subslices with the exception that any one EU in any one subslice may
+ * be fused off for die recovery.
+ */
+ sseu->eu_per_subslice =
+ intel_sseu_subslice_total(sseu) ?
+ DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
+ 0;
+
+ /*
+ * BDW supports slice power gating on devices with more than
+ * one slice.
+ */
+ sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
+ sseu->has_subslice_pg = 0;
+ sseu->has_eu_pg = 0;
+}
+
+static void hsw_sseu_info_init(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct sseu_dev_info *sseu = &gt->info.sseu;
+ u32 fuse1;
+ u8 subslice_mask = 0;
+ int s, ss;
+
+ /*
+ * There isn't a register to tell us how many slices/subslices. We
+ * work off the PCI-ids here.
+ */
+ switch (INTEL_INFO(i915)->gt) {
+ default:
+ MISSING_CASE(INTEL_INFO(i915)->gt);
+ fallthrough;
+ case 1:
+ sseu->slice_mask = BIT(0);
+ subslice_mask = BIT(0);
+ break;
+ case 2:
+ sseu->slice_mask = BIT(0);
+ subslice_mask = BIT(0) | BIT(1);
+ break;
+ case 3:
+ sseu->slice_mask = BIT(0) | BIT(1);
+ subslice_mask = BIT(0) | BIT(1);
+ break;
+ }
+
+ fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
+ switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
+ default:
+ MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >>
+ HSW_F1_EU_DIS_SHIFT);
+ fallthrough;
+ case HSW_F1_EU_DIS_10EUS:
+ sseu->eu_per_subslice = 10;
+ break;
+ case HSW_F1_EU_DIS_8EUS:
+ sseu->eu_per_subslice = 8;
+ break;
+ case HSW_F1_EU_DIS_6EUS:
+ sseu->eu_per_subslice = 6;
+ break;
+ }
+
+ intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
+ hweight8(subslice_mask),
+ sseu->eu_per_subslice);
+
+ for (s = 0; s < sseu->max_slices; s++) {
+ intel_sseu_set_subslices(sseu, s, subslice_mask);
+
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
+ sseu_set_eus(sseu, s, ss,
+ (1UL << sseu->eu_per_subslice) - 1);
+ }
+ }
+
+ sseu->eu_total = compute_eu_total(sseu);
+
+ /* No powergating for you. */
+ sseu->has_slice_pg = 0;
+ sseu->has_subslice_pg = 0;
+ sseu->has_eu_pg = 0;
+}
+
+void intel_sseu_info_init(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (IS_HASWELL(i915))
+ hsw_sseu_info_init(gt);
+ else if (IS_CHERRYVIEW(i915))
+ cherryview_sseu_info_init(gt);
+ else if (IS_BROADWELL(i915))
+ bdw_sseu_info_init(gt);
+ else if (IS_GEN(i915, 9))
+ gen9_sseu_info_init(gt);
+ else if (IS_GEN(i915, 10))
+ gen10_sseu_info_init(gt);
+ else if (IS_GEN(i915, 11))
+ gen11_sseu_info_init(gt);
+ else if (INTEL_GEN(i915) >= 12)
+ gen12_sseu_info_init(gt);
+}
+
+u32 intel_sseu_make_rpcs(struct intel_gt *gt,
const struct intel_sseu *req_sseu)
{
- const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+ struct drm_i915_private *i915 = gt->i915;
+ const struct sseu_dev_info *sseu = &gt->info.sseu;
bool subslice_pg = sseu->has_subslice_pg;
u8 slices, subslices;
u32 rpcs = 0;
@@ -173,3 +715,48 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
return rpcs;
}
+
+void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
+{
+ int s;
+
+ drm_printf(p, "slice total: %u, mask=%04x\n",
+ hweight8(sseu->slice_mask), sseu->slice_mask);
+ drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
+ for (s = 0; s < sseu->max_slices; s++) {
+ drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
+ s, intel_sseu_subslices_per_slice(sseu, s),
+ intel_sseu_get_subslices(sseu, s));
+ }
+ drm_printf(p, "EU total: %u\n", sseu->eu_total);
+ drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
+ drm_printf(p, "has slice power gating: %s\n",
+ yesno(sseu->has_slice_pg));
+ drm_printf(p, "has subslice power gating: %s\n",
+ yesno(sseu->has_subslice_pg));
+ drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg));
+}
+
+void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
+ struct drm_printer *p)
+{
+ int s, ss;
+
+ if (sseu->max_slices == 0) {
+ drm_printf(p, "Unavailable\n");
+ return;
+ }
+
+ for (s = 0; s < sseu->max_slices; s++) {
+ drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
+ s, intel_sseu_subslices_per_slice(sseu, s),
+ intel_sseu_get_subslices(sseu, s));
+
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
+ u16 enabled_eus = sseu_get_eus(sseu, s, ss);
+
+ drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
+ ss, hweight16(enabled_eus), enabled_eus);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index d1d225204f09..23ba6c2ebe70 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -13,6 +13,8 @@
#include "i915_gem.h"
struct drm_i915_private;
+struct intel_gt;
+struct drm_printer;
#define GEN_MAX_SLICES (6) /* CNL upper bound */
#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
@@ -94,7 +96,13 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
u32 ss_mask);
-u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
+void intel_sseu_info_init(struct intel_gt *gt);
+
+u32 intel_sseu_make_rpcs(struct intel_gt *gt,
const struct intel_sseu *req_sseu);
+void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p);
+void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
+ struct drm_printer *p);
+
#endif /* __INTEL_SSEU_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
new file mode 100644
index 000000000000..51780282d872
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "debugfs_gt.h"
+#include "intel_sseu_debugfs.h"
+#include "i915_drv.h"
+
+static void sseu_copy_subslices(const struct sseu_dev_info *sseu,
+ int slice, u8 *to_mask)
+{
+ int offset = slice * sseu->ss_stride;
+
+ memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride);
+}
+
+static void cherryview_sseu_device_status(struct intel_gt *gt,
+ struct sseu_dev_info *sseu)
+{
+#define SS_MAX 2
+ struct intel_uncore *uncore = gt->uncore;
+ const int ss_max = SS_MAX;
+ u32 sig1[SS_MAX], sig2[SS_MAX];
+ int ss;
+
+ sig1[0] = intel_uncore_read(uncore, CHV_POWER_SS0_SIG1);
+ sig1[1] = intel_uncore_read(uncore, CHV_POWER_SS1_SIG1);
+ sig2[0] = intel_uncore_read(uncore, CHV_POWER_SS0_SIG2);
+ sig2[1] = intel_uncore_read(uncore, CHV_POWER_SS1_SIG2);
+
+ for (ss = 0; ss < ss_max; ss++) {
+ unsigned int eu_cnt;
+
+ if (sig1[ss] & CHV_SS_PG_ENABLE)
+ /* skip disabled subslice */
+ continue;
+
+ sseu->slice_mask = BIT(0);
+ sseu->subslice_mask[0] |= BIT(ss);
+ eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
+ ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
+ ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
+ ((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2);
+ sseu->eu_total += eu_cnt;
+ sseu->eu_per_subslice = max_t(unsigned int,
+ sseu->eu_per_subslice, eu_cnt);
+ }
+#undef SS_MAX
+}
+
+static void gen10_sseu_device_status(struct intel_gt *gt,
+ struct sseu_dev_info *sseu)
+{
+#define SS_MAX 6
+ struct intel_uncore *uncore = gt->uncore;
+ const struct intel_gt_info *info = &gt->info;
+ u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
+ int s, ss;
+
+ for (s = 0; s < info->sseu.max_slices; s++) {
+ /*
+ * FIXME: Valid SS Mask respects the spec and read
+ * only valid bits for those registers, excluding reserved
+ * although this seems wrong because it would leave many
+ * subslices without ACK.
+ */
+ s_reg[s] = intel_uncore_read(uncore, GEN10_SLICE_PGCTL_ACK(s)) &
+ GEN10_PGCTL_VALID_SS_MASK(s);
+ eu_reg[2 * s] = intel_uncore_read(uncore,
+ GEN10_SS01_EU_PGCTL_ACK(s));
+ eu_reg[2 * s + 1] = intel_uncore_read(uncore,
+ GEN10_SS23_EU_PGCTL_ACK(s));
+ }
+
+ eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
+ GEN9_PGCTL_SSA_EU19_ACK |
+ GEN9_PGCTL_SSA_EU210_ACK |
+ GEN9_PGCTL_SSA_EU311_ACK;
+ eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK |
+ GEN9_PGCTL_SSB_EU19_ACK |
+ GEN9_PGCTL_SSB_EU210_ACK |
+ GEN9_PGCTL_SSB_EU311_ACK;
+
+ for (s = 0; s < info->sseu.max_slices; s++) {
+ if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
+ /* skip disabled slice */
+ continue;
+
+ sseu->slice_mask |= BIT(s);
+ sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask);
+
+ for (ss = 0; ss < info->sseu.max_subslices; ss++) {
+ unsigned int eu_cnt;
+
+ if (info->sseu.has_subslice_pg &&
+ !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
+ /* skip disabled subslice */
+ continue;
+
+ eu_cnt = 2 * hweight32(eu_reg[2 * s + ss / 2] &
+ eu_mask[ss % 2]);
+ sseu->eu_total += eu_cnt;
+ sseu->eu_per_subslice = max_t(unsigned int,
+ sseu->eu_per_subslice,
+ eu_cnt);
+ }
+ }
+#undef SS_MAX
+}
+
+static void gen9_sseu_device_status(struct intel_gt *gt,
+ struct sseu_dev_info *sseu)
+{
+#define SS_MAX 3
+ struct intel_uncore *uncore = gt->uncore;
+ const struct intel_gt_info *info = &gt->info;
+ u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
+ int s, ss;
+
+ for (s = 0; s < info->sseu.max_slices; s++) {
+ s_reg[s] = intel_uncore_read(uncore, GEN9_SLICE_PGCTL_ACK(s));
+ eu_reg[2 * s] =
+ intel_uncore_read(uncore, GEN9_SS01_EU_PGCTL_ACK(s));
+ eu_reg[2 * s + 1] =
+ intel_uncore_read(uncore, GEN9_SS23_EU_PGCTL_ACK(s));
+ }
+
+ eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
+ GEN9_PGCTL_SSA_EU19_ACK |
+ GEN9_PGCTL_SSA_EU210_ACK |
+ GEN9_PGCTL_SSA_EU311_ACK;
+ eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK |
+ GEN9_PGCTL_SSB_EU19_ACK |
+ GEN9_PGCTL_SSB_EU210_ACK |
+ GEN9_PGCTL_SSB_EU311_ACK;
+
+ for (s = 0; s < info->sseu.max_slices; s++) {
+ if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
+ /* skip disabled slice */
+ continue;
+
+ sseu->slice_mask |= BIT(s);
+
+ if (IS_GEN9_BC(gt->i915))
+ sseu_copy_subslices(&info->sseu, s,
+ sseu->subslice_mask);
+
+ for (ss = 0; ss < info->sseu.max_subslices; ss++) {
+ unsigned int eu_cnt;
+ u8 ss_idx = s * info->sseu.ss_stride +
+ ss / BITS_PER_BYTE;
+
+ if (IS_GEN9_LP(gt->i915)) {
+ if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
+ /* skip disabled subslice */
+ continue;
+
+ sseu->subslice_mask[ss_idx] |=
+ BIT(ss % BITS_PER_BYTE);
+ }
+
+ eu_cnt = eu_reg[2 * s + ss / 2] & eu_mask[ss % 2];
+ eu_cnt = 2 * hweight32(eu_cnt);
+
+ sseu->eu_total += eu_cnt;
+ sseu->eu_per_subslice = max_t(unsigned int,
+ sseu->eu_per_subslice,
+ eu_cnt);
+ }
+ }
+#undef SS_MAX
+}
+
+static void bdw_sseu_device_status(struct intel_gt *gt,
+ struct sseu_dev_info *sseu)
+{
+ const struct intel_gt_info *info = &gt->info;
+ u32 slice_info = intel_uncore_read(gt->uncore, GEN8_GT_SLICE_INFO);
+ int s;
+
+ sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
+
+ if (sseu->slice_mask) {
+ sseu->eu_per_subslice = info->sseu.eu_per_subslice;
+ for (s = 0; s < fls(sseu->slice_mask); s++)
+ sseu_copy_subslices(&info->sseu, s,
+ sseu->subslice_mask);
+ sseu->eu_total = sseu->eu_per_subslice *
+ intel_sseu_subslice_total(sseu);
+
+ /* subtract fused off EU(s) from enabled slice(s) */
+ for (s = 0; s < fls(sseu->slice_mask); s++) {
+ u8 subslice_7eu = info->sseu.subslice_7eu[s];
+
+ sseu->eu_total -= hweight8(subslice_7eu);
+ }
+ }
+}
+
+static void i915_print_sseu_info(struct seq_file *m,
+ bool is_available_info,
+ bool has_pooled_eu,
+ const struct sseu_dev_info *sseu)
+{
+ const char *type = is_available_info ? "Available" : "Enabled";
+ int s;
+
+ seq_printf(m, " %s Slice Mask: %04x\n", type,
+ sseu->slice_mask);
+ seq_printf(m, " %s Slice Total: %u\n", type,
+ hweight8(sseu->slice_mask));
+ seq_printf(m, " %s Subslice Total: %u\n", type,
+ intel_sseu_subslice_total(sseu));
+ for (s = 0; s < fls(sseu->slice_mask); s++) {
+ seq_printf(m, " %s Slice%i subslices: %u\n", type,
+ s, intel_sseu_subslices_per_slice(sseu, s));
+ }
+ seq_printf(m, " %s EU Total: %u\n", type,
+ sseu->eu_total);
+ seq_printf(m, " %s EU Per Subslice: %u\n", type,
+ sseu->eu_per_subslice);
+
+ if (!is_available_info)
+ return;
+
+ seq_printf(m, " Has Pooled EU: %s\n", yesno(has_pooled_eu));
+ if (has_pooled_eu)
+ seq_printf(m, " Min EU in pool: %u\n", sseu->min_eu_in_pool);
+
+ seq_printf(m, " Has Slice Power Gating: %s\n",
+ yesno(sseu->has_slice_pg));
+ seq_printf(m, " Has Subslice Power Gating: %s\n",
+ yesno(sseu->has_subslice_pg));
+ seq_printf(m, " Has EU Power Gating: %s\n",
+ yesno(sseu->has_eu_pg));
+}
+
+/*
+ * this is called from top-level debugfs as well, so we can't get the gt from
+ * the seq_file.
+ */
+int intel_sseu_status(struct seq_file *m, struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ const struct intel_gt_info *info = &gt->info;
+ struct sseu_dev_info sseu;
+ intel_wakeref_t wakeref;
+
+ if (INTEL_GEN(i915) < 8)
+ return -ENODEV;
+
+ seq_puts(m, "SSEU Device Info\n");
+ i915_print_sseu_info(m, true, HAS_POOLED_EU(i915), &info->sseu);
+
+ seq_puts(m, "SSEU Device Status\n");
+ memset(&sseu, 0, sizeof(sseu));
+ intel_sseu_set_info(&sseu, info->sseu.max_slices,
+ info->sseu.max_subslices,
+ info->sseu.max_eus_per_subslice);
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+ if (IS_CHERRYVIEW(i915))
+ cherryview_sseu_device_status(gt, &sseu);
+ else if (IS_BROADWELL(i915))
+ bdw_sseu_device_status(gt, &sseu);
+ else if (IS_GEN(i915, 9))
+ gen9_sseu_device_status(gt, &sseu);
+ else if (INTEL_GEN(i915) >= 10)
+ gen10_sseu_device_status(gt, &sseu);
+ }
+
+ i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), &sseu);
+
+ return 0;
+}
+
+static int sseu_status_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+
+ return intel_sseu_status(m, gt);
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(sseu_status);
+
+static int rcs_topology_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ intel_sseu_print_topology(&gt->info.sseu, &p);
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(rcs_topology);
+
+void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "sseu_status", &sseu_status_fops, NULL },
+ { "rcs_topology", &rcs_topology_fops, NULL },
+ };
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.h b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.h
new file mode 100644
index 000000000000..73f001589e90
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef INTEL_SSEU_DEBUGFS_H
+#define INTEL_SSEU_DEBUGFS_H
+
+struct intel_gt;
+struct dentry;
+struct seq_file;
+
+int intel_sseu_status(struct seq_file *m, struct intel_gt *gt);
+void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* INTEL_SSEU_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 4546284fede1..46d20f5f3ddc 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -73,6 +73,8 @@ hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
return vma;
}
+ GT_TRACE(timeline->gt, "new HWSP allocated\n");
+
vma->private = hwsp;
hwsp->gt = timeline->gt;
hwsp->vma = vma;
@@ -327,6 +329,8 @@ int intel_timeline_pin(struct intel_timeline *tl)
tl->hwsp_offset =
i915_ggtt_offset(tl->hwsp_ggtt) +
offset_in_page(tl->hwsp_offset);
+ GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
+ tl->fence_context, tl->hwsp_offset);
cacheline_acquire(tl->hwsp_cacheline);
if (atomic_fetch_inc(&tl->pin_count)) {
@@ -434,6 +438,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
int err;
might_lock(&tl->gt->ggtt->vm.mutex);
+ GT_TRACE(tl->gt, "timeline:%llx wrapped\n", tl->fence_context);
/*
* If there is an outstanding GPU reference to this cacheline,
@@ -497,6 +502,8 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
tl->hwsp_offset += i915_ggtt_offset(vma);
+ GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
+ tl->fence_context, tl->hwsp_offset);
cacheline_acquire(cl);
tl->hwsp_cacheline = cl;
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 85d2bef51524..5726cd0a37e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -205,6 +205,18 @@ wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
#define WA_SET_FIELD_MASKED(addr, mask, value) \
wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value)))
+static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+}
+
+static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+}
+
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
@@ -355,7 +367,10 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
HDC_FORCE_NON_COHERENT);
/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
- if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
+ if (IS_SKYLAKE(i915) ||
+ IS_KABYLAKE(i915) ||
+ IS_COFFEELAKE(i915) ||
+ IS_COMETLAKE(i915))
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN8_SAMPLER_POWER_BYPASS_DIS);
@@ -389,7 +404,7 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
u8 vals[3] = { 0, 0, 0 };
unsigned int i;
@@ -400,7 +415,7 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
* Only consider slices where one, and only one, subslice has 7
* EUs
*/
- if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
+ if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
continue;
/*
@@ -409,7 +424,7 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
*
* -> 0 <= ss <= 3;
*/
- ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
+ ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
vals[i] = 3 - ss;
}
@@ -600,11 +615,14 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
* Wa_1604555607:gen12 and Wa_1608008084:gen12
* FF_MODE2 register will return the wrong value when read. The default
* value for this register is zero for all fields and there are no bit
- * masks. So instead of doing a RMW we should just write the TDS timer
- * value for Wa_1604555607.
+ * masks. So instead of doing a RMW we should just write the GS Timer
+ * and TDS timer values for Wa_1604555607 and Wa_16011163337.
*/
- wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
- FF_MODE2_TDS_TIMER_128, 0);
+ wa_add(wal,
+ FF_MODE2,
+ FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128,
+ 0);
/* WaDisableGPGPUMidThreadPreemption:tgl */
WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
@@ -630,7 +648,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
icl_ctx_workarounds_init(engine, wal);
else if (IS_CANNONLAKE(i915))
cnl_ctx_workarounds_init(engine, wal);
- else if (IS_COFFEELAKE(i915))
+ else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
cfl_ctx_workarounds_init(engine, wal);
else if (IS_GEMINILAKE(i915))
glk_ctx_workarounds_init(engine, wal);
@@ -644,6 +662,10 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
chv_ctx_workarounds_init(engine, wal);
else if (IS_BROADWELL(i915))
bdw_ctx_workarounds_init(engine, wal);
+ else if (IS_GEN(i915, 7))
+ gen7_ctx_workarounds_init(engine, wal);
+ else if (IS_GEN(i915, 6))
+ gen6_ctx_workarounds_init(engine, wal);
else if (INTEL_GEN(i915) < 8)
return;
else
@@ -917,7 +939,7 @@ static void
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
/* WaDisableKillLogic:bxt,skl,kbl */
- if (!IS_COFFEELAKE(i915))
+ if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
wa_write_or(wal,
GAM_ECOCHK,
ECOCHK_DIS_TLB);
@@ -1014,7 +1036,7 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
static void
wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
- const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+ const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
unsigned int slice, subslice;
u32 l3_en, mcr, mcr_mask;
@@ -1180,7 +1202,7 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
icl_gt_workarounds_init(i915, wal);
else if (IS_CANNONLAKE(i915))
cnl_gt_workarounds_init(i915, wal);
- else if (IS_COFFEELAKE(i915))
+ else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
cfl_gt_workarounds_init(i915, wal);
else if (IS_GEMINILAKE(i915))
glk_gt_workarounds_init(i915, wal);
@@ -1428,6 +1450,18 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine)
RING_FORCE_TO_NONPRIV_RANGE_4);
}
+static void cml_whitelist_build(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *w = &engine->whitelist;
+
+ if (engine->class != RENDER_CLASS)
+ whitelist_reg_ext(w,
+ RING_CTX_TIMESTAMP(engine->mmio_base),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
+
+ cfl_whitelist_build(engine);
+}
+
static void cnl_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
@@ -1478,9 +1512,15 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
/* hucStatus2RegOffset */
whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
RING_FORCE_TO_NONPRIV_ACCESS_RD);
+ whitelist_reg_ext(w,
+ RING_CTX_TIMESTAMP(engine->mmio_base),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
break;
default:
+ whitelist_reg_ext(w,
+ RING_CTX_TIMESTAMP(engine->mmio_base),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
break;
}
}
@@ -1512,6 +1552,9 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
whitelist_reg(w, HIZ_CHICKEN);
break;
default:
+ whitelist_reg_ext(w,
+ RING_CTX_TIMESTAMP(engine->mmio_base),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
break;
}
}
@@ -1529,6 +1572,8 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
icl_whitelist_build(engine);
else if (IS_CANNONLAKE(i915))
cnl_whitelist_build(engine);
+ else if (IS_COMETLAKE(i915))
+ cml_whitelist_build(engine);
else if (IS_COFFEELAKE(i915))
cfl_whitelist_build(engine);
else if (IS_GEMINILAKE(i915))
@@ -1604,11 +1649,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_SARCHKMD,
GEN7_DISABLE_SAMPLER_PREFETCH);
- /* Wa_1407928979:tgl */
- wa_write_or(wal,
- GEN7_FF_THREAD_MODE,
- GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
-
/* Wa_1408615072:tgl */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
VSUNIT_CLKGATE_DIS_TGL);
@@ -1632,6 +1672,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* Wa_14010229206:tgl
*/
wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
+
+ /*
+ * Wa_1407928979:tgl A*
+ * Wa_18011464164:tgl B0+
+ * Wa_22010931296:tgl B0+
+ */
+ wa_write_or(wal, GEN7_FF_THREAD_MODE,
+ GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
}
if (IS_GEN(i915, 11)) {
@@ -1725,6 +1773,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal,
GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+
+ /* Wa_22010271021:ehl */
+ if (IS_ELKHARTLAKE(i915))
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
}
if (IS_GEN_RANGE(i915, 9, 12)) {
@@ -1734,7 +1788,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN9_FFSC_PERCTX_PREEMPT_CTRL);
}
- if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
+ if (IS_SKYLAKE(i915) ||
+ IS_KABYLAKE(i915) ||
+ IS_COFFEELAKE(i915) ||
+ IS_COMETLAKE(i915)) {
/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
wa_write_or(wal,
GEN8_GARBCNTL,
@@ -1818,6 +1875,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
/* XXX bit doesn't stick on Broadwater */
IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
+
+ if (IS_GEN(i915, 4))
+ /*
+ * Disable CONSTANT_BUFFER before it is loaded from the context
+ * image. For as it is loaded, it is executed and the stored
+ * address may no longer be valid, leading to a GPU hang.
+ *
+ * This imposes the requirement that userspace reload their
+ * CONSTANT_BUFFER on every batch, fortunately a requirement
+ * they are already accustomed to from before contexts were
+ * enabled.
+ */
+ wa_add(wal, ECOSKPD,
+ 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
+ 0 /* XXX bit doesn't stick on Broadwater */);
}
static void
@@ -1932,7 +2004,7 @@ wa_list_srm(struct i915_request *rq,
const struct i915_wa_list *wal,
struct i915_vma *vma)
{
- struct drm_i915_private *i915 = rq->i915;
+ struct drm_i915_private *i915 = rq->engine->i915;
unsigned int i, count = 0;
const struct i915_wa *wa;
u32 srm, *cs;
@@ -2021,7 +2093,7 @@ static int engine_wa_list_verify(struct intel_context *ce,
err = 0;
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
+ if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
continue;
if (!wa_verify(wa, results[i], wal->name, from))
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index f88e445a1cae..729c3c7b11e2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -49,7 +49,7 @@ static int write_timestamp(struct i915_request *rq, int slot)
return PTR_ERR(cs);
cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
- if (INTEL_GEN(rq->i915) >= 8)
+ if (INTEL_GEN(rq->engine->i915) >= 8)
cmd++;
*cs++ = cmd;
*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 697114dd1f47..73243ba59c7d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -10,6 +10,7 @@
#include "intel_gt_requests.h"
#include "i915_selftest.h"
+#include "selftest_engine_heartbeat.h"
static int timeline_sync(struct intel_timeline *tl)
{
@@ -142,24 +143,6 @@ out:
return err;
}
-static void engine_heartbeat_disable(struct intel_engine_cs *engine,
- unsigned long *saved)
-{
- *saved = engine->props.heartbeat_interval_ms;
- engine->props.heartbeat_interval_ms = 0;
-
- intel_engine_pm_get(engine);
- intel_engine_park_heartbeat(engine);
-}
-
-static void engine_heartbeat_enable(struct intel_engine_cs *engine,
- unsigned long saved)
-{
- intel_engine_pm_put(engine);
-
- engine->props.heartbeat_interval_ms = saved;
-}
-
static int live_idle_flush(void *arg)
{
struct intel_gt *gt = arg;
@@ -170,11 +153,9 @@ static int live_idle_flush(void *arg)
/* Check that we can flush the idle barriers */
for_each_engine(engine, gt, id) {
- unsigned long heartbeat;
-
- engine_heartbeat_disable(engine, &heartbeat);
+ st_engine_heartbeat_disable(engine);
err = __live_idle_pulse(engine, intel_engine_flush_barriers);
- engine_heartbeat_enable(engine, heartbeat);
+ st_engine_heartbeat_enable(engine);
if (err)
break;
}
@@ -192,11 +173,9 @@ static int live_idle_pulse(void *arg)
/* Check that heartbeat pulses flush the idle barriers */
for_each_engine(engine, gt, id) {
- unsigned long heartbeat;
-
- engine_heartbeat_disable(engine, &heartbeat);
+ st_engine_heartbeat_disable(engine);
err = __live_idle_pulse(engine, intel_engine_pulse);
- engine_heartbeat_enable(engine, heartbeat);
+ st_engine_heartbeat_enable(engine);
if (err && err != -ENODEV)
break;
@@ -386,11 +365,27 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
if (intel_gt_is_wedged(&i915->gt))
return 0;
- saved_hangcheck = i915_modparams.enable_hangcheck;
- i915_modparams.enable_hangcheck = INT_MAX;
+ saved_hangcheck = i915->params.enable_hangcheck;
+ i915->params.enable_hangcheck = INT_MAX;
err = intel_gt_live_subtests(tests, &i915->gt);
- i915_modparams.enable_hangcheck = saved_hangcheck;
+ i915->params.enable_hangcheck = saved_hangcheck;
return err;
}
+
+void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
+{
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms =
+ engine->defaults.heartbeat_interval_ms;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h
new file mode 100644
index 000000000000..cd27113d5400
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef SELFTEST_ENGINE_HEARTBEAT_H
+#define SELFTEST_ENGINE_HEARTBEAT_H
+
+struct intel_engine_cs;
+
+void st_engine_heartbeat_disable(struct intel_engine_cs *engine);
+void st_engine_heartbeat_enable(struct intel_engine_cs *engine);
+
+#endif /* SELFTEST_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index cbf6b0735272..b08fc5390e8a 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -6,7 +6,107 @@
#include "i915_selftest.h"
#include "selftest_engine.h"
+#include "selftest_engine_heartbeat.h"
#include "selftests/igt_atomic.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_spinner.h"
+
+static int live_engine_busy_stats(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * Check that if an engine supports busy-stats, they tell the truth.
+ */
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ GEM_BUG_ON(intel_gt_pm_is_awake(gt));
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq;
+ ktime_t de, dt;
+ ktime_t t[2];
+
+ if (!intel_engine_supports_stats(engine))
+ continue;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ if (intel_gt_pm_wait_for_idle(gt)) {
+ err = -EBUSY;
+ break;
+ }
+
+ st_engine_heartbeat_disable(engine);
+
+ ENGINE_TRACE(engine, "measuring idle time\n");
+ preempt_disable();
+ de = intel_engine_get_busy_time(engine, &t[0]);
+ udelay(100);
+ de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
+ preempt_enable();
+ dt = ktime_sub(t[1], t[0]);
+ if (de < 0 || de > 10) {
+ pr_err("%s: reported %lldns [%d%%] busyness while sleeping [for %lldns]\n",
+ engine->name,
+ de, (int)div64_u64(100 * de, dt), dt);
+ GEM_TRACE_DUMP();
+ err = -EINVAL;
+ goto end;
+ }
+
+ /* 100% busy */
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto end;
+ }
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ intel_gt_set_wedged(engine->gt);
+ err = -ETIME;
+ goto end;
+ }
+
+ ENGINE_TRACE(engine, "measuring busy time\n");
+ preempt_disable();
+ de = intel_engine_get_busy_time(engine, &t[0]);
+ udelay(100);
+ de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
+ preempt_enable();
+ dt = ktime_sub(t[1], t[0]);
+ if (100 * de < 95 * dt || 95 * de > 100 * dt) {
+ pr_err("%s: reported %lldns [%d%%] busyness while spinning [for %lldns]\n",
+ engine->name,
+ de, (int)div64_u64(100 * de, dt), dt);
+ GEM_TRACE_DUMP();
+ err = -EINVAL;
+ goto end;
+ }
+
+end:
+ st_engine_heartbeat_enable(engine);
+ igt_spinner_end(&spin);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ igt_spinner_fini(&spin);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ return err;
+}
static int live_engine_pm(void *arg)
{
@@ -77,6 +177,7 @@ static int live_engine_pm(void *arg)
int live_engine_pm_selftests(struct intel_gt *gt)
{
static const struct i915_subtest tests[] = {
+ SUBTEST(live_engine_busy_stats),
SUBTEST(live_engine_pm),
};
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 242181a5214c..6180a47c1b51 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -5,10 +5,141 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/sort.h>
+
+#include "intel_gt_clock_utils.h"
+
#include "selftest_llc.h"
#include "selftest_rc6.h"
#include "selftest_rps.h"
+static int cmp_u64(const void *A, const void *B)
+{
+ const u64 *a = A, *b = B;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const u32 *a = A, *b = B;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+static void measure_clocks(struct intel_engine_cs *engine,
+ u32 *out_cycles, ktime_t *out_dt)
+{
+ ktime_t dt[5];
+ u32 cycles[5];
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ preempt_disable();
+ cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP);
+ dt[i] = ktime_get();
+
+ udelay(1000);
+
+ dt[i] = ktime_sub(ktime_get(), dt[i]);
+ cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP);
+ preempt_enable();
+ }
+
+ /* Use the median of both cycle/dt; close enough */
+ sort(cycles, 5, sizeof(*cycles), cmp_u32, NULL);
+ *out_cycles = (cycles[1] + 2 * cycles[2] + cycles[3]) / 4;
+
+ sort(dt, 5, sizeof(*dt), cmp_u64, NULL);
+ *out_dt = div_u64(dt[1] + 2 * dt[2] + dt[3], 4);
+}
+
+static int live_gt_clocks(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (!RUNTIME_INFO(gt->i915)->cs_timestamp_frequency_hz) { /* unknown */
+ pr_info("CS_TIMESTAMP frequency unknown\n");
+ return 0;
+ }
+
+ if (INTEL_GEN(gt->i915) < 4) /* Any CS_TIMESTAMP? */
+ return 0;
+
+ if (IS_GEN(gt->i915, 5))
+ /*
+ * XXX CS_TIMESTAMP low dword is dysfunctional?
+ *
+ * Ville's experiments indicate the high dword still works,
+ * but at a correspondingly reduced frequency.
+ */
+ return 0;
+
+ if (IS_GEN(gt->i915, 4))
+ /*
+ * XXX CS_TIMESTAMP appears gibberish
+ *
+ * Ville's experiments indicate that it mostly appears 'stuck'
+ * in that we see the register report the same cycle count
+ * for a couple of reads.
+ */
+ return 0;
+
+ intel_gt_pm_get(gt);
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+ for_each_engine(engine, gt, id) {
+ u32 cycles;
+ u32 expected;
+ u64 time;
+ u64 dt;
+
+ if (INTEL_GEN(engine->i915) < 7 && engine->id != RCS0)
+ continue;
+
+ measure_clocks(engine, &cycles, &dt);
+
+ time = i915_cs_timestamp_ticks_to_ns(engine->i915, cycles);
+ expected = i915_cs_timestamp_ns_to_ticks(engine->i915, dt);
+
+ pr_info("%s: TIMESTAMP %d cycles [%lldns] in %lldns [%d cycles], using CS clock frequency of %uKHz\n",
+ engine->name, cycles, time, dt, expected,
+ RUNTIME_INFO(engine->i915)->cs_timestamp_frequency_hz / 1000);
+
+ if (9 * time < 8 * dt || 8 * time > 9 * dt) {
+ pr_err("%s: CS ticks did not match walltime!\n",
+ engine->name);
+ err = -EINVAL;
+ break;
+ }
+
+ if (9 * expected < 8 * cycles || 8 * expected > 9 * cycles) {
+ pr_err("%s: walltime did not match CS ticks!\n",
+ engine->name);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+ intel_gt_pm_put(gt);
+
+ return err;
+}
+
static int live_gt_resume(void *arg)
{
struct intel_gt *gt = arg;
@@ -52,6 +183,7 @@ static int live_gt_resume(void *arg)
int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
+ SUBTEST(live_gt_clocks),
SUBTEST(live_rc6_manual),
SUBTEST(live_rps_clock_interval),
SUBTEST(live_rps_control),
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 4aa4cc917d8b..fb5ebf930ab2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -29,6 +29,7 @@
#include "intel_gt.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
+#include "selftest_engine_heartbeat.h"
#include "i915_selftest.h"
#include "selftests/i915_random.h"
@@ -203,12 +204,12 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = lower_32_bits(hws_address(hws, rq));
*batch++ = upper_32_bits(hws_address(hws, rq));
*batch++ = rq->fence.seqno;
- *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_NOOP;
memset(batch, 0, 1024);
batch += 1024 / sizeof(*batch);
- *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_NOOP;
*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
*batch++ = lower_32_bits(vma->node.start);
*batch++ = upper_32_bits(vma->node.start);
@@ -217,12 +218,12 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = 0;
*batch++ = lower_32_bits(hws_address(hws, rq));
*batch++ = rq->fence.seqno;
- *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_NOOP;
memset(batch, 0, 1024);
batch += 1024 / sizeof(*batch);
- *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_NOOP;
*batch++ = MI_BATCH_BUFFER_START | 1 << 8;
*batch++ = lower_32_bits(vma->node.start);
} else if (INTEL_GEN(gt->i915) >= 4) {
@@ -230,24 +231,24 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = 0;
*batch++ = lower_32_bits(hws_address(hws, rq));
*batch++ = rq->fence.seqno;
- *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_NOOP;
memset(batch, 0, 1024);
batch += 1024 / sizeof(*batch);
- *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_NOOP;
*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
*batch++ = lower_32_bits(vma->node.start);
} else {
*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
*batch++ = lower_32_bits(hws_address(hws, rq));
*batch++ = rq->fence.seqno;
- *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_NOOP;
memset(batch, 0, 1024);
batch += 1024 / sizeof(*batch);
- *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_NOOP;
*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
*batch++ = lower_32_bits(vma->node.start);
}
@@ -310,22 +311,6 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq)
1000));
}
-static void engine_heartbeat_disable(struct intel_engine_cs *engine)
-{
- engine->props.heartbeat_interval_ms = 0;
-
- intel_engine_pm_get(engine);
- intel_engine_park_heartbeat(engine);
-}
-
-static void engine_heartbeat_enable(struct intel_engine_cs *engine)
-{
- intel_engine_pm_put(engine);
-
- engine->props.heartbeat_interval_ms =
- engine->defaults.heartbeat_interval_ms;
-}
-
static int igt_hang_sanitycheck(void *arg)
{
struct intel_gt *gt = arg;
@@ -482,7 +467,7 @@ static int igt_reset_nop_engine(void *arg)
reset_engine_count = i915_reset_engine_count(global, engine);
count = 0;
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
int i;
@@ -499,6 +484,20 @@ static int igt_reset_nop_engine(void *arg)
rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
+ struct drm_printer p =
+ drm_info_printer(gt->i915->drm.dev);
+ intel_engine_dump(engine, &p,
+ "%s(%s): failed to submit request\n",
+ __func__,
+ engine->name);
+
+ GEM_TRACE("%s(%s): failed to submit request\n",
+ __func__,
+ engine->name);
+ GEM_TRACE_DUMP();
+
+ intel_gt_set_wedged(gt);
+
err = PTR_ERR(rq);
break;
}
@@ -526,7 +525,7 @@ static int igt_reset_nop_engine(void *arg)
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
@@ -576,7 +575,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
reset_count = i915_reset_count(global);
reset_engine_count = i915_reset_engine_count(global, engine);
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
if (active) {
@@ -628,7 +627,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (err)
break;
@@ -805,10 +804,10 @@ static int __igt_reset_engines(struct intel_gt *gt,
threads[tmp].resets =
i915_reset_engine_count(global, other);
- if (!(flags & TEST_OTHERS))
+ if (other == engine && !(flags & TEST_SELF))
continue;
- if (other == engine && !(flags & TEST_SELF))
+ if (other != engine && !(flags & TEST_OTHERS))
continue;
threads[tmp].engine = other;
@@ -827,7 +826,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
yield(); /* start all threads before we begin */
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
struct i915_request *rq = NULL;
@@ -866,13 +865,29 @@ static int __igt_reset_engines(struct intel_gt *gt,
count++;
if (rq) {
+ if (rq->fence.error != -EIO) {
+ pr_err("i915_reset_engine(%s:%s):"
+ " failed to reset request %llx:%lld\n",
+ engine->name, test_name,
+ rq->fence.context,
+ rq->fence.seqno);
+ i915_request_put(rq);
+
+ GEM_TRACE_DUMP();
+ intel_gt_set_wedged(gt);
+ err = -EIO;
+ break;
+ }
+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
pr_err("i915_reset_engine(%s:%s):"
- " failed to complete request after reset\n",
- engine->name, test_name);
+ " failed to complete request %llx:%lld after reset\n",
+ engine->name, test_name,
+ rq->fence.context,
+ rq->fence.seqno);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
i915_request_put(rq);
@@ -901,7 +916,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
pr_info("i915_reset_engine(%s:%s): %lu resets\n",
engine->name, test_name, count);
@@ -983,7 +998,7 @@ static int igt_reset_engines(void *arg)
},
{
"self-priority",
- TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
+ TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
},
{ }
};
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 924bc01ef526..3fc5de961280 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -9,6 +9,7 @@
#include "gem/i915_gem_pm.h"
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_reset.h"
+#include "gt/selftest_engine_heartbeat.h"
#include "i915_selftest.h"
#include "selftests/i915_random.h"
@@ -51,22 +52,6 @@ static struct i915_vma *create_scratch(struct intel_gt *gt)
return vma;
}
-static void engine_heartbeat_disable(struct intel_engine_cs *engine)
-{
- engine->props.heartbeat_interval_ms = 0;
-
- intel_engine_pm_get(engine);
- intel_engine_park_heartbeat(engine);
-}
-
-static void engine_heartbeat_enable(struct intel_engine_cs *engine)
-{
- intel_engine_pm_put(engine);
-
- engine->props.heartbeat_interval_ms =
- engine->defaults.heartbeat_interval_ms;
-}
-
static bool is_active(struct i915_request *rq)
{
if (i915_request_is_active(rq))
@@ -75,7 +60,7 @@ static bool is_active(struct i915_request *rq)
if (i915_request_on_hold(rq))
return true;
- if (i915_request_started(rq))
+ if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
return true;
return false;
@@ -234,7 +219,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
err = -EIO;
break;
}
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
struct intel_context *tmp;
@@ -332,7 +317,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
i915_request_put(rq[0]);
err_ce:
- tasklet_kill(&engine->execlists.tasklet); /* flush submission */
+ intel_engine_flush_submission(engine);
igt_spinner_end(&spin);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
if (IS_ERR_OR_NULL(ce[n]))
@@ -342,7 +327,7 @@ err_ce:
intel_context_put(ce[n]);
}
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (igt_live_test_end(&t))
err = -EIO;
if (err)
@@ -363,6 +348,156 @@ static int live_unlite_preempt(void *arg)
return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
}
+static int live_unlite_ring(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct igt_spinner spin;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Setup a preemption event that will cause almost the entire ring
+ * to be unwound, potentially fooling our intel_ring_direction()
+ * into emitting a forward lite-restore instead of the rollback.
+ */
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce[2] = {};
+ struct i915_request *rq;
+ struct igt_live_test t;
+ int n;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+ err = -EIO;
+ break;
+ }
+ st_engine_heartbeat_disable(engine);
+
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ struct intel_context *tmp;
+
+ tmp = intel_context_create(engine);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto err_ce;
+ }
+
+ err = intel_context_pin(tmp);
+ if (err) {
+ intel_context_put(tmp);
+ goto err_ce;
+ }
+
+ memset32(tmp->ring->vaddr,
+ 0xdeadbeef, /* trigger a hang if executed */
+ tmp->ring->vma->size / sizeof(u32));
+
+ ce[n] = tmp;
+ }
+
+ /* Create max prio spinner, followed by N low prio nops */
+ rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ce;
+ }
+
+ i915_request_get(rq);
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ intel_gt_set_wedged(gt);
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_ce;
+ }
+
+ /* Fill the ring, until we will cause a wrap */
+ n = 0;
+ while (intel_ring_direction(ce[0]->ring,
+ rq->wa_tail,
+ ce[0]->ring->tail) <= 0) {
+ struct i915_request *tmp;
+
+ tmp = intel_context_create_request(ce[0]);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ i915_request_put(rq);
+ goto err_ce;
+ }
+
+ i915_request_add(tmp);
+ intel_engine_flush_submission(engine);
+ n++;
+ }
+ intel_engine_flush_submission(engine);
+ pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
+ engine->name, n,
+ ce[0]->ring->size,
+ ce[0]->ring->tail,
+ ce[0]->ring->emit,
+ rq->tail);
+ GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
+ rq->tail,
+ ce[0]->ring->tail) <= 0);
+ i915_request_put(rq);
+
+ /* Create a second ring to preempt the first ring after rq[0] */
+ rq = intel_context_create_request(ce[1]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ce;
+ }
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ err = wait_for_submit(engine, rq, HZ / 2);
+ i915_request_put(rq);
+ if (err) {
+ pr_err("%s: preemption request was not submitted\n",
+ engine->name);
+ err = -ETIME;
+ }
+
+ pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
+ engine->name,
+ ce[0]->ring->tail, ce[0]->ring->emit,
+ ce[1]->ring->tail, ce[1]->ring->emit);
+
+err_ce:
+ intel_engine_flush_submission(engine);
+ igt_spinner_end(&spin);
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ if (IS_ERR_OR_NULL(ce[n]))
+ break;
+
+ intel_context_unpin(ce[n]);
+ intel_context_put(ce[n]);
+ }
+ st_engine_heartbeat_enable(engine);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ igt_spinner_fini(&spin);
+ return err;
+}
+
static int live_pin_rewind(void *arg)
{
struct intel_gt *gt = arg;
@@ -471,7 +606,7 @@ static int live_hold_reset(void *arg)
break;
}
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
if (IS_ERR(rq)) {
@@ -531,7 +666,7 @@ static int live_hold_reset(void *arg)
i915_request_put(rq);
out:
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
intel_context_put(ce);
if (err)
break;
@@ -578,7 +713,7 @@ static int live_error_interrupt(void *arg)
const struct error_phase *p;
int err = 0;
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
for (p = phases; p->error[0] != GOOD; p++) {
struct i915_request *client[ARRAY_SIZE(phases->error)];
@@ -677,7 +812,7 @@ out:
}
}
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (err) {
intel_gt_set_wedged(gt);
return err;
@@ -828,7 +963,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
goto out;
if (i915_request_wait(head, 0,
- 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
+ 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
count, n);
GEM_TRACE_DUMP();
@@ -845,10 +980,11 @@ static int live_timeslice_preempt(void *arg)
{
struct intel_gt *gt = arg;
struct drm_i915_gem_object *obj;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
struct i915_vma *vma;
void *vaddr;
int err = 0;
- int count;
/*
* If a request takes too long, we would like to give other users
@@ -885,26 +1021,21 @@ static int live_timeslice_preempt(void *arg)
if (err)
goto err_pin;
- for_each_prime_number_from(count, 1, 16) {
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, gt, id) {
- if (!intel_engine_has_preemption(engine))
- continue;
+ for_each_engine(engine, gt, id) {
+ if (!intel_engine_has_preemption(engine))
+ continue;
- memset(vaddr, 0, PAGE_SIZE);
+ memset(vaddr, 0, PAGE_SIZE);
- engine_heartbeat_disable(engine);
- err = slice_semaphore_queue(engine, vma, count);
- engine_heartbeat_enable(engine);
- if (err)
- goto err_pin;
+ st_engine_heartbeat_disable(engine);
+ err = slice_semaphore_queue(engine, vma, 5);
+ st_engine_heartbeat_enable(engine);
+ if (err)
+ goto err_pin;
- if (igt_flush_test(gt->i915)) {
- err = -EIO;
- goto err_pin;
- }
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ goto err_pin;
}
}
@@ -1020,7 +1151,7 @@ static int live_timeslice_rewind(void *arg)
* Expect execution/evaluation order XZY
*/
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
slot = memset32(engine->status_page.addr + 1000, 0, 4);
@@ -1031,18 +1162,18 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
- rq[0] = create_rewinder(ce, NULL, slot, X);
- if (IS_ERR(rq[0])) {
+ rq[A1] = create_rewinder(ce, NULL, slot, X);
+ if (IS_ERR(rq[A1])) {
intel_context_put(ce);
goto err;
}
- rq[1] = create_rewinder(ce, NULL, slot, Y);
+ rq[A2] = create_rewinder(ce, NULL, slot, Y);
intel_context_put(ce);
- if (IS_ERR(rq[1]))
+ if (IS_ERR(rq[A2]))
goto err;
- err = wait_for_submit(engine, rq[1], HZ / 2);
+ err = wait_for_submit(engine, rq[A2], HZ / 2);
if (err) {
pr_err("%s: failed to submit first context\n",
engine->name);
@@ -1055,12 +1186,12 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
- rq[2] = create_rewinder(ce, rq[0], slot, Z);
+ rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
intel_context_put(ce);
if (IS_ERR(rq[2]))
goto err;
- err = wait_for_submit(engine, rq[2], HZ / 2);
+ err = wait_for_submit(engine, rq[B1], HZ / 2);
if (err) {
pr_err("%s: failed to submit second context\n",
engine->name);
@@ -1068,6 +1199,7 @@ static int live_timeslice_rewind(void *arg)
}
/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
+ ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
/* Wait for the timeslice to kick in */
del_timer(&engine->execlists.timer);
@@ -1114,7 +1246,7 @@ err:
wmb();
engine->props.timeslice_duration_ms = timeslice;
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
for (i = 0; i < 3; i++)
i915_request_put(rq[i]);
if (igt_flush_test(gt->i915))
@@ -1140,9 +1272,17 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine)
return rq;
}
-static long timeslice_threshold(const struct intel_engine_cs *engine)
+static long slice_timeout(struct intel_engine_cs *engine)
{
- return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
+ long timeout;
+
+ /* Enough time for a timeslice to kick in, and kick out */
+ timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
+
+ /* Enough time for the nop request to complete */
+ timeout += HZ / 5;
+
+ return timeout + 1;
}
static int live_timeslice_queue(void *arg)
@@ -1198,7 +1338,7 @@ static int live_timeslice_queue(void *arg)
if (!intel_engine_has_preemption(engine))
continue;
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
memset(vaddr, 0, PAGE_SIZE);
/* ELSP[0]: semaphore wait */
@@ -1243,24 +1383,8 @@ static int live_timeslice_queue(void *arg)
intel_engine_flush_submission(engine);
} while (READ_ONCE(engine->execlists.pending[0]));
- if (!READ_ONCE(engine->execlists.timer.expires) &&
- execlists_active(&engine->execlists) == rq &&
- !i915_request_completed(rq)) {
- struct drm_printer p =
- drm_info_printer(gt->i915->drm.dev);
-
- GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
- engine->name);
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
- GEM_TRACE_DUMP();
-
- memset(vaddr, 0xff, PAGE_SIZE);
- err = -EINVAL;
- }
-
/* Timeslice every jiffy, so within 2 we should signal */
- if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
+ if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
@@ -1275,7 +1399,7 @@ static int live_timeslice_queue(void *arg)
err_rq:
i915_request_put(rq);
err_heartbeat:
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (err)
break;
}
@@ -1321,7 +1445,7 @@ static int live_timeslice_nopreempt(void *arg)
break;
}
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
/* Create an unpreemptible spinner */
@@ -1349,7 +1473,7 @@ static int live_timeslice_nopreempt(void *arg)
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
- err = PTR_ERR(rq);
+ err = PTR_ERR(ce);
goto out_spin;
}
@@ -1379,7 +1503,7 @@ static int live_timeslice_nopreempt(void *arg)
* allow the maximum priority barrier through. Wait long
* enough to see if it is timesliced in by mistake.
*/
- if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) {
+ if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
engine->name);
err = -EINVAL;
@@ -1390,7 +1514,7 @@ out_spin:
igt_spinner_end(&spin);
out_heartbeat:
xchg(&engine->props.timeslice_duration_ms, timeslice);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (err)
break;
@@ -2294,7 +2418,7 @@ static int live_suppress_self_preempt(void *arg)
if (igt_flush_test(gt->i915))
goto err_wedged;
- intel_engine_pm_get(engine);
+ st_engine_heartbeat_disable(engine);
engine->execlists.preempt_hang.count = 0;
rq_a = spinner_create_request(&a.spin,
@@ -2302,14 +2426,14 @@ static int live_suppress_self_preempt(void *arg)
MI_NOOP);
if (IS_ERR(rq_a)) {
err = PTR_ERR(rq_a);
- intel_engine_pm_put(engine);
+ st_engine_heartbeat_enable(engine);
goto err_client_b;
}
i915_request_add(rq_a);
if (!igt_wait_for_spinner(&a.spin, rq_a)) {
pr_err("First client failed to start\n");
- intel_engine_pm_put(engine);
+ st_engine_heartbeat_enable(engine);
goto err_wedged;
}
@@ -2321,7 +2445,7 @@ static int live_suppress_self_preempt(void *arg)
MI_NOOP);
if (IS_ERR(rq_b)) {
err = PTR_ERR(rq_b);
- intel_engine_pm_put(engine);
+ st_engine_heartbeat_enable(engine);
goto err_client_b;
}
i915_request_add(rq_b);
@@ -2332,7 +2456,7 @@ static int live_suppress_self_preempt(void *arg)
if (!igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client failed to start\n");
- intel_engine_pm_put(engine);
+ st_engine_heartbeat_enable(engine);
goto err_wedged;
}
@@ -2346,12 +2470,12 @@ static int live_suppress_self_preempt(void *arg)
engine->name,
engine->execlists.preempt_hang.count,
depth);
- intel_engine_pm_put(engine);
+ st_engine_heartbeat_enable(engine);
err = -EINVAL;
goto err_client_b;
}
- intel_engine_pm_put(engine);
+ st_engine_heartbeat_enable(engine);
if (igt_flush_test(gt->i915))
goto err_wedged;
}
@@ -2371,183 +2495,6 @@ err_wedged:
goto err_client_b;
}
-static int __i915_sw_fence_call
-dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
-{
- return NOTIFY_DONE;
-}
-
-static struct i915_request *dummy_request(struct intel_engine_cs *engine)
-{
- struct i915_request *rq;
-
- rq = kzalloc(sizeof(*rq), GFP_KERNEL);
- if (!rq)
- return NULL;
-
- rq->engine = engine;
-
- spin_lock_init(&rq->lock);
- INIT_LIST_HEAD(&rq->fence.cb_list);
- rq->fence.lock = &rq->lock;
- rq->fence.ops = &i915_fence_ops;
-
- i915_sched_node_init(&rq->sched);
-
- /* mark this request as permanently incomplete */
- rq->fence.seqno = 1;
- BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
- rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
- GEM_BUG_ON(i915_request_completed(rq));
-
- i915_sw_fence_init(&rq->submit, dummy_notify);
- set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
-
- spin_lock_init(&rq->lock);
- rq->fence.lock = &rq->lock;
- INIT_LIST_HEAD(&rq->fence.cb_list);
-
- return rq;
-}
-
-static void dummy_request_free(struct i915_request *dummy)
-{
- /* We have to fake the CS interrupt to kick the next request */
- i915_sw_fence_commit(&dummy->submit);
-
- i915_request_mark_complete(dummy);
- dma_fence_signal(&dummy->fence);
-
- i915_sched_node_fini(&dummy->sched);
- i915_sw_fence_fini(&dummy->submit);
-
- dma_fence_free(&dummy->fence);
-}
-
-static int live_suppress_wait_preempt(void *arg)
-{
- struct intel_gt *gt = arg;
- struct preempt_client client[4];
- struct i915_request *rq[ARRAY_SIZE(client)] = {};
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err = -ENOMEM;
- int i;
-
- /*
- * Waiters are given a little priority nudge, but not enough
- * to actually cause any preemption. Double check that we do
- * not needlessly generate preempt-to-idle cycles.
- */
-
- if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
- return 0;
-
- if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
- return -ENOMEM;
- if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
- goto err_client_0;
- if (preempt_client_init(gt, &client[2])) /* head of queue */
- goto err_client_1;
- if (preempt_client_init(gt, &client[3])) /* bystander */
- goto err_client_2;
-
- for_each_engine(engine, gt, id) {
- int depth;
-
- if (!intel_engine_has_preemption(engine))
- continue;
-
- if (!engine->emit_init_breadcrumb)
- continue;
-
- for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
- struct i915_request *dummy;
-
- engine->execlists.preempt_hang.count = 0;
-
- dummy = dummy_request(engine);
- if (!dummy)
- goto err_client_3;
-
- for (i = 0; i < ARRAY_SIZE(client); i++) {
- struct i915_request *this;
-
- this = spinner_create_request(&client[i].spin,
- client[i].ctx, engine,
- MI_NOOP);
- if (IS_ERR(this)) {
- err = PTR_ERR(this);
- goto err_wedged;
- }
-
- /* Disable NEWCLIENT promotion */
- __i915_active_fence_set(&i915_request_timeline(this)->last_request,
- &dummy->fence);
-
- rq[i] = i915_request_get(this);
- i915_request_add(this);
- }
-
- dummy_request_free(dummy);
-
- GEM_BUG_ON(i915_request_completed(rq[0]));
- if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
- pr_err("%s: First client failed to start\n",
- engine->name);
- goto err_wedged;
- }
- GEM_BUG_ON(!i915_request_started(rq[0]));
-
- if (i915_request_wait(rq[depth],
- I915_WAIT_PRIORITY,
- 1) != -ETIME) {
- pr_err("%s: Waiter depth:%d completed!\n",
- engine->name, depth);
- goto err_wedged;
- }
-
- for (i = 0; i < ARRAY_SIZE(client); i++) {
- igt_spinner_end(&client[i].spin);
- i915_request_put(rq[i]);
- rq[i] = NULL;
- }
-
- if (igt_flush_test(gt->i915))
- goto err_wedged;
-
- if (engine->execlists.preempt_hang.count) {
- pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
- engine->name,
- engine->execlists.preempt_hang.count,
- depth);
- err = -EINVAL;
- goto err_client_3;
- }
- }
- }
-
- err = 0;
-err_client_3:
- preempt_client_fini(&client[3]);
-err_client_2:
- preempt_client_fini(&client[2]);
-err_client_1:
- preempt_client_fini(&client[1]);
-err_client_0:
- preempt_client_fini(&client[0]);
- return err;
-
-err_wedged:
- for (i = 0; i < ARRAY_SIZE(client); i++) {
- igt_spinner_end(&client[i].spin);
- i915_request_put(rq[i]);
- }
- intel_gt_set_wedged(gt);
- err = -EIO;
- goto err_client_3;
-}
-
static int live_chain_preempt(void *arg)
{
struct intel_gt *gt = arg;
@@ -2796,6 +2743,168 @@ err_ce:
return err;
}
+static int __live_preempt_ring(struct intel_engine_cs *engine,
+ struct igt_spinner *spin,
+ int queue_sz, int ring_sz)
+{
+ struct intel_context *ce[2] = {};
+ struct i915_request *rq;
+ struct igt_live_test t;
+ int err = 0;
+ int n;
+
+ if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
+ return -EIO;
+
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ struct intel_context *tmp;
+
+ tmp = intel_context_create(engine);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto err_ce;
+ }
+
+ tmp->ring = __intel_context_ring_size(ring_sz);
+
+ err = intel_context_pin(tmp);
+ if (err) {
+ intel_context_put(tmp);
+ goto err_ce;
+ }
+
+ memset32(tmp->ring->vaddr,
+ 0xdeadbeef, /* trigger a hang if executed */
+ tmp->ring->vma->size / sizeof(u32));
+
+ ce[n] = tmp;
+ }
+
+ rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ce;
+ }
+
+ i915_request_get(rq);
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(spin, rq)) {
+ intel_gt_set_wedged(engine->gt);
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_ce;
+ }
+
+ /* Fill the ring, until we will cause a wrap */
+ n = 0;
+ while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
+ struct i915_request *tmp;
+
+ tmp = intel_context_create_request(ce[0]);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ i915_request_put(rq);
+ goto err_ce;
+ }
+
+ i915_request_add(tmp);
+ intel_engine_flush_submission(engine);
+ n++;
+ }
+ intel_engine_flush_submission(engine);
+ pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
+ engine->name, queue_sz, n,
+ ce[0]->ring->size,
+ ce[0]->ring->tail,
+ ce[0]->ring->emit,
+ rq->tail);
+ i915_request_put(rq);
+
+ /* Create a second request to preempt the first ring */
+ rq = intel_context_create_request(ce[1]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ce;
+ }
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ err = wait_for_submit(engine, rq, HZ / 2);
+ i915_request_put(rq);
+ if (err) {
+ pr_err("%s: preemption request was not submited\n",
+ engine->name);
+ err = -ETIME;
+ }
+
+ pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
+ engine->name,
+ ce[0]->ring->tail, ce[0]->ring->emit,
+ ce[1]->ring->tail, ce[1]->ring->emit);
+
+err_ce:
+ intel_engine_flush_submission(engine);
+ igt_spinner_end(spin);
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ if (IS_ERR_OR_NULL(ce[n]))
+ break;
+
+ intel_context_unpin(ce[n]);
+ intel_context_put(ce[n]);
+ }
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int live_preempt_ring(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct igt_spinner spin;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check that we rollback large chunks of a ring in order to do a
+ * preemption event. Similar to live_unlite_ring, but looking at
+ * ring size rather than the impact of intel_ring_direction().
+ */
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ for_each_engine(engine, gt, id) {
+ int n;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ st_engine_heartbeat_disable(engine);
+
+ for (n = 0; n <= 3; n++) {
+ err = __live_preempt_ring(engine, &spin,
+ n * SZ_4K / 4, SZ_4K);
+ if (err)
+ break;
+ }
+
+ st_engine_heartbeat_enable(engine);
+ if (err)
+ break;
+ }
+
+ igt_spinner_fini(&spin);
+ return err;
+}
+
static int live_preempt_gang(void *arg)
{
struct intel_gt *gt = arg;
@@ -2840,16 +2949,8 @@ static int live_preempt_gang(void *arg)
/* Submit each spinner at increasing priority */
engine->schedule(rq, &attr);
-
- if (prio <= I915_PRIORITY_MAX)
- continue;
-
- if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
- break;
-
- if (__igt_timeout(end_time, NULL))
- break;
- } while (1);
+ } while (prio <= I915_PRIORITY_MAX &&
+ !__igt_timeout(end_time, NULL));
pr_debug("%s: Preempt chain of %d requests\n",
engine->name, prio);
@@ -3417,7 +3518,7 @@ static int smoke_crescendo_thread(void *arg)
return err;
count++;
- } while (!__igt_timeout(end_time, NULL));
+ } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
smoke->count = count;
return 0;
@@ -3468,8 +3569,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
}
pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
- count, flags,
- RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
+ count, flags, smoke->gt->info.num_engines, smoke->ncontext);
return 0;
}
@@ -3493,11 +3593,10 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
count++;
}
- } while (!__igt_timeout(end_time, NULL));
+ } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
- count, flags,
- RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
+ count, flags, smoke->gt->info.num_engines, smoke->ncontext);
return 0;
}
@@ -3506,7 +3605,7 @@ static int live_preempt_smoke(void *arg)
struct preempt_smoke smoke = {
.gt = arg,
.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
- .ncontext = 1024,
+ .ncontext = 256,
};
const unsigned int phase[] = { 0, BATCH };
struct igt_live_test t;
@@ -3706,13 +3805,43 @@ out:
return err;
}
+static unsigned int
+__select_siblings(struct intel_gt *gt,
+ unsigned int class,
+ struct intel_engine_cs **siblings,
+ bool (*filter)(const struct intel_engine_cs *))
+{
+ unsigned int n = 0;
+ unsigned int inst;
+
+ for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+ if (!gt->engine_class[class][inst])
+ continue;
+
+ if (filter && !filter(gt->engine_class[class][inst]))
+ continue;
+
+ siblings[n++] = gt->engine_class[class][inst];
+ }
+
+ return n;
+}
+
+static unsigned int
+select_siblings(struct intel_gt *gt,
+ unsigned int class,
+ struct intel_engine_cs **siblings)
+{
+ return __select_siblings(gt, class, siblings, NULL);
+}
+
static int live_virtual_engine(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
struct intel_engine_cs *engine;
enum intel_engine_id id;
- unsigned int class, inst;
+ unsigned int class;
int err;
if (intel_uc_uses_guc_submission(&gt->uc))
@@ -3730,13 +3859,7 @@ static int live_virtual_engine(void *arg)
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
int nsibling, n;
- nsibling = 0;
- for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
- if (!gt->engine_class[class][inst])
- continue;
-
- siblings[nsibling++] = gt->engine_class[class][inst];
- }
+ nsibling = select_siblings(gt, class, siblings);
if (nsibling < 2)
continue;
@@ -3845,7 +3968,7 @@ static int live_virtual_mask(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
- unsigned int class, inst;
+ unsigned int class;
int err;
if (intel_uc_uses_guc_submission(&gt->uc))
@@ -3854,17 +3977,178 @@ static int live_virtual_mask(void *arg)
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
unsigned int nsibling;
- nsibling = 0;
- for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
- if (!gt->engine_class[class][inst])
- break;
+ nsibling = select_siblings(gt, class, siblings);
+ if (nsibling < 2)
+ continue;
+
+ err = mask_virtual_engine(gt, siblings, nsibling);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int slicein_virtual_engine(struct intel_gt *gt,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling)
+{
+ const long timeout = slice_timeout(siblings[0]);
+ struct intel_context *ce;
+ struct i915_request *rq;
+ struct igt_spinner spin;
+ unsigned int n;
+ int err = 0;
+
+ /*
+ * Virtual requests must take part in timeslicing on the target engines.
+ */
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ for (n = 0; n < nsibling; n++) {
+ ce = intel_context_create(siblings[n]);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ i915_request_add(rq);
+ }
+
+ ce = intel_execlists_create_virtual(siblings, nsibling);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ rq = intel_context_create_request(ce);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, timeout) < 0) {
+ GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
+ __func__, rq->engine->name);
+ GEM_TRACE_DUMP();
+ intel_gt_set_wedged(gt);
+ err = -EIO;
+ }
+ i915_request_put(rq);
+
+out:
+ igt_spinner_end(&spin);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ igt_spinner_fini(&spin);
+ return err;
+}
+
+static int sliceout_virtual_engine(struct intel_gt *gt,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling)
+{
+ const long timeout = slice_timeout(siblings[0]);
+ struct intel_context *ce;
+ struct i915_request *rq;
+ struct igt_spinner spin;
+ unsigned int n;
+ int err = 0;
+
+ /*
+ * Virtual requests must allow others a fair timeslice.
+ */
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ /* XXX We do not handle oversubscription and fairness with normal rq */
+ for (n = 0; n < nsibling; n++) {
+ ce = intel_execlists_create_virtual(siblings, nsibling);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ i915_request_add(rq);
+ }
+
+ for (n = 0; !err && n < nsibling; n++) {
+ ce = intel_context_create(siblings[n]);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ rq = intel_context_create_request(ce);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
- siblings[nsibling++] = gt->engine_class[class][inst];
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, timeout) < 0) {
+ GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
+ __func__, siblings[n]->name);
+ GEM_TRACE_DUMP();
+ intel_gt_set_wedged(gt);
+ err = -EIO;
}
+ i915_request_put(rq);
+ }
+
+out:
+ igt_spinner_end(&spin);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ igt_spinner_fini(&spin);
+ return err;
+}
+
+static int live_virtual_slice(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ unsigned int class;
+ int err;
+
+ if (intel_uc_uses_guc_submission(&gt->uc))
+ return 0;
+
+ for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+ unsigned int nsibling;
+
+ nsibling = __select_siblings(gt, class, siblings,
+ intel_engine_has_timeslices);
if (nsibling < 2)
continue;
- err = mask_virtual_engine(gt, siblings, nsibling);
+ err = slicein_virtual_engine(gt, siblings, nsibling);
+ if (err)
+ return err;
+
+ err = sliceout_virtual_engine(gt, siblings, nsibling);
if (err)
return err;
}
@@ -3982,7 +4266,7 @@ static int live_virtual_preserved(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
- unsigned int class, inst;
+ unsigned int class;
/*
* Check that the context image retains non-privileged (user) registers
@@ -4000,13 +4284,7 @@ static int live_virtual_preserved(void *arg)
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
int nsibling, err;
- nsibling = 0;
- for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
- if (!gt->engine_class[class][inst])
- continue;
-
- siblings[nsibling++] = gt->engine_class[class][inst];
- }
+ nsibling = select_siblings(gt, class, siblings);
if (nsibling < 2)
continue;
@@ -4217,7 +4495,7 @@ static int live_virtual_bond(void *arg)
};
struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
- unsigned int class, inst;
+ unsigned int class;
int err;
if (intel_uc_uses_guc_submission(&gt->uc))
@@ -4227,14 +4505,7 @@ static int live_virtual_bond(void *arg)
const struct phase *p;
int nsibling;
- nsibling = 0;
- for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
- if (!gt->engine_class[class][inst])
- break;
-
- GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
- siblings[nsibling++] = gt->engine_class[class][inst];
- }
+ nsibling = select_siblings(gt, class, siblings);
if (nsibling < 2)
continue;
@@ -4280,7 +4551,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
}
for (n = 0; n < nsibling; n++)
- engine_heartbeat_disable(siblings[n]);
+ st_engine_heartbeat_disable(siblings[n]);
rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
if (IS_ERR(rq)) {
@@ -4351,7 +4622,7 @@ out_rq:
i915_request_put(rq);
out_heartbeat:
for (n = 0; n < nsibling; n++)
- engine_heartbeat_enable(siblings[n]);
+ st_engine_heartbeat_enable(siblings[n]);
intel_context_put(ve);
out_spin:
@@ -4363,7 +4634,7 @@ static int live_virtual_reset(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
- unsigned int class, inst;
+ unsigned int class;
/*
* Check that we handle a reset event within a virtual engine.
@@ -4381,13 +4652,7 @@ static int live_virtual_reset(void *arg)
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
int nsibling, err;
- nsibling = 0;
- for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
- if (!gt->engine_class[class][inst])
- continue;
-
- siblings[nsibling++] = gt->engine_class[class][inst];
- }
+ nsibling = select_siblings(gt, class, siblings);
if (nsibling < 2)
continue;
@@ -4405,6 +4670,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_sanitycheck),
SUBTEST(live_unlite_switch),
SUBTEST(live_unlite_preempt),
+ SUBTEST(live_unlite_ring),
SUBTEST(live_pin_rewind),
SUBTEST(live_hold_reset),
SUBTEST(live_error_interrupt),
@@ -4418,8 +4684,8 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_nopreempt),
SUBTEST(live_preempt_cancel),
SUBTEST(live_suppress_self_preempt),
- SUBTEST(live_suppress_wait_preempt),
SUBTEST(live_chain_preempt),
+ SUBTEST(live_preempt_ring),
SUBTEST(live_preempt_gang),
SUBTEST(live_preempt_timeout),
SUBTEST(live_preempt_user),
@@ -4427,6 +4693,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_virtual_engine),
SUBTEST(live_virtual_mask),
SUBTEST(live_virtual_preserved),
+ SUBTEST(live_virtual_slice),
SUBTEST(live_virtual_bond),
SUBTEST(live_virtual_reset),
};
@@ -5030,7 +5297,7 @@ static int live_lrc_gpr(void *arg)
return PTR_ERR(scratch);
for_each_engine(engine, gt, id) {
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
err = __live_lrc_gpr(engine, scratch, false);
if (err)
@@ -5041,7 +5308,7 @@ static int live_lrc_gpr(void *arg)
goto err;
err:
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (igt_flush_test(gt->i915))
err = -EIO;
if (err)
@@ -5190,7 +5457,7 @@ static int live_lrc_timestamp(void *arg)
for_each_engine(data.engine, gt, id) {
int i, err = 0;
- engine_heartbeat_disable(data.engine);
+ st_engine_heartbeat_disable(data.engine);
for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
struct intel_context *tmp;
@@ -5223,7 +5490,7 @@ static int live_lrc_timestamp(void *arg)
}
err:
- engine_heartbeat_enable(data.engine);
+ st_engine_heartbeat_enable(data.engine);
for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
if (!data.ce[i])
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index 63f87d8608c3..b25eba50c88e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -157,7 +157,7 @@ static int read_mocs_table(struct i915_request *rq,
{
u32 addr;
- if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915))
+ if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
addr = global_mocs_offset();
else
addr = mocs_offset(rq->engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 2dc460624bbc..64ef5ee5decf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -132,7 +132,7 @@ static const u32 *__live_rc6_ctx(struct intel_context *ce)
}
cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
- if (INTEL_GEN(rq->i915) >= 8)
+ if (INTEL_GEN(rq->engine->i915) >= 8)
cmd++;
*cs++ = cmd;
@@ -197,10 +197,10 @@ int live_rc6_ctx_wa(void *arg)
int pass;
for (pass = 0; pass < 2; pass++) {
+ struct i915_gpu_error *error = &gt->i915->gpu_error;
struct intel_context *ce;
unsigned int resets =
- i915_reset_engine_count(&gt->i915->gpu_error,
- engine);
+ i915_reset_engine_count(error, engine);
const u32 *res;
/* Use a sacrifical context */
@@ -230,11 +230,10 @@ int live_rc6_ctx_wa(void *arg)
engine->name, READ_ONCE(*res));
if (resets !=
- i915_reset_engine_count(&gt->i915->gpu_error,
- engine)) {
+ i915_reset_engine_count(error, engine)) {
pr_err("%s: GPU reset required\n",
engine->name);
- add_taint_for_CI(TAINT_WARN);
+ add_taint_for_CI(gt->i915, TAINT_WARN);
err = -EIO;
goto out;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index c91981e75ebf..8624f5d2a1f3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -12,6 +12,7 @@
#include "intel_gt_clock_utils.h"
#include "intel_gt_pm.h"
#include "intel_rc6.h"
+#include "selftest_engine_heartbeat.h"
#include "selftest_rps.h"
#include "selftests/igt_flush_test.h"
#include "selftests/igt_spinner.h"
@@ -20,22 +21,6 @@
/* Try to isolate the impact of cstates from determing frequency response */
#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
-static void engine_heartbeat_disable(struct intel_engine_cs *engine)
-{
- engine->props.heartbeat_interval_ms = 0;
-
- intel_engine_pm_get(engine);
- intel_engine_park_heartbeat(engine);
-}
-
-static void engine_heartbeat_enable(struct intel_engine_cs *engine)
-{
- intel_engine_pm_put(engine);
-
- engine->props.heartbeat_interval_ms =
- engine->defaults.heartbeat_interval_ms;
-}
-
static void dummy_rps_work(struct work_struct *wrk)
{
}
@@ -249,13 +234,13 @@ int live_rps_clock_interval(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
rq = igt_spinner_create_request(&spin,
engine->kernel_context,
MI_NOOP);
if (IS_ERR(rq)) {
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
err = PTR_ERR(rq);
break;
}
@@ -266,7 +251,7 @@ int live_rps_clock_interval(void *arg)
pr_err("%s: RPS spinner did not start\n",
engine->name);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
intel_gt_set_wedged(engine->gt);
err = -EIO;
break;
@@ -322,7 +307,7 @@ int live_rps_clock_interval(void *arg)
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (err == 0) {
u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
@@ -408,7 +393,7 @@ int live_rps_control(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
rq = igt_spinner_create_request(&spin,
engine->kernel_context,
@@ -424,7 +409,7 @@ int live_rps_control(void *arg)
pr_err("%s: RPS spinner did not start\n",
engine->name);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
intel_gt_set_wedged(engine->gt);
err = -EIO;
break;
@@ -434,7 +419,7 @@ int live_rps_control(void *arg)
pr_err("%s: could not set minimum frequency [%x], only %x!\n",
engine->name, rps->min_freq, read_cagf(rps));
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
show_pstate_limits(rps);
err = -EINVAL;
break;
@@ -451,7 +436,7 @@ int live_rps_control(void *arg)
pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
engine->name, rps->min_freq, read_cagf(rps));
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
show_pstate_limits(rps);
err = -EINVAL;
break;
@@ -466,7 +451,7 @@ int live_rps_control(void *arg)
min_dt = ktime_sub(ktime_get(), min_dt);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
engine->name,
@@ -637,14 +622,14 @@ int live_rps_frequency_cs(void *arg)
int freq;
} min, max;
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
vma = create_spin_counter(engine,
engine->kernel_context->vm, false,
&cancel, &cntr);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
break;
}
@@ -725,7 +710,7 @@ err_vma:
i915_vma_unpin(vma);
i915_vma_put(vma);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (igt_flush_test(gt->i915))
err = -EIO;
if (err)
@@ -779,14 +764,14 @@ int live_rps_frequency_srm(void *arg)
int freq;
} min, max;
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
vma = create_spin_counter(engine,
engine->kernel_context->vm, true,
&cancel, &cntr);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
break;
}
@@ -866,7 +851,7 @@ err_vma:
i915_vma_unpin(vma);
i915_vma_put(vma);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (igt_flush_test(gt->i915))
err = -EIO;
if (err)
@@ -1061,11 +1046,11 @@ int live_rps_interrupt(void *arg)
intel_gt_pm_wait_for_idle(engine->gt);
GEM_BUG_ON(intel_rps_is_active(rps));
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
err = __rps_up_interrupt(rps, engine, &spin);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (err)
goto out;
@@ -1074,13 +1059,13 @@ int live_rps_interrupt(void *arg)
/* Keep the engine awake but idle and check for DOWN */
if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
intel_rc6_disable(&gt->rc6);
err = __rps_down_interrupt(rps, engine);
intel_rc6_enable(&gt->rc6);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (err)
goto out;
}
@@ -1165,13 +1150,13 @@ int live_rps_power(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
rq = igt_spinner_create_request(&spin,
engine->kernel_context,
MI_NOOP);
if (IS_ERR(rq)) {
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
err = PTR_ERR(rq);
break;
}
@@ -1182,7 +1167,7 @@ int live_rps_power(void *arg)
pr_err("%s: RPS spinner did not start\n",
engine->name);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
intel_gt_set_wedged(engine->gt);
err = -EIO;
break;
@@ -1195,7 +1180,7 @@ int live_rps_power(void *arg)
min.power = measure_power_at(rps, &min.freq);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
engine->name,
@@ -1252,6 +1237,11 @@ int live_rps_dynamic(void *arg)
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
+ if (intel_rps_has_interrupts(rps))
+ pr_info("RPS has interrupt support\n");
+ if (intel_rps_uses_timer(rps))
+ pr_info("RPS has timer support\n");
+
for_each_engine(engine, gt, id) {
struct i915_request *rq;
struct {
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index ef1c35073dc0..fb5b7d3498a6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -12,6 +12,7 @@
#include "intel_gt.h"
#include "intel_gt_requests.h"
#include "intel_ring.h"
+#include "selftest_engine_heartbeat.h"
#include "../selftests/i915_random.h"
#include "../i915_selftest.h"
@@ -426,12 +427,12 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
if (IS_ERR(cs))
return PTR_ERR(cs);
- if (INTEL_GEN(rq->i915) >= 8) {
+ if (INTEL_GEN(rq->engine->i915) >= 8) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = addr;
*cs++ = 0;
*cs++ = value;
- } else if (INTEL_GEN(rq->i915) >= 4) {
+ } else if (INTEL_GEN(rq->engine->i915) >= 4) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = 0;
*cs++ = addr;
@@ -561,8 +562,9 @@ static int live_hwsp_engine(void *arg)
struct intel_timeline *tl = timelines[n];
if (!err && *tl->hwsp_seqno != n) {
- pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
- n, *tl->hwsp_seqno);
+ pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n",
+ n, tl->hwsp_offset, *tl->hwsp_seqno);
+ GEM_TRACE_DUMP();
err = -EINVAL;
}
intel_timeline_put(tl);
@@ -632,8 +634,9 @@ out:
struct intel_timeline *tl = timelines[n];
if (!err && *tl->hwsp_seqno != n) {
- pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
- n, *tl->hwsp_seqno);
+ pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n",
+ n, tl->hwsp_offset, *tl->hwsp_seqno);
+ GEM_TRACE_DUMP();
err = -EINVAL;
}
intel_timeline_put(tl);
@@ -751,22 +754,6 @@ out_free:
return err;
}
-static void engine_heartbeat_disable(struct intel_engine_cs *engine)
-{
- engine->props.heartbeat_interval_ms = 0;
-
- intel_engine_pm_get(engine);
- intel_engine_park_heartbeat(engine);
-}
-
-static void engine_heartbeat_enable(struct intel_engine_cs *engine)
-{
- intel_engine_pm_put(engine);
-
- engine->props.heartbeat_interval_ms =
- engine->defaults.heartbeat_interval_ms;
-}
-
static int live_hwsp_rollover_kernel(void *arg)
{
struct intel_gt *gt = arg;
@@ -785,7 +772,7 @@ static int live_hwsp_rollover_kernel(void *arg)
struct i915_request *rq[3] = {};
int i;
- engine_heartbeat_disable(engine);
+ st_engine_heartbeat_disable(engine);
if (intel_gt_wait_for_idle(gt, HZ / 2)) {
err = -EIO;
goto out;
@@ -836,7 +823,7 @@ static int live_hwsp_rollover_kernel(void *arg)
out:
for (i = 0; i < ARRAY_SIZE(rq); i++)
i915_request_put(rq[i]);
- engine_heartbeat_enable(engine);
+ st_engine_heartbeat_enable(engine);
if (err)
break;
}
@@ -980,8 +967,9 @@ static int live_hwsp_recycle(void *arg)
}
if (*tl->hwsp_seqno != count) {
- pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+ pr_err("Invalid seqno stored in timeline %lu @ tl->hwsp_offset, found 0x%x\n",
count, *tl->hwsp_seqno);
+ GEM_TRACE_DUMP();
err = -EINVAL;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 32785463ec9e..febc9e6692ba 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -417,6 +417,20 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg)
return false;
}
+static bool timestamp(const struct intel_engine_cs *engine, u32 reg)
+{
+ reg = (reg - engine->mmio_base) & ~RING_FORCE_TO_NONPRIV_ACCESS_MASK;
+ switch (reg) {
+ case 0x358:
+ case 0x35c:
+ case 0x3a8:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
static bool ro_register(u32 reg)
{
if ((reg & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
@@ -497,6 +511,9 @@ static int check_dirty_whitelist(struct intel_context *ce)
if (wo_register(engine, reg))
continue;
+ if (timestamp(engine, reg))
+ continue; /* timestamps are expected to autoincrement */
+
ro_reg = ro_register(reg);
/* Clear non priv flags */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 101728006ae9..d44061033f23 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -67,7 +67,7 @@ struct __guc_ads_blob {
static void __guc_ads_init(struct intel_guc *guc)
{
- struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
+ struct intel_gt *gt = guc_to_gt(guc);
struct __guc_ads_blob *blob = guc->ads_blob;
const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE;
u32 base;
@@ -99,13 +99,13 @@ static void __guc_ads_init(struct intel_guc *guc)
}
/* System info */
- blob->system_info.slice_enabled = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask);
+ blob->system_info.slice_enabled = hweight8(gt->info.sseu.slice_mask);
blob->system_info.rcs_enabled = 1;
blob->system_info.bcs_enabled = 1;
- blob->system_info.vdbox_enable_mask = VDBOX_MASK(dev_priv);
- blob->system_info.vebox_enable_mask = VEBOX_MASK(dev_priv);
- blob->system_info.vdbox_sfc_support_mask = RUNTIME_INFO(dev_priv)->vdbox_sfc_access;
+ blob->system_info.vdbox_enable_mask = VDBOX_MASK(gt);
+ blob->system_info.vebox_enable_mask = VEBOX_MASK(gt);
+ blob->system_info.vdbox_sfc_support_mask = gt->info.vdbox_sfc_access;
base = intel_guc_ggtt_offset(guc, guc->ads_vma);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index fb10f3597ea5..9bbe8a795cb8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -424,25 +424,28 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
static u32 __get_default_log_level(struct intel_guc_log *log)
{
+ struct intel_guc *guc = log_to_guc(log);
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
/* A negative value means "use platform/config default" */
- if (i915_modparams.guc_log_level < 0) {
+ if (i915->params.guc_log_level < 0) {
return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) ||
IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ?
GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_NON_VERBOSE;
}
- if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) {
+ if (i915->params.guc_log_level > GUC_LOG_LEVEL_MAX) {
DRM_WARN("Incompatible option detected: %s=%d, %s!\n",
- "guc_log_level", i915_modparams.guc_log_level,
+ "guc_log_level", i915->params.guc_log_level,
"verbosity too high");
return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) ||
IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ?
GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_DISABLED;
}
- GEM_BUG_ON(i915_modparams.guc_log_level < GUC_LOG_LEVEL_DISABLED);
- GEM_BUG_ON(i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX);
- return i915_modparams.guc_log_level;
+ GEM_BUG_ON(i915->params.guc_log_level < GUC_LOG_LEVEL_DISABLED);
+ GEM_BUG_ON(i915->params.guc_log_level > GUC_LOG_LEVEL_MAX);
+ return i915->params.guc_log_level;
}
int intel_guc_log_create(struct intel_guc_log *log)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 94eb63f309ce..fdfeb4b9b0f5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -660,10 +660,12 @@ void intel_guc_submission_disable(struct intel_guc *guc)
static bool __guc_submission_selected(struct intel_guc *guc)
{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
if (!intel_guc_submission_is_supported(guc))
return false;
- return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION;
+ return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
}
void intel_guc_submission_init_early(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index f518fe05c6f9..d6f55f70889d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -47,15 +47,15 @@ static void __confirm_options(struct intel_uc *uc)
drm_dbg(&i915->drm,
"enable_guc=%d (guc:%s submission:%s huc:%s)\n",
- i915_modparams.enable_guc,
+ i915->params.enable_guc,
yesno(intel_uc_wants_guc(uc)),
yesno(intel_uc_wants_guc_submission(uc)),
yesno(intel_uc_wants_huc(uc)));
- if (i915_modparams.enable_guc == -1)
+ if (i915->params.enable_guc == -1)
return;
- if (i915_modparams.enable_guc == 0) {
+ if (i915->params.enable_guc == 0) {
GEM_BUG_ON(intel_uc_wants_guc(uc));
GEM_BUG_ON(intel_uc_wants_guc_submission(uc));
GEM_BUG_ON(intel_uc_wants_huc(uc));
@@ -65,25 +65,25 @@ static void __confirm_options(struct intel_uc *uc)
if (!intel_uc_supports_guc(uc))
drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
- i915_modparams.enable_guc, "GuC is not supported!");
+ i915->params.enable_guc, "GuC is not supported!");
- if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC &&
+ if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC &&
!intel_uc_supports_huc(uc))
drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
- i915_modparams.enable_guc, "HuC is not supported!");
+ i915->params.enable_guc, "HuC is not supported!");
- if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION &&
+ if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
!intel_uc_supports_guc_submission(uc))
drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
- i915_modparams.enable_guc, "GuC submission is N/A");
+ i915->params.enable_guc, "GuC submission is N/A");
- if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION |
+ if (i915->params.enable_guc & ~(ENABLE_GUC_SUBMISSION |
ENABLE_GUC_LOAD_HUC))
drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
- i915_modparams.enable_guc, "undocumented flag");
+ i915->params.enable_guc, "undocumented flag");
}
void intel_uc_init_early(struct intel_uc *uc)
@@ -267,8 +267,17 @@ static void __uc_fetch_firmwares(struct intel_uc *uc)
GEM_BUG_ON(!intel_uc_wants_guc(uc));
err = intel_uc_fw_fetch(&uc->guc.fw);
- if (err)
+ if (err) {
+ /* Make sure we transition out of transient "SELECTED" state */
+ if (intel_uc_wants_huc(uc)) {
+ drm_dbg(&uc_to_gt(uc)->i915->drm,
+ "Failed to fetch GuC: %d disabling HuC\n", err);
+ intel_uc_fw_change_status(&uc->huc.fw,
+ INTEL_UC_FIRMWARE_ERROR);
+ }
+
return;
+ }
if (intel_uc_wants_huc(uc))
intel_uc_fw_fetch(&uc->huc.fw);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
index 9d16b784aa0d..089d98662f46 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
@@ -4,14 +4,41 @@
*/
#include <linux/debugfs.h>
+#include <drm/drm_print.h>
+#include "gt/debugfs_gt.h"
#include "intel_guc_debugfs.h"
#include "intel_huc_debugfs.h"
#include "intel_uc.h"
#include "intel_uc_debugfs.h"
+static int uc_usage_show(struct seq_file *m, void *data)
+{
+ struct intel_uc *uc = m->private;
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ drm_printf(&p, "[guc] supported:%s wanted:%s used:%s\n",
+ yesno(intel_uc_supports_guc(uc)),
+ yesno(intel_uc_wants_guc(uc)),
+ yesno(intel_uc_uses_guc(uc)));
+ drm_printf(&p, "[huc] supported:%s wanted:%s used:%s\n",
+ yesno(intel_uc_supports_huc(uc)),
+ yesno(intel_uc_wants_huc(uc)),
+ yesno(intel_uc_uses_huc(uc)));
+ drm_printf(&p, "[submission] supported:%s wanted:%s used:%s\n",
+ yesno(intel_uc_supports_guc_submission(uc)),
+ yesno(intel_uc_wants_guc_submission(uc)),
+ yesno(intel_uc_uses_guc_submission(uc)));
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(uc_usage);
+
void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
{
+ static const struct debugfs_gt_file files[] = {
+ { "usage", &uc_usage_fops, NULL },
+ };
struct dentry *root;
if (!gt_root)
@@ -25,6 +52,8 @@ void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
if (IS_ERR(root))
return;
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), uc);
+
intel_guc_debugfs_register(&uc->guc, root);
intel_huc_debugfs_register(&uc->huc, root);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index e1caae93996d..59b27aba15c6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -47,12 +47,15 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas
* between 33.0 and 35.2 are only related to new additions to support new Gen12
* features.
+ *
+ * Note that RKL uses the same firmware as TGL.
*/
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
+ fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \
fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \
fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
- fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
+ fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \
fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
@@ -112,11 +115,13 @@ struct __packed uc_fw_platform_requirement {
},
static void
-__uc_fw_auto_select(struct intel_uc_fw *uc_fw, enum intel_platform p, u8 rev)
+__uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
{
static const struct uc_fw_platform_requirement fw_blobs[] = {
INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB)
};
+ enum intel_platform p = INTEL_INFO(i915)->platform;
+ u8 rev = INTEL_REVID(i915);
int i;
for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) {
@@ -151,35 +156,35 @@ __uc_fw_auto_select(struct intel_uc_fw *uc_fw, enum intel_platform p, u8 rev)
}
/* We don't want to enable GuC/HuC on pre-Gen11 by default */
- if (i915_modparams.enable_guc == -1 && p < INTEL_ICELAKE)
+ if (i915->params.enable_guc == -1 && p < INTEL_ICELAKE)
uc_fw->path = NULL;
}
-static const char *__override_guc_firmware_path(void)
+static const char *__override_guc_firmware_path(struct drm_i915_private *i915)
{
- if (i915_modparams.enable_guc & (ENABLE_GUC_SUBMISSION |
- ENABLE_GUC_LOAD_HUC))
- return i915_modparams.guc_firmware_path;
+ if (i915->params.enable_guc & (ENABLE_GUC_SUBMISSION |
+ ENABLE_GUC_LOAD_HUC))
+ return i915->params.guc_firmware_path;
return "";
}
-static const char *__override_huc_firmware_path(void)
+static const char *__override_huc_firmware_path(struct drm_i915_private *i915)
{
- if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC)
- return i915_modparams.huc_firmware_path;
+ if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC)
+ return i915->params.huc_firmware_path;
return "";
}
-static void __uc_fw_user_override(struct intel_uc_fw *uc_fw)
+static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
{
const char *path = NULL;
switch (uc_fw->type) {
case INTEL_UC_FW_TYPE_GUC:
- path = __override_guc_firmware_path();
+ path = __override_guc_firmware_path(i915);
break;
case INTEL_UC_FW_TYPE_HUC:
- path = __override_huc_firmware_path();
+ path = __override_huc_firmware_path(i915);
break;
}
@@ -213,10 +218,8 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
uc_fw->type = type;
if (HAS_GT_UC(i915)) {
- __uc_fw_auto_select(uc_fw,
- INTEL_INFO(i915)->platform,
- INTEL_REVID(i915));
- __uc_fw_user_override(uc_fw);
+ __uc_fw_auto_select(i915, uc_fw);
+ __uc_fw_user_override(i915, uc_fw);
}
intel_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?