9 files changed, 485 insertions, 403 deletions
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index b74729b6f353..8de83c6d81f5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -10,6 +10,8 @@
 
 #include "gem/i915_gem_pm.h"
 
+#include "gt/intel_gt.h"
+
 #include "igt_gem_utils.h"
 #include "mock_context.h"
 
@@ -877,126 +879,22 @@ out_object_put:
 	return err;
 }
 
-static struct i915_vma *
-gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-	const int gen = INTEL_GEN(i915);
-	unsigned int count = vma->size >> PAGE_SHIFT;
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *batch;
-	unsigned int size;
-	u32 *cmd;
-	int n;
-	int err;
-
-	size = (1 + 4 * count) * sizeof(u32);
-	size = round_up(size, PAGE_SIZE);
-	obj = i915_gem_object_create_internal(i915, size);
-	if (IS_ERR(obj))
-		return ERR_CAST(obj);
-
-	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
-	if (IS_ERR(cmd)) {
-		err = PTR_ERR(cmd);
-		goto err;
-	}
-
-	offset += vma->node.start;
-
-	for (n = 0; n < count; n++) {
-		if (gen >= 8) {
-			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
-			*cmd++ = lower_32_bits(offset);
-			*cmd++ = upper_32_bits(offset);
-			*cmd++ = val;
-		} else if (gen >= 4) {
-			*cmd++ = MI_STORE_DWORD_IMM_GEN4 |
-				(gen < 6 ? MI_USE_GGTT : 0);
-			*cmd++ = 0;
-			*cmd++ = offset;
-			*cmd++ = val;
-		} else {
-			*cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-			*cmd++ = offset;
-			*cmd++ = val;
-		}
-
-		offset += PAGE_SIZE;
-	}
-
-	*cmd = MI_BATCH_BUFFER_END;
-	i915_gem_chipset_flush(i915);
-
-	i915_gem_object_unpin_map(obj);
-
-	batch = i915_vma_instance(obj, vma->vm, NULL);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto err;
-	}
-
-	err = i915_vma_pin(batch, 0, 0, PIN_USER);
-	if (err)
-		goto err;
-
-	return batch;
-
-err:
-	i915_gem_object_put(obj);
-
-	return ERR_PTR(err);
-}
-
 static int gpu_write(struct i915_vma *vma,
 		     struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine,
-		     u32 dword,
-		     u32 value)
+		     u32 dw,
+		     u32 val)
 {
-	struct i915_request *rq;
-	struct i915_vma *batch;
 	int err;
 
-	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
-
-	batch = gpu_write_dw(vma, dword * sizeof(u32), value);
-	if (IS_ERR(batch))
-		return PTR_ERR(batch);
-
-	rq = igt_request_alloc(ctx, engine);
-	if (IS_ERR(rq)) {
-		err = PTR_ERR(rq);
-		goto err_batch;
-	}
-
-	i915_vma_lock(batch);
-	err = i915_vma_move_to_active(batch, rq, 0);
-	i915_vma_unlock(batch);
+	i915_gem_object_lock(vma->obj);
+	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+	i915_gem_object_unlock(vma->obj);
 	if (err)
-		goto err_request;
-
-	i915_vma_lock(vma);
-	err = i915_gem_object_set_to_gtt_domain(vma->obj, false);
-	if (err == 0)
-		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
-	if (err)
-		goto err_request;
-
-	err = engine->emit_bb_start(rq,
-				    batch->node.start, batch->node.size,
-				    0);
-err_request:
-	if (err)
-		i915_request_skip(rq, err);
-	i915_request_add(rq);
-err_batch:
-	i915_vma_unpin(batch);
-	i915_vma_close(batch);
-	i915_vma_put(batch);
+		return err;
 
-	return err;
+	return igt_gpu_fill_dw(vma, ctx, engine, dw * sizeof(u32),
+			       vma->size >> PAGE_SHIFT, val);
 }
 
 static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
@@ -1037,8 +935,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx,
 			    u64 size, u64 offset,
 			    u32 dword, u32 val)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm;
 	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
 	struct i915_vma *vma;
 	int err;
@@ -1421,6 +1318,9 @@ static int igt_ppgtt_pin_update(void *arg)
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int n;
 	int first, last;
 	int err;
 
@@ -1518,11 +1418,20 @@ static int igt_ppgtt_pin_update(void *arg)
 	 * land in the now stale 2M page.
 	 */
 
-	err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf);
-	if (err)
-		goto out_unpin;
+	n = 0;
+	for_each_engine(engine, dev_priv, id) {
+		if (!intel_engine_can_store_dword(engine))
+			continue;
 
-	err = cpu_check(obj, 0, 0xdeadbeaf);
+		err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf);
+		if (err)
+			goto out_unpin;
+	}
+	while (n--) {
+		err = cpu_check(obj, n, 0xdeadbeaf);
+		if (err)
+			goto out_unpin;
+	}
 
 out_unpin:
 	i915_vma_unpin(vma);
@@ -1598,8 +1507,11 @@ static int igt_shrink_thp(void *arg)
 	struct drm_i915_private *i915 = ctx->i915;
 	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	struct i915_vma *vma;
 	unsigned int flags = PIN_USER;
+	unsigned int n;
 	int err;
 
 	/*
@@ -1635,9 +1547,15 @@ static int igt_shrink_thp(void *arg)
 	if (err)
 		goto out_unpin;
 
-	err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf);
-	if (err)
-		goto out_unpin;
+	n = 0;
+	for_each_engine(engine, i915, id) {
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf);
+		if (err)
+			goto out_unpin;
+	}
 
 	i915_vma_unpin(vma);
 
@@ -1662,7 +1580,12 @@ static int igt_shrink_thp(void *arg)
 	if (err)
 		goto out_close;
 
-	err = cpu_check(obj, 0, 0xdeadbeaf);
+	while (n--) {
+		err = cpu_check(obj, n, 0xdeadbeaf);
+		if (err)
+			goto out_unpin;
+	}
+
 
 out_unpin:
 	i915_vma_unpin(vma);
@@ -1726,7 +1649,7 @@ out_unlock:
 	return err;
 }
 
-int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
+int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_shrink_thp),
@@ -1741,22 +1664,22 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	intel_wakeref_t wakeref;
 	int err;
 
-	if (!HAS_PPGTT(dev_priv)) {
+	if (!HAS_PPGTT(i915)) {
 		pr_info("PPGTT not supported, skipping live-selftests\n");
 		return 0;
 	}
 
-	if (i915_terminally_wedged(dev_priv))
+	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
-	file = mock_file(dev_priv);
+	file = mock_file(i915);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
-	ctx = live_context(dev_priv, file);
+	ctx = live_context(i915, file);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto out_unlock;
@@ -1768,10 +1691,10 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	err = i915_subtests(tests, ctx);
 
 out_unlock:
-	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
 
-	mock_file_free(dev_priv, file);
+	mock_file_free(i915, file);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index f3a5eb807c1c..d8804a847945 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -5,14 +5,17 @@
 
 #include "i915_selftest.h"
 
+#include "gt/intel_gt.h"
+
 #include "selftests/igt_flush_test.h"
 #include "selftests/mock_drm.h"
+#include "huge_gem_object.h"
 #include "mock_context.h"
 
 static int igt_client_fill(void *arg)
 {
-	struct intel_context *ce = arg;
-	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_private *i915 = arg;
+	struct intel_context *ce = i915->engine[BCS0]->kernel_context;
 	struct drm_i915_gem_object *obj;
 	struct rnd_state prng;
 	IGT_TIMEOUT(end);
@@ -22,15 +25,19 @@ static int igt_client_fill(void *arg)
 	prandom_seed_state(&prng, i915_selftest.random_seed);
 
 	do {
-		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		const u32 max_block_size = S16_MAX * PAGE_SIZE;
+		u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
+		u32 phys_sz = sz % (max_block_size + 1);
 		u32 val = prandom_u32_state(&prng);
 		u32 i;
 
 		sz = round_up(sz, PAGE_SIZE);
+		phys_sz = round_up(phys_sz, PAGE_SIZE);
 
-		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
+			 phys_sz, sz, val);
 
-		obj = i915_gem_object_create_internal(i915, sz);
+		obj = huge_gem_object(i915, phys_sz, sz);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
 			goto err_flush;
@@ -52,7 +59,8 @@ static int igt_client_fill(void *arg)
 		 * values after we do the set_to_cpu_domain and pick it up as a
 		 * test failure.
 		 */
-		memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+		memset32(vaddr, val ^ 0xdeadbeaf,
+			 huge_gem_object_phys_size(obj) / sizeof(u32));
 
 		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			obj->cache_dirty = true;
@@ -63,24 +71,13 @@ static int igt_client_fill(void *arg)
 		if (err)
 			goto err_unpin;
 
-		/*
-		 * XXX: For now do the wait without the object resv lock to
-		 * ensure we don't deadlock.
-		 */
-		err = i915_gem_object_wait(obj,
-					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_ALL,
-					   MAX_SCHEDULE_TIMEOUT);
-		if (err)
-			goto err_unpin;
-
 		i915_gem_object_lock(obj);
 		err = i915_gem_object_set_to_cpu_domain(obj, false);
 		i915_gem_object_unlock(obj);
 		if (err)
 			goto err_unpin;
 
-		for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+		for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
 			if (vaddr[i] != val) {
 				pr_err("vaddr[%u]=%x, expected=%x\n", i,
 				       vaddr[i], val);
@@ -100,11 +97,6 @@ err_unpin:
 err_put:
 	i915_gem_object_put(obj);
 err_flush:
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	if (err == -ENOMEM)
 		err = 0;
 
@@ -117,11 +109,11 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_client_fill),
 	};
 
-	if (i915_terminally_wedged(i915))
+	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
 	if (!HAS_ENGINE(i915, BCS0))
 		return 0;
 
-	return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+	return i915_live_subtests(tests, i915);
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 8f22d3f18422..0ff7a89aadca 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -6,6 +6,8 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gt/intel_gt.h"
+
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
 
@@ -226,7 +228,9 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	intel_ring_advance(rq, cs);
 
 	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unlock(vma);
 	i915_vma_unpin(vma);
 
@@ -242,12 +246,15 @@ static bool always_valid(struct drm_i915_private *i915)
 
 static bool needs_fence_registers(struct drm_i915_private *i915)
 {
-	return !i915_terminally_wedged(i915);
+	return !intel_gt_is_wedged(&i915->gt);
 }
 
 static bool needs_mi_store_dword(struct drm_i915_private *i915)
 {
-	if (i915_terminally_wedged(i915))
+	if (intel_gt_is_wedged(&i915->gt))
+		return false;
+
+	if (!HAS_ENGINE(i915, RCS0))
 		return false;
 
 	return intel_engine_can_store_dword(i915->engine[RCS0]);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index eaa2b16574c7..3e6f4a65d356 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -7,6 +7,7 @@
 #include <linux/prime_numbers.h>
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_reset.h"
 #include "i915_selftest.h"
 
@@ -31,7 +32,6 @@ static int live_nop_switch(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_gem_context **ctx;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	struct drm_file *file;
 	unsigned long n;
@@ -53,7 +53,6 @@ static int live_nop_switch(void *arg)
 		return PTR_ERR(file);
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx) {
@@ -85,7 +84,7 @@ static int live_nop_switch(void *arg)
 		}
 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 			pr_err("Failed to populated %d contexts\n", nctx);
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(&i915->gt);
 			err = -EIO;
 			goto out_unlock;
 		}
@@ -129,7 +128,7 @@ static int live_nop_switch(void *arg)
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				pr_err("Switching between %ld contexts timed out\n",
 				       prime);
-				i915_gem_set_wedged(i915);
+				intel_gt_set_wedged(&i915->gt);
 				break;
 			}
 
@@ -152,76 +151,11 @@ static int live_nop_switch(void *arg)
 	}
 
 out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	mock_file_free(i915, file);
 	return err;
 }
 
-static struct i915_vma *
-gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
-{
-	struct drm_i915_gem_object *obj;
-	const int gen = INTEL_GEN(vma->vm->i915);
-	unsigned long n, size;
-	u32 *cmd;
-	int err;
-
-	size = (4 * count + 1) * sizeof(u32);
-	size = round_up(size, PAGE_SIZE);
-	obj = i915_gem_object_create_internal(vma->vm->i915, size);
-	if (IS_ERR(obj))
-		return ERR_CAST(obj);
-
-	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
-	if (IS_ERR(cmd)) {
-		err = PTR_ERR(cmd);
-		goto err;
-	}
-
-	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
-	offset += vma->node.start;
-
-	for (n = 0; n < count; n++) {
-		if (gen >= 8) {
-			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
-			*cmd++ = lower_32_bits(offset);
-			*cmd++ = upper_32_bits(offset);
-			*cmd++ = value;
-		} else if (gen >= 4) {
-			*cmd++ = MI_STORE_DWORD_IMM_GEN4 |
-				(gen < 6 ? MI_USE_GGTT : 0);
-			*cmd++ = 0;
-			*cmd++ = offset;
-			*cmd++ = value;
-		} else {
-			*cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-			*cmd++ = offset;
-			*cmd++ = value;
-		}
-		offset += PAGE_SIZE;
-	}
-	*cmd = MI_BATCH_BUFFER_END;
-	i915_gem_object_flush_map(obj);
-	i915_gem_object_unpin_map(obj);
-
-	vma = i915_vma_instance(obj, vma->vm, NULL);
-	if (IS_ERR(vma)) {
-		err = PTR_ERR(vma);
-		goto err;
-	}
-
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err)
-		goto err;
-
-	return vma;
-
-err:
-	i915_gem_object_put(obj);
-	return ERR_PTR(err);
-}
-
 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
 {
 	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
@@ -237,12 +171,8 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 		    struct intel_engine_cs *engine,
 		    unsigned int dw)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
-	struct i915_request *rq;
+	struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm;
 	struct i915_vma *vma;
-	struct i915_vma *batch;
-	unsigned int flags;
 	int err;
 
 	GEM_BUG_ON(obj->base.size > vm->total);
@@ -253,7 +183,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 		return PTR_ERR(vma);
 
 	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
 	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
@@ -262,70 +192,23 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		return err;
 
-	/* Within the GTT the huge objects maps every page onto
+	/*
+	 * Within the GTT the huge objects maps every page onto
 	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
 	 * We set the nth dword within the page using the nth
 	 * mapping via the GTT - this should exercise the GTT mapping
 	 * whilst checking that each context provides a unique view
 	 * into the object.
 	 */
-	batch = gpu_fill_dw(vma,
-			    (dw * real_page_count(obj)) << PAGE_SHIFT |
-			    (dw * sizeof(u32)),
-			    real_page_count(obj),
-			    dw);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto err_vma;
-	}
-
-	rq = igt_request_alloc(ctx, engine);
-	if (IS_ERR(rq)) {
-		err = PTR_ERR(rq);
-		goto err_batch;
-	}
-
-	flags = 0;
-	if (INTEL_GEN(vm->i915) <= 5)
-		flags |= I915_DISPATCH_SECURE;
-
-	err = engine->emit_bb_start(rq,
-				    batch->node.start, batch->node.size,
-				    flags);
-	if (err)
-		goto err_request;
-
-	i915_vma_lock(batch);
-	err = i915_vma_move_to_active(batch, rq, 0);
-	i915_vma_unlock(batch);
-	if (err)
-		goto skip_request;
-
-	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
-	if (err)
-		goto skip_request;
-
-	i915_request_add(rq);
-
-	i915_vma_unpin(batch);
-	i915_vma_close(batch);
-	i915_vma_put(batch);
-
+	err = igt_gpu_fill_dw(vma,
+			      ctx,
+			      engine,
+			      (dw * real_page_count(obj)) << PAGE_SHIFT |
+			      (dw * sizeof(u32)),
+			      real_page_count(obj),
+			      dw);
 	i915_vma_unpin(vma);
 
-	return 0;
-
-skip_request:
-	i915_request_skip(rq, err);
-err_request:
-	i915_request_add(rq);
-err_batch:
-	i915_vma_unpin(batch);
-	i915_vma_put(batch);
-err_vma:
-	i915_vma_unpin(vma);
 	return err;
 }
 
@@ -431,6 +314,9 @@ create_test_object(struct i915_gem_context *ctx,
 	u64 size;
 	int err;
 
+	/* Keep in GEM's good graces */
+	i915_retire_requests(ctx->i915);
+
 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
 
@@ -507,7 +393,6 @@ static int igt_ctx_exec(void *arg)
 		dw = 0;
 		while (!time_after(jiffies, end_time)) {
 			struct i915_gem_context *ctx;
-			intel_wakeref_t wakeref;
 
 			ctx = live_context(i915, file);
 			if (IS_ERR(ctx)) {
@@ -523,8 +408,7 @@ static int igt_ctx_exec(void *arg)
 				}
 			}
 
-			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-				err = gpu_fill(obj, ctx, engine, dw);
+			err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -565,6 +449,8 @@ out_unlock:
 		mock_file_free(i915, file);
 		if (err)
 			return err;
+
+		i915_gem_drain_freed_objects(i915);
 	}
 
 	return 0;
@@ -623,7 +509,6 @@ static int igt_shared_ctx_exec(void *arg)
 		ncontexts = 0;
 		while (!time_after(jiffies, end_time)) {
 			struct i915_gem_context *ctx;
-			intel_wakeref_t wakeref;
 
 			ctx = kernel_context(i915);
 			if (IS_ERR(ctx)) {
@@ -642,9 +527,7 @@ static int igt_shared_ctx_exec(void *arg)
 				}
 			}
 
-			err = 0;
-			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-				err = gpu_fill(obj, ctx, engine, dw);
+			err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -678,6 +561,10 @@ static int igt_shared_ctx_exec(void *arg)
 
 			dw += rem;
 		}
+
+		mutex_unlock(&i915->drm.struct_mutex);
+		i915_gem_drain_freed_objects(i915);
+		mutex_lock(&i915->drm.struct_mutex);
 	}
 out_test:
 	if (igt_live_test_end(&t))
@@ -746,7 +633,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 
 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
 
-	vma = i915_vma_instance(obj, ce->gem_context->vm, NULL);
+	vma = i915_vma_instance(obj, ce->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
@@ -779,13 +666,17 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 		goto err_request;
 
 	i915_vma_lock(batch);
-	err = i915_vma_move_to_active(batch, rq, 0);
+	err = i915_request_await_object(rq, batch->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(batch, rq, 0);
 	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
 	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
@@ -820,8 +711,7 @@ err_vma:
 #define TEST_RESET	BIT(2)
 
 static int
-__sseu_prepare(struct drm_i915_private *i915,
-	       const char *name,
+__sseu_prepare(const char *name,
 	       unsigned int flags,
 	       struct intel_context *ce,
 	       struct igt_spinner **spin)
@@ -837,14 +727,11 @@ __sseu_prepare(struct drm_i915_private *i915,
 	if (!*spin)
 		return -ENOMEM;
 
-	ret = igt_spinner_init(*spin, i915);
+	ret = igt_spinner_init(*spin, ce->engine->gt);
 	if (ret)
 		goto err_free;
 
-	rq = igt_spinner_create_request(*spin,
-					ce->gem_context,
-					ce->engine,
-					MI_NOOP);
+	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 		goto err_fini;
@@ -870,8 +757,7 @@ err_free:
 }
 
 static int
-__read_slice_count(struct drm_i915_private *i915,
-		   struct intel_context *ce,
+__read_slice_count(struct intel_context *ce,
 		   struct drm_i915_gem_object *obj,
 		   struct igt_spinner *spin,
 		   u32 *rpcs)
@@ -900,7 +786,7 @@ __read_slice_count(struct drm_i915_private *i915,
 		return ret;
 	}
 
-	if (INTEL_GEN(i915) >= 11) {
+	if (INTEL_GEN(ce->engine->i915) >= 11) {
 		s_mask = GEN11_RPCS_S_CNT_MASK;
 		s_shift = GEN11_RPCS_S_CNT_SHIFT;
 	} else {
@@ -943,8 +829,7 @@ __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
 }
 
 static int
-__sseu_finish(struct drm_i915_private *i915,
-	      const char *name,
+__sseu_finish(const char *name,
 	      unsigned int flags,
 	      struct intel_context *ce,
 	      struct drm_i915_gem_object *obj,
@@ -956,19 +841,18 @@ __sseu_finish(struct drm_i915_private *i915,
 	int ret = 0;
 
 	if (flags & TEST_RESET) {
-		ret = i915_reset_engine(ce->engine, "sseu");
+		ret = intel_engine_reset(ce->engine, "sseu");
 		if (ret)
 			goto out;
 	}
 
-	ret = __read_slice_count(i915, ce, obj,
+	ret = __read_slice_count(ce, obj,
 				 flags & TEST_RESET ? NULL : spin, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
 	if (ret)
 		goto out;
 
-	ret = __read_slice_count(i915, ce->engine->kernel_context, obj,
-				 NULL, &rpcs);
+	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
 
 out:
@@ -976,11 +860,12 @@ out:
 		igt_spinner_end(spin);
 
 	if ((flags & TEST_IDLE) && ret == 0) {
-		ret = i915_gem_wait_for_idle(i915, 0, MAX_SCHEDULE_TIMEOUT);
+		ret = i915_gem_wait_for_idle(ce->engine->i915,
+					     0, MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 
-		ret = __read_slice_count(i915, ce, obj, NULL, &rpcs);
+		ret = __read_slice_count(ce, obj, NULL, &rpcs);
 		ret = __check_rpcs(name, rpcs, ret, expected,
 				   "Context", " after idle!");
 	}
@@ -989,8 +874,7 @@ out:
 }
 
 static int
-__sseu_test(struct drm_i915_private *i915,
-	    const char *name,
+__sseu_test(const char *name,
 	    unsigned int flags,
 	    struct intel_context *ce,
 	    struct drm_i915_gem_object *obj,
@@ -999,7 +883,7 @@ __sseu_test(struct drm_i915_private *i915,
 	struct igt_spinner *spin = NULL;
 	int ret;
 
-	ret = __sseu_prepare(i915, name, flags, ce, &spin);
+	ret = __sseu_prepare(name, flags, ce, &spin);
 	if (ret)
 		return ret;
 
@@ -1007,7 +891,7 @@ __sseu_test(struct drm_i915_private *i915,
 	if (ret)
 		goto out_spin;
 
-	ret = __sseu_finish(i915, name, flags, ce, obj,
+	ret = __sseu_finish(name, flags, ce, obj,
 			    hweight32(sseu.slice_mask), spin);
 
 out_spin:
@@ -1025,35 +909,33 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	       unsigned int flags)
 {
 	struct intel_engine_cs *engine = i915->engine[RCS0];
-	struct intel_sseu default_sseu = engine->sseu;
 	struct drm_i915_gem_object *obj;
 	struct i915_gem_context *ctx;
 	struct intel_context *ce;
 	struct intel_sseu pg_sseu;
-	intel_wakeref_t wakeref;
 	struct drm_file *file;
 	int ret;
 
-	if (INTEL_GEN(i915) < 9)
+	if (INTEL_GEN(i915) < 9 || !engine)
 		return 0;
 
 	if (!RUNTIME_INFO(i915)->sseu.has_slice_pg)
 		return 0;
 
-	if (hweight32(default_sseu.slice_mask) < 2)
+	if (hweight32(engine->sseu.slice_mask) < 2)
 		return 0;
 
 	/*
 	 * Gen11 VME friendly power-gated configuration with half enabled
 	 * sub-slices.
 	 */
-	pg_sseu = default_sseu;
+	pg_sseu = engine->sseu;
 	pg_sseu.slice_mask = 1;
 	pg_sseu.subslice_mask =
-		~(~0 << (hweight32(default_sseu.subslice_mask) / 2));
+		~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
 
 	pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
-		name, flags, hweight32(default_sseu.slice_mask),
+		name, flags, hweight32(engine->sseu.slice_mask),
 		hweight32(pg_sseu.slice_mask));
 
 	file = mock_file(i915);
@@ -1061,7 +943,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		return PTR_ERR(file);
 
 	if (flags & TEST_RESET)
-		igt_global_reset_lock(i915);
+		igt_global_reset_lock(&i915->gt);
 
 	mutex_lock(&i915->drm.struct_mutex);
 
@@ -1078,12 +960,10 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		goto out_unlock;
 	}
 
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	ce = i915_gem_context_get_engine(ctx, RCS0);
 	if (IS_ERR(ce)) {
 		ret = PTR_ERR(ce);
-		goto out_rpm;
+		goto out_put;
 	}
 
 	ret = intel_context_pin(ce);
@@ -1091,22 +971,22 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		goto out_context;
 
 	/* First set the default mask. */
-	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
+	ret = __sseu_test(name, flags, ce, obj, engine->sseu);
 	if (ret)
 		goto out_fail;
 
 	/* Then set a power-gated configuration. */
-	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
+	ret = __sseu_test(name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* Back to defaults. */
-	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
+	ret = __sseu_test(name, flags, ce, obj, engine->sseu);
 	if (ret)
 		goto out_fail;
 
 	/* One last power-gated configuration for the road. */
-	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
+	ret = __sseu_test(name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;
 
@@ -1117,15 +997,14 @@ out_fail:
 	intel_context_unpin(ce);
 out_context:
 	intel_context_put(ce);
-out_rpm:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+out_put:
 	i915_gem_object_put(obj);
 
 out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (flags & TEST_RESET)
-		igt_global_reset_unlock(i915);
+		igt_global_reset_unlock(&i915->gt);
 
 	mock_file_free(i915, file);
 
@@ -1194,7 +1073,7 @@ static int igt_ctx_readonly(void *arg)
 		goto out_unlock;
 	}
 
-	vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm;
+	vm = ctx->vm ?: &i915->ggtt.alias->vm;
 	if (!vm || !vm->has_read_only) {
 		err = 0;
 		goto out_unlock;
@@ -1207,8 +1086,6 @@ static int igt_ctx_readonly(void *arg)
 		unsigned int id;
 
 		for_each_engine(engine, i915, id) {
-			intel_wakeref_t wakeref;
-
 			if (!intel_engine_can_store_dword(engine))
 				continue;
 
@@ -1223,9 +1100,7 @@ static int igt_ctx_readonly(void *arg)
 					i915_gem_object_set_readonly(obj);
 			}
 
-			err = 0;
-			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-				err = gpu_fill(obj, ctx, engine, dw);
+			err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -1347,7 +1222,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 		goto err_request;
 
 	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, 0);
+	err = i915_request_await_object(rq, vma->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, 0);
 	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
@@ -1444,7 +1321,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 		goto err_request;
 
 	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
@@ -1488,7 +1367,6 @@ static int igt_vm_isolation(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct i915_gem_context *ctx_a, *ctx_b;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	struct drm_file *file;
 	I915_RND_STATE(prng);
@@ -1535,8 +1413,6 @@ static int igt_vm_isolation(void *arg)
 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
 	vm_total -= I915_GTT_PAGE_SIZE;
 
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
 	for_each_engine(engine, i915, id) {
 		IGT_TIMEOUT(end_time);
@@ -1551,7 +1427,7 @@ static int igt_vm_isolation(void *arg)
 
 			div64_u64_rem(i915_prandom_u64_state(&prng),
 				      vm_total, &offset);
-			offset &= -sizeof(u32);
+			offset = round_down(offset, alignof_dword);
 			offset += I915_GTT_PAGE_SIZE;
 
 			err = write_to_scratch(ctx_a, engine,
@@ -1560,7 +1436,7 @@ static int igt_vm_isolation(void *arg)
 				err = read_from_scratch(ctx_b, engine,
 							offset, &value);
 			if (err)
-				goto out_rpm;
+				goto out_unlock;
 
 			if (value) {
 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
@@ -1569,7 +1445,7 @@ static int igt_vm_isolation(void *arg)
 				       lower_32_bits(offset),
 				       this);
 				err = -EINVAL;
-				goto out_rpm;
+				goto out_unlock;
 			}
 
 			this++;
@@ -1579,8 +1455,6 @@ static int igt_vm_isolation(void *arg)
 	pr_info("Checked %lu scratch offsets across %d engines\n",
 		count, RUNTIME_INFO(i915)->num_engines);
 
-out_rpm:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 out_unlock:
 	if (igt_live_test_end(&t))
 		err = -EIO;
@@ -1736,7 +1610,7 @@ int i915_gem_context_mock_selftests(void)
 	return err;
 }
 
-int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
+int i915_gem_context_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_nop_switch),
@@ -1747,8 +1621,8 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 		SUBTEST(igt_vm_isolation),
 	};
 
-	if (i915_terminally_wedged(dev_priv))
+	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
-	return i915_subtests(tests, dev_priv);
+	return i915_live_subtests(tests, i915);
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index e3a64edef918..d85d1ce273ca 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -20,7 +20,7 @@ static int igt_dmabuf_export(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
 	i915_gem_object_put(obj);
 	if (IS_ERR(dmabuf)) {
 		pr_err("i915_gem_prime_export failed with err=%d\n",
@@ -44,7 +44,7 @@ static int igt_dmabuf_import_self(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
 	if (IS_ERR(dmabuf)) {
 		pr_err("i915_gem_prime_export failed with err=%d\n",
 		       (int)PTR_ERR(dmabuf));
@@ -219,7 +219,7 @@ static int igt_dmabuf_export_vmap(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
 	if (IS_ERR(dmabuf)) {
 		pr_err("i915_gem_prime_export failed with err=%d\n",
 		       (int)PTR_ERR(dmabuf));
@@ -266,7 +266,7 @@ static int igt_dmabuf_export_kmap(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
 	i915_gem_object_put(obj);
 	if (IS_ERR(dmabuf)) {
 		err = PTR_ERR(dmabuf);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5c81f4b4813a..1d27babff0ce 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -6,6 +6,7 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "huge_gem_object.h"
 #include "i915_selftest.h"
@@ -143,7 +144,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		if (offset >= obj->base.size)
 			continue;
 
-		i915_gem_flush_ggtt_writes(to_i915(obj->base.dev));
+		intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
 
 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
@@ -327,7 +328,8 @@ out:
 static int make_obj_busy(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_request *rq;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	struct i915_vma *vma;
 	int err;
 
@@ -339,17 +341,24 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 	if (err)
 		return err;
 
-	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
-	if (IS_ERR(rq)) {
-		i915_vma_unpin(vma);
-		return PTR_ERR(rq);
-	}
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = i915_request_create(engine->kernel_context);
+		if (IS_ERR(rq)) {
+			i915_vma_unpin(vma);
+			return PTR_ERR(rq);
+		}
 
-	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
+		i915_vma_lock(vma);
+		err = i915_request_await_object(rq, vma->obj, true);
+		if (err == 0)
+			err = i915_vma_move_to_active(vma, rq,
+						      EXEC_OBJECT_WRITE);
+		i915_vma_unlock(vma);
 
-	i915_request_add(rq);
+		i915_request_add(rq);
+	}
 
 	i915_vma_unpin(vma);
 	i915_gem_object_put(obj); /* leave it only alive via its active ref */
@@ -376,9 +385,9 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
 
 static void disable_retire_worker(struct drm_i915_private *i915)
 {
-	i915_gem_shrinker_unregister(i915);
+	i915_gem_driver_unregister__shrinker(i915);
 
-	intel_gt_pm_get(i915);
+	intel_gt_pm_get(&i915->gt);
 
 	cancel_delayed_work_sync(&i915->gem.retire_work);
 	flush_work(&i915->gem.idle_work);
@@ -386,13 +395,25 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 
 static void restore_retire_worker(struct drm_i915_private *i915)
 {
-	intel_gt_pm_put(i915);
+	intel_gt_pm_put(&i915->gt);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	igt_flush_test(i915, I915_WAIT_LOCKED);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	i915_gem_shrinker_register(i915);
+	i915_gem_driver_register__shrinker(i915);
+}
+
+static void mmap_offset_lock(struct drm_i915_private *i915)
+	__acquires(&i915->drm.vma_offset_manager->vm_lock)
+{
+	write_lock(&i915->drm.vma_offset_manager->vm_lock);
+}
+
+static void mmap_offset_unlock(struct drm_i915_private *i915)
+	__releases(&i915->drm.vma_offset_manager->vm_lock)
+{
+	write_unlock(&i915->drm.vma_offset_manager->vm_lock);
 }
 
 static int igt_mmap_offset_exhaustion(void *arg)
@@ -413,7 +434,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
 	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
 		resv.start = hole_start;
 		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
+		mmap_offset_lock(i915);
 		err = drm_mm_reserve_node(mm, &resv);
+		mmap_offset_unlock(i915);
 		if (err) {
 			pr_err("Failed to trim VMA manager, err=%d\n", err);
 			goto out_park;
@@ -458,7 +481,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 
 	/* Now fill with busy dead objects that we expect to reap */
 	for (loop = 0; loop < 3; loop++) {
-		if (i915_terminally_wedged(i915))
+		if (intel_gt_is_wedged(&i915->gt))
 			break;
 
 		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
@@ -474,19 +497,12 @@ static int igt_mmap_offset_exhaustion(void *arg)
 			pr_err("[loop %d] Failed to busy the object\n", loop);
 			goto err_obj;
 		}
-
-		/* NB we rely on the _active_ reference to access obj now */
-		GEM_BUG_ON(!i915_gem_object_is_active(obj));
-		err = create_mmap_offset(obj);
-		if (err) {
-			pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
-			       loop, err);
-			goto out;
-		}
 	}
 
 out:
+	mmap_offset_lock(i915);
 	drm_mm_remove_node(&resv);
+	mmap_offset_unlock(i915);
 out_park:
 	restore_retire_worker(i915);
 	return err;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
index e23d8c9e9298..c21d747e7d05 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -3,16 +3,19 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "gt/intel_gt.h"
+
 #include "i915_selftest.h"
 
 #include "selftests/igt_flush_test.h"
 #include "selftests/mock_drm.h"
+#include "huge_gem_object.h"
 #include "mock_context.h"
 
 static int igt_fill_blt(void *arg)
 {
-	struct intel_context *ce = arg;
-	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_private *i915 = arg;
+	struct intel_context *ce = i915->engine[BCS0]->kernel_context;
 	struct drm_i915_gem_object *obj;
 	struct rnd_state prng;
 	IGT_TIMEOUT(end);
@@ -21,16 +24,26 @@ static int igt_fill_blt(void *arg)
 
 	prandom_seed_state(&prng, i915_selftest.random_seed);
 
+	/*
+	 * XXX: needs some threads to scale all these tests, also maybe throw
+	 * in submission from higher priority context to see if we are
+	 * preempted for very large objects...
+	 */
+
 	do {
-		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		const u32 max_block_size = S16_MAX * PAGE_SIZE;
+		u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
+		u32 phys_sz = sz % (max_block_size + 1);
 		u32 val = prandom_u32_state(&prng);
 		u32 i;
 
 		sz = round_up(sz, PAGE_SIZE);
+		phys_sz = round_up(phys_sz, PAGE_SIZE);
 
-		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
+			 phys_sz, sz, val);
 
-		obj = i915_gem_object_create_internal(i915, sz);
+		obj = huge_gem_object(i915, phys_sz, sz);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
 			goto err_flush;
@@ -46,7 +59,8 @@ static int igt_fill_blt(void *arg)
 		 * Make sure the potentially async clflush does its job, if
 		 * required.
 		 */
-		memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+		memset32(vaddr, val ^ 0xdeadbeaf,
+			 huge_gem_object_phys_size(obj) / sizeof(u32));
 
 		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			obj->cache_dirty = true;
@@ -63,7 +77,7 @@ static int igt_fill_blt(void *arg)
 		if (err)
 			goto err_unpin;
 
-		for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+		for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
 			if (vaddr[i] != val) {
 				pr_err("vaddr[%u]=%x, expected=%x\n", i,
 				       vaddr[i], val);
@@ -83,11 +97,111 @@ err_unpin:
 err_put:
 	i915_gem_object_put(obj);
 err_flush:
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
+static int igt_copy_blt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_context *ce = i915->engine[BCS0]->kernel_context;
+	struct drm_i915_gem_object *src, *dst;
+	struct rnd_state prng;
+	IGT_TIMEOUT(end);
+	u32 *vaddr;
+	int err = 0;
+
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+
+	do {
+		const u32 max_block_size = S16_MAX * PAGE_SIZE;
+		u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
+		u32 phys_sz = sz % (max_block_size + 1);
+		u32 val = prandom_u32_state(&prng);
+		u32 i;
+
+		sz = round_up(sz, PAGE_SIZE);
+		phys_sz = round_up(phys_sz, PAGE_SIZE);
+
+		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
+			 phys_sz, sz, val);
+
+		src = huge_gem_object(i915, phys_sz, sz);
+		if (IS_ERR(src)) {
+			err = PTR_ERR(src);
+			goto err_flush;
+		}
+
+		vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_src;
+		}
+
+		memset32(vaddr, val,
+			 huge_gem_object_phys_size(src) / sizeof(u32));
+
+		i915_gem_object_unpin_map(src);
+
+		if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+			src->cache_dirty = true;
 
+		dst = huge_gem_object(i915, phys_sz, sz);
+		if (IS_ERR(dst)) {
+			err = PTR_ERR(dst);
+			goto err_put_src;
+		}
+
+		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_dst;
+		}
+
+		memset32(vaddr, val ^ 0xdeadbeaf,
+			 huge_gem_object_phys_size(dst) / sizeof(u32));
+
+		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+			dst->cache_dirty = true;
+
+		mutex_lock(&i915->drm.struct_mutex);
+		err = i915_gem_object_copy_blt(src, dst, ce);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err)
+			goto err_unpin;
+
+		i915_gem_object_lock(dst);
+		err = i915_gem_object_set_to_cpu_domain(dst, false);
+		i915_gem_object_unlock(dst);
+		if (err)
+			goto err_unpin;
+
+		for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) {
+			if (vaddr[i] != val) {
+				pr_err("vaddr[%u]=%x, expected=%x\n", i,
+				       vaddr[i], val);
+				err = -EINVAL;
+				goto err_unpin;
+			}
+		}
+
+		i915_gem_object_unpin_map(dst);
+
+		i915_gem_object_put(src);
+		i915_gem_object_put(dst);
+	} while (!time_after(jiffies, end));
+
+	goto err_flush;
+
+err_unpin:
+	i915_gem_object_unpin_map(dst);
+err_put_dst:
+	i915_gem_object_put(dst);
+err_put_src:
+	i915_gem_object_put(src);
+err_flush:
 	if (err == -ENOMEM)
 		err = 0;
 
@@ -98,13 +212,14 @@ int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_fill_blt),
+		SUBTEST(igt_copy_blt),
 	};
 
-	if (i915_terminally_wedged(i915))
+	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
 	if (!HAS_ENGINE(i915, BCS0))
 		return 0;
 
-	return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+	return i915_live_subtests(tests, i915);
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index b232e6d2cd92..57ece53c1075 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -9,6 +9,8 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_context.h"
+#include "i915_vma.h"
+#include "i915_drv.h"
 
 #include "i915_request.h"
 
@@ -23,7 +25,7 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ce = i915_gem_context_get_engine(ctx, engine->id);
+	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
 	if (IS_ERR(ce))
 		return ERR_CAST(ce);
 
@@ -32,3 +34,140 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 
 	return rq;
 }
+
+struct i915_vma *
+igt_emit_store_dw(struct i915_vma *vma,
+		  u64 offset,
+		  unsigned long count,
+		  u32 val)
+{
+	struct drm_i915_gem_object *obj;
+	const int gen = INTEL_GEN(vma->vm->i915);
+	unsigned long n, size;
+	u32 *cmd;
+	int err;
+
+	size = (4 * count + 1) * sizeof(u32);
+	size = round_up(size, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(vma->vm->i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
+	offset += vma->node.start;
+
+	for (n = 0; n < count; n++) {
+		if (gen >= 8) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
+			*cmd++ = lower_32_bits(offset);
+			*cmd++ = upper_32_bits(offset);
+			*cmd++ = val;
+		} else if (gen >= 4) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4 |
+				(gen < 6 ? MI_USE_GGTT : 0);
+			*cmd++ = 0;
+			*cmd++ = offset;
+			*cmd++ = val;
+		} else {
+			*cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+			*cmd++ = offset;
+			*cmd++ = val;
+		}
+		offset += PAGE_SIZE;
+	}
+	*cmd = MI_BATCH_BUFFER_END;
+	i915_gem_object_unpin_map(obj);
+
+	vma = i915_vma_instance(obj, vma->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err;
+
+	return vma;
+
+err:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+int igt_gpu_fill_dw(struct i915_vma *vma,
+		    struct i915_gem_context *ctx,
+		    struct intel_engine_cs *engine,
+		    u64 offset,
+		    unsigned long count,
+		    u32 val)
+{
+	struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm;
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	unsigned int flags;
+	int err;
+
+	GEM_BUG_ON(vma->size > vm->total);
+	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+
+	batch = igt_emit_store_dw(vma, offset, count, val);
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
+	}
+
+	flags = 0;
+	if (INTEL_GEN(vm->i915) <= 5)
+		flags |= I915_DISPATCH_SECURE;
+
+	err = engine->emit_bb_start(rq,
+				    batch->node.start, batch->node.size,
+				    flags);
+	if (err)
+		goto err_request;
+
+	i915_vma_lock(batch);
+	err = i915_request_await_object(rq, batch->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
+	if (err)
+		goto skip_request;
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto skip_request;
+
+	i915_request_add(rq);
+
+	i915_vma_unpin(batch);
+	i915_vma_close(batch);
+	i915_vma_put(batch);
+
+	return 0;
+
+skip_request:
+	i915_request_skip(rq, err);
+err_request:
+	i915_request_add(rq);
+err_batch:
+	i915_vma_unpin(batch);
+	i915_vma_put(batch);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
index 0f17251cf75d..361a7ef866b0 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
@@ -7,11 +7,27 @@
 #ifndef __IGT_GEM_UTILS_H__
 #define __IGT_GEM_UTILS_H__
 
+#include <linux/types.h>
+
 struct i915_request;
 struct i915_gem_context;
 struct intel_engine_cs;
+struct i915_vma;
 
 struct i915_request *
 igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
 
+struct i915_vma *
+igt_emit_store_dw(struct i915_vma *vma,
+		  u64 offset,
+		  unsigned long count,
+		  u32 val);
+
+int igt_gpu_fill_dw(struct i915_vma *vma,
+		    struct i915_gem_context *ctx,
+		    struct intel_engine_cs *engine,
+		    u64 offset,
+		    unsigned long count,
+		    u32 val);
+
 #endif /* __IGT_GEM_UTILS_H__ */