1 files changed, 65 insertions, 91 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index e037e94792f3..3084f52e3372 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -210,6 +210,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 			struct i915_vma *vma)
 {
 	intel_wakeref_t wakeref;
+	struct i915_vma *old;
 	int ret;
 
 	if (vma) {
@@ -229,49 +230,55 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 			return ret;
 	}
 
-	if (fence->vma) {
-		struct i915_vma *old = fence->vma;
-
+	old = xchg(&fence->vma, NULL);
+	if (old) {
 		ret = i915_active_request_retire(&old->last_fence,
 					     &old->obj->base.dev->struct_mutex);
-		if (ret)
+		if (ret) {
+			fence->vma = old;
 			return ret;
+		}
 
 		i915_vma_flush_writes(old);
-	}
 
-	if (fence->vma && fence->vma != vma) {
-		/* Ensure that all userspace CPU access is completed before
+		/*
+		 * Ensure that all userspace CPU access is completed before
 		 * stealing the fence.
 		 */
-		GEM_BUG_ON(fence->vma->fence != fence);
-		i915_vma_revoke_mmap(fence->vma);
-
-		fence->vma->fence = NULL;
-		fence->vma = NULL;
+		if (old != vma) {
+			GEM_BUG_ON(old->fence != fence);
+			i915_vma_revoke_mmap(old);
+			old->fence = NULL;
+		}
 
 		list_move(&fence->link, &fence->i915->mm.fence_list);
 	}
 
-	/* We only need to update the register itself if the device is awake.
+	/*
+	 * We only need to update the register itself if the device is awake.
 	 * If the device is currently powered down, we will defer the write
 	 * to the runtime resume, see i915_gem_restore_fences().
+	 *
+	 * This only works for removing the fence register, on acquisition
+	 * the caller must hold the rpm wakeref. The fence register must
+	 * be cleared before we can use any other fences to ensure that
+	 * the new fences do not overlap the elided clears, confusing HW.
 	 */
 	wakeref = intel_runtime_pm_get_if_in_use(fence->i915);
-	if (wakeref) {
-		fence_write(fence, vma);
-		intel_runtime_pm_put(fence->i915, wakeref);
+	if (!wakeref) {
+		GEM_BUG_ON(vma);
+		return 0;
 	}
 
-	if (vma) {
-		if (fence->vma != vma) {
-			vma->fence = fence;
-			fence->vma = vma;
-		}
+	WRITE_ONCE(fence->vma, vma);
+	fence_write(fence, vma);
 
+	if (vma) {
+		vma->fence = fence;
 		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
 	}
 
+	intel_runtime_pm_put(fence->i915, wakeref);
 	return 0;
 }
 
@@ -436,32 +443,6 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
 }
 
 /**
- * i915_gem_revoke_fences - revoke fence state
- * @dev_priv: i915 device private
- *
- * Removes all GTT mmappings via the fence registers. This forces any user
- * of the fence to reacquire that fence before continuing with their access.
- * One use is during GPU reset where the fence register is lost and we need to
- * revoke concurrent userspace access via GTT mmaps until the hardware has been
- * reset and the fence registers have been restored.
- */
-void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
-{
-	int i;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
-
-		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
-
-		if (fence->vma)
-			i915_vma_revoke_mmap(fence->vma);
-	}
-}
-
-/**
  * i915_gem_restore_fences - restore fence state
  * @dev_priv: i915 device private
  *
@@ -473,9 +454,10 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
 {
 	int i;
 
+	rcu_read_lock(); /* keep obj alive as we dereference */
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-		struct i915_vma *vma = reg->vma;
+		struct i915_vma *vma = READ_ONCE(reg->vma);
 
 		GEM_BUG_ON(vma && vma->fence != reg);
 
@@ -483,18 +465,12 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
 		 * Commit delayed tiling changes if we have an object still
 		 * attached to the fence, otherwise just clear the fence.
 		 */
-		if (vma && !i915_gem_object_is_tiled(vma->obj)) {
-			GEM_BUG_ON(!reg->dirty);
-			GEM_BUG_ON(i915_vma_has_userfault(vma));
-
-			list_move(&reg->link, &dev_priv->mm.fence_list);
-			vma->fence = NULL;
+		if (vma && !i915_gem_object_is_tiled(vma->obj))
 			vma = NULL;
-		}
 
 		fence_write(reg, vma);
-		reg->vma = vma;
 	}
+	rcu_read_unlock();
 }
 
 /**
@@ -609,8 +585,38 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 		 */
 		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-	} else if (IS_MOBILE(dev_priv) ||
-		   IS_I915G(dev_priv) || IS_I945G(dev_priv)) {
+	} else if (IS_G45(dev_priv) || IS_I965G(dev_priv) || IS_G33(dev_priv)) {
+		/* The 965, G33, and newer, have a very flexible memory
+		 * configuration.  It will enable dual-channel mode
+		 * (interleaving) on as much memory as it can, and the GPU
+		 * will additionally sometimes enable different bit 6
+		 * swizzling for tiled objects from the CPU.
+		 *
+		 * Here's what I found on the G965:
+		 *    slot fill         memory size  swizzling
+		 * 0A   0B   1A   1B    1-ch   2-ch
+		 * 512  0    0    0     512    0     O
+		 * 512  0    512  0     16     1008  X
+		 * 512  0    0    512   16     1008  X
+		 * 0    512  0    512   16     1008  X
+		 * 1024 1024 1024 0     2048   1024  O
+		 *
+		 * We could probably detect this based on either the DRB
+		 * matching, which was the case for the swizzling required in
+		 * the table above, or from the 1-ch value being less than
+		 * the minimum size of a rank.
+		 *
+		 * Reports indicate that the swizzling actually
+		 * varies depending upon page placement inside the
+		 * channels, i.e. we see swizzled pages where the
+		 * banks of memory are paired and unswizzled on the
+		 * uneven portion, so leave that as unknown.
+		 */
+		if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
+			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+			swizzle_y = I915_BIT_6_SWIZZLE_9;
+		}
+	} else {
 		u32 dcc;
 
 		/* On 9xx chipsets, channel interleave by the CPU is
@@ -660,37 +666,6 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 		}
-	} else {
-		/* The 965, G33, and newer, have a very flexible memory
-		 * configuration.  It will enable dual-channel mode
-		 * (interleaving) on as much memory as it can, and the GPU
-		 * will additionally sometimes enable different bit 6
-		 * swizzling for tiled objects from the CPU.
-		 *
-		 * Here's what I found on the G965:
-		 *    slot fill         memory size  swizzling
-		 * 0A   0B   1A   1B    1-ch   2-ch
-		 * 512  0    0    0     512    0     O
-		 * 512  0    512  0     16     1008  X
-		 * 512  0    0    512   16     1008  X
-		 * 0    512  0    512   16     1008  X
-		 * 1024 1024 1024 0     2048   1024  O
-		 *
-		 * We could probably detect this based on either the DRB
-		 * matching, which was the case for the swizzling required in
-		 * the table above, or from the 1-ch value being less than
-		 * the minimum size of a rank.
-		 *
-		 * Reports indicate that the swizzling actually
-		 * varies depending upon page placement inside the
-		 * channels, i.e. we see swizzled pages where the
-		 * banks of memory are paired and unswizzled on the
-		 * uneven portion, so leave that as unknown.
-		 */
-		if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
-			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-			swizzle_y = I915_BIT_6_SWIZZLE_9;
-		}
 	}
 
 	if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
@@ -790,8 +765,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
 	int i;
 
 	if (obj->bit_17 == NULL) {
-		obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
-				      sizeof(long), GFP_KERNEL);
+		obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL);
 		if (obj->bit_17 == NULL) {
 			DRM_ERROR("Failed to allocate memory for bit 17 "
 				  "record\n");