summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gem
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.c40
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c61
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c148
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.h2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c66
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h10
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h68
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c5
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c66
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c12
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c13
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c108
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c32
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c10
20 files changed, 514 insertions, 162 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5402a7bbcb1d..9a9ff84c90d7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -964,7 +964,11 @@ static int intel_context_set_gem(struct intel_context *ce,
RCU_INIT_POINTER(ce->gem_context, ctx);
GEM_BUG_ON(intel_context_is_pinned(ce));
- ce->ring_size = SZ_16K;
+
+ if (ce->engine->class == COMPUTE_CLASS)
+ ce->ring_size = SZ_512K;
+ else
+ ce->ring_size = SZ_16K;
i915_vm_put(ce->vm);
ce->vm = i915_gem_context_get_eb_vm(ctx);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..d24c0ce8805c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+ unsigned int pat_index;
};
static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,43 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
return 0;
}
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+ struct create_ext *ext_data = data;
+ struct drm_i915_private *i915 = ext_data->i915;
+ struct drm_i915_gem_create_ext_set_pat ext;
+ unsigned int max_pat_index;
+
+ BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+ offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+ /* Limiting the extension only to Meteor Lake */
+ if (!IS_METEORLAKE(i915))
+ return -ENODEV;
+
+ if (copy_from_user(&ext, base, sizeof(ext)))
+ return -EFAULT;
+
+ max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+ if (ext.pat_index > max_pat_index) {
+ drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
+ ext.pat_index);
+ return -EINVAL;
+ }
+
+ ext_data->pat_index = ext.pat_index;
+
+ return 0;
+}
+
static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+ [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
};
+#define PAT_INDEX_NOT_SET 0xffff
/**
* i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it.
* @dev: drm device pointer
@@ -418,6 +451,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
+ ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
create_extensions,
ARRAY_SIZE(create_extensions),
@@ -454,5 +488,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
if (IS_ERR(obj))
return PTR_ERR(obj);
+ if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+ i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+ /* Mark pat_index is set by UMD */
+ obj->pat_set_by_user = true;
+ }
+
return i915_gem_publish(obj, file, &args->size, &args->handle);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index d2d5a24301b2..dfaaa8b66ac3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -27,8 +27,15 @@ static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (IS_DGFX(i915))
return false;
- return !(obj->cache_level == I915_CACHE_NONE ||
- obj->cache_level == I915_CACHE_WT);
+ /*
+ * For objects created by userspace through GEM_CREATE with pat_index
+ * set by set_pat extension, i915_gem_object_has_cache_level() will
+ * always return true, because the coherency of such object is managed
+ * by userspace. Othereise the call here would fall back to checking
+ * whether the object is un-cached or write-through.
+ */
+ return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
+ i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
}
bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
@@ -267,7 +274,13 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
{
int ret;
- if (obj->cache_level == cache_level)
+ /*
+ * For objects created by userspace through GEM_CREATE with pat_index
+ * set by set_pat extension, simply return 0 here without touching
+ * the cache setting, because such objects should have an immutable
+ * cache setting by desgin and always managed by userspace.
+ */
+ if (i915_gem_object_has_cache_level(obj, cache_level))
return 0;
ret = i915_gem_object_wait(obj,
@@ -278,10 +291,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
return ret;
/* Always invalidate stale cachelines */
- if (obj->cache_level != cache_level) {
- i915_gem_object_set_cache_coherency(obj, cache_level);
- obj->cache_dirty = true;
- }
+ i915_gem_object_set_cache_coherency(obj, cache_level);
+ obj->cache_dirty = true;
/* The cache-level will be applied when each vma is rebound. */
return i915_gem_object_unbind(obj,
@@ -306,20 +317,22 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
goto out;
}
- switch (obj->cache_level) {
- case I915_CACHE_LLC:
- case I915_CACHE_L3_LLC:
- args->caching = I915_CACHING_CACHED;
- break;
+ /*
+ * This ioctl should be disabled for the objects with pat_index
+ * set by user space.
+ */
+ if (obj->pat_set_by_user) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
- case I915_CACHE_WT:
+ if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
+ i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
+ args->caching = I915_CACHING_CACHED;
+ else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
args->caching = I915_CACHING_DISPLAY;
- break;
-
- default:
+ else
args->caching = I915_CACHING_NONE;
- break;
- }
out:
rcu_read_unlock();
return err;
@@ -337,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
if (IS_DGFX(i915))
return -ENODEV;
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
@@ -365,6 +381,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
/*
+ * This ioctl should be disabled for the objects with pat_index
+ * set by user space.
+ */
+ if (obj->pat_set_by_user) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /*
* The caching mode of proxy object is handled by its generator, and
* not allowed to be changed by userspace.
*/
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 3aeede6aee4d..cfd7929587d8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -640,9 +640,15 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
return false;
+ /*
+ * For objects created by userspace through GEM_CREATE with pat_index
+ * set by set_pat extension, i915_gem_object_has_cache_level() always
+ * return true, otherwise the call would fall back to checking whether
+ * the object is un-cached.
+ */
return (cache->has_llc ||
obj->cache_dirty ||
- obj->cache_level != I915_CACHE_NONE);
+ !i915_gem_object_has_cache_level(obj, I915_CACHE_NONE));
}
static int eb_reserve_vma(struct i915_execbuffer *eb,
@@ -730,7 +736,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
struct eb_vma *ev;
unsigned int pass;
int err = 0;
- bool unpinned;
/*
* We have one more buffers that we couldn't bind, which could be due to
@@ -770,7 +775,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
pin_flags |= PIN_NONBLOCK;
if (pass >= 1)
- unpinned = eb_unbind(eb, pass >= 2);
+ eb_unbind(eb, pass >= 2);
if (pass == 2) {
err = mutex_lock_interruptible(&eb->context->vm->mutex);
@@ -1324,7 +1329,10 @@ static void *reloc_iomap(struct i915_vma *batch,
if (drm_mm_node_allocated(&cache->node)) {
ggtt->vm.insert_page(&ggtt->vm,
i915_gem_object_get_dma_address(obj, page),
- offset, I915_CACHE_NONE, 0);
+ offset,
+ i915_gem_get_pat_index(ggtt->vm.i915,
+ I915_CACHE_NONE),
+ 0);
} else {
offset += page << PAGE_SHIFT;
}
@@ -1464,7 +1472,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
reloc_cache_unmap(&eb->reloc_cache);
mutex_lock(&vma->vm->mutex);
err = i915_vma_bind(target->vma,
- target->vma->obj->cache_level,
+ target->vma->obj->pat_index,
PIN_GLOBAL, NULL, NULL);
mutex_unlock(&vma->vm->mutex);
reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index d3c1dee16af2..aa4d842d4c5a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -383,7 +383,16 @@ retry:
}
/* Access to snoopable pages through the GTT is incoherent. */
- if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+ /*
+ * For objects created by userspace through GEM_CREATE with pat_index
+ * set by set_pat extension, coherency is managed by userspace, make
+ * sure we don't fail handling the vm fault by calling
+ * i915_gem_object_has_cache_level() which always return true for such
+ * objects. Otherwise this helper function would fall back to checking
+ * whether the object is un-cached.
+ */
+ if (!(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
+ HAS_LLC(i915))) {
ret = -EFAULT;
goto err_unpin;
}
@@ -927,53 +936,15 @@ static struct file *mmap_singleton(struct drm_i915_private *i915)
return file;
}
-/*
- * This overcomes the limitation in drm_gem_mmap's assignment of a
- * drm_gem_object as the vma->vm_private_data. Since we need to
- * be able to resolve multiple mmap offsets which could be tied
- * to a single gem object.
- */
-int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+static int
+i915_gem_object_mmap(struct drm_i915_gem_object *obj,
+ struct i915_mmap_offset *mmo,
+ struct vm_area_struct *vma)
{
- struct drm_vma_offset_node *node;
- struct drm_file *priv = filp->private_data;
- struct drm_device *dev = priv->minor->dev;
- struct drm_i915_gem_object *obj = NULL;
- struct i915_mmap_offset *mmo = NULL;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct drm_device *dev = &i915->drm;
struct file *anon;
- if (drm_dev_is_unplugged(dev))
- return -ENODEV;
-
- rcu_read_lock();
- drm_vma_offset_lock_lookup(dev->vma_offset_manager);
- node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
- vma->vm_pgoff,
- vma_pages(vma));
- if (node && drm_vma_node_is_allowed(node, priv)) {
- /*
- * Skip 0-refcnted objects as it is in the process of being
- * destroyed and will be invalid when the vma manager lock
- * is released.
- */
- if (!node->driver_private) {
- mmo = container_of(node, struct i915_mmap_offset, vma_node);
- obj = i915_gem_object_get_rcu(mmo->obj);
-
- GEM_BUG_ON(obj && obj->ops->mmap_ops);
- } else {
- obj = i915_gem_object_get_rcu
- (container_of(node, struct drm_i915_gem_object,
- base.vma_node));
-
- GEM_BUG_ON(obj && !obj->ops->mmap_ops);
- }
- }
- drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
- rcu_read_unlock();
- if (!obj)
- return node ? -EACCES : -EINVAL;
-
if (i915_gem_object_is_readonly(obj)) {
if (vma->vm_flags & VM_WRITE) {
i915_gem_object_put(obj);
@@ -1005,7 +976,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
if (obj->ops->mmap_ops) {
vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags));
vma->vm_ops = obj->ops->mmap_ops;
- vma->vm_private_data = node->driver_private;
+ vma->vm_private_data = obj->base.vma_node.driver_private;
return 0;
}
@@ -1043,6 +1014,91 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
return 0;
}
+/*
+ * This overcomes the limitation in drm_gem_mmap's assignment of a
+ * drm_gem_object as the vma->vm_private_data. Since we need to
+ * be able to resolve multiple mmap offsets which could be tied
+ * to a single gem object.
+ */
+int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct drm_vma_offset_node *node;
+ struct drm_file *priv = filp->private_data;
+ struct drm_device *dev = priv->minor->dev;
+ struct drm_i915_gem_object *obj = NULL;
+ struct i915_mmap_offset *mmo = NULL;
+
+ if (drm_dev_is_unplugged(dev))
+ return -ENODEV;
+
+ rcu_read_lock();
+ drm_vma_offset_lock_lookup(dev->vma_offset_manager);
+ node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
+ vma->vm_pgoff,
+ vma_pages(vma));
+ if (node && drm_vma_node_is_allowed(node, priv)) {
+ /*
+ * Skip 0-refcnted objects as it is in the process of being
+ * destroyed and will be invalid when the vma manager lock
+ * is released.
+ */
+ if (!node->driver_private) {
+ mmo = container_of(node, struct i915_mmap_offset, vma_node);
+ obj = i915_gem_object_get_rcu(mmo->obj);
+
+ GEM_BUG_ON(obj && obj->ops->mmap_ops);
+ } else {
+ obj = i915_gem_object_get_rcu
+ (container_of(node, struct drm_i915_gem_object,
+ base.vma_node));
+
+ GEM_BUG_ON(obj && !obj->ops->mmap_ops);
+ }
+ }
+ drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
+ rcu_read_unlock();
+ if (!obj)
+ return node ? -EACCES : -EINVAL;
+
+ return i915_gem_object_mmap(obj, mmo, vma);
+}
+
+int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct drm_device *dev = &i915->drm;
+ struct i915_mmap_offset *mmo = NULL;
+ enum i915_mmap_type mmap_type;
+ struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+
+ if (drm_dev_is_unplugged(dev))
+ return -ENODEV;
+
+ /* handle ttm object */
+ if (obj->ops->mmap_ops) {
+ /*
+ * ttm fault handler, ttm_bo_vm_fault_reserved() uses fake offset
+ * to calculate page offset so set that up.
+ */
+ vma->vm_pgoff += drm_vma_node_start(&obj->base.vma_node);
+ } else {
+ /* handle stolen and smem objects */
+ mmap_type = i915_ggtt_has_aperture(ggtt) ? I915_MMAP_TYPE_GTT : I915_MMAP_TYPE_WC;
+ mmo = mmap_offset_attach(obj, mmap_type, NULL);
+ if (IS_ERR(mmo))
+ return PTR_ERR(mmo);
+ }
+
+ /*
+ * When we install vm_ops for mmap we are too late for
+ * the vm_ops->open() which increases the ref_count of
+ * this obj and then it gets decreased by the vm_ops->close().
+ * To balance this increase the obj ref_count here.
+ */
+ obj = i915_gem_object_get(obj);
+ return i915_gem_object_mmap(obj, mmo, vma);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_gem_mman.c"
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
index 1fa91b3033b3..196417fd0f5c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
@@ -29,5 +29,5 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj);
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
-
+int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma);
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,33 @@ static struct kmem_cache *slab_objects;
static const struct drm_gem_object_funcs i915_gem_object_funcs;
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+ enum i915_cache_level level)
+{
+ if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL))
+ return 0;
+
+ return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
+bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj,
+ enum i915_cache_level lvl)
+{
+ /*
+ * In case the pat_index is set by user space, this kernel mode
+ * driver should leave the coherency to be managed by user space,
+ * simply return true here.
+ */
+ if (obj->pat_set_by_user)
+ return true;
+
+ /*
+ * Otherwise the pat_index should have been converted from cache_level
+ * so that the following comparison is valid.
+ */
+ return obj->pat_index == i915_gem_get_pat_index(obj_to_i915(obj), lvl);
+}
+
struct drm_i915_gem_object *i915_gem_object_alloc(void)
{
struct drm_i915_gem_object *obj;
@@ -124,7 +151,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- obj->cache_level = cache_level;
+ obj->pat_index = i915_gem_get_pat_index(i915, cache_level);
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
@@ -139,6 +166,37 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
!IS_DGFX(i915);
}
+/**
+ * i915_gem_object_set_pat_index - set PAT index to be used in PTE encode
+ * @obj: #drm_i915_gem_object
+ * @pat_index: PAT index
+ *
+ * This is a clone of i915_gem_object_set_cache_coherency taking pat index
+ * instead of cache_level as its second argument.
+ */
+void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj,
+ unsigned int pat_index)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+ if (obj->pat_index == pat_index)
+ return;
+
+ obj->pat_index = pat_index;
+
+ if (pat_index != i915_gem_get_pat_index(i915, I915_CACHE_NONE))
+ obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
+ I915_BO_CACHE_COHERENT_FOR_WRITE);
+ else if (HAS_LLC(i915))
+ obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
+ else
+ obj->cache_coherent = 0;
+
+ obj->cache_dirty =
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
+ !IS_DGFX(i915);
+}
+
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
@@ -151,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
return false;
/*
+ * Always flush cache for UMD objects at creation time.
+ */
+ if (obj->pat_set_by_user)
+ return true;
+
+ /*
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
* possible for userspace to bypass the GTT caching bits set by the
* kernel, as per the given object cache_level. This is troublesome
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..884a17275b3a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
enum intel_region_id;
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
static inline bool i915_gem_object_size_2big(u64 size)
{
struct drm_i915_gem_object *obj;
@@ -30,6 +32,10 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
}
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+ enum i915_cache_level level);
+bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj,
+ enum i915_cache_level lvl);
void i915_gem_init__objects(struct drm_i915_private *i915);
void i915_objects_module_exit(void);
@@ -80,7 +86,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
/**
* i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
- * @filp: DRM file private date
+ * @file: DRM file private date
* @handle: userspace handle
*
* Returns:
@@ -760,6 +766,8 @@ bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj);
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
+void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj,
+ unsigned int pat_index);
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5dcbbef31d44..e72c57716bee 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,13 @@ enum i915_cache_level {
* engine.
*/
I915_CACHE_WT,
+ /**
+ * @I915_MAX_CACHE_LEVEL:
+ *
+ * Mark the last entry in the enum. Used for defining cachelevel_to_pat
+ * array for cache_level to pat translation table.
+ */
+ I915_MAX_CACHE_LEVEL,
};
enum i915_map_type {
@@ -328,6 +335,12 @@ struct drm_i915_gem_object {
*/
#define I915_BO_ALLOC_GPU_ONLY BIT(6)
#define I915_BO_ALLOC_CCS_AUX BIT(7)
+/*
+ * Object is allowed to retain its initial data and will not be cleared on first
+ * access if used along with I915_BO_ALLOC_USER. This is mainly to keep
+ * preallocated framebuffer data intact while transitioning it to i915drmfb.
+ */
+#define I915_BO_PREALLOC BIT(8)
#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
I915_BO_ALLOC_VOLATILE | \
I915_BO_ALLOC_CPU_CLEAR | \
@@ -335,10 +348,11 @@ struct drm_i915_gem_object {
I915_BO_ALLOC_PM_VOLATILE | \
I915_BO_ALLOC_PM_EARLY | \
I915_BO_ALLOC_GPU_ONLY | \
- I915_BO_ALLOC_CCS_AUX)
-#define I915_BO_READONLY BIT(8)
-#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */
-#define I915_BO_PROTECTED BIT(10)
+ I915_BO_ALLOC_CCS_AUX | \
+ I915_BO_PREALLOC)
+#define I915_BO_READONLY BIT(9)
+#define I915_TILING_QUIRK_BIT 10 /* unknown swizzling; do not release! */
+#define I915_BO_PROTECTED BIT(11)
/**
* @mem_flags - Mutable placement-related flags
*
@@ -350,15 +364,43 @@ struct drm_i915_gem_object {
#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
#define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */
/**
- * @cache_level: The desired GTT caching level.
+ * @pat_index: The desired PAT index.
+ *
+ * See hardware specification for valid PAT indices for each platform.
+ * This field replaces the @cache_level that contains a value of enum
+ * i915_cache_level since PAT indices are being used by both userspace
+ * and kernel mode driver for caching policy control after GEN12.
+ * In the meantime platform specific tables are created to translate
+ * i915_cache_level into pat index, for more details check the macros
+ * defined i915/i915_pci.c, e.g. PVC_CACHELEVEL.
+ * For backward compatibility, this field contains values exactly match
+ * the entries of enum i915_cache_level for pre-GEN12 platforms (See
+ * LEGACY_CACHELEVEL), so that the PTE encode functions for these
+ * legacy platforms can stay the same.
+ */
+ unsigned int pat_index:6;
+ /**
+ * @pat_set_by_user: Indicate whether pat_index is set by user space
*
- * See enum i915_cache_level for possible values, along with what
- * each does.
+ * This field is set to false by default, only set to true if the
+ * pat_index is set by user space. By design, user space is capable of
+ * managing caching behavior by setting pat_index, in which case this
+ * kernel mode driver should never touch the pat_index.
*/
- unsigned int cache_level:3;
+ unsigned int pat_set_by_user:1;
/**
* @cache_coherent:
*
+ * Note: with the change above which replaced @cache_level with pat_index,
+ * the use of @cache_coherent is limited to the objects created by kernel
+ * or by userspace without pat index specified.
+ * Check for @pat_set_by_user to find out if an object has pat index set
+ * by userspace. The ioctl's to change cache settings have also been
+ * disabled for the objects with pat index set by userspace. Please don't
+ * assume @cache_coherent having the flags set as describe here. A helper
+ * function i915_gem_object_has_cache_level() provides one way to bypass
+ * the use of this field.
+ *
* Track whether the pages are coherent with the GPU if reading or
* writing through the CPU caches. The largely depends on the
* @cache_level setting.
@@ -432,6 +474,16 @@ struct drm_i915_gem_object {
/**
* @cache_dirty:
*
+ * Note: with the change above which replaced cache_level with pat_index,
+ * the use of @cache_dirty is limited to the objects created by kernel
+ * or by userspace without pat index specified.
+ * Check for @pat_set_by_user to find out if an object has pat index set
+ * by userspace. The ioctl's to change cache settings have also been
+ * disabled for the objects with pat_index set by userspace. Please don't
+ * assume @cache_dirty is set as describe here. Also see helper function
+ * i915_gem_object_has_cache_level() for possible ways to bypass the use
+ * of this field.
+ *
* Track if we are we dirty with writes through the CPU cache for this
* object. As a result reading directly from main memory might yield
* stale data.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index ecd86130b74f..89fc8ea6bcfc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -469,7 +469,10 @@ enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
bool always_coherent)
{
- if (i915_gem_object_is_lmem(obj))
+ /*
+ * Wa_22016122933: always return I915_MAP_WC for MTL
+ */
+ if (i915_gem_object_is_lmem(obj) || IS_METEORLAKE(i915))
return I915_MAP_WC;
if (HAS_LLC(i915) || always_coherent)
return I915_MAP_WB;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h
index 2dfcc41c0170..8a7650b27cc2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h
@@ -22,9 +22,7 @@ struct i915_gem_apply_to_region;
*/
struct i915_gem_apply_to_region_ops {
/**
- * process_obj - Process the current object
- * @apply: Embed this for private data.
- * @obj: The current object.
+ * @process_obj: Process the current object
*
* Note that if this function is part of a ww transaction, and
* if returns -EDEADLK for one of the objects, it may be
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..8f1633c3fb93 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -19,13 +19,13 @@
#include "i915_trace.h"
/*
- * Move pages to appropriate lru and release the pagevec, decrementing the
- * ref count of those pages.
+ * Move folios to appropriate lru and release the batch, decrementing the
+ * ref count of those folios.
*/
-static void check_release_pagevec(struct pagevec *pvec)
+static void check_release_folio_batch(struct folio_batch *fbatch)
{
- check_move_unevictable_pages(pvec);
- __pagevec_release(pvec);
+ check_move_unevictable_folios(fbatch);
+ __folio_batch_release(fbatch);
cond_resched();
}
@@ -33,24 +33,29 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
bool dirty, bool backup)
{
struct sgt_iter sgt_iter;
- struct pagevec pvec;
+ struct folio_batch fbatch;
+ struct folio *last = NULL;
struct page *page;
mapping_clear_unevictable(mapping);
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
for_each_sgt_page(page, sgt_iter, st) {
- if (dirty)
- set_page_dirty(page);
+ struct folio *folio = page_folio(page);
+ if (folio == last)
+ continue;
+ last = folio;
+ if (dirty)
+ folio_mark_dirty(folio);
if (backup)
- mark_page_accessed(page);
+ folio_mark_accessed(folio);
- if (!pagevec_add(&pvec, page))
- check_release_pagevec(&pvec);
+ if (!folio_batch_add(&fbatch, folio))
+ check_release_folio_batch(&fbatch);
}
- if (pagevec_count(&pvec))
- check_release_pagevec(&pvec);
+ if (fbatch.nr)
+ check_release_folio_batch(&fbatch);
sg_free_table(st);
}
@@ -63,8 +68,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
unsigned int page_count; /* restricted by sg_alloc_table */
unsigned long i;
struct scatterlist *sg;
- struct page *page;
- unsigned long last_pfn = 0; /* suppress gcc warning */
+ unsigned long next_pfn = 0; /* suppress gcc warning */
gfp_t noreclaim;
int ret;
@@ -95,6 +99,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
sg = st->sgl;
st->nents = 0;
for (i = 0; i < page_count; i++) {
+ struct folio *folio;
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
0,
@@ -103,12 +108,12 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
do {
cond_resched();
- page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- if (!IS_ERR(page))
+ folio = shmem_read_folio_gfp(mapping, i, gfp);
+ if (!IS_ERR(folio))
break;
if (!*s) {
- ret = PTR_ERR(page);
+ ret = PTR_ERR(folio);
goto err_sg;
}
@@ -147,19 +152,21 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
if (!i ||
sg->length >= max_segment ||
- page_to_pfn(page) != last_pfn + 1) {
+ folio_pfn(folio) != next_pfn) {
if (i)
sg = sg_next(sg);
st->nents++;
- sg_set_page(sg, page, PAGE_SIZE, 0);
+ sg_set_folio(sg, folio, folio_size(folio), 0);
} else {
- sg->length += PAGE_SIZE;
+ /* XXX: could overflow? */
+ sg->length += folio_size(folio);
}
- last_pfn = page_to_pfn(page);
+ next_pfn = folio_pfn(folio) + folio_nr_pages(folio);
+ i += folio_nr_pages(folio) - 1;
/* Check that the i965g/gm workaround works. */
- GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
+ GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
}
if (sg) /* loop terminated early; short sg table */
sg_mark_end(sg);
@@ -455,7 +462,7 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
struct page *page;
void *data, *vaddr;
int err;
- char c;
+ char __maybe_unused c;
len = PAGE_SIZE - pg;
if (len > remain)
@@ -601,7 +608,14 @@ static int shmem_object_init(struct intel_memory_region *mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
- if (HAS_LLC(i915))
+ /*
+ * MTL doesn't snoop CPU cache by default for GPU access (namely
+ * 1-way coherency). However some UMD's are currently depending on
+ * that. Make 1-way coherent the default setting for MTL. A follow
+ * up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+ * caching mode at BO creation time
+ */
+ if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
fs_reclaim_release(GFP_KERNEL);
}
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
/**
* i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
* default all object types that support shrinking(see IS_SHRINKABLE), will also
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 8ac376c24aa2..3b094d36a0b0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -535,6 +535,14 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
/* Basic memrange allocator for stolen space. */
drm_mm_init(&i915->mm.stolen, 0, i915->dsm.usable_size);
+ /*
+ * Access to stolen lmem beyond certain size for MTL A0 stepping
+ * would crash the machine. Disable stolen lmem for userspace access
+ * by setting usable_size to zero.
+ */
+ if (IS_METEORLAKE(i915) && INTEL_REVID(i915) == 0x0)
+ i915->dsm.usable_size = 0;
+
return 0;
}
@@ -557,7 +565,9 @@ static void dbg_poison(struct i915_ggtt *ggtt,
ggtt->vm.insert_page(&ggtt->vm, addr,
ggtt->error_capture.start,
- I915_CACHE_NONE, 0);
+ i915_gem_get_pat_index(ggtt->vm.i915,
+ I915_CACHE_NONE),
+ 0);
mb();
s = io_mapping_map_wc(&ggtt->iomap,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
index f8f6bed1b297..67347e62e29b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -42,8 +42,9 @@ static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo)
/**
* i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding
* struct drm_i915_gem_object.
+ * @bo: Pointer to the ttm buffer object
*
- * Return: Pointer to the embedding struct ttm_buffer_object.
+ * Return: Pointer to the embedding struct drm_i915_gem_object.
*/
static inline struct drm_i915_gem_object *
i915_ttm_to_gem(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index dd188dfcc423..7078af2f8f79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -214,7 +214,8 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
- dst_st->sgl, dst_level,
+ dst_st->sgl,
+ i915_gem_get_pat_index(i915, dst_level),
i915_ttm_gtt_binds_lmem(dst_mem),
0, &rq);
} else {
@@ -228,9 +229,10 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
deps, src_rsgt->table.sgl,
- src_level,
+ i915_gem_get_pat_index(i915, src_level),
i915_ttm_gtt_binds_lmem(bo->resource),
- dst_st->sgl, dst_level,
+ dst_st->sgl,
+ i915_gem_get_pat_index(i915, dst_level),
i915_ttm_gtt_binds_lmem(dst_mem),
&rq);
@@ -576,7 +578,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
struct dma_fence *migration_fence = NULL;
struct ttm_tt *ttm = bo->ttm;
struct i915_refct_sgt *dst_rsgt;
- bool clear;
+ bool clear, prealloc_bo;
int ret;
if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) {
@@ -632,7 +634,8 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
return PTR_ERR(dst_rsgt);
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
- if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
+ prealloc_bo = obj->flags & I915_BO_PREALLOC;
+ if (!(clear && ttm && !((ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) && !prealloc_bo))) {
struct i915_deps deps;
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 99f39a5feca1..df6c9a84252c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -354,7 +354,7 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
- obj->cache_level = I915_CACHE_NONE;
+ obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE);
return obj;
}
@@ -695,8 +695,7 @@ out_put:
return err;
}
-static void close_object_list(struct list_head *objects,
- struct i915_ppgtt *ppgtt)
+static void close_object_list(struct list_head *objects)
{
struct drm_i915_gem_object *obj, *on;
@@ -710,17 +709,36 @@ static void close_object_list(struct list_head *objects,
}
}
-static int igt_mock_ppgtt_huge_fill(void *arg)
+static int igt_ppgtt_huge_fill(void *arg)
{
- struct i915_ppgtt *ppgtt = arg;
- struct drm_i915_private *i915 = ppgtt->vm.i915;
- unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT;
+ struct drm_i915_private *i915 = arg;
+ unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
+ bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50);
+ struct i915_address_space *vm;
+ struct i915_gem_context *ctx;
+ unsigned long max_pages;
unsigned long page_num;
+ struct file *file;
bool single = false;
LIST_HEAD(objects);
IGT_TIMEOUT(end_time);
int err = -ENODEV;
+ if (supported == I915_GTT_PAGE_SIZE_4K)
+ return 0;
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = hugepage_ctx(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+ vm = i915_gem_context_get_eb_vm(ctx);
+ max_pages = vm->total >> PAGE_SHIFT;
+
for_each_prime_number_from(page_num, 1, max_pages) {
struct drm_i915_gem_object *obj;
u64 size = page_num << PAGE_SHIFT;
@@ -750,13 +768,14 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
list_add(&obj->st_link, &objects);
- vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+ vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
break;
}
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ /* vma start must be aligned to BIT(21) to allow 2M PTEs */
+ err = i915_vma_pin(vma, 0, BIT(21), PIN_USER);
if (err)
break;
@@ -784,12 +803,13 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
GEM_BUG_ON(!expected_gtt);
GEM_BUG_ON(size);
- if (expected_gtt & I915_GTT_PAGE_SIZE_4K)
+ if (!has_pte64 && (obj->base.size < I915_GTT_PAGE_SIZE_2M ||
+ expected_gtt & I915_GTT_PAGE_SIZE_2M))
expected_gtt &= ~I915_GTT_PAGE_SIZE_64K;
i915_vma_unpin(vma);
- if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+ if (!has_pte64 && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
if (!IS_ALIGNED(vma->node.start,
I915_GTT_PAGE_SIZE_2M)) {
pr_err("node.start(%llx) not aligned to 2M\n",
@@ -808,7 +828,7 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
}
if (vma->resource->page_sizes_gtt != expected_gtt) {
- pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
+ pr_err("gtt=%#x, expected=%#x, size=0x%zx, single=%s\n",
vma->resource->page_sizes_gtt, expected_gtt,
obj->base.size, str_yes_no(!!single));
err = -EINVAL;
@@ -823,19 +843,25 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
single = !single;
}
- close_object_list(&objects, ppgtt);
+ close_object_list(&objects);
if (err == -ENOMEM || err == -ENOSPC)
err = 0;
+ i915_vm_put(vm);
+out:
+ fput(file);
return err;
}
-static int igt_mock_ppgtt_64K(void *arg)
+static int igt_ppgtt_64K(void *arg)
{
- struct i915_ppgtt *ppgtt = arg;
- struct drm_i915_private *i915 = ppgtt->vm.i915;
+ struct drm_i915_private *i915 = arg;
+ bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50);
struct drm_i915_gem_object *obj;
+ struct i915_address_space *vm;
+ struct i915_gem_context *ctx;
+ struct file *file;
const struct object_info {
unsigned int size;
unsigned int gtt;
@@ -907,16 +933,41 @@ static int igt_mock_ppgtt_64K(void *arg)
if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K))
return 0;
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = hugepage_ctx(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+ vm = i915_gem_context_get_eb_vm(ctx);
+
for (i = 0; i < ARRAY_SIZE(objects); ++i) {
unsigned int size = objects[i].size;
unsigned int expected_gtt = objects[i].gtt;
unsigned int offset = objects[i].offset;
unsigned int flags = PIN_USER;
+ /*
+ * For modern GTT models, the requirements for marking a page-table
+ * as 64K have been relaxed. Account for this.
+ */
+ if (has_pte64) {
+ expected_gtt = 0;
+ if (size >= SZ_64K)
+ expected_gtt |= I915_GTT_PAGE_SIZE_64K;
+ if (size & (SZ_64K - 1))
+ expected_gtt |= I915_GTT_PAGE_SIZE_4K;
+ }
+
for (single = 0; single <= 1; single++) {
obj = fake_huge_pages_object(i915, size, !!single);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_vm;
+ }
err = i915_gem_object_pin_pages_unlocked(obj);
if (err)
@@ -928,7 +979,7 @@ static int igt_mock_ppgtt_64K(void *arg)
*/
obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M;
- vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+ vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto out_object_unpin;
@@ -945,7 +996,8 @@ static int igt_mock_ppgtt_64K(void *arg)
if (err)
goto out_vma_unpin;
- if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+ if (!has_pte64 && !offset &&
+ vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
if (!IS_ALIGNED(vma->node.start,
I915_GTT_PAGE_SIZE_2M)) {
pr_err("node.start(%llx) not aligned to 2M\n",
@@ -964,9 +1016,10 @@ static int igt_mock_ppgtt_64K(void *arg)
}
if (vma->resource->page_sizes_gtt != expected_gtt) {
- pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
+ pr_err("gtt=%#x, expected=%#x, i=%d, single=%s offset=%#x size=%#x\n",
vma->resource->page_sizes_gtt,
- expected_gtt, i, str_yes_no(!!single));
+ expected_gtt, i, str_yes_no(!!single),
+ offset, size);
err = -EINVAL;
goto out_vma_unpin;
}
@@ -982,7 +1035,7 @@ static int igt_mock_ppgtt_64K(void *arg)
}
}
- return 0;
+ goto out_vm;
out_vma_unpin:
i915_vma_unpin(vma);
@@ -992,7 +1045,10 @@ out_object_unpin:
i915_gem_object_unlock(obj);
out_object_put:
i915_gem_object_put(obj);
-
+out_vm:
+ i915_vm_put(vm);
+out:
+ fput(file);
return err;
}
@@ -1910,8 +1966,6 @@ int i915_gem_huge_page_mock_selftests(void)
SUBTEST(igt_mock_exhaust_device_supported_pages),
SUBTEST(igt_mock_memory_region_huge_pages),
SUBTEST(igt_mock_ppgtt_misaligned_dma),
- SUBTEST(igt_mock_ppgtt_huge_fill),
- SUBTEST(igt_mock_ppgtt_64K),
};
struct drm_i915_private *dev_priv;
struct i915_ppgtt *ppgtt;
@@ -1962,6 +2016,8 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_ppgtt_sanity_check),
SUBTEST(igt_ppgtt_compact),
SUBTEST(igt_ppgtt_mixed),
+ SUBTEST(igt_ppgtt_huge_fill),
+ SUBTEST(igt_ppgtt_64K),
};
if (!HAS_PPGTT(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index a81fa6a20f5a..7021b6e9b219 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -66,7 +66,7 @@ static int live_nop_switch(void *arg)
ctx[n] = live_context(i915, file);
if (IS_ERR(ctx[n])) {
err = PTR_ERR(ctx[n]);
- goto out_file;
+ goto out_ctx;
}
}
@@ -82,7 +82,7 @@ static int live_nop_switch(void *arg)
this = igt_request_alloc(ctx[n], engine);
if (IS_ERR(this)) {
err = PTR_ERR(this);
- goto out_file;
+ goto out_ctx;
}
if (rq) {
i915_request_await_dma_fence(this, &rq->fence);
@@ -93,10 +93,10 @@ static int live_nop_switch(void *arg)
}
if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
pr_err("Failed to populated %d contexts\n", nctx);
- intel_gt_set_wedged(to_gt(i915));
+ intel_gt_set_wedged(engine->gt);
i915_request_put(rq);
err = -EIO;
- goto out_file;
+ goto out_ctx;
}
i915_request_put(rq);
@@ -107,7 +107,7 @@ static int live_nop_switch(void *arg)
err = igt_live_test_begin(&t, i915, __func__, engine->name);
if (err)
- goto out_file;
+ goto out_ctx;
end_time = jiffies + i915_selftest.timeout_jiffies;
for_each_prime_number_from(prime, 2, 8192) {
@@ -120,7 +120,7 @@ static int live_nop_switch(void *arg)
this = igt_request_alloc(ctx[n % nctx], engine);
if (IS_ERR(this)) {
err = PTR_ERR(this);
- goto out_file;
+ goto out_ctx;
}
if (rq) { /* Force submission order */
@@ -149,7 +149,7 @@ static int live_nop_switch(void *arg)
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("Switching between %ld contexts timed out\n",
prime);
- intel_gt_set_wedged(to_gt(i915));
+ intel_gt_set_wedged(engine->gt);
i915_request_put(rq);
break;
}
@@ -165,7 +165,7 @@ static int live_nop_switch(void *arg)
err = igt_live_test_end(&t);
if (err)
- goto out_file;
+ goto out_ctx;
pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
engine->name,
@@ -173,6 +173,8 @@ static int live_nop_switch(void *arg)
prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
}
+out_ctx:
+ kfree(ctx);
out_file:
fput(file);
return err;
@@ -346,8 +348,10 @@ static int live_parallel_switch(void *arg)
continue;
ce = intel_context_create(data[m].ce[0]->engine);
- if (IS_ERR(ce))
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
goto out;
+ }
err = intel_context_pin(ce);
if (err) {
@@ -367,8 +371,10 @@ static int live_parallel_switch(void *arg)
worker = kthread_create_worker(0, "igt/parallel:%s",
data[n].ce[0]->engine->name);
- if (IS_ERR(worker))
+ if (IS_ERR(worker)) {
+ err = PTR_ERR(worker);
goto out;
+ }
data[n].worker = worker;
}
@@ -397,8 +403,10 @@ static int live_parallel_switch(void *arg)
}
}
- if (igt_live_test_end(&t))
- err = -EIO;
+ if (igt_live_test_end(&t)) {
+ err = err ?: -EIO;
+ break;
+ }
}
out:
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index fe6c37fd7859..a93a90b15907 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -219,7 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt,
continue;
err = intel_migrate_clear(&gt->migrate, &ww, deps,
- obj->mm.pages->sgl, obj->cache_level,
+ obj->mm.pages->sgl, obj->pat_index,
i915_gem_object_is_lmem(obj),
0xdeadbeaf, &rq);
if (rq) {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 56279908ed30..72957a36a36b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -1222,7 +1222,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements,
}
err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL,
- obj->mm.pages->sgl, obj->cache_level,
+ obj->mm.pages->sgl, obj->pat_index,
i915_gem_object_is_lmem(obj),
expand32(POISON_INUSE), &rq);
i915_gem_object_unpin_pages(obj);
@@ -1681,7 +1681,9 @@ static int igt_mmap_gpu(void *arg)
static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
{
- if (!pte_present(*pte) || pte_none(*pte)) {
+ pte_t ptent = ptep_get(pte);
+
+ if (!pte_present(ptent) || pte_none(ptent)) {
pr_err("missing PTE:%lx\n",
(addr - (unsigned long)data) >> PAGE_SHIFT);
return -EINVAL;
@@ -1692,7 +1694,9 @@ static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
static int check_absent_pte(pte_t *pte, unsigned long addr, void *data)
{
- if (pte_present(*pte) && !pte_none(*pte)) {
+ pte_t ptent = ptep_get(pte);
+
+ if (pte_present(ptent) && !pte_none(ptent)) {
pr_err("present PTE:%lx; expected to be revoked\n",
(addr - (unsigned long)data) >> PAGE_SHIFT);
return -EINVAL;