summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_vm.c
diff options
context:
space:
mode:
authorMatthew Auld <matthew.auld@intel.com>2023-09-25 14:42:18 +0300
committerRodrigo Vivi <rodrigo.vivi@intel.com>2023-12-21 19:45:07 +0300
commite1fbc4f18d5b4405271e964670b9b054c4397127 (patch)
tree30998784f152528761b830cd4b22c7523a0c870a /drivers/gpu/drm/xe/xe_vm.c
parentf6a22e6862737e31d2c0693d2a4f986e71d32da6 (diff)
downloadlinux-e1fbc4f18d5b4405271e964670b9b054c4397127.tar.xz
drm/xe/uapi: support pat_index selection with vm_bind
Allow userspace to directly control the pat_index for a given vm binding. This should allow directly controlling the coherency, caching behaviour, compression and potentially other stuff in the future for the ppGTT binding. The exact meaning behind the pat_index is very platform specific (see BSpec or PRMs) but effectively maps to some predefined memory attributes. From the KMD pov we only care about the coherency that is provided by the pat_index, which falls into either NONE, 1WAY or 2WAY. The vm_bind coherency mode for the given pat_index needs to be at least 1way coherent when using cpu_caching with DRM_XE_GEM_CPU_CACHING_WB. For platforms that lack the explicit coherency mode attribute, we treat UC/WT/WC as NONE and WB as AT_LEAST_1WAY. For userptr mappings we lack a corresponding gem object, so the expected coherency mode is instead implicit and must fall into either 1WAY or 2WAY. Trying to use NONE will be rejected by the kernel. For imported dma-buf (from a different device) the coherency mode is also implicit and must also be either 1WAY or 2WAY. v2: - Undefined coh_mode(pat_index) can now be treated as programmer error. (Matt Roper) - We now allow gem_create.coh_mode <= coh_mode(pat_index), rather than having to match exactly. This ensures imported dma-buf can always just use 1way (or even 2way), now that we also bundle 1way/2way into at_least_1way. We still require 1way/2way for external dma-buf, but the policy can now be the same for self-import, if desired. - Use u16 for pat_index in uapi. u32 is massive overkill. (José) - Move as much of the pat_index validation as we can into vm_bind_ioctl_check_args. (José) v3 (Matt Roper): - Split the pte_encode() refactoring into separate patch. v4: - Rebase v5: - Check for and reject !coh_mode which would indicate hw reserved pat_index on xe2. v6: - Rebase on removal of coh_mode from uapi. We just need to reject cpu_caching=wb + pat_index with coh_none. Testcase: igt@xe_pat Bspec: 45101, 44235 #xe Bspec: 70552, 71582, 59400 #xe2 Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Pallavi Mishra <pallavi.mishra@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Lucas De Marchi <lucas.demarchi@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: José Roberto de Souza <jose.souza@intel.com> Cc: Filip Hazubski <filip.hazubski@intel.com> Cc: Carl Zhang <carl.zhang@intel.com> Cc: Effie Yu <effie.yu@intel.com> Cc: Zhengguo Xu <zhengguo.xu@intel.com> Cc: Francois Dugast <francois.dugast@intel.com> Tested-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Acked-by: Zhengguo Xu <zhengguo.xu@intel.com> Acked-by: Bartosz Dunajski <bartosz.dunajski@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/xe/xe_vm.c')
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c67
1 files changed, 59 insertions, 8 deletions
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c33ae4db4e02..a97a310123fc 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -6,6 +6,7 @@
#include "xe_vm.h"
#include <linux/dma-fence-array.h>
+#include <linux/nospec.h>
#include <drm/drm_exec.h>
#include <drm/drm_print.h>
@@ -26,6 +27,7 @@
#include "xe_gt_pagefault.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
+#include "xe_pat.h"
#include "xe_pm.h"
#include "xe_preempt_fence.h"
#include "xe_pt.h"
@@ -868,7 +870,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
u64 start, u64 end,
bool read_only,
bool is_null,
- u8 tile_mask)
+ u8 tile_mask,
+ u16 pat_index)
{
struct xe_vma *vma;
struct xe_tile *tile;
@@ -910,6 +913,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
+ vma->pat_index = pat_index;
+
if (bo) {
struct drm_gpuvm_bo *vm_bo;
@@ -2162,7 +2167,7 @@ static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
u64 bo_offset_or_userptr, u64 addr, u64 range,
u32 operation, u32 flags, u8 tile_mask,
- u32 prefetch_region)
+ u32 prefetch_region, u16 pat_index)
{
struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
struct drm_gpuva_ops *ops;
@@ -2231,6 +2236,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
op->map.read_only =
flags & DRM_XE_VM_BIND_FLAG_READONLY;
op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+ op->map.pat_index = pat_index;
} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
op->prefetch.region = prefetch_region;
}
@@ -2242,7 +2248,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
}
static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
- u8 tile_mask, bool read_only, bool is_null)
+ u8 tile_mask, bool read_only, bool is_null,
+ u16 pat_index)
{
struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
struct xe_vma *vma;
@@ -2258,7 +2265,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
vma = xe_vma_create(vm, bo, op->gem.offset,
op->va.addr, op->va.addr +
op->va.range - 1, read_only, is_null,
- tile_mask);
+ tile_mask, pat_index);
if (bo)
xe_bo_unlock(bo);
@@ -2404,7 +2411,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
vma = new_vma(vm, &op->base.map,
op->tile_mask, op->map.read_only,
- op->map.is_null);
+ op->map.is_null, op->map.pat_index);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -2430,7 +2437,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
vma = new_vma(vm, op->base.remap.prev,
op->tile_mask, read_only,
- is_null);
+ is_null, old->pat_index);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -2464,7 +2471,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
vma = new_vma(vm, op->base.remap.next,
op->tile_mask, read_only,
- is_null);
+ is_null, old->pat_index);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -2862,6 +2869,26 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
u64 obj_offset = (*bind_ops)[i].obj_offset;
u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+ u16 pat_index = (*bind_ops)[i].pat_index;
+ u16 coh_mode;
+
+ if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
+
+ pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
+ (*bind_ops)[i].pat_index = pat_index;
+ coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+ if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
+
+ if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
if (i == 0) {
*async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC);
@@ -2892,6 +2919,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+ XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+ op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_PREFETCH) ||
XE_IOCTL_DBG(xe, prefetch_region &&
@@ -3025,6 +3054,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
u64 addr = bind_ops[i].addr;
u32 obj = bind_ops[i].obj;
u64 obj_offset = bind_ops[i].obj_offset;
+ u16 pat_index = bind_ops[i].pat_index;
+ u16 coh_mode;
if (!obj)
continue;
@@ -3052,6 +3083,24 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto put_obj;
}
}
+
+ coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+ if (bos[i]->cpu_caching) {
+ if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+ bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+ err = -EINVAL;
+ goto put_obj;
+ }
+ } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+ /*
+ * Imported dma-buf from a different device should
+ * require 1way or 2way coherency since we don't know
+ * how it was mapped on the CPU. Just assume is it
+ * potentially cached on CPU side.
+ */
+ err = -EINVAL;
+ goto put_obj;
+ }
}
if (args->num_syncs) {
@@ -3079,10 +3128,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
u64 obj_offset = bind_ops[i].obj_offset;
u8 tile_mask = bind_ops[i].tile_mask;
u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
+ u16 pat_index = bind_ops[i].pat_index;
ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
addr, range, op, flags,
- tile_mask, prefetch_region);
+ tile_mask, prefetch_region,
+ pat_index);
if (IS_ERR(ops[i])) {
err = PTR_ERR(ops[i]);
ops[i] = NULL;