summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Auld <matthew.auld@intel.com>2023-03-31 11:46:27 +0300
committerRodrigo Vivi <rodrigo.vivi@intel.com>2023-12-21 19:37:54 +0300
commitcd928fced9968558f1c7d724c23b1f8868c39774 (patch)
tree6d42b436a6b1c9de1dca4c5ad7deae70e4132c00
parent6a024f1bfdfe3b535786780f67c38429df17e857 (diff)
downloadlinux-cd928fced9968558f1c7d724c23b1f8868c39774.tar.xz
drm/xe/uapi: add the userspace bits for small-bar
Mostly the same as i915. We add a new hint for userspace to force an object into the mappable part of vram. We also need to tell userspace how large the mappable part is. In Vulkan for example, there will be two vram heaps for small-bar systems. And here the size of each heap needs to be known. Likewise the used/avail tracking needs to account for the mappable part. We also limit the available tracking going forward, such that we limit to privileged users only, since these values are system wide and are technically considered an info leak. v2 (Maarten): - s/NEEDS_CPU_ACCESS/NEEDS_VISIBLE_VRAM/ in the uapi. We also no longer require smem as an extra placement. This is more flexible, and lets us use this for clear-color surfaces, since we need CPU access there but we don't want to attach smem, since that effectively disables CCS from kernel pov. - Reject clear-color CCS buffers where NEEDS_VISIBLE_VRAM is not set, instead of migrating it behind the scenes. v3 (José): - Split the changes that limit the accounting for perfmon_capable() into a separate patch. - Use XE_BO_CREATE_VRAM_MASK. v4 (Gwan-gyeong Mun): - Add some kernel-doc for the query bits. v5: - One small kernel-doc correction. The cpu_visible_size and corresponding used tracking are always zero for non XE_MEM_REGION_CLASS_VRAM. v6: - Without perfmon_capable() it likely makes more sense to report as zero, instead of reporting as used == total size. This should give similar behaviour as i915 which rather tracks free instead of used. - Only enforce NEEDS_VISIBLE_VRAM on rc_ccs_cc_plane surfaces when the device is actually small-bar. Testcase: igt/tests/xe_query Testcase: igt/tests/xe_mmap@small-bar Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> Cc: Lucas De Marchi <lucas.demarchi@intel.com> Cc: José Roberto de Souza <jose.souza@intel.com> Cc: Filip Hazubski <filip.hazubski@intel.com> Cc: Carl Zhang <carl.zhang@intel.com> Cc: Effie Yu <effie.yu@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c13
-rw-r--r--drivers/gpu/drm/xe/xe_query.c8
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c18
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.h4
-rw-r--r--include/uapi/drm/xe_drm.h47
5 files changed, 85 insertions, 5 deletions
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index fa3fc825b730..d89cf93acb61 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1109,7 +1109,6 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
ret = ttm_bo_vm_fault_reserved(vmf,
vmf->vma->vm_page_prot,
TTM_BO_VM_NUM_PREFAULT);
-
drm_dev_exit(idx);
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
@@ -1760,6 +1759,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, args->flags &
~(XE_GEM_CREATE_FLAG_DEFER_BACKING |
XE_GEM_CREATE_FLAG_SCANOUT |
+ XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM |
xe->info.mem_region_mask)))
return -EINVAL;
@@ -1797,6 +1797,14 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
bo_flags |= XE_BO_SCANOUT_BIT;
bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+
+ if (args->flags & XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
+ if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
+ return -EINVAL;
+
+ bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
+ }
+
bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
bo_flags);
if (IS_ERR(bo)) {
@@ -2081,7 +2089,8 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device,
XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT);
+ XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
+ XE_BO_NEEDS_CPU_ACCESS);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index f880c9af1651..3997c644f8fc 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -17,6 +17,7 @@
#include "xe_gt.h"
#include "xe_guc_hwconfig.h"
#include "xe_macros.h"
+#include "xe_ttm_vram_mgr.h"
static const enum xe_engine_class xe_to_user_engine_class[] = {
[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
@@ -148,10 +149,13 @@ static int query_memory_usage(struct xe_device *xe,
man->size;
if (perfmon_capable()) {
- usage->regions[usage->num_regions].used =
- ttm_resource_manager_usage(man);
+ xe_ttm_vram_get_used(man,
+ &usage->regions[usage->num_regions].used,
+ &usage->regions[usage->num_regions].cpu_visible_used);
}
+ usage->regions[usage->num_regions].cpu_visible_size =
+ xe_ttm_vram_get_cpu_visible_size(man);
usage->num_regions++;
}
}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 27e0d40daca8..06a54c8bd46f 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -457,3 +457,21 @@ void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
sg_free_table(sgt);
kfree(sgt);
}
+
+u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man)
+{
+ struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+
+ return mgr->visible_size;
+}
+
+void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
+ u64 *used, u64 *used_visible)
+{
+ struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+
+ mutex_lock(&mgr->lock);
+ *used = mgr->mm.size - mgr->mm.avail;
+ *used_visible = mgr->visible_size - mgr->visible_avail;
+ mutex_unlock(&mgr->lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
index 6e1d6033d739..d184e19a9230 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -25,6 +25,10 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
struct sg_table *sgt);
+u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man);
+void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
+ u64 *used, u64 *used_visible);
+
static inline struct xe_ttm_vram_mgr_resource *
to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
{
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 347351a8f618..7f29c58f87a3 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -180,8 +180,37 @@ struct drm_xe_query_mem_region {
* zero.
*/
__u64 used;
+ /**
+ * @cpu_visible_size: How much of this region can be CPU
+ * accessed, in bytes.
+ *
+ * This will always be <= @total_size, and the remainder (if
+ * any) will not be CPU accessible. If the CPU accessible part
+ * is smaller than @total_size then this is referred to as a
+ * small BAR system.
+ *
+ * On systems without small BAR (full BAR), the probed_size will
+ * always equal the @total_size, since all of it will be CPU
+ * accessible.
+ *
+ * Note this is only tracked for XE_MEM_REGION_CLASS_VRAM
+ * regions (for other types the value here will always equal
+ * zero).
+ */
+ __u64 cpu_visible_size;
+ /**
+ * @cpu_visible_used: Estimate of CPU visible memory used, in
+ * bytes.
+ *
+ * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable
+ * accounting. Without this the value here will always equal
+ * zero. Note this is only currently tracked for
+ * XE_MEM_REGION_CLASS_VRAM regions (for other types the value
+ * here will always be zero).
+ */
+ __u64 cpu_visible_used;
/** @reserved: MBZ */
- __u64 reserved[8];
+ __u64 reserved[6];
};
/**
@@ -383,6 +412,22 @@ struct drm_xe_gem_create {
#define XE_GEM_CREATE_FLAG_DEFER_BACKING (0x1 << 24)
#define XE_GEM_CREATE_FLAG_SCANOUT (0x1 << 25)
+/*
+ * When using VRAM as a possible placement, ensure that the corresponding VRAM
+ * allocation will always use the CPU accessible part of VRAM. This is important
+ * for small-bar systems (on full-bar systems this gets turned into a noop).
+ *
+ * Note: System memory can be used as an extra placement if the kernel should
+ * spill the allocation to system memory, if space can't be made available in
+ * the CPU accessible part of VRAM (giving the same behaviour as the i915
+ * interface, see I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS).
+ *
+ * Note: For clear-color CCS surfaces the kernel needs to read the clear-color
+ * value stored in the buffer, and on discrete platforms we need to use VRAM for
+ * display surfaces, therefore the kernel requires setting this flag for such
+ * objects, otherwise an error is thrown on small-bar systems.
+ */
+#define XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (0x1 << 26)
/**
* @flags: Flags, currently a mask of memory instances of where BO can
* be placed