From 84a1ed5e67565b09b8fd22a26754d2897de55ce0 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 22 Feb 2024 18:23:56 -0500 Subject: drm/xe/uapi: Remove unused flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those cases missed in previous uAPI cleanups were mostly accidentally brought in from i915 or created to exercise the possibilities of gpuvm but they are not used by userspace yet, so let's remove them. They can still be brought back later if needed. v2: - Fix XE_VM_FLAG_FAULT_MODE support in xe_lrc.c (Brian Welty) - Leave DRM_XE_VM_BIND_OP_UNMAP_ALL (José Roberto de Souza) - Ensure invalid flag values are rejected (Rodrigo Vivi) v3: Rebase after removal of persistent exec_queues (Francois Dugast) v4: Rodrigo: Rebase after the new dumpable flag. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Cc: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232356.175431-1-rodrigo.vivi@intel.com --- include/uapi/drm/xe_drm.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2fefec9c0e94..538a3ac95c54 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -862,10 +862,6 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: - * - %DRM_XE_VM_BIND_FLAG_READONLY - * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the - * MAP operation immediately rather than deferring the MAP to the page - * fault handler. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags @@ -958,8 +954,6 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; -#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ @@ -1076,19 +1070,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 -/* Monitor 128KB contiguous region with 4K sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_128K 0 -/* Monitor 2MB contiguous region with 64KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_2M 1 -/* Monitor 16MB contiguous region with 512KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_16M 2 -/* Monitor 64MB contiguous region with 2M sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_64M 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From b2121f2bd2232cd0556b2182078d159d81497885 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 8 Feb 2024 10:35:39 -0800 Subject: drm/xe: Extend uAPI to query HuC micro-controler firmware version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The infrastructure to query GuC firmware version is already in place. It is extended with a new micro-controller type to query the HuC firmware version. It can be used from user space to know if HuC is running. Cc: John Harrison Cc: Francois Dugast Cc: Lucas De Marchi Signed-off-by: Francois Dugast Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20240208183539.185095-2-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_query.c | 38 ++++++++++++++++++++++++++++++++++---- include/uapi/drm/xe_drm.h | 1 + 2 files changed, 35 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index f1876b556ab4..a6a20a6dd360 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -540,14 +540,44 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; break; } + case XE_QUERY_UC_TYPE_HUC: { + struct xe_gt *media_gt = NULL; + struct xe_huc *huc; + + if (MEDIA_VER(xe) >= 13) { + struct xe_tile *tile; + u8 gt_id; + + for_each_tile(tile, xe, gt_id) { + if (tile->media_gt) { + media_gt = tile->media_gt; + break; + } + } + } else { + media_gt = xe->tiles[0].primary_gt; + } + + if (!media_gt) + break; + + huc = &media_gt->uc.huc; + if (huc->fw.status == XE_UC_FIRMWARE_RUNNING) + version = &huc->fw.versions.found[XE_UC_FW_VER_RELEASE]; + break; + } default: return -EINVAL; } - resp.branch_ver = 0; - resp.major_ver = version->major; - resp.minor_ver = version->minor; - resp.patch_ver = version->patch; + if (version) { + resp.branch_ver = 0; + resp.major_ver = version->major; + resp.minor_ver = version->minor; + resp.patch_ver = version->patch; + } else { + return -ENODEV; + } if (copy_to_user(query_ptr, &resp, size)) return -EFAULT; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 538a3ac95c54..2fc19177d2b0 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -583,6 +583,7 @@ struct drm_xe_query_engine_cycles { struct drm_xe_query_uc_fw_version { /** @uc_type: The micro-controller type to query firmware version */ #define XE_QUERY_UC_TYPE_GUC_SUBMISSION 0 +#define XE_QUERY_UC_TYPE_HUC 1 __u16 uc_type; /** @pad: MBZ */ -- cgit v1.2.3 From 0f25e493a2462dbdd2e34f4e100405380cc0201a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 29 Feb 2024 17:22:15 +0100 Subject: drm/panthor: Add uAPI Panthor follows the lead of other recently submitted drivers with ioctls allowing us to support modern Vulkan features, like sparse memory binding: - Pretty standard GEM management ioctls (BO_CREATE and BO_MMAP_OFFSET), with the 'exclusive-VM' bit to speed-up BO reservation on job submission - VM management ioctls (VM_CREATE, VM_DESTROY and VM_BIND). The VM_BIND ioctl is loosely based on the Xe model, and can handle both asynchronous and synchronous requests - GPU execution context creation/destruction, tiler heap context creation and job submission. Those ioctls reflect how the hardware/scheduler works and are thus driver specific. We also have a way to expose IO regions, such that the usermode driver can directly access specific/well-isolate registers, like the LATEST_FLUSH register used to implement cache-flush reduction. This uAPI intentionally keeps usermode queues out of the scope, which explains why doorbell registers and command stream ring-buffers are not directly exposed to userspace. v6: - Add Maxime's and Heiko's acks v5: - Fix typo - Add Liviu's R-b v4: - Add a VM_GET_STATE ioctl - Fix doc - Expose the CORE_FEATURES register so we can deal with variants in the UMD - Add Steve's R-b v3: - Add the concept of sync-only VM operation - Fix support for 32-bit userspace - Rework drm_panthor_vm_create to pass the user VA size instead of the kernel VA size (suggested by Robin Murphy) - Typo fixes - Explicitly cast enums with top bit set to avoid compiler warnings in -pedantic mode. - Drop property core_group_count as it can be easily calculated by the number of bits set in l2_present. Co-developed-by: Steven Price Signed-off-by: Steven Price Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Acked-by: Maxime Ripard Acked-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-2-boris.brezillon@collabora.com --- Documentation/gpu/driver-uapi.rst | 5 + include/uapi/drm/panthor_drm.h | 945 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 950 insertions(+) create mode 100644 include/uapi/drm/panthor_drm.h (limited to 'include/uapi') diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst index e5070a0e95ab..971cdb4816fc 100644 --- a/Documentation/gpu/driver-uapi.rst +++ b/Documentation/gpu/driver-uapi.rst @@ -18,6 +18,11 @@ VM_BIND / EXEC uAPI .. kernel-doc:: include/uapi/drm/nouveau_drm.h +drm/panthor uAPI +================ + +.. kernel-doc:: include/uapi/drm/panthor_drm.h + drm/xe uAPI =========== diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h new file mode 100644 index 000000000000..373df80f41ed --- /dev/null +++ b/include/uapi/drm/panthor_drm.h @@ -0,0 +1,945 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright (C) 2023 Collabora ltd. */ +#ifndef _PANTHOR_DRM_H_ +#define _PANTHOR_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: Introduction + * + * This documentation describes the Panthor IOCTLs. + * + * Just a few generic rules about the data passed to the Panthor IOCTLs: + * + * - Structures must be aligned on 64-bit/8-byte. If the object is not + * naturally aligned, a padding field must be added. + * - Fields must be explicitly aligned to their natural type alignment with + * pad[0..N] fields. + * - All padding fields will be checked by the driver to make sure they are + * zeroed. + * - Flags can be added, but not removed/replaced. + * - New fields can be added to the main structures (the structures + * directly passed to the ioctl). Those fields can be added at the end of + * the structure, or replace existing padding fields. Any new field being + * added must preserve the behavior that existed before those fields were + * added when a value of zero is passed. + * - New fields can be added to indirect objects (objects pointed by the + * main structure), iff those objects are passed a size to reflect the + * size known by the userspace driver (see drm_panthor_obj_array::stride + * or drm_panthor_dev_query::size). + * - If the kernel driver is too old to know some fields, those will be + * ignored if zero, and otherwise rejected (and so will be zero on output). + * - If userspace is too old to know some fields, those will be zeroed + * (input) before the structure is parsed by the kernel driver. + * - Each new flag/field addition must come with a driver version update so + * the userspace driver doesn't have to trial and error to know which + * flags are supported. + * - Structures should not contain unions, as this would defeat the + * extensibility of such structures. + * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed + * at the end of the drm_panthor_ioctl_id enum. + */ + +/** + * DOC: MMIO regions exposed to userspace. + * + * .. c:macro:: DRM_PANTHOR_USER_MMIO_OFFSET + * + * File offset for all MMIO regions being exposed to userspace. Don't use + * this value directly, use DRM_PANTHOR_USER__OFFSET values instead. + * pgoffset passed to mmap2() is an unsigned long, which forces us to use a + * different offset on 32-bit and 64-bit systems. + * + * .. c:macro:: DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET + * + * File offset for the LATEST_FLUSH_ID register. The Userspace driver controls + * GPU cache flushing through CS instructions, but the flush reduction + * mechanism requires a flush_id. This flush_id could be queried with an + * ioctl, but Arm provides a well-isolated register page containing only this + * read-only register, so let's expose this page through a static mmap offset + * and allow direct mapping of this MMIO region so we can avoid the + * user <-> kernel round-trip. + */ +#define DRM_PANTHOR_USER_MMIO_OFFSET_32BIT (1ull << 43) +#define DRM_PANTHOR_USER_MMIO_OFFSET_64BIT (1ull << 56) +#define DRM_PANTHOR_USER_MMIO_OFFSET (sizeof(unsigned long) < 8 ? \ + DRM_PANTHOR_USER_MMIO_OFFSET_32BIT : \ + DRM_PANTHOR_USER_MMIO_OFFSET_64BIT) +#define DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET (DRM_PANTHOR_USER_MMIO_OFFSET | 0) + +/** + * DOC: IOCTL IDs + * + * enum drm_panthor_ioctl_id - IOCTL IDs + * + * Place new ioctls at the end, don't re-order, don't replace or remove entries. + * + * These IDs are not meant to be used directly. Use the DRM_IOCTL_PANTHOR_xxx + * definitions instead. + */ +enum drm_panthor_ioctl_id { + /** @DRM_PANTHOR_DEV_QUERY: Query device information. */ + DRM_PANTHOR_DEV_QUERY = 0, + + /** @DRM_PANTHOR_VM_CREATE: Create a VM. */ + DRM_PANTHOR_VM_CREATE, + + /** @DRM_PANTHOR_VM_DESTROY: Destroy a VM. */ + DRM_PANTHOR_VM_DESTROY, + + /** @DRM_PANTHOR_VM_BIND: Bind/unbind memory to a VM. */ + DRM_PANTHOR_VM_BIND, + + /** @DRM_PANTHOR_VM_GET_STATE: Get VM state. */ + DRM_PANTHOR_VM_GET_STATE, + + /** @DRM_PANTHOR_BO_CREATE: Create a buffer object. */ + DRM_PANTHOR_BO_CREATE, + + /** + * @DRM_PANTHOR_BO_MMAP_OFFSET: Get the file offset to pass to + * mmap to map a GEM object. + */ + DRM_PANTHOR_BO_MMAP_OFFSET, + + /** @DRM_PANTHOR_GROUP_CREATE: Create a scheduling group. */ + DRM_PANTHOR_GROUP_CREATE, + + /** @DRM_PANTHOR_GROUP_DESTROY: Destroy a scheduling group. */ + DRM_PANTHOR_GROUP_DESTROY, + + /** + * @DRM_PANTHOR_GROUP_SUBMIT: Submit jobs to queues belonging + * to a specific scheduling group. + */ + DRM_PANTHOR_GROUP_SUBMIT, + + /** @DRM_PANTHOR_GROUP_GET_STATE: Get the state of a scheduling group. */ + DRM_PANTHOR_GROUP_GET_STATE, + + /** @DRM_PANTHOR_TILER_HEAP_CREATE: Create a tiler heap. */ + DRM_PANTHOR_TILER_HEAP_CREATE, + + /** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */ + DRM_PANTHOR_TILER_HEAP_DESTROY, +}; + +/** + * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number + * @__access: Access type. Must be R, W or RW. + * @__id: One of the DRM_PANTHOR_xxx id. + * @__type: Suffix of the type being passed to the IOCTL. + * + * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx + * values instead. + * + * Return: An IOCTL number to be passed to ioctl() from userspace. + */ +#define DRM_IOCTL_PANTHOR(__access, __id, __type) \ + DRM_IO ## __access(DRM_COMMAND_BASE + DRM_PANTHOR_ ## __id, \ + struct drm_panthor_ ## __type) + +#define DRM_IOCTL_PANTHOR_DEV_QUERY \ + DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query) +#define DRM_IOCTL_PANTHOR_VM_CREATE \ + DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create) +#define DRM_IOCTL_PANTHOR_VM_DESTROY \ + DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy) +#define DRM_IOCTL_PANTHOR_VM_BIND \ + DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind) +#define DRM_IOCTL_PANTHOR_VM_GET_STATE \ + DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state) +#define DRM_IOCTL_PANTHOR_BO_CREATE \ + DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create) +#define DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET \ + DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset) +#define DRM_IOCTL_PANTHOR_GROUP_CREATE \ + DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create) +#define DRM_IOCTL_PANTHOR_GROUP_DESTROY \ + DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy) +#define DRM_IOCTL_PANTHOR_GROUP_SUBMIT \ + DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit) +#define DRM_IOCTL_PANTHOR_GROUP_GET_STATE \ + DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state) +#define DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE \ + DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create) +#define DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY \ + DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy) + +/** + * DOC: IOCTL arguments + */ + +/** + * struct drm_panthor_obj_array - Object array. + * + * This object is used to pass an array of objects whose size is subject to changes in + * future versions of the driver. In order to support this mutability, we pass a stride + * describing the size of the object as known by userspace. + * + * You shouldn't fill drm_panthor_obj_array fields directly. You should instead use + * the DRM_PANTHOR_OBJ_ARRAY() macro that takes care of initializing the stride to + * the object size. + */ +struct drm_panthor_obj_array { + /** @stride: Stride of object struct. Used for versioning. */ + __u32 stride; + + /** @count: Number of objects in the array. */ + __u32 count; + + /** @array: User pointer to an array of objects. */ + __u64 array; +}; + +/** + * DRM_PANTHOR_OBJ_ARRAY() - Initialize a drm_panthor_obj_array field. + * @cnt: Number of elements in the array. + * @ptr: Pointer to the array to pass to the kernel. + * + * Macro initializing a drm_panthor_obj_array based on the object size as known + * by userspace. + */ +#define DRM_PANTHOR_OBJ_ARRAY(cnt, ptr) \ + { .stride = sizeof((ptr)[0]), .count = (cnt), .array = (__u64)(uintptr_t)(ptr) } + +/** + * enum drm_panthor_sync_op_flags - Synchronization operation flags. + */ +enum drm_panthor_sync_op_flags { + /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK: Synchronization handle type mask. */ + DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK = 0xff, + + /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ: Synchronization object type. */ + DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ = 0, + + /** + * @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ: Timeline synchronization + * object type. + */ + DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ = 1, + + /** @DRM_PANTHOR_SYNC_OP_WAIT: Wait operation. */ + DRM_PANTHOR_SYNC_OP_WAIT = 0 << 31, + + /** @DRM_PANTHOR_SYNC_OP_SIGNAL: Signal operation. */ + DRM_PANTHOR_SYNC_OP_SIGNAL = (int)(1u << 31), +}; + +/** + * struct drm_panthor_sync_op - Synchronization operation. + */ +struct drm_panthor_sync_op { + /** @flags: Synchronization operation flags. Combination of DRM_PANTHOR_SYNC_OP values. */ + __u32 flags; + + /** @handle: Sync handle. */ + __u32 handle; + + /** + * @timeline_value: MBZ if + * (flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK) != + * DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ. + */ + __u64 timeline_value; +}; + +/** + * enum drm_panthor_dev_query_type - Query type + * + * Place new types at the end, don't re-order, don't remove or replace. + */ +enum drm_panthor_dev_query_type { + /** @DRM_PANTHOR_DEV_QUERY_GPU_INFO: Query GPU information. */ + DRM_PANTHOR_DEV_QUERY_GPU_INFO = 0, + + /** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */ + DRM_PANTHOR_DEV_QUERY_CSIF_INFO, +}; + +/** + * struct drm_panthor_gpu_info - GPU information + * + * Structure grouping all queryable information relating to the GPU. + */ +struct drm_panthor_gpu_info { + /** @gpu_id : GPU ID. */ + __u32 gpu_id; +#define DRM_PANTHOR_ARCH_MAJOR(x) ((x) >> 28) +#define DRM_PANTHOR_ARCH_MINOR(x) (((x) >> 24) & 0xf) +#define DRM_PANTHOR_ARCH_REV(x) (((x) >> 20) & 0xf) +#define DRM_PANTHOR_PRODUCT_MAJOR(x) (((x) >> 16) & 0xf) +#define DRM_PANTHOR_VERSION_MAJOR(x) (((x) >> 12) & 0xf) +#define DRM_PANTHOR_VERSION_MINOR(x) (((x) >> 4) & 0xff) +#define DRM_PANTHOR_VERSION_STATUS(x) ((x) & 0xf) + + /** @gpu_rev: GPU revision. */ + __u32 gpu_rev; + + /** @csf_id: Command stream frontend ID. */ + __u32 csf_id; +#define DRM_PANTHOR_CSHW_MAJOR(x) (((x) >> 26) & 0x3f) +#define DRM_PANTHOR_CSHW_MINOR(x) (((x) >> 20) & 0x3f) +#define DRM_PANTHOR_CSHW_REV(x) (((x) >> 16) & 0xf) +#define DRM_PANTHOR_MCU_MAJOR(x) (((x) >> 10) & 0x3f) +#define DRM_PANTHOR_MCU_MINOR(x) (((x) >> 4) & 0x3f) +#define DRM_PANTHOR_MCU_REV(x) ((x) & 0xf) + + /** @l2_features: L2-cache features. */ + __u32 l2_features; + + /** @tiler_features: Tiler features. */ + __u32 tiler_features; + + /** @mem_features: Memory features. */ + __u32 mem_features; + + /** @mmu_features: MMU features. */ + __u32 mmu_features; +#define DRM_PANTHOR_MMU_VA_BITS(x) ((x) & 0xff) + + /** @thread_features: Thread features. */ + __u32 thread_features; + + /** @max_threads: Maximum number of threads. */ + __u32 max_threads; + + /** @thread_max_workgroup_size: Maximum workgroup size. */ + __u32 thread_max_workgroup_size; + + /** + * @thread_max_barrier_size: Maximum number of threads that can wait + * simultaneously on a barrier. + */ + __u32 thread_max_barrier_size; + + /** @coherency_features: Coherency features. */ + __u32 coherency_features; + + /** @texture_features: Texture features. */ + __u32 texture_features[4]; + + /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ + __u32 as_present; + + /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ + __u64 shader_present; + + /** @l2_present: Bitmask encoding the L2 caches exposed by the GPU. */ + __u64 l2_present; + + /** @tiler_present: Bitmask encoding the tiler units exposed by the GPU. */ + __u64 tiler_present; + + /* @core_features: Used to discriminate core variants when they exist. */ + __u32 core_features; + + /* @pad: MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_csif_info - Command stream interface information + * + * Structure grouping all queryable information relating to the command stream interface. + */ +struct drm_panthor_csif_info { + /** @csg_slot_count: Number of command stream group slots exposed by the firmware. */ + __u32 csg_slot_count; + + /** @cs_slot_count: Number of command stream slots per group. */ + __u32 cs_slot_count; + + /** @cs_reg_count: Number of command stream registers. */ + __u32 cs_reg_count; + + /** @scoreboard_slot_count: Number of scoreboard slots. */ + __u32 scoreboard_slot_count; + + /** + * @unpreserved_cs_reg_count: Number of command stream registers reserved by + * the kernel driver to call a userspace command stream. + * + * All registers can be used by a userspace command stream, but the + * [cs_slot_count - unpreserved_cs_reg_count .. cs_slot_count] registers are + * used by the kernel when DRM_PANTHOR_IOCTL_GROUP_SUBMIT is called. + */ + __u32 unpreserved_cs_reg_count; + + /** + * @pad: Padding field, set to zero. + */ + __u32 pad; +}; + +/** + * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY + */ +struct drm_panthor_dev_query { + /** @type: the query type (see drm_panthor_dev_query_type). */ + __u32 type; + + /** + * @size: size of the type being queried. + * + * If pointer is NULL, size is updated by the driver to provide the + * output structure size. If pointer is not NULL, the driver will + * only copy min(size, actual_structure_size) bytes to the pointer, + * and update the size accordingly. This allows us to extend query + * types without breaking userspace. + */ + __u32 size; + + /** + * @pointer: user pointer to a query type struct. + * + * Pointer can be NULL, in which case, nothing is copied, but the + * actual structure size is returned. If not NULL, it must point to + * a location that's large enough to hold size bytes. + */ + __u64 pointer; +}; + +/** + * struct drm_panthor_vm_create - Arguments passed to DRM_PANTHOR_IOCTL_VM_CREATE + */ +struct drm_panthor_vm_create { + /** @flags: VM flags, MBZ. */ + __u32 flags; + + /** @id: Returned VM ID. */ + __u32 id; + + /** + * @user_va_range: Size of the VA space reserved for user objects. + * + * The kernel will pick the remaining space to map kernel-only objects to the + * VM (heap chunks, heap context, ring buffers, kernel synchronization objects, + * ...). If the space left for kernel objects is too small, kernel object + * allocation will fail further down the road. One can use + * drm_panthor_gpu_info::mmu_features to extract the total virtual address + * range, and chose a user_va_range that leaves some space to the kernel. + * + * If user_va_range is zero, the kernel will pick a sensible value based on + * TASK_SIZE and the virtual range supported by the GPU MMU (the kernel/user + * split should leave enough VA space for userspace processes to support SVM, + * while still allowing the kernel to map some amount of kernel objects in + * the kernel VA range). The value chosen by the driver will be returned in + * @user_va_range. + * + * User VA space always starts at 0x0, kernel VA space is always placed after + * the user VA range. + */ + __u64 user_va_range; +}; + +/** + * struct drm_panthor_vm_destroy - Arguments passed to DRM_PANTHOR_IOCTL_VM_DESTROY + */ +struct drm_panthor_vm_destroy { + /** @id: ID of the VM to destroy. */ + __u32 id; + + /** @pad: MBZ. */ + __u32 pad; +}; + +/** + * enum drm_panthor_vm_bind_op_flags - VM bind operation flags + */ +enum drm_panthor_vm_bind_op_flags { + /** + * @DRM_PANTHOR_VM_BIND_OP_MAP_READONLY: Map the memory read-only. + * + * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. + */ + DRM_PANTHOR_VM_BIND_OP_MAP_READONLY = 1 << 0, + + /** + * @DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC: Map the memory not-executable. + * + * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. + */ + DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC = 1 << 1, + + /** + * @DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED: Map the memory uncached. + * + * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. + */ + DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED = 1 << 2, + + /** + * @DRM_PANTHOR_VM_BIND_OP_TYPE_MASK: Mask used to determine the type of operation. + */ + DRM_PANTHOR_VM_BIND_OP_TYPE_MASK = (int)(0xfu << 28), + + /** @DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: Map operation. */ + DRM_PANTHOR_VM_BIND_OP_TYPE_MAP = 0 << 28, + + /** @DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: Unmap operation. */ + DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP = 1 << 28, + + /** + * @DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: No VM operation. + * + * Just serves as a synchronization point on a VM queue. + * + * Only valid if %DRM_PANTHOR_VM_BIND_ASYNC is set in drm_panthor_vm_bind::flags, + * and drm_panthor_vm_bind_op::syncs contains at least one element. + */ + DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY = 2 << 28, +}; + +/** + * struct drm_panthor_vm_bind_op - VM bind operation + */ +struct drm_panthor_vm_bind_op { + /** @flags: Combination of drm_panthor_vm_bind_op_flags flags. */ + __u32 flags; + + /** + * @bo_handle: Handle of the buffer object to map. + * MBZ for unmap or sync-only operations. + */ + __u32 bo_handle; + + /** + * @bo_offset: Buffer object offset. + * MBZ for unmap or sync-only operations. + */ + __u64 bo_offset; + + /** + * @va: Virtual address to map/unmap. + * MBZ for sync-only operations. + */ + __u64 va; + + /** + * @size: Size to map/unmap. + * MBZ for sync-only operations. + */ + __u64 size; + + /** + * @syncs: Array of struct drm_panthor_sync_op synchronization + * operations. + * + * This array must be empty if %DRM_PANTHOR_VM_BIND_ASYNC is not set on + * the drm_panthor_vm_bind object containing this VM bind operation. + * + * This array shall not be empty for sync-only operations. + */ + struct drm_panthor_obj_array syncs; + +}; + +/** + * enum drm_panthor_vm_bind_flags - VM bind flags + */ +enum drm_panthor_vm_bind_flags { + /** + * @DRM_PANTHOR_VM_BIND_ASYNC: VM bind operations are queued to the VM + * queue instead of being executed synchronously. + */ + DRM_PANTHOR_VM_BIND_ASYNC = 1 << 0, +}; + +/** + * struct drm_panthor_vm_bind - Arguments passed to DRM_IOCTL_PANTHOR_VM_BIND + */ +struct drm_panthor_vm_bind { + /** @vm_id: VM targeted by the bind request. */ + __u32 vm_id; + + /** @flags: Combination of drm_panthor_vm_bind_flags flags. */ + __u32 flags; + + /** @ops: Array of struct drm_panthor_vm_bind_op bind operations. */ + struct drm_panthor_obj_array ops; +}; + +/** + * enum drm_panthor_vm_state - VM states. + */ +enum drm_panthor_vm_state { + /** + * @DRM_PANTHOR_VM_STATE_USABLE: VM is usable. + * + * New VM operations will be accepted on this VM. + */ + DRM_PANTHOR_VM_STATE_USABLE, + + /** + * @DRM_PANTHOR_VM_STATE_UNUSABLE: VM is unusable. + * + * Something put the VM in an unusable state (like an asynchronous + * VM_BIND request failing for any reason). + * + * Once the VM is in this state, all new MAP operations will be + * rejected, and any GPU job targeting this VM will fail. + * UNMAP operations are still accepted. + * + * The only way to recover from an unusable VM is to create a new + * VM, and destroy the old one. + */ + DRM_PANTHOR_VM_STATE_UNUSABLE, +}; + +/** + * struct drm_panthor_vm_get_state - Get VM state. + */ +struct drm_panthor_vm_get_state { + /** @vm_id: VM targeted by the get_state request. */ + __u32 vm_id; + + /** + * @state: state returned by the driver. + * + * Must be one of the enum drm_panthor_vm_state values. + */ + __u32 state; +}; + +/** + * enum drm_panthor_bo_flags - Buffer object flags, passed at creation time. + */ +enum drm_panthor_bo_flags { + /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ + DRM_PANTHOR_BO_NO_MMAP = (1 << 0), +}; + +/** + * struct drm_panthor_bo_create - Arguments passed to DRM_IOCTL_PANTHOR_BO_CREATE. + */ +struct drm_panthor_bo_create { + /** + * @size: Requested size for the object + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + + /** + * @flags: Flags. Must be a combination of drm_panthor_bo_flags flags. + */ + __u32 flags; + + /** + * @exclusive_vm_id: Exclusive VM this buffer object will be mapped to. + * + * If not zero, the field must refer to a valid VM ID, and implies that: + * - the buffer object will only ever be bound to that VM + * - cannot be exported as a PRIME fd + */ + __u32 exclusive_vm_id; + + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_bo_mmap_offset - Arguments passed to DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET. + */ +struct drm_panthor_bo_mmap_offset { + /** @handle: Handle of the object we want an mmap offset for. */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** @offset: The fake offset to use for subsequent mmap calls. */ + __u64 offset; +}; + +/** + * struct drm_panthor_queue_create - Queue creation arguments. + */ +struct drm_panthor_queue_create { + /** + * @priority: Defines the priority of queues inside a group. Goes from 0 to 15, + * 15 being the highest priority. + */ + __u8 priority; + + /** @pad: Padding fields, MBZ. */ + __u8 pad[3]; + + /** @ringbuf_size: Size of the ring buffer to allocate to this queue. */ + __u32 ringbuf_size; +}; + +/** + * enum drm_panthor_group_priority - Scheduling group priority + */ +enum drm_panthor_group_priority { + /** @PANTHOR_GROUP_PRIORITY_LOW: Low priority group. */ + PANTHOR_GROUP_PRIORITY_LOW = 0, + + /** @PANTHOR_GROUP_PRIORITY_MEDIUM: Medium priority group. */ + PANTHOR_GROUP_PRIORITY_MEDIUM, + + /** @PANTHOR_GROUP_PRIORITY_HIGH: High priority group. */ + PANTHOR_GROUP_PRIORITY_HIGH, +}; + +/** + * struct drm_panthor_group_create - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_CREATE + */ +struct drm_panthor_group_create { + /** @queues: Array of drm_panthor_queue_create elements. */ + struct drm_panthor_obj_array queues; + + /** + * @max_compute_cores: Maximum number of cores that can be used by compute + * jobs across CS queues bound to this group. + * + * Must be less or equal to the number of bits set in @compute_core_mask. + */ + __u8 max_compute_cores; + + /** + * @max_fragment_cores: Maximum number of cores that can be used by fragment + * jobs across CS queues bound to this group. + * + * Must be less or equal to the number of bits set in @fragment_core_mask. + */ + __u8 max_fragment_cores; + + /** + * @max_tiler_cores: Maximum number of tilers that can be used by tiler jobs + * across CS queues bound to this group. + * + * Must be less or equal to the number of bits set in @tiler_core_mask. + */ + __u8 max_tiler_cores; + + /** @priority: Group priority (see enum drm_panthor_group_priority). */ + __u8 priority; + + /** @pad: Padding field, MBZ. */ + __u32 pad; + + /** + * @compute_core_mask: Mask encoding cores that can be used for compute jobs. + * + * This field must have at least @max_compute_cores bits set. + * + * The bits set here should also be set in drm_panthor_gpu_info::shader_present. + */ + __u64 compute_core_mask; + + /** + * @fragment_core_mask: Mask encoding cores that can be used for fragment jobs. + * + * This field must have at least @max_fragment_cores bits set. + * + * The bits set here should also be set in drm_panthor_gpu_info::shader_present. + */ + __u64 fragment_core_mask; + + /** + * @tiler_core_mask: Mask encoding cores that can be used for tiler jobs. + * + * This field must have at least @max_tiler_cores bits set. + * + * The bits set here should also be set in drm_panthor_gpu_info::tiler_present. + */ + __u64 tiler_core_mask; + + /** + * @vm_id: VM ID to bind this group to. + * + * All submission to queues bound to this group will use this VM. + */ + __u32 vm_id; + + /** + * @group_handle: Returned group handle. Passed back when submitting jobs or + * destroying a group. + */ + __u32 group_handle; +}; + +/** + * struct drm_panthor_group_destroy - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_DESTROY + */ +struct drm_panthor_group_destroy { + /** @group_handle: Group to destroy */ + __u32 group_handle; + + /** @pad: Padding field, MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_queue_submit - Job submission arguments. + * + * This is describing the userspace command stream to call from the kernel + * command stream ring-buffer. Queue submission is always part of a group + * submission, taking one or more jobs to submit to the underlying queues. + */ +struct drm_panthor_queue_submit { + /** @queue_index: Index of the queue inside a group. */ + __u32 queue_index; + + /** + * @stream_size: Size of the command stream to execute. + * + * Must be 64-bit/8-byte aligned (the size of a CS instruction) + * + * Can be zero if stream_addr is zero too. + */ + __u32 stream_size; + + /** + * @stream_addr: GPU address of the command stream to execute. + * + * Must be aligned on 64-byte. + * + * Can be zero is stream_size is zero too. + */ + __u64 stream_addr; + + /** + * @latest_flush: FLUSH_ID read at the time the stream was built. + * + * This allows cache flush elimination for the automatic + * flush+invalidate(all) done at submission time, which is needed to + * ensure the GPU doesn't get garbage when reading the indirect command + * stream buffers. If you want the cache flush to happen + * unconditionally, pass a zero here. + */ + __u32 latest_flush; + + /** @pad: MBZ. */ + __u32 pad; + + /** @syncs: Array of struct drm_panthor_sync_op sync operations. */ + struct drm_panthor_obj_array syncs; +}; + +/** + * struct drm_panthor_group_submit - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_SUBMIT + */ +struct drm_panthor_group_submit { + /** @group_handle: Handle of the group to queue jobs to. */ + __u32 group_handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** @queue_submits: Array of drm_panthor_queue_submit objects. */ + struct drm_panthor_obj_array queue_submits; +}; + +/** + * enum drm_panthor_group_state_flags - Group state flags + */ +enum drm_panthor_group_state_flags { + /** + * @DRM_PANTHOR_GROUP_STATE_TIMEDOUT: Group had unfinished jobs. + * + * When a group ends up with this flag set, no jobs can be submitted to its queues. + */ + DRM_PANTHOR_GROUP_STATE_TIMEDOUT = 1 << 0, + + /** + * @DRM_PANTHOR_GROUP_STATE_FATAL_FAULT: Group had fatal faults. + * + * When a group ends up with this flag set, no jobs can be submitted to its queues. + */ + DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1, +}; + +/** + * struct drm_panthor_group_get_state - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_GET_STATE + * + * Used to query the state of a group and decide whether a new group should be created to + * replace it. + */ +struct drm_panthor_group_get_state { + /** @group_handle: Handle of the group to query state on */ + __u32 group_handle; + + /** + * @state: Combination of DRM_PANTHOR_GROUP_STATE_* flags encoding the + * group state. + */ + __u32 state; + + /** @fatal_queues: Bitmask of queues that faced fatal faults. */ + __u32 fatal_queues; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_panthor_tiler_heap_create - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE + */ +struct drm_panthor_tiler_heap_create { + /** @vm_id: VM ID the tiler heap should be mapped to */ + __u32 vm_id; + + /** @initial_chunk_count: Initial number of chunks to allocate. */ + __u32 initial_chunk_count; + + /** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */ + __u32 chunk_size; + + /** @max_chunks: Maximum number of chunks that can be allocated. */ + __u32 max_chunks; + + /** + * @target_in_flight: Maximum number of in-flight render passes. + * + * If the heap has more than tiler jobs in-flight, the FW will wait for render + * passes to finish before queuing new tiler jobs. + */ + __u32 target_in_flight; + + /** @handle: Returned heap handle. Passed back to DESTROY_TILER_HEAP. */ + __u32 handle; + + /** @tiler_heap_ctx_gpu_va: Returned heap GPU virtual address returned */ + __u64 tiler_heap_ctx_gpu_va; + + /** + * @first_heap_chunk_gpu_va: First heap chunk. + * + * The tiler heap is formed of heap chunks forming a single-link list. This + * is the first element in the list. + */ + __u64 first_heap_chunk_gpu_va; +}; + +/** + * struct drm_panthor_tiler_heap_destroy - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY + */ +struct drm_panthor_tiler_heap_destroy { + /** @handle: Handle of the tiler heap to destroy */ + __u32 handle; + + /** @pad: Padding field, MBZ. */ + __u32 pad; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _PANTHOR_DRM_H_ */ -- cgit v1.2.3 From d10612f8303fab350d82f8b7d5793683af50ee3c Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 29 Feb 2024 14:29:18 +0100 Subject: drm/i915: Add missing doc for drm_i915_reset_stats Add missing doc for struct drm_i915_reset_stats. Cc: Andi Shyti Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240229132918.10205-1-nirmoy.das@intel.com --- include/uapi/drm/i915_drm.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index bd87386a8243..fb195d3b3e9f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2623,19 +2623,29 @@ struct drm_i915_reg_read { * */ +/* + * struct drm_i915_reset_stats - Return global reset and other context stats + * + * Driver keeps few stats for each contexts and also global reset count. + * This struct can be used to query those stats. + */ struct drm_i915_reset_stats { + /** @ctx_id: ID of the requested context */ __u32 ctx_id; + + /** @flags: MBZ */ __u32 flags; - /* All resets since boot/module reload, for all contexts */ + /** @reset_count: All resets since boot/module reload, for all contexts */ __u32 reset_count; - /* Number of batches lost when active in GPU, for this context */ + /** @batch_active: Number of batches lost when active in GPU, for this context */ __u32 batch_active; - /* Number of batches lost pending for execution, for this context */ + /** @batch_pending: Number of batches lost pending for execution, for this context */ __u32 batch_pending; + /** @pad: MBZ */ __u32 pad; }; -- cgit v1.2.3 From 460be1d527a8e296d85301e8b14923299508d4fc Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 5 Mar 2024 14:38:53 +0100 Subject: drm/nouveau: move more missing UAPI bits Those are already de-facto UAPI, so let's just move it into the uapi header. Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Reviewed-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240305133853.2214268-2-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 20 +++++++++++++++----- drivers/gpu/drm/nouveau/nouveau_abi16.h | 12 ------------ include/uapi/drm/nouveau_drm.h | 22 ++++++++++++++++++++++ 3 files changed, 37 insertions(+), 17 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index cd14f993bdd1..92f9127b284a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -312,11 +312,21 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { if (init->fb_ctxdma_handle == ~0) { switch (init->tt_ctxdma_handle) { - case 0x01: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR ; break; - case 0x02: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSPDEC; break; - case 0x04: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSPPP ; break; - case 0x08: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSVLD ; break; - case 0x30: engine = NV_DEVICE_HOST_RUNLIST_ENGINES_CE ; break; + case NOUVEAU_FIFO_ENGINE_GR: + engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR; + break; + case NOUVEAU_FIFO_ENGINE_VP: + engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSPDEC; + break; + case NOUVEAU_FIFO_ENGINE_PPP: + engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSPPP; + break; + case NOUVEAU_FIFO_ENGINE_BSP: + engine = NV_DEVICE_HOST_RUNLIST_ENGINES_MSVLD; + break; + case NOUVEAU_FIFO_ENGINE_CE: + engine = NV_DEVICE_HOST_RUNLIST_ENGINES_CE; + break; default: return nouveau_abi16_put(abi16, -ENOSYS); } diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 11c8c4a80079..661b901d8ecc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -50,18 +50,6 @@ struct drm_nouveau_grobj_alloc { int class; }; -struct drm_nouveau_notifierobj_alloc { - uint32_t channel; - uint32_t handle; - uint32_t size; - uint32_t offset; -}; - -struct drm_nouveau_gpuobj_free { - int channel; - uint32_t handle; -}; - struct drm_nouveau_setparam { uint64_t param; uint64_t value; diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index cd84227f1b42..8ad8d1cd1566 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -73,6 +73,16 @@ struct drm_nouveau_getparam { __u64 value; }; +/* + * Those are used to support selecting the main engine used on Kepler. + * This goes into drm_nouveau_channel_alloc::tt_ctxdma_handle + */ +#define NOUVEAU_FIFO_ENGINE_GR 0x01 +#define NOUVEAU_FIFO_ENGINE_VP 0x02 +#define NOUVEAU_FIFO_ENGINE_PPP 0x04 +#define NOUVEAU_FIFO_ENGINE_BSP 0x08 +#define NOUVEAU_FIFO_ENGINE_CE 0x30 + struct drm_nouveau_channel_alloc { __u32 fb_ctxdma_handle; __u32 tt_ctxdma_handle; @@ -95,6 +105,18 @@ struct drm_nouveau_channel_free { __s32 channel; }; +struct drm_nouveau_notifierobj_alloc { + __u32 channel; + __u32 handle; + __u32 size; + __u32 offset; +}; + +struct drm_nouveau_gpuobj_free { + __s32 channel; + __u32 handle; +}; + #define NOUVEAU_GEM_DOMAIN_CPU (1 << 0) #define NOUVEAU_GEM_DOMAIN_VRAM (1 << 1) #define NOUVEAU_GEM_DOMAIN_GART (1 << 2) -- cgit v1.2.3 From cec82816d0d018f178b9b7f88fe4bf80d66954e9 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 5 Mar 2024 17:27:59 -0800 Subject: drm/i915/guc: Use context hints for GT frequency Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINT. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). Also, use flag bits in intel_context as it allows finer control for toggling per engine if needed (Tvrtko). v3: Minor review comments (Tvrtko) v4: Update comment (Sushma) Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Sushma Venkatesh Reddy Reviewed-by: Rodrigo Vivi Acked-by: Ivan Briano Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20240306012759.204938-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 16 ++++++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 4 ++++ .../gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++++++ drivers/gpu/drm/i915/i915_getparam.c | 6 ++++++ include/uapi/drm/i915_drm.h | 15 +++++++++++++++ 10 files changed, 86 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dcbfe32fd30c..81f65cab1330 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, struct drm_i915_gem_context_param *args) { + struct drm_i915_private *i915 = fpriv->i915; int ret = 0; switch (args->param) { @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); break; + case I915_CONTEXT_PARAM_LOW_LATENCY: + if (intel_uc_uses_guc_submission(&to_gt(i915)->uc)) + pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY); + else + ret = -EINVAL; + break; + case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce, if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) ret = intel_context_reconfigure_sseu(ce, sseu); + if (test_bit(UCONTEXT_LOW_LATENCY, &ctx->user_flags)) + __set_bit(CONTEXT_LOW_LATENCY, &ce->flags); + return ret; } @@ -1630,6 +1641,9 @@ i915_gem_create_context(struct drm_i915_private *i915, if (vm) ctx->vm = vm; + /* Assign early so intel_context_set_gem can access these flags */ + ctx->user_flags = pc->user_flags; + mutex_init(&ctx->engines_mutex); if (pc->num_user_engines >= 0) { i915_gem_context_set_user_engines(ctx); @@ -1652,8 +1666,6 @@ i915_gem_create_context(struct drm_i915_private *i915, * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(i915); - ctx->user_flags = pc->user_flags; - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 03bc7f9d191b..b6d97da63d1f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -338,6 +338,7 @@ struct i915_gem_context { #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_PERSISTENCE 4 +#define UCONTEXT_LOW_LATENCY 5 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 7eccbd70d89f..ed95a7b57cbb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -130,6 +130,7 @@ struct intel_context { #define CONTEXT_PERMA_PIN 11 #define CONTEXT_IS_PARKING 12 #define CONTEXT_EXITING 13 +#define CONTEXT_LOW_LATENCY 14 struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 9c6812257ac2..a929aa6e3c85 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1013,6 +1013,10 @@ void intel_rps_boost(struct i915_request *rq) if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; + /* Waitboost is not needed for contexts marked with a Freq hint */ + if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags)) + return; + /* Serializes with i915_request_retire() */ if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h index 811add10c30d..c34674e797c6 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -207,6 +207,27 @@ struct slpc_shared_data { u8 reserved_mode_definition[4096]; } __packed; +struct slpc_context_frequency_request { + u32 frequency_request:16; + u32 reserved:12; + u32 is_compute:1; + u32 ignore_busyness:1; + u32 is_minimum:1; + u32 is_predefined:1; +} __packed; + +#define SLPC_CTX_FREQ_REQ_IS_COMPUTE REG_BIT(28) + +struct slpc_optimized_strategies { + u32 compute:1; + u32 async_flip:1; + u32 media:1; + u32 vsync_flip:1; + u32 reserved:28; +} __packed; + +#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0) + /** * DOC: SLPC H2G MESSAGE FORMAT * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 3e681ab6fbf9..706fffca698b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -537,6 +537,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + ret = slpc_set_param(slpc, + SLPC_PARAM_STRATEGIES, + val); + + return ret; +} + int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -711,6 +725,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Set cached media freq ratio mode */ intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); + /* Enable SLPC Optimized Strategy for compute */ + intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 6ac6503c39d4..1cb5fd44f05c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -45,5 +45,6 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); +int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index cc076e9302ad..e5c645137cfe 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2645,6 +2645,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) +MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY) #undef MAKE_CONTEXT_POLICY_ADD @@ -2664,6 +2665,7 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) struct context_policy policy; u32 execution_quantum; u32 preemption_timeout; + u32 slpc_ctx_freq_req = 0; unsigned long flags; int ret; @@ -2675,11 +2677,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) execution_quantum = engine->props.timeslice_duration_ms * 1000; preemption_timeout = engine->props.preempt_timeout_ms * 1000; + if (ce && (ce->flags & BIT(CONTEXT_LOW_LATENCY))) + slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; + __guc_context_policy_start_klv(&policy, ce->guc_id.id); __guc_context_policy_add_priority(&policy, ce->guc_state.prio); __guc_context_policy_add_execution_quantum(&policy, execution_quantum); __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req); if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) __guc_context_policy_add_preempt_to_idle(&policy, 1); diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 5c3fec63cb4c..95c58805b2a4 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -155,6 +155,12 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, */ value = 1; break; + case I915_PARAM_HAS_CONTEXT_FREQ_HINT: + if (intel_uc_uses_guc_submission(&to_gt(i915)->uc)) + value = 1; + else + value = -EINVAL; + break; case I915_PARAM_HAS_CONTEXT_ISOLATION: value = intel_engines_has_context_isolation(i915); break; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2ee338860b7e..558d95baf851 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -806,6 +806,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PXP_STATUS 58 +/* + * Query if kernel allows marking a context to send a Freq hint to SLPC. This + * will enable use of the strategies allowed by the SLPC algorithm. + */ +#define I915_PARAM_HAS_CONTEXT_FREQ_HINT 59 + /* Must be kept compact -- no holes and well documented */ /** @@ -2148,6 +2154,15 @@ struct drm_i915_gem_context_param { * -EIO: The firmware did not succeed in creating the protected context. */ #define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd + +/* + * I915_CONTEXT_PARAM_LOW_LATENCY: + * + * Mark this context as a low latency workload which requires aggressive GT + * frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel + * supports this per context flag. + */ +#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ -- cgit v1.2.3 From 8e61e3192a77e1006c1c54c022173026f7afae2c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 12 Mar 2024 14:12:25 -0700 Subject: drm/xe/uapi: Add IP version and stepping to GT list query For modern platforms (MTL and later), both kernel and userspace drivers are expected to apply GT programming and workarounds based on the IP version and stepping self-reported by the GT hardware via the GMD_ID registers. Since userspace drivers can't access these registers directly, pass along the version and stepping information via the GT list query. Note that the new query fields will remain 0's when running on pre-GMD_ID platforms. Userspace is expected to continue using PCI devid / revid on those older platforms. Although the hardware also has a GMD_ID register for display version/stepping, that value is intentionally *not* included anywhere in the Xe uapi. Display userspace should be using platform-agnostic APIs and auto-detecting platform capabilities rather than matching specific IP versions. v2: - s/revid/rev/ (Lucas) - Fix kerneldoc copy/paste mistakes Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240312211229.2871288-4-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_query.c | 8 ++++++++ include/uapi/drm/xe_drm.h | 10 +++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index a6a20a6dd360..e80321b34918 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -12,6 +12,7 @@ #include #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" @@ -401,6 +402,13 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query BIT(gt_to_tile(gt)->id) << 1; gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ gt_list->gt_list[id].near_mem_regions; + + gt_list->gt_list[id].ip_ver_major = + REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); + gt_list->gt_list[id].ip_ver_minor = + REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); + gt_list->gt_list[id].ip_ver_rev = + REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); } if (copy_to_user(query_ptr, gt_list, size)) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2fc19177d2b0..808ad1c308ec 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -459,8 +459,16 @@ struct drm_xe_gt { * by struct drm_xe_query_mem_regions' mem_class. */ __u64 far_mem_regions; + /** @ip_ver_major: Graphics/media IP major version on GMD_ID platforms */ + __u16 ip_ver_major; + /** @ip_ver_minor: Graphics/media IP minor version on GMD_ID platforms */ + __u16 ip_ver_minor; + /** @ip_ver_rev: Graphics/media IP revision version on GMD_ID platforms */ + __u16 ip_ver_rev; + /** @pad2: MBZ */ + __u16 pad2; /** @reserved: Reserved */ - __u64 reserved[8]; + __u64 reserved[7]; }; /** -- cgit v1.2.3 From 462a7c0f8e3f833e5ca3dd3f427882b60b3f38e9 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 26 Mar 2024 10:30:55 +0100 Subject: drm/panthor: Fix wrong kernel-doc format in the uAPI header The kernel doc prefix is /** not /*. Reported-by: Stephen Rothwell Signed-off-by: Boris Brezillon Acked-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240326093055.411932-1-boris.brezillon@collabora.com --- include/uapi/drm/panthor_drm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 373df80f41ed..dadb05ab1235 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -336,10 +336,10 @@ struct drm_panthor_gpu_info { /** @tiler_present: Bitmask encoding the tiler units exposed by the GPU. */ __u64 tiler_present; - /* @core_features: Used to discriminate core variants when they exist. */ + /** @core_features: Used to discriminate core variants when they exist. */ __u32 core_features; - /* @pad: MBZ. */ + /** @pad: MBZ. */ __u32 pad; }; -- cgit v1.2.3 From ca83f9d20171ce1ba5f82fb28b77eddabd636443 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 28 Mar 2024 14:02:43 +0000 Subject: drm/xe/uapi: Define topology types as indexes rather than masks The topology type is an index (not a mask) so define the values like other indexes instead of using powers of 2. This is also to make clear that the next type can use value 3. This commit does not change the existing values so it does not break compatibility. Cc: Lucas De Marchi Suggested-by: Matt Roper Signed-off-by: Francois Dugast Link: https://lore.kernel.org/intel-xe/20240327232317.GI718896@mdroper-desk1.amr.corp.intel.com/ Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240328140243.7-1-francois.dugast@intel.com Signed-off-by: Lucas De Marchi --- include/uapi/drm/xe_drm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 808ad1c308ec..95a8ecca21f4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -518,9 +518,9 @@ struct drm_xe_query_topology_mask { /** @gt_id: GT ID the mask is associated with */ __u16 gt_id; -#define DRM_XE_TOPO_DSS_GEOMETRY (1 << 0) -#define DRM_XE_TOPO_DSS_COMPUTE (1 << 1) -#define DRM_XE_TOPO_EU_PER_DSS (1 << 2) +#define DRM_XE_TOPO_DSS_GEOMETRY 1 +#define DRM_XE_TOPO_DSS_COMPUTE 2 +#define DRM_XE_TOPO_EU_PER_DSS 4 /** @type: type of mask */ __u16 type; -- cgit v1.2.3 From 31ced035ecde7b24aef57c3c4b85bbc3283c81f2 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 29 Mar 2024 12:44:03 +0000 Subject: drm/xe/uapi: Restore flags VM_BIND_FLAG_READONLY and VM_BIND_FLAG_IMMEDIATE The commit 84a1ed5e6756 ("drm/xe/uapi: Remove unused flags") is partially reverted. At the time, flags not used by user space were removed during cleanup. Some flags now needed by the compute runtime are brought back in this commit: - DRM_XE_VM_BIND_FLAG_READONLY is used to write protect kernel ISA thus preventing accidental overwrites. - DRM_XE_VM_BIND_FLAG_IMMEDIATE is used to trigger mapping at the time of binding in order to prevent faulting at execution time. The changes in the compute runtime are ready and approved, see link below. v2: Include a link to the PR in the commit message (Matthew Brost) v3: Update kernel doc and improve commit message (Lucas De Marchi) Cc: Mateusz Jablonski Cc: Michal Mrozek Cc: Matthew Brost Cc: Lucas De Marchi Link: https://github.com/intel/compute-runtime/pull/717 Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240329124403.7-1-francois.dugast@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 13 +++++++++++-- drivers/gpu/drm/xe/xe_vm_types.h | 4 ++++ include/uapi/drm/xe_drm.h | 8 ++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index fff10e1717ff..66b70fd3d105 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2213,6 +2213,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { + op->map.immediate = + flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; + op->map.read_only = + flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; @@ -2407,6 +2411,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { + flags |= op->map.read_only ? + VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; flags |= op->map.dumpable ? @@ -2551,7 +2557,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - !xe_vm_in_fault_mode(vm), + op->map.immediate || !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2826,7 +2832,10 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ +#define SUPPORTED_FLAGS \ + (DRM_XE_VM_BIND_FLAG_READONLY | \ + DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ + DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index badf3945083d..0447c79c40a2 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -276,6 +276,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + /** @immediate: Immediate bind */ + bool immediate; + /** @read_only: Read only */ + bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @dumpable: whether BO is dumped on GPU hang */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 95a8ecca21f4..1446c3bae515 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -871,6 +871,12 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: + * - %DRM_XE_VM_BIND_FLAG_READONLY - Setup the page tables as read-only + * to ensure write protection + * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - On a faulting VM, do the + * MAP operation immediately rather than deferring the MAP to the page + * fault handler. This is implied on a non-faulting VM as there is no + * fault handler to defer to. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags @@ -963,6 +969,8 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; +#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ -- cgit v1.2.3 From 9677547d836299497ba2da1ad59851e200109ca1 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 18 Mar 2024 22:44:07 +0200 Subject: drm: Introduce plane SIZE_HINTS property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new immutable plane property by which a plane can advertise a handful of recommended plane sizes. This would be mostly exposed by cursor planes as a slightly more capable replacement for the DRM_CAP_CURSOR_WIDTH/HEIGHT caps, which can only declare a one size fits all limit for the whole device. Currently eg. amdgpu/i915/nouveau just advertize the max cursor size via the cursor size caps. But always using the max sized cursor can waste a surprising amount of power, so a better strategy is desirable. Most other drivers don't specify any cursor size at all, in which case the ioctl code just claims that 64x64 is a great choice. Whether that is actually true is debatable. A poll of various compositor developers informs us that blindly probing with setcursor/atomic ioctl to determine suitable cursor sizes is not acceptable, thus the introduction of the new property to supplant the cursor size caps. The compositor will now be free to select a more optimal cursor size from the short list of options. Note that the reported sizes (either via the property or the caps) make no claims about things such as plane scaling. So these things should only really be consulted for simple "cursor like" use cases. Userspace consumer in the form of mutter seems ready: https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/3165 v2: Try to add some docs v3: Specify that value 0 is reserved for future use (basic idea from Jonas) Drop the note about typical hardware (Pekka) v4: Update the docs to indicate the list is "in order of preference" Add a a link to the mutter MR v5: Limit to cursors only for now (Simon) Cc: Jonas Ådahl Cc: Sameer Lattannavar Reviewed-by: Sebastian Wick Reviewed-by: Simon Ser Acked-by: Daniel Stone Acked-by: Harry Wentland Acked-by: Pekka Paalanen Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240318204408.9687-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_mode_config.c | 7 +++++ drivers/gpu/drm/drm_plane.c | 56 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_mode_config.h | 5 ++++ include/drm/drm_plane.h | 4 +++ include/uapi/drm/drm_mode.h | 11 ++++++++ 5 files changed, 83 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index 48fd2d67f352..568972258222 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -372,6 +372,13 @@ static int drm_mode_create_standard_properties(struct drm_device *dev) return -ENOMEM; dev->mode_config.modifiers_property = prop; + prop = drm_property_create(dev, + DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_BLOB, + "SIZE_HINTS", 0); + if (!prop) + return -ENOMEM; + dev->mode_config.size_hints_property = prop; + return 0; } diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 672c655c7a8e..eecc24c54efd 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -140,6 +140,25 @@ * DRM_FORMAT_MOD_LINEAR. Before linux kernel release v5.1 there have been * various bugs in this area with inconsistencies between the capability * flag and per-plane properties. + * + * SIZE_HINTS: + * Blob property which contains the set of recommended plane size + * which can used for simple "cursor like" use cases (eg. no scaling). + * Using these hints frees userspace from extensive probing of + * supported plane sizes through atomic/setcursor ioctls. + * + * The blob contains an array of struct drm_plane_size_hint, in + * order of preference. For optimal usage userspace should pick + * the first size that satisfies its own requirements. + * + * Drivers should only attach this property to planes that + * support a very limited set of sizes. + * + * Note that property value 0 (ie. no blob) is reserved for potential + * future use. Current userspace is expected to ignore the property + * if the value is 0, and fall back to some other means (eg. + * &DRM_CAP_CURSOR_WIDTH and &DRM_CAP_CURSOR_HEIGHT) to determine + * the appropriate plane size to use. */ static unsigned int drm_num_planes(struct drm_device *dev) @@ -1729,3 +1748,40 @@ int drm_plane_create_scaling_filter_property(struct drm_plane *plane, return 0; } EXPORT_SYMBOL(drm_plane_create_scaling_filter_property); + +/** + * drm_plane_add_size_hint_property - create a size hint property + * + * @plane: drm plane + * @hints: size hints + * @num_hints: number of size hints + * + * Create a size hints property for the plane. + * + * RETURNS: + * Zero for success or -errno + */ +int drm_plane_add_size_hints_property(struct drm_plane *plane, + const struct drm_plane_size_hint *hints, + int num_hints) +{ + struct drm_device *dev = plane->dev; + struct drm_mode_config *config = &dev->mode_config; + struct drm_property_blob *blob; + + /* extending to other plane types needs actual thought */ + if (drm_WARN_ON(dev, plane->type != DRM_PLANE_TYPE_CURSOR)) + return -EINVAL; + + blob = drm_property_create_blob(dev, + array_size(sizeof(hints[0]), num_hints), + hints); + if (IS_ERR(blob)) + return PTR_ERR(blob); + + drm_object_attach_property(&plane->base, config->size_hints_property, + blob->base.id); + + return 0; +} +EXPORT_SYMBOL(drm_plane_add_size_hints_property); diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 973119a9176b..9d8acf7a10eb 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -942,6 +942,11 @@ struct drm_mode_config { */ struct drm_property *modifiers_property; + /** + * @size_hints_propertty: Plane SIZE_HINTS property. + */ + struct drm_property *size_hints_property; + /* cursor size */ uint32_t cursor_width, cursor_height; diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 641fe298052d..ec1112208b73 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -32,6 +32,7 @@ #include struct drm_crtc; +struct drm_plane_size_hint; struct drm_printer; struct drm_modeset_acquire_ctx; @@ -976,5 +977,8 @@ drm_plane_get_damage_clips(const struct drm_plane_state *state); int drm_plane_create_scaling_filter_property(struct drm_plane *plane, unsigned int supported_filters); +int drm_plane_add_size_hints_property(struct drm_plane *plane, + const struct drm_plane_size_hint *hints, + int num_hints); #endif diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 7040e7ea80c7..1ca5c7e418fd 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -865,6 +865,17 @@ struct drm_color_lut { __u16 reserved; }; +/** + * struct drm_plane_size_hint - Plane size hints + * + * The plane SIZE_HINTS property blob contains an + * array of struct drm_plane_size_hint. + */ +struct drm_plane_size_hint { + __u16 width; + __u16 height; +}; + /** * struct hdr_metadata_infoframe - HDR Metadata Infoframe Data. * -- cgit v1.2.3