summaryrefslogtreecommitdiff
path: root/include/uapi
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 21:00:17 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 21:00:17 +0300
commit1b722407a13b7f8658d2e26917791f32805980a2 (patch)
tree30aab582725a46e42843d75e2eb9ce4151f0f3ed /include/uapi
parentf8824e151fbfa0ac0a258015d606ea6f4a10251b (diff)
parent5ff2977b19769fd24b0cfbe7cbe4d5114b6106af (diff)
downloadlinux-1b722407a13b7f8658d2e26917791f32805980a2.tar.xz
Merge tag 'drm-next-2023-06-29' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "There is one set of patches to misc for a i915 gsc/mei proxy driver. Otherwise it's mostly amdgpu/i915/msm, lots of hw enablement and lots of refactoring. core: - replace strlcpy with strscpy - EDID changes to support further conversion to struct drm_edid - Move i915 DSC parameter code to common DRM helpers - Add Colorspace functionality aperture: - ignore framebuffers with non-primary devices fbdev: - use fbdev i/o helpers - add Kconfig options for fb_ops helpers - use new fb io helpers directly in drivers sysfs: - export DRM connector ID scheduler: - Avoid an infinite loop ttm: - store function table in .rodata - Add query for TTM mem limit - Add NUMA awareness to pools - Export ttm_pool_fini() bridge: - fsl-ldb: support i.MX6SX - lt9211, lt9611: remove blanking packets - tc358768: implement input bus formats, devm cleanups - ti-snd65dsi86: implement wait_hpd_asserted - analogix: fix endless probe loop - samsung-dsim: support swapped clock, fix enabling, support var clock - display-connector: Add support for external power supply - imx: Fix module linking - tc358762: Support reset GPIO panel: - nt36523: Support Lenovo J606F - st7703: Support Anbernic RG353V-V2 - InnoLux G070ACE-L01 support - boe-tv101wum-nl6: Improve initialization - sharp-ls043t1le001: Mode fixes - simple: BOE EV121WXM-N10-1850, S6D7AA0 - Ampire AM-800480L1TMQW-T00H - Rocktech RK043FN48H - Starry himax83102-j02 - Starry ili9882t amdgpu: - add new ctx query flag to handle reset better - add new query/set shadow buffer for rdna3 - DCN 3.2/3.1.x/3.0.x updates - Enable DC_FP on loongarch - PCIe fix for RDNA2 - improve DC FAMS/SubVP support for better power management - partition support for lots of engines - Take NUMA into account when allocating memory - Add new DRM_AMDGPU_WERROR config parameter to help with CI - Initial SMU13 overdrive support - Add support for new colorspace KMS API - W=1 fixes amdkfd: - Query TTM mem limit rather than hardcoding it - GC 9.4.3 partition support - Handle NUMA for partitions - Add debugger interface for enabling gdb - Add KFD event age tracking radeon: - Fix possible UAF i915: - new getparam for PXP support - GSC/MEI proxy driver - Meteorlake display enablement - avoid clearing preallocated framebuffers with TTM - implement framebuffer mmap support - Disable sampler indirect state in bindless heap - Enable fdinfo for GuC backends - GuC loading and firmware table handling fixes - Various refactors for multi-tile enablement - Define MOCS and PAT tables for MTL - GSC/MEI support for Meteorlake - PMU multi-tile support - Large driver kernel doc cleanup - Allow VRR toggling and arbitrary refresh rates - Support async flips on linear buffers on display ver 12+ - Expose CRTC CTM property on ILK/SNB/VLV - New debugfs for display clock frequencies - Hotplug refactoring - Display refactoring - I915_GEM_CREATE_EXT_SET_PAT for Mesa on Meteorlake - Use large rings for compute contexts - HuC loading for MTL - Allow user to set cache at BO creation - MTL powermanagement enhancements - Switch to dedicated workqueues to stop using flush_scheduled_work() - Move display runtime init under display/ - Remove 10bit gamma on desktop gen3 parts, they don't support it habanalabs: - uapi: return 0 for user queries if there was a h/w or f/w error - Add pci health check when we lose connection with the firmware. This can be used to distinguish between pci link down and firmware getting stuck. - Add more info to the error print when TPC interrupt occur. - Firmware fixes msm: - Adreno A660 bindings - SM8350 MDSS bindings fix - Added support for DPU on sm6350 and sm6375 platforms - Implemented tearcheck support to support vsync on SM150 and newer platforms - Enabled missing features (DSPP, DSC, split display) on sc8180x, sc8280xp, sm8450 - Added support for DSI and 28nm DSI PHY on MSM8226 platform - Added support for DSI on sm6350 and sm6375 platforms - Added support for display controller on MSM8226 platform - A690 GPU support - Move cmdstream dumping out of fence signaling path - a610 support - Support for a6xx devices without GMU nouveau: - NULL ptr before deref fixes armada: - implement fbdev emulation as client sun4i: - fix mipi-dsi dotclock - release clocks vc4: - rgb range toggle property - BT601 / BT2020 HDMI support vkms: - convert to drmm helpers - add reflection and rotation support - fix rgb565 conversion gma500: - fix iomem access shmobile: - support renesas soc platform - enable fbdev mxsfb: - Add support for i.MX93 LCDIF stm: - dsi: Use devm_ helper - ltdc: Fix potential invalid pointer deref renesas: - Group drivers in renesas subdirectory to prepare for new platform - Drop deprecated R-Car H3 ES1.x support meson: - Add support for MIPI DSI displays virtio: - add sync object support mediatek: - Add display binding document for MT6795" * tag 'drm-next-2023-06-29' of git://anongit.freedesktop.org/drm/drm: (1791 commits) drm/i915: Fix a NULL vs IS_ERR() bug drm/i915: make i915_drm_client_fdinfo() reference conditional again drm/i915/huc: Fix missing error code in intel_huc_init() drm/i915/gsc: take a wakeref for the proxy-init-completion check drm/msm/a6xx: Add A610 speedbin support drm/msm/a6xx: Add A619_holi speedbin support drm/msm/a6xx: Use adreno_is_aXYZ macros in speedbin matching drm/msm/a6xx: Use "else if" in GPU speedbin rev matching drm/msm/a6xx: Fix some A619 tunables drm/msm/a6xx: Add A610 support drm/msm/a6xx: Add support for A619_holi drm/msm/adreno: Disable has_cached_coherent in GMU wrapper configurations drm/msm/a6xx: Introduce GMU wrapper support drm/msm/a6xx: Move CX GMU power counter enablement to hw_init drm/msm/a6xx: Extend and explain UBWC config drm/msm/a6xx: Remove both GBIF and RBBM GBIF halt on hw init drm/msm/a6xx: Add a helper for software-resetting the GPU drm/msm/a6xx: Improve a6xx_bus_clear_pending_transactions() drm/msm/a6xx: Move a6xx_bus_clear_pending_transactions to a6xx_gpu drm/msm/a6xx: Move force keepalive vote removal to a6xx_gmu_force_off() ...
Diffstat (limited to 'include/uapi')
-rw-r--r--include/uapi/drm/amdgpu_drm.h22
-rw-r--r--include/uapi/drm/drm_fourcc.h43
-rw-r--r--include/uapi/drm/drm_mode.h5
-rw-r--r--include/uapi/drm/habanalabs_accel.h10
-rw-r--r--include/uapi/drm/i915_drm.h95
-rw-r--r--include/uapi/linux/kfd_ioctl.h682
-rw-r--r--include/uapi/linux/kfd_sysfs.h15
7 files changed, 867 insertions, 5 deletions
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index b6eb90df5d05..79b14828d542 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -245,6 +245,8 @@ union drm_amdgpu_bo_list {
/* indicate some errors are detected by RAS */
#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3)
#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4)
+/* indicate that the reset hasn't completed yet */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5)
/* Context priority level */
#define AMDGPU_CTX_PRIORITY_UNSET -2048
@@ -592,6 +594,7 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07
#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08
#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09
+#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW 0x0a
struct drm_amdgpu_cs_chunk {
__u32 chunk_id;
@@ -708,6 +711,15 @@ struct drm_amdgpu_cs_chunk_data {
};
};
+#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW 0x1
+
+struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
+ __u64 shadow_va;
+ __u64 csa_va;
+ __u64 gds_va;
+ __u64 flags;
+};
+
/*
* Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU
*
@@ -876,6 +888,8 @@ struct drm_amdgpu_cs_chunk_data {
#define AMDGPU_INFO_VIDEO_CAPS_DECODE 0
/* Subquery id: Encode */
#define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1
+/* Query the max number of IBs per gang per submission */
+#define AMDGPU_INFO_MAX_IBS 0x22
#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
@@ -1126,6 +1140,14 @@ struct drm_amdgpu_info_device {
__u64 mall_size; /* AKA infinity cache */
/* high 32 bits of the rb pipes mask */
__u32 enabled_rb_pipes_mask_hi;
+ /* shadow area size for gfx11 */
+ __u32 shadow_size;
+ /* shadow area base virtual alignment for gfx11 */
+ __u32 shadow_alignment;
+ /* context save area size for gfx11 */
+ __u32 csa_size;
+ /* context save area base virtual alignment for gfx11 */
+ __u32 csa_alignment;
};
struct drm_amdgpu_info_hw_ip {
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index de703c6be969..8db7fd3f743e 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -658,6 +658,49 @@ extern "C" {
#define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC fourcc_mod_code(INTEL, 12)
/*
+ * Intel Color Control Surfaces (CCS) for display ver. 14 render compression.
+ *
+ * The main surface is tile4 and at plane index 0, the CCS is linear and
+ * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
+ * main surface. In other words, 4 bits in CCS map to a main surface cache
+ * line pair. The main surface pitch is required to be a multiple of four
+ * tile4 widths.
+ */
+#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS fourcc_mod_code(INTEL, 13)
+
+/*
+ * Intel Color Control Surfaces (CCS) for display ver. 14 media compression
+ *
+ * The main surface is tile4 and at plane index 0, the CCS is linear and
+ * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
+ * main surface. In other words, 4 bits in CCS map to a main surface cache
+ * line pair. The main surface pitch is required to be a multiple of four
+ * tile4 widths. For semi-planar formats like NV12, CCS planes follow the
+ * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces,
+ * planes 2 and 3 for the respective CCS.
+ */
+#define I915_FORMAT_MOD_4_TILED_MTL_MC_CCS fourcc_mod_code(INTEL, 14)
+
+/*
+ * Intel Color Control Surface with Clear Color (CCS) for display ver. 14 render
+ * compression.
+ *
+ * The main surface is tile4 and is at plane index 0 whereas CCS is linear
+ * and at index 1. The clear color is stored at index 2, and the pitch should
+ * be ignored. The clear color structure is 256 bits. The first 128 bits
+ * represents Raw Clear Color Red, Green, Blue and Alpha color each represented
+ * by 32 bits. The raw clear color is consumed by the 3d engine and generates
+ * the converted clear color of size 64 bits. The first 32 bits store the Lower
+ * Converted Clear Color value and the next 32 bits store the Higher Converted
+ * Clear Color value when applicable. The Converted Clear Color values are
+ * consumed by the DE. The last 64 bits are used to store Color Discard Enable
+ * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line
+ * corresponds to an area of 4x1 tiles in the main surface. The main surface
+ * pitch is required to be a multiple of 4 tile widths.
+ */
+#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15)
+
+/*
* Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
*
* Macroblocks are laid in a Z-shape, and each pixel data is following the
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 46becedf5b2f..43691058d28f 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -834,6 +834,11 @@ struct drm_color_ctm {
/*
* Conversion matrix in S31.32 sign-magnitude
* (not two's complement!) format.
+ *
+ * out matrix in
+ * |R| |0 1 2| |R|
+ * |G| = |3 4 5| x |G|
+ * |B| |6 7 8| |B|
*/
__u64 matrix[9];
};
diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h
index d9ef1b151d04..e6436f3e8ea6 100644
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -787,18 +787,28 @@ enum hl_server_type {
* The address which accessing it caused the razwi.
* Razwi initiator.
* Razwi cause, was it a page fault or MMU access error.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
* HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation
* HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot.
* HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications.
* HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd
* HL_INFO_GET_EVENTS - Retrieve the last occurred events
* HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
* HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic.
* HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
* HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event.
* HL_INFO_FW_GENERIC_REQ - Send generic request to FW.
* HL_INFO_HW_ERR_EVENT - Retrieve information on the reported HW error.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
* HL_INFO_FW_ERR_EVENT - Retrieve information on the reported FW error.
+ * May return 0 even though no new data is available, in that case
+ * timestamp will be 0.
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..7000e5910a1d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -280,7 +280,16 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
-#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+/*
+ * Top 4 bits of every non-engine counter are GT id.
+ */
+#define __I915_PMU_GT_SHIFT (60)
+
+#define ___I915_PMU_OTHER(gt, x) \
+ (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
+ ((__u64)(gt) << __I915_PMU_GT_SHIFT))
+
+#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x)
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
@@ -290,6 +299,12 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
+#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0)
+#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1)
+#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2)
+#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3)
+#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4)
+
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
@@ -659,7 +674,8 @@ typedef struct drm_i915_irq_wait {
* If the IOCTL is successful, the returned parameter will be set to one of the
* following values:
* * 0 if HuC firmware load is not complete,
- * * 1 if HuC firmware is authenticated and running.
+ * * 1 if HuC firmware is loaded and fully authenticated,
+ * * 2 if HuC firmware is loaded and authenticated for clear media only
*/
#define I915_PARAM_HUC_STATUS 42
@@ -771,6 +787,25 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57
+/*
+ * Query the status of PXP support in i915.
+ *
+ * The query can fail in the following scenarios with the listed error codes:
+ * -ENODEV = PXP support is not available on the GPU device or in the
+ * kernel due to missing component drivers or kernel configs.
+ *
+ * If the IOCTL is successful, the returned parameter will be set to one of
+ * the following values:
+ * 1 = PXP feature is supported and is ready for use.
+ * 2 = PXP feature is supported but should be ready soon (pending
+ * initialization of non-i915 system dependencies).
+ *
+ * NOTE: When param is supported (positive return values), user space should
+ * still refer to the GEM PXP context-creation UAPI header specs to be
+ * aware of possible failure due to system state machine at the time.
+ */
+#define I915_PARAM_PXP_STATUS 58
+
/* Must be kept compact -- no holes and well documented */
/**
@@ -2096,6 +2131,21 @@ struct drm_i915_gem_context_param {
*
* -ENODEV: feature not available
* -EPERM: trying to mark a recoverable or not bannable context as protected
+ * -ENXIO: A dependency such as a component driver or firmware is not yet
+ * loaded so user space may need to attempt again. Depending on the
+ * device, this error may be reported if protected context creation is
+ * attempted very early after kernel start because the internal timeout
+ * waiting for such dependencies is not guaranteed to be larger than
+ * required (numbers differ depending on system and kernel config):
+ * - ADL/RPL: dependencies may take up to 3 seconds from kernel start
+ * while context creation internal timeout is 250 milisecs
+ * - MTL: dependencies may take up to 8 seconds from kernel start
+ * while context creation internal timeout is 250 milisecs
+ * NOTE: such dependencies happen once, so a subsequent call to create a
+ * protected context after a prior successful call will not experience
+ * such timeouts and will not return -ENXIO (unless the driver is reloaded,
+ * or, depending on the device, resumes from a suspended state).
+ * -EIO: The firmware did not succeed in creating the protected context.
*/
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
/* Must be kept compact -- no holes and well documented */
@@ -3630,9 +3680,13 @@ struct drm_i915_gem_create_ext {
*
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
* struct drm_i915_gem_create_ext_protected_content.
+ *
+ * For I915_GEM_CREATE_EXT_SET_PAT usage see
+ * struct drm_i915_gem_create_ext_set_pat.
*/
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+#define I915_GEM_CREATE_EXT_SET_PAT 2
__u64 extensions;
};
@@ -3747,6 +3801,43 @@ struct drm_i915_gem_create_ext_protected_content {
__u32 flags;
};
+/**
+ * struct drm_i915_gem_create_ext_set_pat - The
+ * I915_GEM_CREATE_EXT_SET_PAT extension.
+ *
+ * If this extension is provided, the specified caching policy (PAT index) is
+ * applied to the buffer object.
+ *
+ * Below is an example on how to create an object with specific caching policy:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
+ * .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
+ * .pat_index = 0,
+ * };
+ * struct drm_i915_gem_create_ext create_ext = {
+ * .size = PAGE_SIZE,
+ * .extensions = (uintptr_t)&set_pat_ext,
+ * };
+ *
+ * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ * if (err) ...
+ */
+struct drm_i915_gem_create_ext_set_pat {
+ /** @base: Extension link. See struct i915_user_extension. */
+ struct i915_user_extension base;
+ /**
+ * @pat_index: PAT index to be set
+ * PAT index is a bit field in Page Table Entry to control caching
+ * behaviors for GPU accesses. The definition of PAT index is
+ * platform dependent and can be found in hardware specifications,
+ */
+ __u32 pat_index;
+ /** @rsvd: reserved for future use */
+ __u32 rsvd;
+};
+
/* ID of the protected content session managed by i915 when PXP is active */
#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 2da5c3ad71bd..eeb2fdcbdcb7 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -38,9 +38,11 @@
* - 1.10 - Add SMI profiler event log
* - 1.11 - Add unified memory for ctx save/restore area
* - 1.12 - Add DMA buf export ioctl
+ * - 1.13 - Add debugger API
+ * - 1.14 - Update kfd_event_data
*/
#define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 12
+#define KFD_IOCTL_MINOR_VERSION 14
struct kfd_ioctl_get_version_args {
__u32 major_version; /* from KFD */
@@ -110,6 +112,32 @@ struct kfd_ioctl_get_available_memory_args {
__u32 pad;
};
+struct kfd_dbg_device_info_entry {
+ __u64 exception_status;
+ __u64 lds_base;
+ __u64 lds_limit;
+ __u64 scratch_base;
+ __u64 scratch_limit;
+ __u64 gpuvm_base;
+ __u64 gpuvm_limit;
+ __u32 gpu_id;
+ __u32 location_id;
+ __u32 vendor_id;
+ __u32 device_id;
+ __u32 revision_id;
+ __u32 subsystem_vendor_id;
+ __u32 subsystem_device_id;
+ __u32 fw_version;
+ __u32 gfx_target_version;
+ __u32 simd_count;
+ __u32 max_waves_per_simd;
+ __u32 array_count;
+ __u32 simd_arrays_per_engine;
+ __u32 num_xcc;
+ __u32 capability;
+ __u32 debug_prop;
+};
+
/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
#define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
@@ -293,12 +321,20 @@ struct kfd_hsa_hw_exception_data {
__u32 gpu_id;
};
+/* hsa signal event data */
+struct kfd_hsa_signal_event_data {
+ __u64 last_event_age; /* to and from KFD */
+};
+
/* Event data */
struct kfd_event_data {
union {
+ /* From KFD */
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
- }; /* From KFD */
+ /* To and From KFD */
+ struct kfd_hsa_signal_event_data signal_event_data;
+ };
__u64 kfd_event_data_ext; /* pointer to an extension structure
for future exception types */
__u32 event_id; /* to KFD */
@@ -773,6 +809,640 @@ struct kfd_ioctl_set_xnack_mode_args {
__s32 xnack_enabled;
};
+/* Wave launch override modes */
+enum kfd_dbg_trap_override_mode {
+ KFD_DBG_TRAP_OVERRIDE_OR = 0,
+ KFD_DBG_TRAP_OVERRIDE_REPLACE = 1
+};
+
+/* Wave launch overrides */
+enum kfd_dbg_trap_mask {
+ KFD_DBG_TRAP_MASK_FP_INVALID = 1,
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL = 2,
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO = 4,
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW = 8,
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW = 16,
+ KFD_DBG_TRAP_MASK_FP_INEXACT = 32,
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO = 64,
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH = 128,
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION = 256,
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START = (1 << 30),
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END = (1 << 31)
+};
+
+/* Wave launch modes */
+enum kfd_dbg_trap_wave_launch_mode {
+ KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL = 0,
+ KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT = 1,
+ KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG = 3
+};
+
+/* Address watch modes */
+enum kfd_dbg_trap_address_watch_mode {
+ KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ = 0,
+ KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD = 1,
+ KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ATOMIC = 2,
+ KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL = 3
+};
+
+/* Additional wave settings */
+enum kfd_dbg_trap_flags {
+ KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1,
+};
+
+/* Trap exceptions */
+enum kfd_dbg_trap_exception_code {
+ EC_NONE = 0,
+ /* per queue */
+ EC_QUEUE_WAVE_ABORT = 1,
+ EC_QUEUE_WAVE_TRAP = 2,
+ EC_QUEUE_WAVE_MATH_ERROR = 3,
+ EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION = 4,
+ EC_QUEUE_WAVE_MEMORY_VIOLATION = 5,
+ EC_QUEUE_WAVE_APERTURE_VIOLATION = 6,
+ EC_QUEUE_PACKET_DISPATCH_DIM_INVALID = 16,
+ EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID = 17,
+ EC_QUEUE_PACKET_DISPATCH_CODE_INVALID = 18,
+ EC_QUEUE_PACKET_RESERVED = 19,
+ EC_QUEUE_PACKET_UNSUPPORTED = 20,
+ EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID = 21,
+ EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID = 22,
+ EC_QUEUE_PACKET_VENDOR_UNSUPPORTED = 23,
+ EC_QUEUE_PREEMPTION_ERROR = 30,
+ EC_QUEUE_NEW = 31,
+ /* per device */
+ EC_DEVICE_QUEUE_DELETE = 32,
+ EC_DEVICE_MEMORY_VIOLATION = 33,
+ EC_DEVICE_RAS_ERROR = 34,
+ EC_DEVICE_FATAL_HALT = 35,
+ EC_DEVICE_NEW = 36,
+ /* per process */
+ EC_PROCESS_RUNTIME = 48,
+ EC_PROCESS_DEVICE_REMOVE = 49,
+ EC_MAX
+};
+
+/* Mask generated by ecode in kfd_dbg_trap_exception_code */
+#define KFD_EC_MASK(ecode) (1ULL << (ecode - 1))
+
+/* Masks for exception code type checks below */
+#define KFD_EC_MASK_QUEUE (KFD_EC_MASK(EC_QUEUE_WAVE_ABORT) | \
+ KFD_EC_MASK(EC_QUEUE_WAVE_TRAP) | \
+ KFD_EC_MASK(EC_QUEUE_WAVE_MATH_ERROR) | \
+ KFD_EC_MASK(EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION) | \
+ KFD_EC_MASK(EC_QUEUE_WAVE_MEMORY_VIOLATION) | \
+ KFD_EC_MASK(EC_QUEUE_WAVE_APERTURE_VIOLATION) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED) | \
+ KFD_EC_MASK(EC_QUEUE_PREEMPTION_ERROR) | \
+ KFD_EC_MASK(EC_QUEUE_NEW))
+#define KFD_EC_MASK_DEVICE (KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE) | \
+ KFD_EC_MASK(EC_DEVICE_RAS_ERROR) | \
+ KFD_EC_MASK(EC_DEVICE_FATAL_HALT) | \
+ KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION) | \
+ KFD_EC_MASK(EC_DEVICE_NEW))
+#define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \
+ KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE))
+
+/* Checks for exception code types for KFD search */
+#define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \
+ (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
+#define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \
+ (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
+#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \
+ (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
+
+
+/* Runtime enable states */
+enum kfd_dbg_runtime_state {
+ DEBUG_RUNTIME_STATE_DISABLED = 0,
+ DEBUG_RUNTIME_STATE_ENABLED = 1,
+ DEBUG_RUNTIME_STATE_ENABLED_BUSY = 2,
+ DEBUG_RUNTIME_STATE_ENABLED_ERROR = 3
+};
+
+/* Runtime enable status */
+struct kfd_runtime_info {
+ __u64 r_debug;
+ __u32 runtime_state;
+ __u32 ttmp_setup;
+};
+
+/* Enable modes for runtime enable */
+#define KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK 1
+#define KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK 2
+
+/**
+ * kfd_ioctl_runtime_enable_args - Arguments for runtime enable
+ *
+ * Coordinates debug exception signalling and debug device enablement with runtime.
+ *
+ * @r_debug - pointer to user struct for sharing information between ROCr and the debuggger
+ * @mode_mask - mask to set mode
+ * KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK - enable runtime for debugging, otherwise disable
+ * KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK - enable trap temporary setup (ignore on disable)
+ * @capabilities_mask - mask to notify runtime on what KFD supports
+ *
+ * Return - 0 on SUCCESS.
+ * - EBUSY if runtime enable call already pending.
+ * - EEXIST if user queues already active prior to call.
+ * If process is debug enabled, runtime enable will enable debug devices and
+ * wait for debugger process to send runtime exception EC_PROCESS_RUNTIME
+ * to unblock - see kfd_ioctl_dbg_trap_args.
+ *
+ */
+struct kfd_ioctl_runtime_enable_args {
+ __u64 r_debug;
+ __u32 mode_mask;
+ __u32 capabilities_mask;
+};
+
+/* Queue information */
+struct kfd_queue_snapshot_entry {
+ __u64 exception_status;
+ __u64 ring_base_address;
+ __u64 write_pointer_address;
+ __u64 read_pointer_address;
+ __u64 ctx_save_restore_address;
+ __u32 queue_id;
+ __u32 gpu_id;
+ __u32 ring_size;
+ __u32 queue_type;
+ __u32 ctx_save_restore_area_size;
+ __u32 reserved;
+};
+
+/* Queue status return for suspend/resume */
+#define KFD_DBG_QUEUE_ERROR_BIT 30
+#define KFD_DBG_QUEUE_INVALID_BIT 31
+#define KFD_DBG_QUEUE_ERROR_MASK (1 << KFD_DBG_QUEUE_ERROR_BIT)
+#define KFD_DBG_QUEUE_INVALID_MASK (1 << KFD_DBG_QUEUE_INVALID_BIT)
+
+/* Context save area header information */
+struct kfd_context_save_area_header {
+ struct {
+ __u32 control_stack_offset;
+ __u32 control_stack_size;
+ __u32 wave_state_offset;
+ __u32 wave_state_size;
+ } wave_state;
+ __u32 debug_offset;
+ __u32 debug_size;
+ __u64 err_payload_addr;
+ __u32 err_event_id;
+ __u32 reserved1;
+};
+
+/*
+ * Debug operations
+ *
+ * For specifics on usage and return values, see documentation per operation
+ * below. Otherwise, generic error returns apply:
+ * - ESRCH if the process to debug does not exist.
+ *
+ * - EINVAL (with KFD_IOC_DBG_TRAP_ENABLE exempt) if operation
+ * KFD_IOC_DBG_TRAP_ENABLE has not succeeded prior.
+ * Also returns this error if GPU hardware scheduling is not supported.
+ *
+ * - EPERM (with KFD_IOC_DBG_TRAP_DISABLE exempt) if target process is not
+ * PTRACE_ATTACHED. KFD_IOC_DBG_TRAP_DISABLE is exempt to allow
+ * clean up of debug mode as long as process is debug enabled.
+ *
+ * - EACCES if any DBG_HW_OP (debug hardware operation) is requested when
+ * AMDKFD_IOC_RUNTIME_ENABLE has not succeeded prior.
+ *
+ * - ENODEV if any GPU does not support debugging on a DBG_HW_OP call.
+ *
+ * - Other errors may be returned when a DBG_HW_OP occurs while the GPU
+ * is in a fatal state.
+ *
+ */
+enum kfd_dbg_trap_operations {
+ KFD_IOC_DBG_TRAP_ENABLE = 0,
+ KFD_IOC_DBG_TRAP_DISABLE = 1,
+ KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT = 2,
+ KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED = 3,
+ KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE = 4, /* DBG_HW_OP */
+ KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE = 5, /* DBG_HW_OP */
+ KFD_IOC_DBG_TRAP_SUSPEND_QUEUES = 6, /* DBG_HW_OP */
+ KFD_IOC_DBG_TRAP_RESUME_QUEUES = 7, /* DBG_HW_OP */
+ KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH = 8, /* DBG_HW_OP */
+ KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH = 9, /* DBG_HW_OP */
+ KFD_IOC_DBG_TRAP_SET_FLAGS = 10,
+ KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT = 11,
+ KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO = 12,
+ KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT = 13,
+ KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT = 14
+};
+
+/**
+ * kfd_ioctl_dbg_trap_enable_args
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_ENABLE.
+ *
+ * Enables debug session for target process. Call @op KFD_IOC_DBG_TRAP_DISABLE in
+ * kfd_ioctl_dbg_trap_args to disable debug session.
+ *
+ * @exception_mask (IN) - exceptions to raise to the debugger
+ * @rinfo_ptr (IN) - pointer to runtime info buffer (see kfd_runtime_info)
+ * @rinfo_size (IN/OUT) - size of runtime info buffer in bytes
+ * @dbg_fd (IN) - fd the KFD will nofify the debugger with of raised
+ * exceptions set in exception_mask.
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ * Copies KFD saved kfd_runtime_info to @rinfo_ptr on enable.
+ * Size of kfd_runtime saved by the KFD returned to @rinfo_size.
+ * - EBADF if KFD cannot get a reference to dbg_fd.
+ * - EFAULT if KFD cannot copy runtime info to rinfo_ptr.
+ * - EINVAL if target process is already debug enabled.
+ *
+ */
+struct kfd_ioctl_dbg_trap_enable_args {
+ __u64 exception_mask;
+ __u64 rinfo_ptr;
+ __u32 rinfo_size;
+ __u32 dbg_fd;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_send_runtime_event_args
+ *
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT.
+ * Raises exceptions to runtime.
+ *
+ * @exception_mask (IN) - exceptions to raise to runtime
+ * @gpu_id (IN) - target device id
+ * @queue_id (IN) - target queue id
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ * - ENODEV if gpu_id not found.
+ * If exception_mask contains EC_PROCESS_RUNTIME, unblocks pending
+ * AMDKFD_IOC_RUNTIME_ENABLE call - see kfd_ioctl_runtime_enable_args.
+ * All other exceptions are raised to runtime through err_payload_addr.
+ * See kfd_context_save_area_header.
+ */
+struct kfd_ioctl_dbg_trap_send_runtime_event_args {
+ __u64 exception_mask;
+ __u32 gpu_id;
+ __u32 queue_id;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_exceptions_enabled_args
+ *
+ * Arguments for KFD_IOC_SET_EXCEPTIONS_ENABLED
+ * Set new exceptions to be raised to the debugger.
+ *
+ * @exception_mask (IN) - new exceptions to raise the debugger
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ */
+struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args {
+ __u64 exception_mask;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_wave_launch_override_args
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE
+ * Enable HW exceptions to raise trap.
+ *
+ * @override_mode (IN) - see kfd_dbg_trap_override_mode
+ * @enable_mask (IN/OUT) - reference kfd_dbg_trap_mask.
+ * IN is the override modes requested to be enabled.
+ * OUT is referenced in Return below.
+ * @support_request_mask (IN/OUT) - reference kfd_dbg_trap_mask.
+ * IN is the override modes requested for support check.
+ * OUT is referenced in Return below.
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ * Previous enablement is returned in @enable_mask.
+ * Actual override support is returned in @support_request_mask.
+ * - EINVAL if override mode is not supported.
+ * - EACCES if trap support requested is not actually supported.
+ * i.e. enable_mask (IN) is not a subset of support_request_mask (OUT).
+ * Otherwise it is considered a generic error (see kfd_dbg_trap_operations).
+ */
+struct kfd_ioctl_dbg_trap_set_wave_launch_override_args {
+ __u32 override_mode;
+ __u32 enable_mask;
+ __u32 support_request_mask;
+ __u32 pad;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_wave_launch_mode_args
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE
+ * Set wave launch mode.
+ *
+ * @mode (IN) - see kfd_dbg_trap_wave_launch_mode
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ */
+struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args {
+ __u32 launch_mode;
+ __u32 pad;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_suspend_queues_ags
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_SUSPEND_QUEUES
+ * Suspend queues.
+ *
+ * @exception_mask (IN) - raised exceptions to clear
+ * @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id)
+ * to suspend
+ * @num_queues (IN) - number of queues to suspend in @queue_array_ptr
+ * @grace_period (IN) - wave time allowance before preemption
+ * per 1K GPU clock cycle unit
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Destruction of a suspended queue is blocked until the queue is
+ * resumed. This allows the debugger to access queue information and
+ * the its context save area without running into a race condition on
+ * queue destruction.
+ * Automatically copies per queue context save area header information
+ * into the save area base
+ * (see kfd_queue_snapshot_entry and kfd_context_save_area_header).
+ *
+ * Return - Number of queues suspended on SUCCESS.
+ * . KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK masked
+ * for each queue id in @queue_array_ptr array reports unsuccessful
+ * suspend reason.
+ * KFD_DBG_QUEUE_ERROR_MASK = HW failure.
+ * KFD_DBG_QUEUE_INVALID_MASK = queue does not exist, is new or
+ * is being destroyed.
+ */
+struct kfd_ioctl_dbg_trap_suspend_queues_args {
+ __u64 exception_mask;
+ __u64 queue_array_ptr;
+ __u32 num_queues;
+ __u32 grace_period;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_resume_queues_args
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_RESUME_QUEUES
+ * Resume queues.
+ *
+ * @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id)
+ * to resume
+ * @num_queues (IN) - number of queues to resume in @queue_array_ptr
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - Number of queues resumed on SUCCESS.
+ * KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK mask
+ * for each queue id in @queue_array_ptr array reports unsuccessful
+ * resume reason.
+ * KFD_DBG_QUEUE_ERROR_MASK = HW failure.
+ * KFD_DBG_QUEUE_INVALID_MASK = queue does not exist.
+ */
+struct kfd_ioctl_dbg_trap_resume_queues_args {
+ __u64 queue_array_ptr;
+ __u32 num_queues;
+ __u32 pad;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_node_address_watch_args
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH
+ * Sets address watch for device.
+ *
+ * @address (IN) - watch address to set
+ * @mode (IN) - see kfd_dbg_trap_address_watch_mode
+ * @mask (IN) - watch address mask
+ * @gpu_id (IN) - target gpu to set watch point
+ * @id (OUT) - watch id allocated
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ * Allocated watch ID returned to @id.
+ * - ENODEV if gpu_id not found.
+ * - ENOMEM if watch IDs can be allocated
+ */
+struct kfd_ioctl_dbg_trap_set_node_address_watch_args {
+ __u64 address;
+ __u32 mode;
+ __u32 mask;
+ __u32 gpu_id;
+ __u32 id;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_clear_node_address_watch_args
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH
+ * Clear address watch for device.
+ *
+ * @gpu_id (IN) - target device to clear watch point
+ * @id (IN) - allocated watch id to clear
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ * - ENODEV if gpu_id not found.
+ * - EINVAL if watch ID has not been allocated.
+ */
+struct kfd_ioctl_dbg_trap_clear_node_address_watch_args {
+ __u32 gpu_id;
+ __u32 id;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_flags_args
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_SET_FLAGS
+ * Sets flags for wave behaviour.
+ *
+ * @flags (IN/OUT) - IN = flags to enable, OUT = flags previously enabled
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ * - EACCESS if any debug device does not allow flag options.
+ */
+struct kfd_ioctl_dbg_trap_set_flags_args {
+ __u32 flags;
+ __u32 pad;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_query_debug_event_args
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT
+ *
+ * Find one or more raised exceptions. This function can return multiple
+ * exceptions from a single queue or a single device with one call. To find
+ * all raised exceptions, this function must be called repeatedly until it
+ * returns -EAGAIN. Returned exceptions can optionally be cleared by
+ * setting the corresponding bit in the @exception_mask input parameter.
+ * However, clearing an exception prevents retrieving further information
+ * about it with KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO.
+ *
+ * @exception_mask (IN/OUT) - exception to clear (IN) and raised (OUT)
+ * @gpu_id (OUT) - gpu id of exceptions raised
+ * @queue_id (OUT) - queue id of exceptions raised
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on raised exception found
+ * Raised exceptions found are returned in @exception mask
+ * with reported source id returned in @gpu_id or @queue_id.
+ * - EAGAIN if no raised exception has been found
+ */
+struct kfd_ioctl_dbg_trap_query_debug_event_args {
+ __u64 exception_mask;
+ __u32 gpu_id;
+ __u32 queue_id;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_query_exception_info_args
+ *
+ * Arguments KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO
+ * Get additional info on raised exception.
+ *
+ * @info_ptr (IN) - pointer to exception info buffer to copy to
+ * @info_size (IN/OUT) - exception info buffer size (bytes)
+ * @source_id (IN) - target gpu or queue id
+ * @exception_code (IN) - target exception
+ * @clear_exception (IN) - clear raised @exception_code exception
+ * (0 = false, 1 = true)
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ * If @exception_code is EC_DEVICE_MEMORY_VIOLATION, copy @info_size(OUT)
+ * bytes of memory exception data to @info_ptr.
+ * If @exception_code is EC_PROCESS_RUNTIME, copy saved
+ * kfd_runtime_info to @info_ptr.
+ * Actual required @info_ptr size (bytes) is returned in @info_size.
+ */
+struct kfd_ioctl_dbg_trap_query_exception_info_args {
+ __u64 info_ptr;
+ __u32 info_size;
+ __u32 source_id;
+ __u32 exception_code;
+ __u32 clear_exception;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_get_queue_snapshot_args
+ *
+ * Arguments KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT
+ * Get queue information.
+ *
+ * @exception_mask (IN) - exceptions raised to clear
+ * @snapshot_buf_ptr (IN) - queue snapshot entry buffer (see kfd_queue_snapshot_entry)
+ * @num_queues (IN/OUT) - number of queue snapshot entries
+ * The debugger specifies the size of the array allocated in @num_queues.
+ * KFD returns the number of queues that actually existed. If this is
+ * larger than the size specified by the debugger, KFD will not overflow
+ * the array allocated by the debugger.
+ *
+ * @entry_size (IN/OUT) - size per entry in bytes
+ * The debugger specifies sizeof(struct kfd_queue_snapshot_entry) in
+ * @entry_size. KFD returns the number of bytes actually populated per
+ * entry. The debugger should use the KFD_IOCTL_MINOR_VERSION to determine,
+ * which fields in struct kfd_queue_snapshot_entry are valid. This allows
+ * growing the ABI in a backwards compatible manner.
+ * Note that entry_size(IN) should still be used to stride the snapshot buffer in the
+ * event that it's larger than actual kfd_queue_snapshot_entry.
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ * Copies @num_queues(IN) queue snapshot entries of size @entry_size(IN)
+ * into @snapshot_buf_ptr if @num_queues(IN) > 0.
+ * Otherwise return @num_queues(OUT) queue snapshot entries that exist.
+ */
+struct kfd_ioctl_dbg_trap_queue_snapshot_args {
+ __u64 exception_mask;
+ __u64 snapshot_buf_ptr;
+ __u32 num_queues;
+ __u32 entry_size;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_get_device_snapshot_args
+ *
+ * Arguments for KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT
+ * Get device information.
+ *
+ * @exception_mask (IN) - exceptions raised to clear
+ * @snapshot_buf_ptr (IN) - pointer to snapshot buffer (see kfd_dbg_device_info_entry)
+ * @num_devices (IN/OUT) - number of debug devices to snapshot
+ * The debugger specifies the size of the array allocated in @num_devices.
+ * KFD returns the number of devices that actually existed. If this is
+ * larger than the size specified by the debugger, KFD will not overflow
+ * the array allocated by the debugger.
+ *
+ * @entry_size (IN/OUT) - size per entry in bytes
+ * The debugger specifies sizeof(struct kfd_dbg_device_info_entry) in
+ * @entry_size. KFD returns the number of bytes actually populated. The
+ * debugger should use KFD_IOCTL_MINOR_VERSION to determine, which fields
+ * in struct kfd_dbg_device_info_entry are valid. This allows growing the
+ * ABI in a backwards compatible manner.
+ * Note that entry_size(IN) should still be used to stride the snapshot buffer in the
+ * event that it's larger than actual kfd_dbg_device_info_entry.
+ *
+ * Generic errors apply (see kfd_dbg_trap_operations).
+ * Return - 0 on SUCCESS.
+ * Copies @num_devices(IN) device snapshot entries of size @entry_size(IN)
+ * into @snapshot_buf_ptr if @num_devices(IN) > 0.
+ * Otherwise return @num_devices(OUT) queue snapshot entries that exist.
+ */
+struct kfd_ioctl_dbg_trap_device_snapshot_args {
+ __u64 exception_mask;
+ __u64 snapshot_buf_ptr;
+ __u32 num_devices;
+ __u32 entry_size;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_args
+ *
+ * Arguments to debug target process.
+ *
+ * @pid - target process to debug
+ * @op - debug operation (see kfd_dbg_trap_operations)
+ *
+ * @op determines which union struct args to use.
+ * Refer to kern docs for each kfd_ioctl_dbg_trap_*_args struct.
+ */
+struct kfd_ioctl_dbg_trap_args {
+ __u32 pid;
+ __u32 op;
+
+ union {
+ struct kfd_ioctl_dbg_trap_enable_args enable;
+ struct kfd_ioctl_dbg_trap_send_runtime_event_args send_runtime_event;
+ struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args set_exceptions_enabled;
+ struct kfd_ioctl_dbg_trap_set_wave_launch_override_args launch_override;
+ struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args launch_mode;
+ struct kfd_ioctl_dbg_trap_suspend_queues_args suspend_queues;
+ struct kfd_ioctl_dbg_trap_resume_queues_args resume_queues;
+ struct kfd_ioctl_dbg_trap_set_node_address_watch_args set_node_address_watch;
+ struct kfd_ioctl_dbg_trap_clear_node_address_watch_args clear_node_address_watch;
+ struct kfd_ioctl_dbg_trap_set_flags_args set_flags;
+ struct kfd_ioctl_dbg_trap_query_debug_event_args query_debug_event;
+ struct kfd_ioctl_dbg_trap_query_exception_info_args query_exception_info;
+ struct kfd_ioctl_dbg_trap_queue_snapshot_args queue_snapshot;
+ struct kfd_ioctl_dbg_trap_device_snapshot_args device_snapshot;
+ };
+};
+
#define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -887,7 +1557,13 @@ struct kfd_ioctl_set_xnack_mode_args {
#define AMDKFD_IOC_EXPORT_DMABUF \
AMDKFD_IOWR(0x24, struct kfd_ioctl_export_dmabuf_args)
+#define AMDKFD_IOC_RUNTIME_ENABLE \
+ AMDKFD_IOWR(0x25, struct kfd_ioctl_runtime_enable_args)
+
+#define AMDKFD_IOC_DBG_TRAP \
+ AMDKFD_IOWR(0x26, struct kfd_ioctl_dbg_trap_args)
+
#define AMDKFD_COMMAND_START 0x01
-#define AMDKFD_COMMAND_END 0x25
+#define AMDKFD_COMMAND_END 0x27
#endif
diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h
index 3e330f368917..a51b7331e0b4 100644
--- a/include/uapi/linux/kfd_sysfs.h
+++ b/include/uapi/linux/kfd_sysfs.h
@@ -43,6 +43,11 @@
#define HSA_CAP_DOORBELL_TYPE_2_0 0x2
#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
+#define HSA_CAP_TRAP_DEBUG_SUPPORT 0x00008000
+#define HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED 0x00010000
+#define HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED 0x00020000
+#define HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED 0x00040000
+
/* Old buggy user mode depends on this being 0 */
#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000
@@ -53,8 +58,18 @@
#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000
#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000
#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000
+#define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED 0x20000000
#define HSA_CAP_RESERVED 0xe00f8000
+/* debug_prop bits in node properties */
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_SHIFT 0
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_MASK 0x000003f0
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT 4
+#define HSA_DBG_DISPATCH_INFO_ALWAYS_VALID 0x00000400
+#define HSA_DBG_WATCHPOINTS_EXCLUSIVE 0x00000800
+#define HSA_DBG_RESERVED 0xfffffffffffff000ull
+
/* Heap types in memory properties */
#define HSA_MEM_HEAP_TYPE_SYSTEM 0
#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1