summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Kim <jonathan.kim@amd.com>2022-05-10 19:47:45 +0300
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 19:37:00 +0300
commit12976e6a5ab8fc3766c0304d72f7eec81a109b55 (patch)
tree2443653672cec2d8f1f011d8bc1e149e1ce661ed
parentb17bd5dbf64677682a3bca249c64521d5eabcb38 (diff)
downloadlinux-12976e6a5ab8fc3766c0304d72f7eec81a109b55.tar.xz
drm/amdkfd: add debug device snapshot operation
Similar to queue snapshot, return an array of device information using an entry_size check and return. Unlike queue snapshots, the debugger needs to pass to correct number of devices that exist. If it fails to do so, the KFD will return the number of actual devices so that the debugger can make a subsequent successful call. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c73
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h5
3 files changed, 83 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 24066756e478..f54ff5c3387d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -3060,8 +3060,11 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
&args->queue_snapshot.entry_size);
break;
case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
- pr_warn("Debug op %i not supported yet\n", args->op);
- r = -EACCES;
+ r = kfd_dbg_trap_device_snapshot(target,
+ args->device_snapshot.exception_mask,
+ (void __user *)args->device_snapshot.snapshot_buf_ptr,
+ &args->device_snapshot.num_devices,
+ &args->device_snapshot.entry_size);
break;
default:
pr_err("Invalid option: %i\n", args->op);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 24e2b285448a..125274445f43 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -22,6 +22,7 @@
#include "kfd_debug.h"
#include "kfd_device_queue_manager.h"
+#include "kfd_topology.h"
#include <linux/file.h>
#include <uapi/linux/kfd_ioctl.h>
@@ -1010,6 +1011,78 @@ out:
return r;
}
+int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
+ uint64_t exception_clear_mask,
+ void __user *user_info,
+ uint32_t *number_of_device_infos,
+ uint32_t *entry_size)
+{
+ struct kfd_dbg_device_info_entry device_info;
+ uint32_t tmp_entry_size = *entry_size, tmp_num_devices;
+ int i, r = 0;
+
+ if (!(target && user_info && number_of_device_infos && entry_size))
+ return -EINVAL;
+
+ tmp_num_devices = min_t(size_t, *number_of_device_infos, target->n_pdds);
+ *number_of_device_infos = target->n_pdds;
+ *entry_size = min_t(size_t, *entry_size, sizeof(device_info));
+
+ if (!tmp_num_devices)
+ return 0;
+
+ memset(&device_info, 0, sizeof(device_info));
+
+ mutex_lock(&target->event_mutex);
+
+ /* Run over all pdd of the process */
+ for (i = 0; i < tmp_num_devices; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+ struct kfd_topology_device *topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+
+ device_info.gpu_id = pdd->dev->id;
+ device_info.exception_status = pdd->exception_status;
+ device_info.lds_base = pdd->lds_base;
+ device_info.lds_limit = pdd->lds_limit;
+ device_info.scratch_base = pdd->scratch_base;
+ device_info.scratch_limit = pdd->scratch_limit;
+ device_info.gpuvm_base = pdd->gpuvm_base;
+ device_info.gpuvm_limit = pdd->gpuvm_limit;
+ device_info.location_id = topo_dev->node_props.location_id;
+ device_info.vendor_id = topo_dev->node_props.vendor_id;
+ device_info.device_id = topo_dev->node_props.device_id;
+ device_info.revision_id = pdd->dev->adev->pdev->revision;
+ device_info.subsystem_vendor_id = pdd->dev->adev->pdev->subsystem_vendor;
+ device_info.subsystem_device_id = pdd->dev->adev->pdev->subsystem_device;
+ device_info.fw_version = pdd->dev->kfd->mec_fw_version;
+ device_info.gfx_target_version =
+ topo_dev->node_props.gfx_target_version;
+ device_info.simd_count = topo_dev->node_props.simd_count;
+ device_info.max_waves_per_simd =
+ topo_dev->node_props.max_waves_per_simd;
+ device_info.array_count = topo_dev->node_props.array_count;
+ device_info.simd_arrays_per_engine =
+ topo_dev->node_props.simd_arrays_per_engine;
+ device_info.num_xcc = NUM_XCC(pdd->dev->xcc_mask);
+ device_info.capability = topo_dev->node_props.capability;
+ device_info.debug_prop = topo_dev->node_props.debug_prop;
+
+ if (exception_clear_mask)
+ pdd->exception_status &= ~exception_clear_mask;
+
+ if (copy_to_user(user_info, &device_info, *entry_size)) {
+ r = -EFAULT;
+ break;
+ }
+
+ user_info += tmp_entry_size;
+ }
+
+ mutex_unlock(&target->event_mutex);
+
+ return r;
+}
+
void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
uint64_t exception_set_mask)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 234e2ccafa87..a289e59ceb79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -81,6 +81,11 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
}
void debug_event_write_work_handler(struct work_struct *work);
+int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
+ uint64_t exception_clear_mask,
+ void __user *user_info,
+ uint32_t *number_of_device_infos,
+ uint32_t *entry_size);
void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
uint64_t exception_set_mask);