summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c148
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c10
4 files changed, 190 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index ee086a0a46df..826a99acb6fb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1488,6 +1488,11 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
goto out_unlock;
}
+ if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) {
+ retval = -EBUSY;
+ goto out_unlock;
+ }
+
retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
mutex_unlock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 898cc1fe3d13..73b07b5f17f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -21,13 +21,78 @@
*/
#include "kfd_debug.h"
+#include "kfd_device_queue_manager.h"
#include <linux/file.h>
+static int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
+{
+ uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
+ uint32_t flags = pdd->process->dbg_flags;
+
+ if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
+ return 0;
+
+ return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
+ pdd->watch_points, flags);
+}
+
+/* kfd_dbg_trap_deactivate:
+ * target: target process
+ * unwind: If this is unwinding a failed kfd_dbg_trap_enable()
+ * unwind_count:
+ * If unwind == true, how far down the pdd list we need
+ * to unwind
+ * else: ignored
+ */
+static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count)
+{
+ int i;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ /* If this is an unwind, and we have unwound the required
+ * enable calls on the pdd list, we need to stop now
+ * otherwise we may mess up another debugger session.
+ */
+ if (unwind && i == unwind_count)
+ break;
+
+ /* GFX off is already disabled by debug activate if not RLC restore supported. */
+ if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ pdd->spi_dbg_override =
+ pdd->dev->kfd2kgd->disable_debug_trap(
+ pdd->dev->adev,
+ target->runtime_info.ttmp_setup,
+ pdd->dev->vm_info.last_vmid_kfd);
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+ if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
+ release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
+ pr_err("Failed to release debug vmid on [%i]\n", pdd->dev->id);
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ debug_refresh_runlist(pdd->dev->dqm);
+ else
+ kfd_dbg_set_mes_debug_mode(pdd);
+ }
+}
+
int kfd_dbg_trap_disable(struct kfd_process *target)
{
if (!target->debug_trap_enabled)
return 0;
+ /*
+ * Defer deactivation to runtime if runtime not enabled otherwise reset
+ * attached running target runtime state to enable for re-attach.
+ */
+ if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
+ kfd_dbg_trap_deactivate(target, false, 0);
+ else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
+ target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+
fput(target->dbg_ev_file);
target->dbg_ev_file = NULL;
@@ -42,16 +107,89 @@ int kfd_dbg_trap_disable(struct kfd_process *target)
return 0;
}
+static int kfd_dbg_trap_activate(struct kfd_process *target)
+{
+ int i, r = 0;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+ r = reserve_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd);
+
+ if (r) {
+ target->runtime_info.runtime_state = (r == -EBUSY) ?
+ DEBUG_RUNTIME_STATE_ENABLED_BUSY :
+ DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+
+ goto unwind_err;
+ }
+ }
+
+ /* Disable GFX OFF to prevent garbage read/writes to debug registers.
+ * If RLC restore of debug registers is not supported and runtime enable
+ * hasn't done so already on ttmp setup request, restore the trap config registers.
+ *
+ * If RLC restore of debug registers is not supported, keep gfx off disabled for
+ * the debug session.
+ */
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
+ target->runtime_info.ttmp_setup))
+ pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
+ pdd->dev->vm_info.last_vmid_kfd);
+
+ pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
+ pdd->dev->adev,
+ false,
+ pdd->dev->vm_info.last_vmid_kfd);
+
+ if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+ if (!pdd->dev->kfd->shared_resources.enable_mes)
+ r = debug_refresh_runlist(pdd->dev->dqm);
+ else
+ r = kfd_dbg_set_mes_debug_mode(pdd);
+
+ if (r) {
+ target->runtime_info.runtime_state =
+ DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+ goto unwind_err;
+ }
+ }
+
+ return 0;
+
+unwind_err:
+ /* Enabling debug failed, we need to disable on
+ * all GPUs so the enable is all or nothing.
+ */
+ kfd_dbg_trap_deactivate(target, true, i);
+ return r;
+}
+
int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
void __user *runtime_info, uint32_t *runtime_size)
{
struct file *f;
uint32_t copy_size;
- int r = 0;
+ int i, r = 0;
if (target->debug_trap_enabled)
return -EALREADY;
+ /* Enable pre-checks */
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ if (!KFD_IS_SOC15(pdd->dev))
+ return -ENODEV;
+
+ if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws)
+ return -EBUSY;
+ }
+
copy_size = min((size_t)(*runtime_size), sizeof(target->runtime_info));
f = fget(fd);
@@ -62,6 +200,10 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
target->dbg_ev_file = f;
+ /* defer activation to runtime if not runtime enabled */
+ if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
+ kfd_dbg_trap_activate(target);
+
/* We already hold the process reference but hold another one for the
* debug session.
*/
@@ -71,8 +213,10 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
if (target->debugger_process)
atomic_inc(&target->debugger_process->debugged_process_count);
- if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size))
+ if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size)) {
+ kfd_dbg_trap_deactivate(target, false, 0);
r = -EFAULT;
+ }
*runtime_size = sizeof(target->runtime_info);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index db6d72e7930f..17481f824647 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -34,4 +34,33 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2);
}
+/*
+ * If GFX off is enabled, chips that do not support RLC restore for the debug
+ * registers will disable GFX off temporarily for the entire debug session.
+ * See disable_on_trap_action_entry and enable_on_trap_action_exit for details.
+ */
+static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
+{
+ return !(KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 10) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
+}
+
+static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
+{
+ if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
+ && dev->kfd->mec2_fw_version < 0x81b6) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(9, 1, 0)
+ && KFD_GC_VERSION(dev) <= IP_VERSION(9, 2, 2)
+ && dev->kfd->mec2_fw_version < 0x1b6) ||
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0)
+ && dev->kfd->mec2_fw_version < 0x1b6) ||
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1)
+ && dev->kfd->mec2_fw_version < 0x30) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0)))
+ return false;
+
+ /* Assume debugging and cooperative launch supported otherwise. */
+ return true;
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 725d936b2cc7..e77cadadb09b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1165,9 +1165,19 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn)
static void kfd_process_notifier_release_internal(struct kfd_process *p)
{
+ int i;
+
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ /* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */
+ if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup)
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+ }
+
/* Indicate to other users that MM is no longer valid */
p->mm = NULL;
kfd_dbg_trap_disable(p);