summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Kim <jonathan.kim@amd.com>2023-06-12 18:31:07 +0300
committerAlex Deucher <alexander.deucher@amd.com>2023-07-27 21:59:29 +0300
commitfc7f1d9697bcd3f7a4c64fb00a0e9bb050eaaa2a (patch)
treec10fe751e6eb17987cf16b843e280881780cf6a1
parentf9acfafc3458653d306a45c71e052df50af1c81d (diff)
downloadlinux-fc7f1d9697bcd3f7a4c64fb00a0e9bb050eaaa2a.tar.xz
drm/amdkfd: fix and enable ttmp setup for gfx11
The MES cached process context must be cleared on adding any queue for the first time. For proper debug support, the MES will clear it's cached process context on the first call to SET_SHADER_DEBUGGER. This allows TTMPs to be pesistently enabled in a safe manner. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Eric Huang <jinhuieric@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c12
6 files changed, 39 insertions, 20 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
index 77ca5cbfb601..d67d003bada2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -637,7 +637,7 @@ static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
{
uint32_t data = 0;
- data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 35b36cbe5aa2..aef8e12df61f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2759,6 +2759,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
if (pdd->qpd.queue_count)
return -EEXIST;
+
+ /*
+ * Setup TTMPs by default.
+ * Note that this call must remain here for MES ADD QUEUE to
+ * skip_process_ctx_clear unconditionally as the first call to
+ * SET_SHADER_DEBUGGER clears any stale process context data
+ * saved in MES.
+ */
+ if (pdd->dev->kfd->shared_resources.enable_mes)
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
@@ -2852,7 +2862,8 @@ static int runtime_disable(struct kfd_process *p)
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd,
+ !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 1f82caea59ba..9ec750666382 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -344,11 +344,10 @@ unwind:
return r;
}
-int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags;
- bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev);
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0;
@@ -432,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
kfd_dbg_clear_dev_watch_id(pdd, watch_id);
@@ -474,7 +473,7 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
/* HWS is broken so no point in HW rollback but release the watchpoint anyways */
if (r)
@@ -516,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) {
target->dbg_flags = prev_flags;
@@ -539,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd, true);
}
}
@@ -601,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
kfd_dbg_set_workaround(target, false);
@@ -717,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) {
target->runtime_info.runtime_state =
@@ -851,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r)
break;
@@ -883,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 586d7f886712..fd0ff64d4184 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -126,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
return true;
}
-int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd);
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
+
+static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
+{
+ return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
+ (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 71b7f16c0173..ccaf85fc12c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -228,6 +228,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.tma_addr = qpd->tma_addr;
queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
+ queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
+ kfd_dbg_has_ttmps_always_setup(q->device);
queue_type = convert_to_mes_queue_type(q->properties.type);
if (queue_type < 0) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index eeedc3ddffeb..3b0749390388 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -38,6 +38,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
#include "kfd_svm.h"
+#include "kfd_debug.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
#include "amdgpu.h"
@@ -1931,6 +1932,9 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
+ if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
+ dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
+
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
dev->node_props.debug_prop |=
@@ -1941,10 +1945,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
- if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 4, 2))
- dev->node_props.debug_prop |=
- HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
-
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
@@ -1952,9 +1952,7 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
- if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0))
- dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
- else
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
}