summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2023-08-04 12:10:18 +0300
committerDaniel Vetter <daniel.vetter@ffwll.ch>2023-08-04 12:10:18 +0300
commit3d00c59d147724e536b415e389445ece6fcda42f (patch)
treeb7abf11faad68372dfd889eb644a825f4cdb19a2 /drivers/gpu/drm/amd/amdkfd
parent52920704df878050123dfeb469aa6ab8022547c1 (diff)
parent7ea1db28119e237d634c6f74ba52056939c009ad (diff)
downloadlinux-3d00c59d147724e536b415e389445ece6fcda42f.tar.xz
Merge tag 'amd-drm-next-6.6-2023-07-28' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.6-2023-07-28: amdgpu: - Lots of checkpatch cleanups - GFX 9.4.3 updates - Add USB PD and IFWI flashing documentation - GPUVM updates - RAS fixes - DRR fixes - FAMS fixes - Virtual display fixes - Soft IH fixes - SMU13 fixes - Rework PSP firmware loading for other IPs - Kernel doc fixes - DCN 3.0.1 fixes - LTTPR fixes - DP MST fixes - DCN 3.1.6 fixes - SubVP fixes - Display bandwidth calculation fixes - VCN4 secure submission fixes - Allow building DC on RISC-V - Add visible FB info to bo_print_info - HBR3 fixes - Add PSP 14.0 support - GFX9 MCBP fix - GMC10 vmhub index fix - GMC11 vmhub index fix - Create a new doorbell manager - SR-IOV fixes amdkfd: - Cleanup CRIU dma-buf handling - Use KIQ to unmap HIQ - GFX 9.4.3 debugger updates - GFX 9.4.2 debugger fixes - Enable cooperative groups fof gfx11 - SVM fixes radeon: - Lots of checkpatch cleanups Merge conflicts: - drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c The switch to drm eu helpers in 8a206685d36f ("drm/amdgpu: use drm_exec for GEM and CSA handling v2") clashed with the cosmetic cleanups from 30953c4d000b ("drm/amdgpu: Fix style issues in amdgpu_gem.c"). I kept the former since the cleanup up code is gone. - drivers/gpu/drm/amd/amdgpu/atom.c. adf64e214280 ("drm/amd: Avoid reading the VBIOS part number twice") removed code that 992b8fe106ab ("drm/radeon: Replace all non-returning strlcpy with strscpy") polished. From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230728214228.8102-1-alexander.deucher@amd.com [sima: some merge conflict wrangling as noted] Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c35
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c34
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c38
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c36
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c21
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h3
13 files changed, 200 insertions, 66 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6a27b000a246..aef8e12df61f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1487,7 +1487,8 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
goto out_unlock;
}
- if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) {
+ if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) ||
+ kfd_dbg_has_cwsr_workaround(dev))) {
retval = -EBUSY;
goto out_unlock;
}
@@ -1845,22 +1846,21 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
idr_for_each_entry(&pdd->alloc_idr, mem, id) {
struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
- if ((uint64_t)kgd_mem->va > pdd->gpuvm_base)
+ if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)
num_of_bos++;
}
}
return num_of_bos;
}
-static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
+static int criu_get_prime_handle(struct kgd_mem *mem, int flags,
u32 *shared_fd)
{
struct dma_buf *dmabuf;
int ret;
- dmabuf = amdgpu_gem_prime_export(gobj, flags);
- if (IS_ERR(dmabuf)) {
- ret = PTR_ERR(dmabuf);
+ ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
+ if (ret) {
pr_err("dmabuf export failed for the BO\n");
return ret;
}
@@ -1918,7 +1918,11 @@ static int criu_checkpoint_bos(struct kfd_process *p,
kgd_mem = (struct kgd_mem *)mem;
dumper_bo = kgd_mem->bo;
- if ((uint64_t)kgd_mem->va <= pdd->gpuvm_base)
+ /* Skip checkpointing BOs that are used for Trap handler
+ * code and state. Currently, these BOs have a VA that
+ * is less GPUVM Base
+ */
+ if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base)
continue;
bo_bucket = &bo_buckets[bo_index];
@@ -1940,7 +1944,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
}
if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
- ret = criu_get_prime_handle(&dumper_bo->tbo.base,
+ ret = criu_get_prime_handle(kgd_mem,
bo_bucket->alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
&bo_bucket->dmabuf_fd);
@@ -2402,7 +2406,7 @@ static int criu_restore_bo(struct kfd_process *p,
/* create the dmabuf object and export the bo */
if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
- ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR,
+ ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
&bo_bucket->dmabuf_fd);
if (ret)
return ret;
@@ -2755,6 +2759,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
if (pdd->qpd.queue_count)
return -EEXIST;
+
+ /*
+ * Setup TTMPs by default.
+ * Note that this call must remain here for MES ADD QUEUE to
+ * skip_process_ctx_clear unconditionally as the first call to
+ * SET_SHADER_DEBUGGER clears any stale process context data
+ * saved in MES.
+ */
+ if (pdd->dev->kfd->shared_resources.enable_mes)
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
@@ -2848,7 +2862,8 @@ static int runtime_disable(struct kfd_process *p)
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd,
+ !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index fff3ccc04fa9..9ec750666382 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -302,8 +302,7 @@ static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
if (!q)
return 0;
- if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
- KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
+ if (!kfd_dbg_has_cwsr_workaround(q->device))
return 0;
if (enable && q->properties.is_user_cu_masked)
@@ -345,11 +344,10 @@ unwind:
return r;
}
-int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags;
- bool sq_trap_en = !!spi_dbg_cntl;
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0;
@@ -433,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
kfd_dbg_clear_dev_watch_id(pdd, watch_id);
@@ -446,7 +444,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
uint32_t *watch_id,
uint32_t watch_mode)
{
- int r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
+ int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
+ uint32_t xcc_mask = pdd->dev->xcc_mask;
if (r)
return r;
@@ -460,19 +459,21 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
}
amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
- pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
+ for_each_inst(xcc_id, xcc_mask)
+ pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
pdd->dev->adev,
watch_address,
watch_address_mask,
*watch_id,
watch_mode,
- pdd->dev->vm_info.last_vmid_kfd);
+ pdd->dev->vm_info.last_vmid_kfd,
+ xcc_id);
amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
/* HWS is broken so no point in HW rollback but release the watchpoint anyways */
if (r)
@@ -514,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) {
target->dbg_flags = prev_flags;
@@ -537,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd, true);
}
}
@@ -599,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
kfd_dbg_set_workaround(target, false);
@@ -715,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) {
target->runtime_info.runtime_state =
@@ -751,7 +752,8 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
if (!KFD_IS_SOC15(pdd->dev))
return -ENODEV;
- if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws)
+ if (pdd->qpd.num_gws && (!kfd_dbg_has_gws_support(pdd->dev) ||
+ kfd_dbg_has_cwsr_workaround(pdd->dev)))
return -EBUSY;
}
@@ -848,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r)
break;
@@ -880,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index a289e59ceb79..fd0ff64d4184 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -76,8 +76,9 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
{
- return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
- KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
+ return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
}
void debug_event_write_work_handler(struct work_struct *work);
@@ -100,6 +101,12 @@ static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
}
+static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev)
+{
+ return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3);
+}
+
static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
{
if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
@@ -119,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
return true;
}
-int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd);
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
+
+static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
+{
+ return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
+ (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0b3dc754e06b..ebc9674d3ce1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -508,6 +508,7 @@ static int kfd_gws_init(struct kfd_node *node)
{
int ret = 0;
struct kfd_dev *kfd = node->kfd;
+ uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
return 0;
@@ -524,7 +525,10 @@ static int kfd_gws_init(struct kfd_node *node)
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
- && kfd->mec2_fw_version >= 0x6b))))
+ && kfd->mec2_fw_version >= 0x6b) ||
+ (KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0)
+ && KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0)
+ && mes_rev >= 68))))
ret = amdgpu_amdkfd_alloc_gws(node->adev,
node->adev->gds.gws_size, &node->gws);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f515cb8f30ca..ccaf85fc12c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -226,9 +226,10 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.paging = false;
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
- queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
- KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3);
+ queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
+ queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
+ kfd_dbg_has_ttmps_always_setup(q->device);
queue_type = convert_to_mes_queue_type(q->properties.type);
if (queue_type < 0) {
@@ -238,10 +239,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
}
queue_input.queue_type = (uint32_t)queue_type;
- if (q->gws) {
- queue_input.gws_base = 0;
- queue_input.gws_size = qpd->num_gws;
- }
+ queue_input.exclusively_scheduled = q->properties.is_gws;
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
@@ -251,7 +249,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
q->properties.doorbell_off);
pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
kfd_hws_hang(dqm);
-}
+ }
return r;
}
@@ -1621,7 +1619,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
if (dqm->dev->kfd2kgd->get_iq_wait_times)
dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
- &dqm->wait_times);
+ &dqm->wait_times,
+ ffs(dqm->dev->xcc_mask) - 1);
return 0;
}
@@ -1663,6 +1662,26 @@ static int start_cpsch(struct device_queue_manager *dqm)
if (!dqm->dev->kfd->shared_resources.enable_mes)
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+
+ /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
+ if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
+ (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
+ uint32_t reg_offset = 0;
+ uint32_t grace_period = 1;
+
+ retval = pm_update_grace_period(&dqm->packet_mgr,
+ grace_period);
+ if (retval)
+ pr_err("Setting grace timeout failed\n");
+ else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
+ /* Update dqm->wait_times maintained in software */
+ dqm->dev->kfd2kgd->build_grace_period_packet_info(
+ dqm->dev->adev, dqm->wait_times,
+ grace_period, &reg_offset,
+ &dqm->wait_times,
+ ffs(dqm->dev->xcc_mask) - 1);
+ }
+
dqm_unlock(dqm);
return 0;
@@ -1806,8 +1825,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
*/
q->properties.is_evicted = !!qpd->evicted;
q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
- KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) &&
- KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3);
+ kfd_dbg_has_cwsr_workaround(q->device);
if (qd)
mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 94c0fc2e57b7..83699392c808 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -318,6 +318,26 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ int err;
+ struct v10_compute_mqd *m;
+ u32 doorbell_off;
+
+ m = get_mqd(mqd);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+ if (err)
+ pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+ return err;
+}
+
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -460,7 +480,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->destroy_mqd = destroy_hiq_mqd;
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
mqd->mqd_stride = kfd_mqd_stride;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 31fec5e70d13..2319467d2d95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -335,6 +335,26 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ int err;
+ struct v11_compute_mqd *m;
+ u32 doorbell_off;
+
+ m = get_mqd(mqd);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+ if (err)
+ pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+ return err;
+}
+
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -449,7 +469,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->destroy_mqd = destroy_hiq_mqd;
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v11_compute_mqd);
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 601bb9f68048..e23d32f35607 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -405,6 +405,25 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ int err;
+ struct v9_mqd *m;
+ u32 doorbell_off;
+
+ m = get_mqd(mqd);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+ if (err)
+ pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+ return err;
+}
+
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -548,16 +567,19 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
{
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
- void *xcc_mqd;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
+ struct v9_mqd *m;
+ u32 doorbell_off;
for_each_inst(xcc_id, xcc_mask) {
- xcc_mqd = mqd + hiq_mqd_size * inst;
- err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
- type, timeout, pipe_id,
- queue_id, xcc_id);
+ m = get_mqd(mqd + hiq_mqd_size * inst);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, xcc_id);
if (err) {
- pr_debug("Destroy MQD failed for xcc: %d\n", inst);
+ pr_debug("Destroy HIQ MQD failed for xcc: %d\n", inst);
break;
}
++inst;
@@ -846,7 +868,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
} else {
mqd->init_mqd = init_mqd_hiq;
mqd->load_mqd = kfd_hiq_load_mqd_kiq;
- mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->destroy_mqd = destroy_hiq_mqd;
}
break;
case KFD_MQD_TYPE_DIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 29a2d0499b67..8fda16e6fee6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -298,7 +298,8 @@ static int pm_set_grace_period_v9(struct packet_manager *pm,
pm->dqm->wait_times,
grace_period,
&reg_offset,
- &reg_data);
+ &reg_data,
+ 0);
if (grace_period == USE_DEFAULT_GRACE_PERIOD)
reg_data = pm->dqm->wait_times;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index ba9d69054119..60e6b37b43ba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -123,7 +123,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) {
+ if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
gws, &mem);
@@ -136,7 +136,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
} else {
/*
* Intentionally set GWS to a non-NULL value
- * for GFX 9.4.3.
+ * for devices that do not use GWS for global wave
+ * synchronization but require the formality
+ * of setting GWS for cooperative groups.
*/
pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
}
@@ -173,7 +175,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
if (pqn->q && pqn->q->gws &&
- KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3))
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ !pqn->q->device->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
pqn->q->gws);
kfd_procfs_del_queue(pqn->q);
@@ -455,7 +458,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
}
if (pqn->q->gws) {
- if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ !dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(
pqm->process->kgd_process_info,
pqn->q->gws);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index aa32b06eea77..01c7de2d6e19 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -48,6 +48,8 @@
* page table is updated.
*/
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
+#define dynamic_svm_range_dump(svms) \
+ _dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms)
/* Giant svm range split into smaller ranges based on this, it is decided using
* minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to
@@ -1521,6 +1523,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
struct kfd_process_device *pdd;
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ if (!pdd)
+ return NULL;
return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);
}
@@ -1595,12 +1599,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
}
if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
- if (!prange->mapped_to_gpu) {
+ bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+ if (!prange->mapped_to_gpu ||
+ bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
r = 0;
goto free_ctx;
}
-
- bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
}
if (prange->actual_loc && !prange->ttm_res) {
@@ -3560,7 +3564,7 @@ out_unlock_range:
break;
}
- svm_range_debug_dump(svms);
+ dynamic_svm_range_dump(svms);
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 61fc62f3e003..3b0749390388 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -38,6 +38,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
#include "kfd_svm.h"
+#include "kfd_debug.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
#include "amdgpu.h"
@@ -1931,23 +1932,27 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
- if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
- dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
- HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+ if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
+ dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
- if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 4, 2))
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
+ if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
dev->node_props.debug_prop |=
- HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
+ HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 |
+ HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3;
else
+ dev->node_props.debug_prop |=
+ HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
+ HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
} else {
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
- if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0))
- dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
- else
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index cba2cd5ed9d1..dea32a9e5506 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -32,9 +32,12 @@
#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 6
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 7
#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 7
#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT \
(29 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3 \
+ (30 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
struct kfd_node_properties {
uint64_t hive_id;